gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-inline.h"
  63 #include "tree-cfgcleanup.h"
  64 #include "builtins.h"
  65 #include "tree-ssa-sccvn.h"
  66 #include "dbgcnt.h"
  67
  68 /* Specifies types of loops that may be unrolled.  */
  69
  70 enum unroll_level
  71 {
  72   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  73                            iteration.  */
  74   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  75                            of code size.  */
  76   UL_ALL                /* All suitable loops.  */
  77 };
  78
  79 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  80    is the exit edge whose condition is replaced.  The ssa versions of the new
  81    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  82    if they are not NULL.  */
  83
  84 void
  85 create_canonical_iv (class loop *loop, edge exit, tree niter,
  86                      tree *var_before = NULL, tree *var_after = NULL)
  87 {
  88   edge in;
  89   tree type, var;
  90   gcond *cond;
  91   gimple_stmt_iterator incr_at;
  92   enum tree_code cmp;
  93
  94   if (dump_file && (dump_flags & TDF_DETAILS))
  95     {
  96       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  97       print_generic_expr (dump_file, niter, TDF_SLIM);
  98       fprintf (dump_file, " iterations.\n");
  99     }
 100
 101   cond = as_a <gcond *> (last_stmt (exit->src));
 102   in = EDGE_SUCC (exit->src, 0);
 103   if (in == exit)
 104     in = EDGE_SUCC (exit->src, 1);
 105
 106   /* Note that we do not need to worry about overflows, since
 107      type of niter is always unsigned and all comparisons are
 108      just for equality/nonequality -- i.e. everything works
 109      with a modulo arithmetics.  */
 110
 111   type = TREE_TYPE (niter);
 112   niter = fold_build2 (PLUS_EXPR, type,
 113                        niter,
 114                        build_int_cst (type, 1));
 115   incr_at = gsi_last_bb (in->src);
 116   create_iv (niter,
 117              build_int_cst (type, -1),
 118              NULL_TREE, loop,
 119              &incr_at, false, var_before, &var);
 120   if (var_after)
 121     *var_after = var;
 122
 123   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 124   gimple_cond_set_code (cond, cmp);
 125   gimple_cond_set_lhs (cond, var);
 126   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 127   update_stmt (cond);
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145
 146   /* If some IV computation will become constant.  */
 147   bool constant_iv;
 148
 149   /* Number of call stmts that are not a builtin and are pure or const
 150      present on the hot path.  */
 151   int num_pure_calls_on_hot_path;
 152   /* Number of call stmts that are not a builtin and are not pure nor const
 153      present on the hot path.  */
 154   int num_non_pure_calls_on_hot_path;
 155   /* Number of statements other than calls in the loop.  */
 156   int non_call_stmts_on_hot_path;
 157   /* Number of branches seen on the hot path.  */
 158   int num_branches_on_hot_path;
 159 };
 160
 161 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 162
 163 static bool
 164 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 165 {
 166   if (CONSTANT_CLASS_P (op))
 167     return true;
 168
 169   /* We can still fold accesses to constant arrays when index is known.  */
 170   if (TREE_CODE (op) != SSA_NAME)
 171     {
 172       tree base = op;
 173
 174       /* First make fast look if we see constant array inside.  */
 175       while (handled_component_p (base))
 176         base = TREE_OPERAND (base, 0);
 177       if ((DECL_P (base)
 178            && ctor_for_folding (base) != error_mark_node)
 179           || CONSTANT_CLASS_P (base))
 180         {
 181           /* If so, see if we understand all the indices.  */
 182           base = op;
 183           while (handled_component_p (base))
 184             {
 185               if (TREE_CODE (base) == ARRAY_REF
 186                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 187                 return false;
 188               base = TREE_OPERAND (base, 0);
 189             }
 190           return true;
 191         }
 192       return false;
 193     }
 194
 195   /* Induction variables are constants when defined in loop.  */
 196   if (loop_containing_stmt (stmt) != loop)
 197     return false;
 198   tree ev = analyze_scalar_evolution (loop, op);
 199   if (chrec_contains_undetermined (ev)
 200       || chrec_contains_symbols (ev))
 201     return false;
 202   return true;
 203 }
 204
 205 /* Computes an estimated number of insns in LOOP.
 206    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 207    iteration of the loop.
 208    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 209    of loop.
 210    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 211    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 212
 213 static bool
 214 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 215                          struct loop_size *size, int upper_bound)
 216 {
 217   basic_block *body = get_loop_body (loop);
 218   gimple_stmt_iterator gsi;
 219   unsigned int i;
 220   bool after_exit;
 221   vec<basic_block> path = get_loop_hot_path (loop);
 222
 223   size->overall = 0;
 224   size->eliminated_by_peeling = 0;
 225   size->last_iteration = 0;
 226   size->last_iteration_eliminated_by_peeling = 0;
 227   size->num_pure_calls_on_hot_path = 0;
 228   size->num_non_pure_calls_on_hot_path = 0;
 229   size->non_call_stmts_on_hot_path = 0;
 230   size->num_branches_on_hot_path = 0;
 231   size->constant_iv = 0;
 232
 233   if (dump_file && (dump_flags & TDF_DETAILS))
 234     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 235   for (i = 0; i < loop->num_nodes; i++)
 236     {
 237       if (edge_to_cancel && body[i] != edge_to_cancel->src
 238           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 239         after_exit = true;
 240       else
 241         after_exit = false;
 242       if (dump_file && (dump_flags & TDF_DETAILS))
 243         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 244                  after_exit);
 245
 246       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 247         {
 248           gimple *stmt = gsi_stmt (gsi);
 249           int num = estimate_num_insns (stmt, &eni_size_weights);
 250           bool likely_eliminated = false;
 251           bool likely_eliminated_last = false;
 252           bool likely_eliminated_peeled = false;
 253
 254           if (dump_file && (dump_flags & TDF_DETAILS))
 255             {
 256               fprintf (dump_file, "  size: %3i ", num);
 257               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 258             }
 259
 260           /* Look for reasons why we might optimize this stmt away. */
 261
 262           if (!gimple_has_side_effects (stmt))
 263             {
 264               /* Exit conditional.  */
 265               if (exit && body[i] == exit->src
 266                   && stmt == last_stmt (exit->src))
 267                 {
 268                   if (dump_file && (dump_flags & TDF_DETAILS))
 269                     fprintf (dump_file, "   Exit condition will be eliminated "
 270                              "in peeled copies.\n");
 271                   likely_eliminated_peeled = true;
 272                 }
 273               if (edge_to_cancel && body[i] == edge_to_cancel->src
 274                   && stmt == last_stmt (edge_to_cancel->src))
 275                 {
 276                   if (dump_file && (dump_flags & TDF_DETAILS))
 277                     fprintf (dump_file, "   Exit condition will be eliminated "
 278                              "in last copy.\n");
 279                   likely_eliminated_last = true;
 280                 }
 281               /* Sets of IV variables  */
 282               if (gimple_code (stmt) == GIMPLE_ASSIGN
 283                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 284                 {
 285                   if (dump_file && (dump_flags & TDF_DETAILS))
 286                     fprintf (dump_file, "   Induction variable computation will"
 287                              " be folded away.\n");
 288                   likely_eliminated = true;
 289                 }
 290               /* Assignments of IV variables.  */
 291               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 292                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 293                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 294                                                   stmt, loop)
 295                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 296                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 297                                                       stmt, loop))
 298                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
 299                 {
 300                   size->constant_iv = true;
 301                   if (dump_file && (dump_flags & TDF_DETAILS))
 302                     fprintf (dump_file,
 303                              "   Constant expression will be folded away.\n");
 304                   likely_eliminated = true;
 305                 }
 306               /* Conditionals.  */
 307               else if ((gimple_code (stmt) == GIMPLE_COND
 308                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 309                                                    loop)
 310                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 311                                                    loop)
 312                         /* We don't simplify all constant compares so make sure
 313                            they are not both constant already.  See PR70288.  */
 314                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 315                             || ! is_gimple_min_invariant
 316                                  (gimple_cond_rhs (stmt))))
 317                        || (gimple_code (stmt) == GIMPLE_SWITCH
 318                            && constant_after_peeling (gimple_switch_index (
 319                                                         as_a <gswitch *>
 320                                                           (stmt)),
 321                                                       stmt, loop)
 322                            && ! is_gimple_min_invariant
 323                                    (gimple_switch_index
 324                                       (as_a <gswitch *> (stmt)))))
 325                 {
 326                   if (dump_file && (dump_flags & TDF_DETAILS))
 327                     fprintf (dump_file, "   Constant conditional.\n");
 328                   likely_eliminated = true;
 329                 }
 330             }
 331
 332           size->overall += num;
 333           if (likely_eliminated || likely_eliminated_peeled)
 334             size->eliminated_by_peeling += num;
 335           if (!after_exit)
 336             {
 337               size->last_iteration += num;
 338               if (likely_eliminated || likely_eliminated_last)
 339                 size->last_iteration_eliminated_by_peeling += num;
 340             }
 341           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 342               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 343             {
 344               free (body);
 345               path.release ();
 346               return true;
 347             }
 348         }
 349     }
 350   while (path.length ())
 351     {
 352       basic_block bb = path.pop ();
 353       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 354         {
 355           gimple *stmt = gsi_stmt (gsi);
 356           if (gimple_code (stmt) == GIMPLE_CALL
 357               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 358             {
 359               int flags = gimple_call_flags (stmt);
 360               if (flags & (ECF_PURE | ECF_CONST))
 361                 size->num_pure_calls_on_hot_path++;
 362               else
 363                 size->num_non_pure_calls_on_hot_path++;
 364               size->num_branches_on_hot_path ++;
 365             }
 366           /* Count inexpensive calls as non-calls, because they will likely
 367              expand inline.  */
 368           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 369             size->non_call_stmts_on_hot_path++;
 370           if (((gimple_code (stmt) == GIMPLE_COND
 371                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 372                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 373                                                 loop)))
 374                || (gimple_code (stmt) == GIMPLE_SWITCH
 375                    && !constant_after_peeling (gimple_switch_index (
 376                                                  as_a <gswitch *> (stmt)),
 377                                                stmt, loop)))
 378               && (!exit || bb != exit->src))
 379             size->num_branches_on_hot_path++;
 380         }
 381     }
 382   path.release ();
 383   if (dump_file && (dump_flags & TDF_DETAILS))
 384     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 385              size->eliminated_by_peeling, size->last_iteration,
 386              size->last_iteration_eliminated_by_peeling);
 387
 388   free (body);
 389   return false;
 390 }
 391
 392 /* Estimate number of insns of completely unrolled loop.
 393    It is (NUNROLL + 1) * size of loop body with taking into account
 394    the fact that in last copy everything after exit conditional
 395    is dead and that some instructions will be eliminated after
 396    peeling.
 397
 398    Loop body is likely going to simplify further, this is difficult
 399    to guess, we just decrease the result by 1/3.  */
 400
 401 static unsigned HOST_WIDE_INT
 402 estimated_unrolled_size (struct loop_size *size,
 403                          unsigned HOST_WIDE_INT nunroll)
 404 {
 405   HOST_WIDE_INT unr_insns = ((nunroll)
 406                              * (HOST_WIDE_INT) (size->overall
 407                                                 - size->eliminated_by_peeling));
 408   if (!nunroll)
 409     unr_insns = 0;
 410   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 411
 412   unr_insns = unr_insns * 2 / 3;
 413   if (unr_insns <= 0)
 414     unr_insns = 1;
 415
 416   return unr_insns;
 417 }
 418
 419 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 420    body that can be remove to make the loop to always exit and at
 421    the same time it does not make any code potentially executed
 422    during the last iteration dead.
 423
 424    After complete unrolling we still may get rid of the conditional
 425    on the exit in the last copy even if we have no idea what it does.
 426    This is quite common case for loops of form
 427
 428      int a[5];
 429      for (i=0;i<b;i++)
 430        a[i]=0;
 431
 432    Here we prove the loop to iterate 5 times but we do not know
 433    it from induction variable.
 434
 435    For now we handle only simple case where there is exit condition
 436    just before the latch block and the latch block contains no statements
 437    with side effect that may otherwise terminate the execution of loop
 438    (such as by EH or by terminating the program or longjmp).
 439
 440    In the general case we may want to cancel the paths leading to statements
 441    loop-niter identified as having undefined effect in the last iteration.
 442    The other cases are hopefully rare and will be cleaned up later.  */
 443
 444 static edge
 445 loop_edge_to_cancel (class loop *loop)
 446 {
 447   unsigned i;
 448   edge edge_to_cancel;
 449   gimple_stmt_iterator gsi;
 450
 451   /* We want only one predecestor of the loop.  */
 452   if (EDGE_COUNT (loop->latch->preds) > 1)
 453     return NULL;
 454
 455   auto_vec<edge> exits = get_loop_exit_edges (loop);
 456
 457   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 458     {
 459        /* Find the other edge than the loop exit
 460           leaving the conditoinal.  */
 461        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 462          continue;
 463        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 464          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 465        else
 466          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 467
 468       /* We only can handle conditionals.  */
 469       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 470         continue;
 471
 472       /* We should never have conditionals in the loop latch. */
 473       gcc_assert (edge_to_cancel->dest != loop->header);
 474
 475       /* Check that it leads to loop latch.  */
 476       if (edge_to_cancel->dest != loop->latch)
 477         continue;
 478
 479       /* Verify that the code in loop latch does nothing that may end program
 480          execution without really reaching the exit.  This may include
 481          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 482       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 483         if (gimple_has_side_effects (gsi_stmt (gsi)))
 484            return NULL;
 485       return edge_to_cancel;
 486     }
 487   return NULL;
 488 }
 489
 490 /* Remove all tests for exits that are known to be taken after LOOP was
 491    peeled NPEELED times. Put gcc_unreachable before every statement
 492    known to not be executed.  */
 493
 494 static bool
 495 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 496 {
 497   class nb_iter_bound *elt;
 498   bool changed = false;
 499
 500   for (elt = loop->bounds; elt; elt = elt->next)
 501     {
 502       /* If statement is known to be undefined after peeling, turn it
 503          into unreachable (or trap when debugging experience is supposed
 504          to be good).  */
 505       if (!elt->is_exit
 506           && wi::ltu_p (elt->bound, npeeled))
 507         {
 508           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 509           gcall *stmt = gimple_build_call
 510               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 511           gimple_set_location (stmt, gimple_location (elt->stmt));
 512           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 513           split_block (gimple_bb (stmt), stmt);
 514           changed = true;
 515           if (dump_file && (dump_flags & TDF_DETAILS))
 516             {
 517               fprintf (dump_file, "Forced statement unreachable: ");
 518               print_gimple_stmt (dump_file, elt->stmt, 0);
 519             }
 520         }
 521       /* If we know the exit will be taken after peeling, update.  */
 522       else if (elt->is_exit
 523                && wi::leu_p (elt->bound, npeeled))
 524         {
 525           basic_block bb = gimple_bb (elt->stmt);
 526           edge exit_edge = EDGE_SUCC (bb, 0);
 527
 528           if (dump_file && (dump_flags & TDF_DETAILS))
 529             {
 530               fprintf (dump_file, "Forced exit to be taken: ");
 531               print_gimple_stmt (dump_file, elt->stmt, 0);
 532             }
 533           if (!loop_exit_edge_p (loop, exit_edge))
 534             exit_edge = EDGE_SUCC (bb, 1);
 535           exit_edge->probability = profile_probability::always ();
 536           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 537           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 538           if (exit_edge->flags & EDGE_TRUE_VALUE)
 539             gimple_cond_make_true (cond_stmt);
 540           else
 541             gimple_cond_make_false (cond_stmt);
 542           update_stmt (cond_stmt);
 543           changed = true;
 544         }
 545     }
 546   return changed;
 547 }
 548
 549 /* Remove all exits that are known to be never taken because of the loop bound
 550    discovered.  */
 551
 552 static bool
 553 remove_redundant_iv_tests (class loop *loop)
 554 {
 555   class nb_iter_bound *elt;
 556   bool changed = false;
 557
 558   if (!loop->any_upper_bound)
 559     return false;
 560   for (elt = loop->bounds; elt; elt = elt->next)
 561     {
 562       /* Exit is pointless if it won't be taken before loop reaches
 563          upper bound.  */
 564       if (elt->is_exit && loop->any_upper_bound
 565           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 566         {
 567           basic_block bb = gimple_bb (elt->stmt);
 568           edge exit_edge = EDGE_SUCC (bb, 0);
 569           class tree_niter_desc niter;
 570
 571           if (!loop_exit_edge_p (loop, exit_edge))
 572             exit_edge = EDGE_SUCC (bb, 1);
 573
 574           /* Only when we know the actual number of iterations, not
 575              just a bound, we can remove the exit.  */
 576           if (!number_of_iterations_exit (loop, exit_edge,
 577                                           &niter, false, false)
 578               || !integer_onep (niter.assumptions)
 579               || !integer_zerop (niter.may_be_zero)
 580               || !niter.niter
 581               || TREE_CODE (niter.niter) != INTEGER_CST
 582               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 583                              wi::to_widest (niter.niter)))
 584             continue;
 585
 586           if (dump_file && (dump_flags & TDF_DETAILS))
 587             {
 588               fprintf (dump_file, "Removed pointless exit: ");
 589               print_gimple_stmt (dump_file, elt->stmt, 0);
 590             }
 591           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 592           if (exit_edge->flags & EDGE_TRUE_VALUE)
 593             gimple_cond_make_false (cond_stmt);
 594           else
 595             gimple_cond_make_true (cond_stmt);
 596           update_stmt (cond_stmt);
 597           changed = true;
 598         }
 599     }
 600   return changed;
 601 }
 602
 603 /* Stores loops that will be unlooped and edges that will be removed
 604    after we process whole loop tree. */
 605 static vec<loop_p> loops_to_unloop;
 606 static vec<int> loops_to_unloop_nunroll;
 607 static vec<edge> edges_to_remove;
 608 /* Stores loops that has been peeled.  */
 609 static bitmap peeled_loops;
 610
 611 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 612    on the latch edge.
 613    We do it after all unrolling since unlooping moves basic blocks
 614    across loop boundaries trashing loop closed SSA form as well
 615    as SCEV info needed to be intact during unrolling.
 616
 617    IRRED_INVALIDATED is used to bookkeep if information about
 618    irreducible regions may become invalid as a result
 619    of the transformation.
 620    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 621    when we need to go into loop closed SSA form.  */
 622
 623 static void
 624 unloop_loops (bitmap loop_closed_ssa_invalidated,
 625               bool *irred_invalidated)
 626 {
 627   while (loops_to_unloop.length ())
 628     {
 629       class loop *loop = loops_to_unloop.pop ();
 630       int n_unroll = loops_to_unloop_nunroll.pop ();
 631       basic_block latch = loop->latch;
 632       edge latch_edge = loop_latch_edge (loop);
 633       int flags = latch_edge->flags;
 634       location_t locus = latch_edge->goto_locus;
 635       gcall *stmt;
 636       gimple_stmt_iterator gsi;
 637
 638       remove_exits_and_undefined_stmts (loop, n_unroll);
 639
 640       /* Unloop destroys the latch edge.  */
 641       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 642
 643       /* Create new basic block for the latch edge destination and wire
 644          it in.  */
 645       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 646       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 647       latch_edge->probability = profile_probability::never ();
 648       latch_edge->flags |= flags;
 649       latch_edge->goto_locus = locus;
 650
 651       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 652       latch_edge->dest->count = profile_count::zero ();
 653       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 654
 655       gsi = gsi_start_bb (latch_edge->dest);
 656       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 657     }
 658   loops_to_unloop.release ();
 659   loops_to_unloop_nunroll.release ();
 660
 661   /* Remove edges in peeled copies.  Given remove_path removes dominated
 662      regions we need to cope with removal of already removed paths.  */
 663   unsigned i;
 664   edge e;
 665   auto_vec<int, 20> src_bbs;
 666   src_bbs.reserve_exact (edges_to_remove.length ());
 667   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 668     src_bbs.quick_push (e->src->index);
 669   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 670     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 671       {
 672         bool ok = remove_path (e, irred_invalidated,
 673                                loop_closed_ssa_invalidated);
 674         gcc_assert (ok);
 675       }
 676   edges_to_remove.release ();
 677 }
 678
 679 /* Tries to unroll LOOP completely, i.e. NITER times.
 680    UL determines which loops we are allowed to unroll.
 681    EXIT is the exit of the loop that should be eliminated.
 682    MAXITER specfy bound on number of iterations, -1 if it is
 683    not known or too large for HOST_WIDE_INT.  The location
 684    LOCUS corresponding to the loop is used when emitting
 685    a summary of the unroll to the dump file.  */
 686
 687 static bool
 688 try_unroll_loop_completely (class loop *loop,
 689                             edge exit, tree niter, bool may_be_zero,
 690                             enum unroll_level ul,
 691                             HOST_WIDE_INT maxiter,
 692                             dump_user_location_t locus, bool allow_peel)
 693 {
 694   unsigned HOST_WIDE_INT n_unroll = 0;
 695   bool n_unroll_found = false;
 696   edge edge_to_cancel = NULL;
 697
 698   /* See if we proved number of iterations to be low constant.
 699
 700      EXIT is an edge that will be removed in all but last iteration of
 701      the loop.
 702
 703      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 704      of the unrolled sequence and is expected to make the final loop not
 705      rolling.
 706
 707      If the number of execution of loop is determined by standard induction
 708      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 709      from the iv test.  */
 710   if (tree_fits_uhwi_p (niter))
 711     {
 712       n_unroll = tree_to_uhwi (niter);
 713       n_unroll_found = true;
 714       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 715       if (edge_to_cancel == exit)
 716         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 717     }
 718   /* We do not know the number of iterations and thus we cannot eliminate
 719      the EXIT edge.  */
 720   else
 721     exit = NULL;
 722
 723   /* See if we can improve our estimate by using recorded loop bounds.  */
 724   if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
 725       && maxiter >= 0
 726       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 727     {
 728       n_unroll = maxiter;
 729       n_unroll_found = true;
 730       /* Loop terminates before the IV variable test, so we cannot
 731          remove it in the last iteration.  */
 732       edge_to_cancel = NULL;
 733     }
 734
 735   if (!n_unroll_found)
 736     return false;
 737
 738   if (!loop->unroll
 739       && n_unroll > (unsigned) param_max_completely_peel_times)
 740     {
 741       if (dump_file && (dump_flags & TDF_DETAILS))
 742         fprintf (dump_file, "Not unrolling loop %d "
 743                  "(--param max-completely-peel-times limit reached).\n",
 744                  loop->num);
 745       return false;
 746     }
 747
 748   if (!edge_to_cancel)
 749     edge_to_cancel = loop_edge_to_cancel (loop);
 750
 751   if (n_unroll)
 752     {
 753       if (ul == UL_SINGLE_ITER)
 754         return false;
 755
 756       if (loop->unroll)
 757         {
 758           /* If the unrolling factor is too large, bail out.  */
 759           if (n_unroll > (unsigned)loop->unroll)
 760             {
 761               if (dump_file && (dump_flags & TDF_DETAILS))
 762                 fprintf (dump_file,
 763                          "Not unrolling loop %d: "
 764                          "user didn't want it unrolled completely.\n",
 765                          loop->num);
 766               return false;
 767             }
 768         }
 769       else
 770         {
 771           struct loop_size size;
 772           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 773              iterations.  */
 774           bool remove_exit = (exit && niter
 775                               && TREE_CODE (niter) == INTEGER_CST
 776                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 777           bool large
 778             = tree_estimate_loop_size
 779                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 780                  param_max_completely_peeled_insns);
 781           if (large)
 782             {
 783               if (dump_file && (dump_flags & TDF_DETAILS))
 784                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 785                          loop->num);
 786               return false;
 787             }
 788
 789           unsigned HOST_WIDE_INT ninsns = size.overall;
 790           unsigned HOST_WIDE_INT unr_insns
 791             = estimated_unrolled_size (&size, n_unroll);
 792           if (dump_file && (dump_flags & TDF_DETAILS))
 793             {
 794               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 795               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 796                        (int) unr_insns);
 797             }
 798
 799           /* If the code is going to shrink, we don't need to be extra
 800              cautious on guessing if the unrolling is going to be
 801              profitable.  */
 802           if (unr_insns
 803               /* If there is IV variable that will become constant, we
 804                  save one instruction in the loop prologue we do not
 805                  account otherwise.  */
 806               <= ninsns + (size.constant_iv != false))
 807             ;
 808           /* We unroll only inner loops, because we do not consider it
 809              profitable otheriwse.  We still can cancel loopback edge
 810              of not rolling loop; this is always a good idea.  */
 811           else if (ul == UL_NO_GROWTH)
 812             {
 813               if (dump_file && (dump_flags & TDF_DETAILS))
 814                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 815                          loop->num);
 816               return false;
 817             }
 818           /* Outer loops tend to be less interesting candidates for
 819              complete unrolling unless we can do a lot of propagation
 820              into the inner loop body.  For now we disable outer loop
 821              unrolling when the code would grow.  */
 822           else if (loop->inner)
 823             {
 824               if (dump_file && (dump_flags & TDF_DETAILS))
 825                 fprintf (dump_file, "Not unrolling loop %d: "
 826                          "it is not innermost and code would grow.\n",
 827                          loop->num);
 828               return false;
 829             }
 830           /* If there is call on a hot path through the loop, then
 831              there is most probably not much to optimize.  */
 832           else if (size.num_non_pure_calls_on_hot_path)
 833             {
 834               if (dump_file && (dump_flags & TDF_DETAILS))
 835                 fprintf (dump_file, "Not unrolling loop %d: "
 836                          "contains call and code would grow.\n",
 837                          loop->num);
 838               return false;
 839             }
 840           /* If there is pure/const call in the function, then we can
 841              still optimize the unrolled loop body if it contains some
 842              other interesting code than the calls and code storing or
 843              cumulating the return value.  */
 844           else if (size.num_pure_calls_on_hot_path
 845                    /* One IV increment, one test, one ivtmp store and
 846                       one useful stmt.  That is about minimal loop
 847                       doing pure call.  */
 848                    && (size.non_call_stmts_on_hot_path
 849                        <= 3 + size.num_pure_calls_on_hot_path))
 850             {
 851               if (dump_file && (dump_flags & TDF_DETAILS))
 852                 fprintf (dump_file, "Not unrolling loop %d: "
 853                          "contains just pure calls and code would grow.\n",
 854                          loop->num);
 855               return false;
 856             }
 857           /* Complete unrolling is major win when control flow is
 858              removed and one big basic block is created.  If the loop
 859              contains control flow the optimization may still be a win
 860              because of eliminating the loop overhead but it also may
 861              blow the branch predictor tables.  Limit number of
 862              branches on the hot path through the peeled sequence.  */
 863           else if (size.num_branches_on_hot_path * (int)n_unroll
 864                    > param_max_peel_branches)
 865             {
 866               if (dump_file && (dump_flags & TDF_DETAILS))
 867                 fprintf (dump_file, "Not unrolling loop %d: "
 868                          "number of branches on hot path in the unrolled "
 869                          "sequence reaches --param max-peel-branches limit.\n",
 870                          loop->num);
 871               return false;
 872             }
 873           else if (unr_insns
 874                    > (unsigned) param_max_completely_peeled_insns)
 875             {
 876               if (dump_file && (dump_flags & TDF_DETAILS))
 877                 fprintf (dump_file, "Not unrolling loop %d: "
 878                          "number of insns in the unrolled sequence reaches "
 879                          "--param max-completely-peeled-insns limit.\n",
 880                          loop->num);
 881               return false;
 882             }
 883         }
 884
 885       if (!dbg_cnt (gimple_unroll))
 886         return false;
 887
 888       initialize_original_copy_tables ();
 889       auto_sbitmap wont_exit (n_unroll + 1);
 890       if (exit && niter
 891           && TREE_CODE (niter) == INTEGER_CST
 892           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 893         {
 894           bitmap_ones (wont_exit);
 895           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 896               || edge_to_cancel)
 897             bitmap_clear_bit (wont_exit, 0);
 898         }
 899       else
 900         {
 901           exit = NULL;
 902           bitmap_clear (wont_exit);
 903         }
 904       if (may_be_zero)
 905         bitmap_clear_bit (wont_exit, 1);
 906
 907       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 908                                                  n_unroll, wont_exit,
 909                                                  exit, &edges_to_remove,
 910                                                  DLTHE_FLAG_UPDATE_FREQ
 911                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 912         {
 913           free_original_copy_tables ();
 914           if (dump_file && (dump_flags & TDF_DETAILS))
 915             fprintf (dump_file, "Failed to duplicate the loop\n");
 916           return false;
 917         }
 918
 919       free_original_copy_tables ();
 920     }
 921
 922   /* Remove the conditional from the last copy of the loop.  */
 923   if (edge_to_cancel)
 924     {
 925       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 926       force_edge_cold (edge_to_cancel, true);
 927       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 928         gimple_cond_make_false (cond);
 929       else
 930         gimple_cond_make_true (cond);
 931       update_stmt (cond);
 932       /* Do not remove the path, as doing so may remove outer loop and
 933          confuse bookkeeping code in tree_unroll_loops_completely.  */
 934     }
 935
 936   /* Store the loop for later unlooping and exit removal.  */
 937   loops_to_unloop.safe_push (loop);
 938   loops_to_unloop_nunroll.safe_push (n_unroll);
 939
 940   if (dump_enabled_p ())
 941     {
 942       if (!n_unroll)
 943         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 944                          "loop turned into non-loop; it never loops\n");
 945       else
 946         {
 947           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 948                            "loop with %d iterations completely unrolled",
 949                            (int) n_unroll);
 950           if (loop->header->count.initialized_p ())
 951             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 952                          " (header execution count %d)",
 953                          (int)loop->header->count.to_gcov_type ());
 954           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 955         }
 956     }
 957
 958   if (dump_file && (dump_flags & TDF_DETAILS))
 959     {
 960       if (exit)
 961         fprintf (dump_file, "Exit condition of peeled iterations was "
 962                  "eliminated.\n");
 963       if (edge_to_cancel)
 964         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 965       else
 966         fprintf (dump_file, "Latch of last iteration was marked by "
 967                  "__builtin_unreachable ().\n");
 968     }
 969
 970   return true;
 971 }
 972
 973 /* Return number of instructions after peeling.  */
 974 static unsigned HOST_WIDE_INT
 975 estimated_peeled_sequence_size (struct loop_size *size,
 976                                 unsigned HOST_WIDE_INT npeel)
 977 {
 978   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 979                                        - size->eliminated_by_peeling), 1);
 980 }
 981
 982 /* If the loop is expected to iterate N times and is
 983    small enough, duplicate the loop body N+1 times before
 984    the loop itself.  This way the hot path will never
 985    enter the loop.
 986    Parameters are the same as for try_unroll_loops_completely */
 987
 988 static bool
 989 try_peel_loop (class loop *loop,
 990                edge exit, tree niter, bool may_be_zero,
 991                HOST_WIDE_INT maxiter)
 992 {
 993   HOST_WIDE_INT npeel;
 994   struct loop_size size;
 995   int peeled_size;
 996
 997   if (!flag_peel_loops
 998       || param_max_peel_times <= 0
 999       || !peeled_loops)
1000     return false;
1001
1002   if (bitmap_bit_p (peeled_loops, loop->num))
1003     {
1004       if (dump_file)
1005         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1006       return false;
1007     }
1008
1009   /* We don't peel loops that will be unrolled as this can duplicate a
1010      loop more times than the user requested.  */
1011   if (loop->unroll)
1012     {
1013       if (dump_file)
1014         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1015       return false;
1016     }
1017
1018   /* Peel only innermost loops.
1019      While the code is perfectly capable of peeling non-innermost loops,
1020      the heuristics would probably need some improvements. */
1021   if (loop->inner)
1022     {
1023       if (dump_file)
1024         fprintf (dump_file, "Not peeling: outer loop\n");
1025       return false;
1026     }
1027
1028   if (!optimize_loop_for_speed_p (loop))
1029     {
1030       if (dump_file)
1031         fprintf (dump_file, "Not peeling: cold loop\n");
1032       return false;
1033     }
1034
1035   /* Check if there is an estimate on the number of iterations.  */
1036   npeel = estimated_loop_iterations_int (loop);
1037   if (npeel < 0)
1038     npeel = likely_max_loop_iterations_int (loop);
1039   if (npeel < 0)
1040     {
1041       if (dump_file)
1042         fprintf (dump_file, "Not peeling: number of iterations is not "
1043                  "estimated\n");
1044       return false;
1045     }
1046   if (maxiter >= 0 && maxiter <= npeel)
1047     {
1048       if (dump_file)
1049         fprintf (dump_file, "Not peeling: upper bound is known so can "
1050                  "unroll completely\n");
1051       return false;
1052     }
1053
1054   /* We want to peel estimated number of iterations + 1 (so we never
1055      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1056      and be sure to avoid overflows.  */
1057   if (npeel > param_max_peel_times - 1)
1058     {
1059       if (dump_file)
1060         fprintf (dump_file, "Not peeling: rolls too much "
1061                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1062       return false;
1063     }
1064   npeel++;
1065
1066   /* Check peeled loops size.  */
1067   tree_estimate_loop_size (loop, exit, NULL, &size,
1068                            param_max_peeled_insns);
1069   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1070       > param_max_peeled_insns)
1071     {
1072       if (dump_file)
1073         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1074                  "(%i insns > --param max-peel-insns)", peeled_size);
1075       return false;
1076     }
1077
1078   if (!dbg_cnt (gimple_unroll))
1079     return false;
1080
1081   /* Duplicate possibly eliminating the exits.  */
1082   initialize_original_copy_tables ();
1083   auto_sbitmap wont_exit (npeel + 1);
1084   if (exit && niter
1085       && TREE_CODE (niter) == INTEGER_CST
1086       && wi::leu_p (npeel, wi::to_widest (niter)))
1087     {
1088       bitmap_ones (wont_exit);
1089       bitmap_clear_bit (wont_exit, 0);
1090     }
1091   else
1092     {
1093       exit = NULL;
1094       bitmap_clear (wont_exit);
1095     }
1096   if (may_be_zero)
1097     bitmap_clear_bit (wont_exit, 1);
1098   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1099                                              npeel, wont_exit,
1100                                              exit, &edges_to_remove,
1101                                              DLTHE_FLAG_UPDATE_FREQ))
1102     {
1103       free_original_copy_tables ();
1104       return false;
1105     }
1106   free_original_copy_tables ();
1107   if (dump_file && (dump_flags & TDF_DETAILS))
1108     {
1109       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1110                loop->num, (int) npeel);
1111     }
1112   if (loop->any_estimate)
1113     {
1114       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1115         loop->nb_iterations_estimate -= npeel;
1116       else
1117         loop->nb_iterations_estimate = 0;
1118     }
1119   if (loop->any_upper_bound)
1120     {
1121       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1122         loop->nb_iterations_upper_bound -= npeel;
1123       else
1124         loop->nb_iterations_upper_bound = 0;
1125     }
1126   if (loop->any_likely_upper_bound)
1127     {
1128       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1129         loop->nb_iterations_likely_upper_bound -= npeel;
1130       else
1131         {
1132           loop->any_estimate = true;
1133           loop->nb_iterations_estimate = 0;
1134           loop->nb_iterations_likely_upper_bound = 0;
1135         }
1136     }
1137   profile_count entry_count = profile_count::zero ();
1138
1139   edge e;
1140   edge_iterator ei;
1141   FOR_EACH_EDGE (e, ei, loop->header->preds)
1142     if (e->src != loop->latch)
1143       {
1144         if (e->src->count.initialized_p ())
1145           entry_count += e->src->count;
1146         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1147       }
1148   profile_probability p;
1149   p = entry_count.probability_in (loop->header->count);
1150   scale_loop_profile (loop, p, 0);
1151   bitmap_set_bit (peeled_loops, loop->num);
1152   return true;
1153 }
1154 /* Adds a canonical induction variable to LOOP if suitable.
1155    CREATE_IV is true if we may create a new iv.  UL determines
1156    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1157    to determine the number of iterations of a loop by direct evaluation.
1158    Returns true if cfg is changed.   */
1159
1160 static bool
1161 canonicalize_loop_induction_variables (class loop *loop,
1162                                        bool create_iv, enum unroll_level ul,
1163                                        bool try_eval, bool allow_peel)
1164 {
1165   edge exit = NULL;
1166   tree niter;
1167   HOST_WIDE_INT maxiter;
1168   bool modified = false;
1169   dump_user_location_t locus;
1170   class tree_niter_desc niter_desc;
1171   bool may_be_zero = false;
1172
1173   /* For unrolling allow conditional constant or zero iterations, thus
1174      perform loop-header copying on-the-fly.  */
1175   exit = single_exit (loop);
1176   niter = chrec_dont_know;
1177   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1178     {
1179       niter = niter_desc.niter;
1180       may_be_zero
1181         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1182     }
1183   if (TREE_CODE (niter) == INTEGER_CST)
1184     locus = last_stmt (exit->src);
1185   else
1186     {
1187       /* For non-constant niter fold may_be_zero into niter again.  */
1188       if (may_be_zero)
1189         {
1190           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1191             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1192                                  niter_desc.may_be_zero,
1193                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1194           else
1195             niter = chrec_dont_know;
1196           may_be_zero = false;
1197         }
1198
1199       /* If the loop has more than one exit, try checking all of them
1200          for # of iterations determinable through scev.  */
1201       if (!exit)
1202         niter = find_loop_niter (loop, &exit);
1203
1204       /* Finally if everything else fails, try brute force evaluation.  */
1205       if (try_eval
1206           && (chrec_contains_undetermined (niter)
1207               || TREE_CODE (niter) != INTEGER_CST))
1208         niter = find_loop_niter_by_eval (loop, &exit);
1209
1210       if (exit)
1211         locus = last_stmt (exit->src);
1212
1213       if (TREE_CODE (niter) != INTEGER_CST)
1214         exit = NULL;
1215     }
1216
1217   /* We work exceptionally hard here to estimate the bound
1218      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1219   if (niter && TREE_CODE (niter) == INTEGER_CST)
1220     {
1221       auto_vec<edge> exits = get_loop_exit_edges  (loop);
1222       record_niter_bound (loop, wi::to_widest (niter),
1223                           exit == single_likely_exit (loop, exits), true);
1224     }
1225
1226   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1227   maxiter = max_loop_iterations_int (loop);
1228
1229   if (dump_file && (dump_flags & TDF_DETAILS)
1230       && TREE_CODE (niter) == INTEGER_CST)
1231     {
1232       fprintf (dump_file, "Loop %d iterates ", loop->num);
1233       print_generic_expr (dump_file, niter, TDF_SLIM);
1234       fprintf (dump_file, " times.\n");
1235     }
1236   if (dump_file && (dump_flags & TDF_DETAILS)
1237       && maxiter >= 0)
1238     {
1239       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1240                (int)maxiter);
1241     }
1242   if (dump_file && (dump_flags & TDF_DETAILS)
1243       && likely_max_loop_iterations_int (loop) >= 0)
1244     {
1245       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1246                loop->num, (int)likely_max_loop_iterations_int (loop));
1247     }
1248
1249   /* Remove exits that are known to be never taken based on loop bound.
1250      Needs to be called after compilation of max_loop_iterations_int that
1251      populates the loop bounds.  */
1252   modified |= remove_redundant_iv_tests (loop);
1253
1254   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1255                                   maxiter, locus, allow_peel))
1256     return true;
1257
1258   if (create_iv
1259       && niter && !chrec_contains_undetermined (niter)
1260       && exit && just_once_each_iteration_p (loop, exit->src))
1261     {
1262       tree iv_niter = niter;
1263       if (may_be_zero)
1264         {
1265           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1266             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1267                                     niter_desc.may_be_zero,
1268                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1269                                     iv_niter);
1270           else
1271             iv_niter = NULL_TREE;
1272         }
1273       if (iv_niter)
1274         create_canonical_iv (loop, exit, iv_niter);
1275     }
1276
1277   if (ul == UL_ALL)
1278     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1279
1280   return modified;
1281 }
1282
1283 /* The main entry point of the pass.  Adds canonical induction variables
1284    to the suitable loops.  */
1285
1286 unsigned int
1287 canonicalize_induction_variables (void)
1288 {
1289   class loop *loop;
1290   bool changed = false;
1291   bool irred_invalidated = false;
1292   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1293
1294   estimate_numbers_of_iterations (cfun);
1295
1296   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1297     {
1298       changed |= canonicalize_loop_induction_variables (loop,
1299                                                         true, UL_SINGLE_ITER,
1300                                                         true, false);
1301     }
1302   gcc_assert (!need_ssa_update_p (cfun));
1303
1304   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1305   if (irred_invalidated
1306       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1307     mark_irreducible_loops ();
1308
1309   /* Clean up the information about numbers of iterations, since brute force
1310      evaluation could reveal new information.  */
1311   free_numbers_of_iterations_estimates (cfun);
1312   scev_reset ();
1313
1314   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1315     {
1316       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1317       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1318     }
1319   BITMAP_FREE (loop_closed_ssa_invalidated);
1320
1321   if (changed)
1322     return TODO_cleanup_cfg;
1323   return 0;
1324 }
1325
1326 /* Process loops from innermost to outer, stopping at the innermost
1327    loop we unrolled.  */
1328
1329 static bool
1330 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1331                                 bitmap father_bbs, class loop *loop)
1332 {
1333   class loop *loop_father;
1334   bool changed = false;
1335   class loop *inner;
1336   enum unroll_level ul;
1337   unsigned num = number_of_loops (cfun);
1338
1339   /* Process inner loops first.  Don't walk loops added by the recursive
1340      calls because SSA form is not up-to-date.  They can be handled in the
1341      next iteration.  */
1342   bitmap child_father_bbs = NULL;
1343   for (inner = loop->inner; inner != NULL; inner = inner->next)
1344     if ((unsigned) inner->num < num)
1345       {
1346         if (!child_father_bbs)
1347           child_father_bbs = BITMAP_ALLOC (NULL);
1348         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1349                                             child_father_bbs, inner))
1350           {
1351             bitmap_ior_into (father_bbs, child_father_bbs);
1352             bitmap_clear (child_father_bbs);
1353             changed = true;
1354           }
1355       }
1356   if (child_father_bbs)
1357     BITMAP_FREE (child_father_bbs);
1358
1359   /* If we changed an inner loop we cannot process outer loops in this
1360      iteration because SSA form is not up-to-date.  Continue with
1361      siblings of outer loops instead.  */
1362   if (changed)
1363     {
1364       /* If we are recorded as father clear all other fathers that
1365          are necessarily covered already to avoid redundant work.  */
1366       if (bitmap_bit_p (father_bbs, loop->header->index))
1367         {
1368           bitmap_clear (father_bbs);
1369           bitmap_set_bit (father_bbs, loop->header->index);
1370         }
1371       return true;
1372     }
1373
1374   /* Don't unroll #pragma omp simd loops until the vectorizer
1375      attempts to vectorize those.  */
1376   if (loop->force_vectorize)
1377     return false;
1378
1379   /* Try to unroll this loop.  */
1380   loop_father = loop_outer (loop);
1381   if (!loop_father)
1382     return false;
1383
1384   if (loop->unroll > 1)
1385     ul = UL_ALL;
1386   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1387       /* Unroll outermost loops only if asked to do so or they do
1388          not cause code growth.  */
1389       && (unroll_outer || loop_outer (loop_father)))
1390     ul = UL_ALL;
1391   else
1392     ul = UL_NO_GROWTH;
1393
1394   if (canonicalize_loop_induction_variables
1395         (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1396     {
1397       /* If we'll continue unrolling, we need to propagate constants
1398          within the new basic blocks to fold away induction variable
1399          computations; otherwise, the size might blow up before the
1400          iteration is complete and the IR eventually cleaned up.  */
1401       if (loop_outer (loop_father))
1402         {
1403           /* Once we process our father we will have processed
1404              the fathers of our children as well, so avoid doing
1405              redundant work and clear fathers we've gathered sofar.  */
1406           bitmap_clear (father_bbs);
1407           bitmap_set_bit (father_bbs, loop_father->header->index);
1408         }
1409       else if (unroll_outer)
1410         /* Trigger scalar cleanup once any outermost loop gets unrolled.  */
1411         cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1412
1413       return true;
1414     }
1415
1416   return false;
1417 }
1418
1419 /* Unroll LOOPS completely if they iterate just few times.  Unless
1420    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1421    size of the code does not increase.  */
1422
1423 static unsigned int
1424 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1425 {
1426   bitmap father_bbs = BITMAP_ALLOC (NULL);
1427   bool changed;
1428   int iteration = 0;
1429   bool irred_invalidated = false;
1430
1431   estimate_numbers_of_iterations (cfun);
1432
1433   do
1434     {
1435       changed = false;
1436       bitmap loop_closed_ssa_invalidated = NULL;
1437
1438       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1439         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1440
1441       free_numbers_of_iterations_estimates (cfun);
1442       estimate_numbers_of_iterations (cfun);
1443
1444       changed = tree_unroll_loops_completely_1 (may_increase_size,
1445                                                 unroll_outer, father_bbs,
1446                                                 current_loops->tree_root);
1447       if (changed)
1448         {
1449           unsigned i;
1450
1451           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1452
1453           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1454           if (loop_closed_ssa_invalidated
1455               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1456             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1457                                           TODO_update_ssa);
1458           else
1459             update_ssa (TODO_update_ssa);
1460
1461           /* father_bbs is a bitmap of loop father header BB indices.
1462              Translate that to what non-root loops these BBs belong to now.  */
1463           bitmap_iterator bi;
1464           bitmap fathers = BITMAP_ALLOC (NULL);
1465           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1466             {
1467               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1468               if (! unrolled_loop_bb)
1469                 continue;
1470               if (loop_outer (unrolled_loop_bb->loop_father))
1471                 bitmap_set_bit (fathers,
1472                                 unrolled_loop_bb->loop_father->num);
1473             }
1474           bitmap_clear (father_bbs);
1475           /* Propagate the constants within the new basic blocks.  */
1476           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1477             {
1478               loop_p father = get_loop (cfun, i);
1479               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1480               loop_exit *exit = father->exits->next;
1481               while (exit->e)
1482                 {
1483                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1484                   exit = exit->next;
1485                 }
1486               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1487             }
1488           BITMAP_FREE (fathers);
1489
1490           /* This will take care of removing completely unrolled loops
1491              from the loop structures so we can continue unrolling now
1492              innermost loops.  */
1493           if (cleanup_tree_cfg ())
1494             update_ssa (TODO_update_ssa_only_virtuals);
1495
1496           /* Clean up the information about numbers of iterations, since
1497              complete unrolling might have invalidated it.  */
1498           scev_reset ();
1499           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1500             verify_loop_closed_ssa (true);
1501         }
1502       if (loop_closed_ssa_invalidated)
1503         BITMAP_FREE (loop_closed_ssa_invalidated);
1504     }
1505   while (changed
1506          && ++iteration <= param_max_unroll_iterations);
1507
1508   BITMAP_FREE (father_bbs);
1509
1510   if (irred_invalidated
1511       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1512     mark_irreducible_loops ();
1513
1514   return 0;
1515 }
1516
1517 /* Canonical induction variable creation pass.  */
1518
1519 namespace {
1520
1521 const pass_data pass_data_iv_canon =
1522 {
1523   GIMPLE_PASS, /* type */
1524   "ivcanon", /* name */
1525   OPTGROUP_LOOP, /* optinfo_flags */
1526   TV_TREE_LOOP_IVCANON, /* tv_id */
1527   ( PROP_cfg | PROP_ssa ), /* properties_required */
1528   0, /* properties_provided */
1529   0, /* properties_destroyed */
1530   0, /* todo_flags_start */
1531   0, /* todo_flags_finish */
1532 };
1533
1534 class pass_iv_canon : public gimple_opt_pass
1535 {
1536 public:
1537   pass_iv_canon (gcc::context *ctxt)
1538     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1539   {}
1540
1541   /* opt_pass methods: */
1542   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1543   virtual unsigned int execute (function *fun);
1544
1545 }; // class pass_iv_canon
1546
1547 unsigned int
1548 pass_iv_canon::execute (function *fun)
1549 {
1550   if (number_of_loops (fun) <= 1)
1551     return 0;
1552
1553   return canonicalize_induction_variables ();
1554 }
1555
1556 } // anon namespace
1557
1558 gimple_opt_pass *
1559 make_pass_iv_canon (gcc::context *ctxt)
1560 {
1561   return new pass_iv_canon (ctxt);
1562 }
1563
1564 /* Complete unrolling of loops.  */
1565
1566 namespace {
1567
1568 const pass_data pass_data_complete_unroll =
1569 {
1570   GIMPLE_PASS, /* type */
1571   "cunroll", /* name */
1572   OPTGROUP_LOOP, /* optinfo_flags */
1573   TV_COMPLETE_UNROLL, /* tv_id */
1574   ( PROP_cfg | PROP_ssa ), /* properties_required */
1575   0, /* properties_provided */
1576   0, /* properties_destroyed */
1577   0, /* todo_flags_start */
1578   0, /* todo_flags_finish */
1579 };
1580
1581 class pass_complete_unroll : public gimple_opt_pass
1582 {
1583 public:
1584   pass_complete_unroll (gcc::context *ctxt)
1585     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1586   {}
1587
1588   /* opt_pass methods: */
1589   virtual unsigned int execute (function *);
1590
1591 }; // class pass_complete_unroll
1592
1593 unsigned int
1594 pass_complete_unroll::execute (function *fun)
1595 {
1596   if (number_of_loops (fun) <= 1)
1597     return 0;
1598
1599   /* If we ever decide to run loop peeling more than once, we will need to
1600      track loops already peeled in loop structures themselves to avoid
1601      re-peeling the same loop multiple times.  */
1602   if (flag_peel_loops)
1603     peeled_loops = BITMAP_ALLOC (NULL);
1604   unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size,
1605                                                    true);
1606   if (peeled_loops)
1607     {
1608       BITMAP_FREE (peeled_loops);
1609       peeled_loops = NULL;
1610     }
1611   return val;
1612 }
1613
1614 } // anon namespace
1615
1616 gimple_opt_pass *
1617 make_pass_complete_unroll (gcc::context *ctxt)
1618 {
1619   return new pass_complete_unroll (ctxt);
1620 }
1621
1622 /* Complete unrolling of inner loops.  */
1623
1624 namespace {
1625
1626 const pass_data pass_data_complete_unrolli =
1627 {
1628   GIMPLE_PASS, /* type */
1629   "cunrolli", /* name */
1630   OPTGROUP_LOOP, /* optinfo_flags */
1631   TV_COMPLETE_UNROLL, /* tv_id */
1632   ( PROP_cfg | PROP_ssa ), /* properties_required */
1633   0, /* properties_provided */
1634   0, /* properties_destroyed */
1635   0, /* todo_flags_start */
1636   0, /* todo_flags_finish */
1637 };
1638
1639 class pass_complete_unrolli : public gimple_opt_pass
1640 {
1641 public:
1642   pass_complete_unrolli (gcc::context *ctxt)
1643     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1644   {}
1645
1646   /* opt_pass methods: */
1647   virtual bool gate (function *) { return optimize >= 2; }
1648   virtual unsigned int execute (function *);
1649
1650 }; // class pass_complete_unrolli
1651
1652 unsigned int
1653 pass_complete_unrolli::execute (function *fun)
1654 {
1655   unsigned ret = 0;
1656
1657   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1658   if (number_of_loops (fun) > 1)
1659     {
1660       scev_initialize ();
1661       ret = tree_unroll_loops_completely (optimize >= 3, false);
1662       scev_finalize ();
1663     }
1664   loop_optimizer_finalize ();
1665
1666   return ret;
1667 }
1668
1669 } // anon namespace
1670
1671 gimple_opt_pass *
1672 make_pass_complete_unrolli (gcc::context *ctxt)
1673 {
1674   return new pass_complete_unrolli (ctxt);
1675 }
1676
1677