gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134
 135 /* For lang_hooks.types.type_for_mode.  */
 136 #include "langhooks.h"
 137
 138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 139    cost of different addressing modes.  This should be moved to a TBD
 140    interface between the GIMPLE and RTL worlds.  */
 141
 142 /* The infinite cost.  */
 143 #define INFTY 1000000000
 144
 145 /* Returns the expected number of loop iterations for LOOP.
 146    The average trip count is computed from profile data if it
 147    exists. */
 148
 149 static inline HOST_WIDE_INT
 150 avg_loop_niter (class loop *loop)
 151 {
 152   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 153   if (niter == -1)
 154     {
 155       niter = likely_max_stmt_executions_int (loop);
 156
 157       if (niter == -1 || niter > param_avg_loop_niter)
 158         return param_avg_loop_niter;
 159     }
 160
 161   return niter;
 162 }
 163
 164 struct iv_use;
 165
 166 /* Representation of the induction variable.  */
 167 struct iv
 168 {
 169   tree base;            /* Initial value of the iv.  */
 170   tree base_object;     /* A memory object to that the induction variable points.  */
 171   tree step;            /* Step of the iv (constant only).  */
 172   tree ssa_name;        /* The ssa name with the value.  */
 173   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 174   bool biv_p;           /* Is it a biv?  */
 175   bool no_overflow;     /* True if the iv doesn't overflow.  */
 176   bool have_address_use;/* For biv, indicate if it's used in any address
 177                            type use.  */
 178 };
 179
 180 /* Per-ssa version information (induction variable descriptions, etc.).  */
 181 struct version_info
 182 {
 183   tree name;            /* The ssa name.  */
 184   struct iv *iv;        /* Induction variable description.  */
 185   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 186                            an expression that is not an induction variable.  */
 187   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 188   unsigned inv_id;      /* Id of an invariant.  */
 189 };
 190
 191 /* Types of uses.  */
 192 enum use_type
 193 {
 194   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 195   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 196                            reference.  */
 197   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 198                            cases where the expansion of the function
 199                            will turn the argument into a normal address.  */
 200   USE_COMPARE           /* Use is a compare.  */
 201 };
 202
 203 /* Cost of a computation.  */
 204 class comp_cost
 205 {
 206 public:
 207   comp_cost (): cost (0), complexity (0), scratch (0)
 208   {}
 209
 210   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 211     : cost (cost), complexity (complexity), scratch (scratch)
 212   {}
 213
 214   /* Returns true if COST is infinite.  */
 215   bool infinite_cost_p ();
 216
 217   /* Adds costs COST1 and COST2.  */
 218   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 219
 220   /* Adds COST to the comp_cost.  */
 221   comp_cost operator+= (comp_cost cost);
 222
 223   /* Adds constant C to this comp_cost.  */
 224   comp_cost operator+= (HOST_WIDE_INT c);
 225
 226   /* Subtracts constant C to this comp_cost.  */
 227   comp_cost operator-= (HOST_WIDE_INT c);
 228
 229   /* Divide the comp_cost by constant C.  */
 230   comp_cost operator/= (HOST_WIDE_INT c);
 231
 232   /* Multiply the comp_cost by constant C.  */
 233   comp_cost operator*= (HOST_WIDE_INT c);
 234
 235   /* Subtracts costs COST1 and COST2.  */
 236   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 237
 238   /* Subtracts COST from this comp_cost.  */
 239   comp_cost operator-= (comp_cost cost);
 240
 241   /* Returns true if COST1 is smaller than COST2.  */
 242   friend bool operator< (comp_cost cost1, comp_cost cost2);
 243
 244   /* Returns true if COST1 and COST2 are equal.  */
 245   friend bool operator== (comp_cost cost1, comp_cost cost2);
 246
 247   /* Returns true if COST1 is smaller or equal than COST2.  */
 248   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 249
 250   int64_t cost;         /* The runtime cost.  */
 251   unsigned complexity;  /* The estimate of the complexity of the code for
 252                            the computation (in no concrete units --
 253                            complexity field should be larger for more
 254                            complex expressions and addressing modes).  */
 255   int64_t scratch;      /* Scratch used during cost computation.  */
 256 };
 257
 258 static const comp_cost no_cost;
 259 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 260
 261 bool
 262 comp_cost::infinite_cost_p ()
 263 {
 264   return cost == INFTY;
 265 }
 266
 267 comp_cost
 268 operator+ (comp_cost cost1, comp_cost cost2)
 269 {
 270   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 271     return infinite_cost;
 272
 273   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 274   cost1.cost += cost2.cost;
 275   cost1.complexity += cost2.complexity;
 276
 277   return cost1;
 278 }
 279
 280 comp_cost
 281 operator- (comp_cost cost1, comp_cost cost2)
 282 {
 283   if (cost1.infinite_cost_p ())
 284     return infinite_cost;
 285
 286   gcc_assert (!cost2.infinite_cost_p ());
 287   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 288
 289   cost1.cost -= cost2.cost;
 290   cost1.complexity -= cost2.complexity;
 291
 292   return cost1;
 293 }
 294
 295 comp_cost
 296 comp_cost::operator+= (comp_cost cost)
 297 {
 298   *this = *this + cost;
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator+= (HOST_WIDE_INT c)
 304 {
 305   if (c >= INFTY)
 306     this->cost = INFTY;
 307
 308   if (infinite_cost_p ())
 309     return *this;
 310
 311   gcc_assert (this->cost + c < infinite_cost.cost);
 312   this->cost += c;
 313
 314   return *this;
 315 }
 316
 317 comp_cost
 318 comp_cost::operator-= (HOST_WIDE_INT c)
 319 {
 320   if (infinite_cost_p ())
 321     return *this;
 322
 323   gcc_assert (this->cost - c < infinite_cost.cost);
 324   this->cost -= c;
 325
 326   return *this;
 327 }
 328
 329 comp_cost
 330 comp_cost::operator/= (HOST_WIDE_INT c)
 331 {
 332   gcc_assert (c != 0);
 333   if (infinite_cost_p ())
 334     return *this;
 335
 336   this->cost /= c;
 337
 338   return *this;
 339 }
 340
 341 comp_cost
 342 comp_cost::operator*= (HOST_WIDE_INT c)
 343 {
 344   if (infinite_cost_p ())
 345     return *this;
 346
 347   gcc_assert (this->cost * c < infinite_cost.cost);
 348   this->cost *= c;
 349
 350   return *this;
 351 }
 352
 353 comp_cost
 354 comp_cost::operator-= (comp_cost cost)
 355 {
 356   *this = *this - cost;
 357   return *this;
 358 }
 359
 360 bool
 361 operator< (comp_cost cost1, comp_cost cost2)
 362 {
 363   if (cost1.cost == cost2.cost)
 364     return cost1.complexity < cost2.complexity;
 365
 366   return cost1.cost < cost2.cost;
 367 }
 368
 369 bool
 370 operator== (comp_cost cost1, comp_cost cost2)
 371 {
 372   return cost1.cost == cost2.cost
 373     && cost1.complexity == cost2.complexity;
 374 }
 375
 376 bool
 377 operator<= (comp_cost cost1, comp_cost cost2)
 378 {
 379   return cost1 < cost2 || cost1 == cost2;
 380 }
 381
 382 struct iv_inv_expr_ent;
 383
 384 /* The candidate - cost pair.  */
 385 class cost_pair
 386 {
 387 public:
 388   struct iv_cand *cand; /* The candidate.  */
 389   comp_cost cost;       /* The cost.  */
 390   enum tree_code comp;  /* For iv elimination, the comparison.  */
 391   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 392                            preserved when representing iv_use with iv_cand.  */
 393   bitmap inv_exprs;     /* The list of newly created invariant expressions
 394                            when representing iv_use with iv_cand.  */
 395   tree value;           /* For final value elimination, the expression for
 396                            the final value of the iv.  For iv elimination,
 397                            the new bound to compare with.  */
 398 };
 399
 400 /* Use.  */
 401 struct iv_use
 402 {
 403   unsigned id;          /* The id of the use.  */
 404   unsigned group_id;    /* The group id the use belongs to.  */
 405   enum use_type type;   /* Type of the use.  */
 406   tree mem_type;        /* The memory type to use when testing whether an
 407                            address is legitimate, and what the address's
 408                            cost is.  */
 409   struct iv *iv;        /* The induction variable it is based on.  */
 410   gimple *stmt;         /* Statement in that it occurs.  */
 411   tree *op_p;           /* The place where it occurs.  */
 412
 413   tree addr_base;       /* Base address with const offset stripped.  */
 414   poly_uint64_pod addr_offset;
 415                         /* Const offset stripped from base address.  */
 416 };
 417
 418 /* Group of uses.  */
 419 struct iv_group
 420 {
 421   /* The id of the group.  */
 422   unsigned id;
 423   /* Uses of the group are of the same type.  */
 424   enum use_type type;
 425   /* The set of "related" IV candidates, plus the important ones.  */
 426   bitmap related_cands;
 427   /* Number of IV candidates in the cost_map.  */
 428   unsigned n_map_members;
 429   /* The costs wrto the iv candidates.  */
 430   class cost_pair *cost_map;
 431   /* The selected candidate for the group.  */
 432   struct iv_cand *selected;
 433   /* To indicate this is a doloop use group.  */
 434   bool doloop_p;
 435   /* Uses in the group.  */
 436   vec<struct iv_use *> vuses;
 437 };
 438
 439 /* The position where the iv is computed.  */
 440 enum iv_position
 441 {
 442   IP_NORMAL,            /* At the end, just before the exit condition.  */
 443   IP_END,               /* At the end of the latch block.  */
 444   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 445   IP_AFTER_USE,         /* Immediately after a specific use.  */
 446   IP_ORIGINAL           /* The original biv.  */
 447 };
 448
 449 /* The induction variable candidate.  */
 450 struct iv_cand
 451 {
 452   unsigned id;          /* The number of the candidate.  */
 453   bool important;       /* Whether this is an "important" candidate, i.e. such
 454                            that it should be considered by all uses.  */
 455   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 456   gimple *incremented_at;/* For original biv, the statement where it is
 457                            incremented.  */
 458   tree var_before;      /* The variable used for it before increment.  */
 459   tree var_after;       /* The variable used for it after increment.  */
 460   struct iv *iv;        /* The value of the candidate.  NULL for
 461                            "pseudocandidate" used to indicate the possibility
 462                            to replace the final value of an iv by direct
 463                            computation of the value.  */
 464   unsigned cost;        /* Cost of the candidate.  */
 465   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 466   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 467                               where it is incremented.  */
 468   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 469                            iv_cand.  */
 470   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 471                            hanlde it as a new invariant expression which will
 472                            be hoisted out of loop.  */
 473   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 474                            smaller type.  */
 475   bool doloop_p;        /* Whether this is a doloop candidate.  */
 476 };
 477
 478 /* Hashtable entry for common candidate derived from iv uses.  */
 479 class iv_common_cand
 480 {
 481 public:
 482   tree base;
 483   tree step;
 484   /* IV uses from which this common candidate is derived.  */
 485   auto_vec<struct iv_use *> uses;
 486   hashval_t hash;
 487 };
 488
 489 /* Hashtable helpers.  */
 490
 491 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 492 {
 493   static inline hashval_t hash (const iv_common_cand *);
 494   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 495 };
 496
 497 /* Hash function for possible common candidates.  */
 498
 499 inline hashval_t
 500 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 501 {
 502   return ccand->hash;
 503 }
 504
 505 /* Hash table equality function for common candidates.  */
 506
 507 inline bool
 508 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 509                               const iv_common_cand *ccand2)
 510 {
 511   return (ccand1->hash == ccand2->hash
 512           && operand_equal_p (ccand1->base, ccand2->base, 0)
 513           && operand_equal_p (ccand1->step, ccand2->step, 0)
 514           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 515               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 516 }
 517
 518 /* Loop invariant expression hashtable entry.  */
 519
 520 struct iv_inv_expr_ent
 521 {
 522   /* Tree expression of the entry.  */
 523   tree expr;
 524   /* Unique indentifier.  */
 525   int id;
 526   /* Hash value.  */
 527   hashval_t hash;
 528 };
 529
 530 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 531
 532 static int
 533 sort_iv_inv_expr_ent (const void *a, const void *b)
 534 {
 535   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 536   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 537
 538   unsigned id1 = (*e1)->id;
 539   unsigned id2 = (*e2)->id;
 540
 541   if (id1 < id2)
 542     return -1;
 543   else if (id1 > id2)
 544     return 1;
 545   else
 546     return 0;
 547 }
 548
 549 /* Hashtable helpers.  */
 550
 551 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 552 {
 553   static inline hashval_t hash (const iv_inv_expr_ent *);
 554   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 555 };
 556
 557 /* Return true if uses of type TYPE represent some form of address.  */
 558
 559 inline bool
 560 address_p (use_type type)
 561 {
 562   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 563 }
 564
 565 /* Hash function for loop invariant expressions.  */
 566
 567 inline hashval_t
 568 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 569 {
 570   return expr->hash;
 571 }
 572
 573 /* Hash table equality function for expressions.  */
 574
 575 inline bool
 576 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 577                            const iv_inv_expr_ent *expr2)
 578 {
 579   return expr1->hash == expr2->hash
 580          && operand_equal_p (expr1->expr, expr2->expr, 0);
 581 }
 582
 583 struct ivopts_data
 584 {
 585   /* The currently optimized loop.  */
 586   class loop *current_loop;
 587   location_t loop_loc;
 588
 589   /* Numbers of iterations for all exits of the current loop.  */
 590   hash_map<edge, tree_niter_desc *> *niters;
 591
 592   /* Number of registers used in it.  */
 593   unsigned regs_used;
 594
 595   /* The size of version_info array allocated.  */
 596   unsigned version_info_size;
 597
 598   /* The array of information for the ssa names.  */
 599   struct version_info *version_info;
 600
 601   /* The hashtable of loop invariant expressions created
 602      by ivopt.  */
 603   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 604
 605   /* The bitmap of indices in version_info whose value was changed.  */
 606   bitmap relevant;
 607
 608   /* The uses of induction variables.  */
 609   vec<iv_group *> vgroups;
 610
 611   /* The candidates.  */
 612   vec<iv_cand *> vcands;
 613
 614   /* A bitmap of important candidates.  */
 615   bitmap important_candidates;
 616
 617   /* Cache used by tree_to_aff_combination_expand.  */
 618   hash_map<tree, name_expansion *> *name_expansion_cache;
 619
 620   /* The hashtable of common candidates derived from iv uses.  */
 621   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 622
 623   /* The common candidates.  */
 624   vec<iv_common_cand *> iv_common_cands;
 625
 626   /* Hash map recording base object information of tree exp.  */
 627   hash_map<tree, tree> *base_object_map;
 628
 629   /* The maximum invariant variable id.  */
 630   unsigned max_inv_var_id;
 631
 632   /* The maximum invariant expression id.  */
 633   unsigned max_inv_expr_id;
 634
 635   /* Number of no_overflow BIVs which are not used in memory address.  */
 636   unsigned bivs_not_used_in_addr;
 637
 638   /* Obstack for iv structure.  */
 639   struct obstack iv_obstack;
 640
 641   /* Whether to consider just related and important candidates when replacing a
 642      use.  */
 643   bool consider_all_candidates;
 644
 645   /* Are we optimizing for speed?  */
 646   bool speed;
 647
 648   /* Whether the loop body includes any function calls.  */
 649   bool body_includes_call;
 650
 651   /* Whether the loop body can only be exited via single exit.  */
 652   bool loop_single_exit_p;
 653
 654   /* Whether the loop has doloop comparison use.  */
 655   bool doloop_use_p;
 656 };
 657
 658 /* An assignment of iv candidates to uses.  */
 659
 660 class iv_ca
 661 {
 662 public:
 663   /* The number of uses covered by the assignment.  */
 664   unsigned upto;
 665
 666   /* Number of uses that cannot be expressed by the candidates in the set.  */
 667   unsigned bad_groups;
 668
 669   /* Candidate assigned to a use, together with the related costs.  */
 670   class cost_pair **cand_for_group;
 671
 672   /* Number of times each candidate is used.  */
 673   unsigned *n_cand_uses;
 674
 675   /* The candidates used.  */
 676   bitmap cands;
 677
 678   /* The number of candidates in the set.  */
 679   unsigned n_cands;
 680
 681   /* The number of invariants needed, including both invariant variants and
 682      invariant expressions.  */
 683   unsigned n_invs;
 684
 685   /* Total cost of expressing uses.  */
 686   comp_cost cand_use_cost;
 687
 688   /* Total cost of candidates.  */
 689   int64_t cand_cost;
 690
 691   /* Number of times each invariant variable is used.  */
 692   unsigned *n_inv_var_uses;
 693
 694   /* Number of times each invariant expression is used.  */
 695   unsigned *n_inv_expr_uses;
 696
 697   /* Total cost of the assignment.  */
 698   comp_cost cost;
 699 };
 700
 701 /* Difference of two iv candidate assignments.  */
 702
 703 struct iv_ca_delta
 704 {
 705   /* Changed group.  */
 706   struct iv_group *group;
 707
 708   /* An old assignment (for rollback purposes).  */
 709   class cost_pair *old_cp;
 710
 711   /* A new assignment.  */
 712   class cost_pair *new_cp;
 713
 714   /* Next change in the list.  */
 715   struct iv_ca_delta *next;
 716 };
 717
 718 /* Bound on number of candidates below that all candidates are considered.  */
 719
 720 #define CONSIDER_ALL_CANDIDATES_BOUND \
 721   ((unsigned) param_iv_consider_all_candidates_bound)
 722
 723 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 724    optimizing such a loop would help, and it would take ages).  */
 725
 726 #define MAX_CONSIDERED_GROUPS \
 727   ((unsigned) param_iv_max_considered_uses)
 728
 729 /* If there are at most this number of ivs in the set, try removing unnecessary
 730    ivs from the set always.  */
 731
 732 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 733   ((unsigned) param_iv_always_prune_cand_set_bound)
 734
 735 /* The list of trees for that the decl_rtl field must be reset is stored
 736    here.  */
 737
 738 static vec<tree> decl_rtl_to_reset;
 739
 740 static comp_cost force_expr_to_var_cost (tree, bool);
 741
 742 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 743
 744 edge
 745 single_dom_exit (class loop *loop)
 746 {
 747   edge exit = single_exit (loop);
 748
 749   if (!exit)
 750     return NULL;
 751
 752   if (!just_once_each_iteration_p (loop, exit->src))
 753     return NULL;
 754
 755   return exit;
 756 }
 757
 758 /* Dumps information about the induction variable IV to FILE.  Don't dump
 759    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 760    preceding spaces indicated by INDENT_LEVEL.  */
 761
 762 void
 763 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 764 {
 765   const char *p;
 766   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 767
 768   if (indent_level > 4)
 769     indent_level = 4;
 770   p = spaces + 8 - (indent_level << 1);
 771
 772   fprintf (file, "%sIV struct:\n", p);
 773   if (iv->ssa_name && dump_name)
 774     {
 775       fprintf (file, "%s  SSA_NAME:\t", p);
 776       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 777       fprintf (file, "\n");
 778     }
 779
 780   fprintf (file, "%s  Type:\t", p);
 781   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 782   fprintf (file, "\n");
 783
 784   fprintf (file, "%s  Base:\t", p);
 785   print_generic_expr (file, iv->base, TDF_SLIM);
 786   fprintf (file, "\n");
 787
 788   fprintf (file, "%s  Step:\t", p);
 789   print_generic_expr (file, iv->step, TDF_SLIM);
 790   fprintf (file, "\n");
 791
 792   if (iv->base_object)
 793     {
 794       fprintf (file, "%s  Object:\t", p);
 795       print_generic_expr (file, iv->base_object, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798
 799   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 800
 801   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 802            p, iv->no_overflow ? "No-overflow" : "Overflow");
 803 }
 804
 805 /* Dumps information about the USE to FILE.  */
 806
 807 void
 808 dump_use (FILE *file, struct iv_use *use)
 809 {
 810   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 811   fprintf (file, "    At stmt:\t");
 812   print_gimple_stmt (file, use->stmt, 0);
 813   fprintf (file, "    At pos:\t");
 814   if (use->op_p)
 815     print_generic_expr (file, *use->op_p, TDF_SLIM);
 816   fprintf (file, "\n");
 817   dump_iv (file, use->iv, false, 2);
 818 }
 819
 820 /* Dumps information about the uses to FILE.  */
 821
 822 void
 823 dump_groups (FILE *file, struct ivopts_data *data)
 824 {
 825   unsigned i, j;
 826   struct iv_group *group;
 827
 828   for (i = 0; i < data->vgroups.length (); i++)
 829     {
 830       group = data->vgroups[i];
 831       fprintf (file, "Group %d:\n", group->id);
 832       if (group->type == USE_NONLINEAR_EXPR)
 833         fprintf (file, "  Type:\tGENERIC\n");
 834       else if (group->type == USE_REF_ADDRESS)
 835         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 836       else if (group->type == USE_PTR_ADDRESS)
 837         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 838       else
 839         {
 840           gcc_assert (group->type == USE_COMPARE);
 841           fprintf (file, "  Type:\tCOMPARE\n");
 842         }
 843       for (j = 0; j < group->vuses.length (); j++)
 844         dump_use (file, group->vuses[j]);
 845     }
 846 }
 847
 848 /* Dumps information about induction variable candidate CAND to FILE.  */
 849
 850 void
 851 dump_cand (FILE *file, struct iv_cand *cand)
 852 {
 853   struct iv *iv = cand->iv;
 854
 855   fprintf (file, "Candidate %d:\n", cand->id);
 856   if (cand->inv_vars)
 857     {
 858       fprintf (file, "  Depend on inv.vars: ");
 859       dump_bitmap (file, cand->inv_vars);
 860     }
 861   if (cand->inv_exprs)
 862     {
 863       fprintf (file, "  Depend on inv.exprs: ");
 864       dump_bitmap (file, cand->inv_exprs);
 865     }
 866
 867   if (cand->var_before)
 868     {
 869       fprintf (file, "  Var befor: ");
 870       print_generic_expr (file, cand->var_before, TDF_SLIM);
 871       fprintf (file, "\n");
 872     }
 873   if (cand->var_after)
 874     {
 875       fprintf (file, "  Var after: ");
 876       print_generic_expr (file, cand->var_after, TDF_SLIM);
 877       fprintf (file, "\n");
 878     }
 879
 880   switch (cand->pos)
 881     {
 882     case IP_NORMAL:
 883       fprintf (file, "  Incr POS: before exit test\n");
 884       break;
 885
 886     case IP_BEFORE_USE:
 887       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 888       break;
 889
 890     case IP_AFTER_USE:
 891       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 892       break;
 893
 894     case IP_END:
 895       fprintf (file, "  Incr POS: at end\n");
 896       break;
 897
 898     case IP_ORIGINAL:
 899       fprintf (file, "  Incr POS: orig biv\n");
 900       break;
 901     }
 902
 903   dump_iv (file, iv, false, 1);
 904 }
 905
 906 /* Returns the info for ssa version VER.  */
 907
 908 static inline struct version_info *
 909 ver_info (struct ivopts_data *data, unsigned ver)
 910 {
 911   return data->version_info + ver;
 912 }
 913
 914 /* Returns the info for ssa name NAME.  */
 915
 916 static inline struct version_info *
 917 name_info (struct ivopts_data *data, tree name)
 918 {
 919   return ver_info (data, SSA_NAME_VERSION (name));
 920 }
 921
 922 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 923    emitted in LOOP.  */
 924
 925 static bool
 926 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 927 {
 928   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 929
 930   gcc_assert (bb);
 931
 932   if (sbb == loop->latch)
 933     return true;
 934
 935   if (sbb != bb)
 936     return false;
 937
 938   return stmt == last_stmt (bb);
 939 }
 940
 941 /* Returns true if STMT if after the place where the original induction
 942    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 943    if the positions are identical.  */
 944
 945 static bool
 946 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 947 {
 948   basic_block cand_bb = gimple_bb (cand->incremented_at);
 949   basic_block stmt_bb = gimple_bb (stmt);
 950
 951   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 952     return false;
 953
 954   if (stmt_bb != cand_bb)
 955     return true;
 956
 957   if (true_if_equal
 958       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 959     return true;
 960   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 961 }
 962
 963 /* Returns true if STMT if after the place where the induction variable
 964    CAND is incremented in LOOP.  */
 965
 966 static bool
 967 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 968 {
 969   switch (cand->pos)
 970     {
 971     case IP_END:
 972       return false;
 973
 974     case IP_NORMAL:
 975       return stmt_after_ip_normal_pos (loop, stmt);
 976
 977     case IP_ORIGINAL:
 978     case IP_AFTER_USE:
 979       return stmt_after_inc_pos (cand, stmt, false);
 980
 981     case IP_BEFORE_USE:
 982       return stmt_after_inc_pos (cand, stmt, true);
 983
 984     default:
 985       gcc_unreachable ();
 986     }
 987 }
 988
 989 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 990
 991 static tree
 992 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 993 {
 994   if (TREE_CODE (*tp) == SSA_NAME
 995       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 996     return *tp;
 997
 998   if (!EXPR_P (*tp))
 999     *walk_subtrees = 0;
1000
1001   return NULL_TREE;
1002 }
1003
1004 /* Returns true if EXPR contains a ssa name that occurs in an
1005    abnormal phi node.  */
1006
1007 bool
1008 contains_abnormal_ssa_name_p (tree expr)
1009 {
1010   return walk_tree_without_duplicates
1011            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1012 }
1013
1014 /*  Returns the structure describing number of iterations determined from
1015     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1016
1017 static class tree_niter_desc *
1018 niter_for_exit (struct ivopts_data *data, edge exit)
1019 {
1020   class tree_niter_desc *desc;
1021   tree_niter_desc **slot;
1022
1023   if (!data->niters)
1024     {
1025       data->niters = new hash_map<edge, tree_niter_desc *>;
1026       slot = NULL;
1027     }
1028   else
1029     slot = data->niters->get (exit);
1030
1031   if (!slot)
1032     {
1033       /* Try to determine number of iterations.  We cannot safely work with ssa
1034          names that appear in phi nodes on abnormal edges, so that we do not
1035          create overlapping life ranges for them (PR 27283).  */
1036       desc = XNEW (class tree_niter_desc);
1037       if (!number_of_iterations_exit (data->current_loop,
1038                                       exit, desc, true)
1039           || contains_abnormal_ssa_name_p (desc->niter))
1040         {
1041           XDELETE (desc);
1042           desc = NULL;
1043         }
1044       data->niters->put (exit, desc);
1045     }
1046   else
1047     desc = *slot;
1048
1049   return desc;
1050 }
1051
1052 /* Returns the structure describing number of iterations determined from
1053    single dominating exit of DATA->current_loop, or NULL if something
1054    goes wrong.  */
1055
1056 static class tree_niter_desc *
1057 niter_for_single_dom_exit (struct ivopts_data *data)
1058 {
1059   edge exit = single_dom_exit (data->current_loop);
1060
1061   if (!exit)
1062     return NULL;
1063
1064   return niter_for_exit (data, exit);
1065 }
1066
1067 /* Initializes data structures used by the iv optimization pass, stored
1068    in DATA.  */
1069
1070 static void
1071 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1072 {
1073   data->version_info_size = 2 * num_ssa_names;
1074   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1075   data->relevant = BITMAP_ALLOC (NULL);
1076   data->important_candidates = BITMAP_ALLOC (NULL);
1077   data->max_inv_var_id = 0;
1078   data->max_inv_expr_id = 0;
1079   data->niters = NULL;
1080   data->vgroups.create (20);
1081   data->vcands.create (20);
1082   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1083   data->name_expansion_cache = NULL;
1084   data->base_object_map = NULL;
1085   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1086   data->iv_common_cands.create (20);
1087   decl_rtl_to_reset.create (20);
1088   gcc_obstack_init (&data->iv_obstack);
1089 }
1090
1091 /* walk_tree callback for determine_base_object.  */
1092
1093 static tree
1094 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1095 {
1096   tree_code code = TREE_CODE (*tp);
1097   tree obj = NULL_TREE;
1098   if (code == ADDR_EXPR)
1099     {
1100       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1101       if (!base)
1102         obj = *tp;
1103       else if (TREE_CODE (base) != MEM_REF)
1104         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1105     }
1106   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1107         obj = fold_convert (ptr_type_node, *tp);
1108
1109   if (!obj)
1110     {
1111       if (!EXPR_P (*tp))
1112         *walk_subtrees = 0;
1113
1114       return NULL_TREE;
1115     }
1116   /* Record special node for multiple base objects and stop.  */
1117   if (*static_cast<tree *> (wdata))
1118     {
1119       *static_cast<tree *> (wdata) = integer_zero_node;
1120       return integer_zero_node;
1121     }
1122   /* Record the base object and continue looking.  */
1123   *static_cast<tree *> (wdata) = obj;
1124   return NULL_TREE;
1125 }
1126
1127 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1128    are able to determine that it does not point to any such object; specially
1129    return integer_zero_node if EXPR contains multiple base objects.  */
1130
1131 static tree
1132 determine_base_object (struct ivopts_data *data, tree expr)
1133 {
1134   tree *slot, obj = NULL_TREE;
1135   if (data->base_object_map)
1136     {
1137       if ((slot = data->base_object_map->get(expr)) != NULL)
1138         return *slot;
1139     }
1140   else
1141     data->base_object_map = new hash_map<tree, tree>;
1142
1143   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1144   data->base_object_map->put (expr, obj);
1145   return obj;
1146 }
1147
1148 /* Return true if address expression with non-DECL_P operand appears
1149    in EXPR.  */
1150
1151 static bool
1152 contain_complex_addr_expr (tree expr)
1153 {
1154   bool res = false;
1155
1156   STRIP_NOPS (expr);
1157   switch (TREE_CODE (expr))
1158     {
1159     case POINTER_PLUS_EXPR:
1160     case PLUS_EXPR:
1161     case MINUS_EXPR:
1162       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1163       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1164       break;
1165
1166     case ADDR_EXPR:
1167       return (!DECL_P (TREE_OPERAND (expr, 0)));
1168
1169     default:
1170       return false;
1171     }
1172
1173   return res;
1174 }
1175
1176 /* Allocates an induction variable with given initial value BASE and step STEP
1177    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1178
1179 static struct iv *
1180 alloc_iv (struct ivopts_data *data, tree base, tree step,
1181           bool no_overflow = false)
1182 {
1183   tree expr = base;
1184   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1185                                               sizeof (struct iv));
1186   gcc_assert (step != NULL_TREE);
1187
1188   /* Lower address expression in base except ones with DECL_P as operand.
1189      By doing this:
1190        1) More accurate cost can be computed for address expressions;
1191        2) Duplicate candidates won't be created for bases in different
1192           forms, like &a[0] and &a.  */
1193   STRIP_NOPS (expr);
1194   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1195       || contain_complex_addr_expr (expr))
1196     {
1197       aff_tree comb;
1198       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1199       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1200     }
1201
1202   iv->base = base;
1203   iv->base_object = determine_base_object (data, base);
1204   iv->step = step;
1205   iv->biv_p = false;
1206   iv->nonlin_use = NULL;
1207   iv->ssa_name = NULL_TREE;
1208   if (!no_overflow
1209        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1210                               base, step))
1211     no_overflow = true;
1212   iv->no_overflow = no_overflow;
1213   iv->have_address_use = false;
1214
1215   return iv;
1216 }
1217
1218 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1219    doesn't overflow.  */
1220
1221 static void
1222 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1223         bool no_overflow)
1224 {
1225   struct version_info *info = name_info (data, iv);
1226
1227   gcc_assert (!info->iv);
1228
1229   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1230   info->iv = alloc_iv (data, base, step, no_overflow);
1231   info->iv->ssa_name = iv;
1232 }
1233
1234 /* Finds induction variable declaration for VAR.  */
1235
1236 static struct iv *
1237 get_iv (struct ivopts_data *data, tree var)
1238 {
1239   basic_block bb;
1240   tree type = TREE_TYPE (var);
1241
1242   if (!POINTER_TYPE_P (type)
1243       && !INTEGRAL_TYPE_P (type))
1244     return NULL;
1245
1246   if (!name_info (data, var)->iv)
1247     {
1248       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1249
1250       if (!bb
1251           || !flow_bb_inside_loop_p (data->current_loop, bb))
1252         {
1253           if (POINTER_TYPE_P (type))
1254             type = sizetype;
1255           set_iv (data, var, var, build_int_cst (type, 0), true);
1256         }
1257     }
1258
1259   return name_info (data, var)->iv;
1260 }
1261
1262 /* Return the first non-invariant ssa var found in EXPR.  */
1263
1264 static tree
1265 extract_single_var_from_expr (tree expr)
1266 {
1267   int i, n;
1268   tree tmp;
1269   enum tree_code code;
1270
1271   if (!expr || is_gimple_min_invariant (expr))
1272     return NULL;
1273
1274   code = TREE_CODE (expr);
1275   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1276     {
1277       n = TREE_OPERAND_LENGTH (expr);
1278       for (i = 0; i < n; i++)
1279         {
1280           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1281
1282           if (tmp)
1283             return tmp;
1284         }
1285     }
1286   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1287 }
1288
1289 /* Finds basic ivs.  */
1290
1291 static bool
1292 find_bivs (struct ivopts_data *data)
1293 {
1294   gphi *phi;
1295   affine_iv iv;
1296   tree step, type, base, stop;
1297   bool found = false;
1298   class loop *loop = data->current_loop;
1299   gphi_iterator psi;
1300
1301   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1302     {
1303       phi = psi.phi ();
1304
1305       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1306         continue;
1307
1308       if (virtual_operand_p (PHI_RESULT (phi)))
1309         continue;
1310
1311       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1312         continue;
1313
1314       if (integer_zerop (iv.step))
1315         continue;
1316
1317       step = iv.step;
1318       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1319       /* Stop expanding iv base at the first ssa var referred by iv step.
1320          Ideally we should stop at any ssa var, because that's expensive
1321          and unusual to happen, we just do it on the first one.
1322
1323          See PR64705 for the rationale.  */
1324       stop = extract_single_var_from_expr (step);
1325       base = expand_simple_operations (base, stop);
1326       if (contains_abnormal_ssa_name_p (base)
1327           || contains_abnormal_ssa_name_p (step))
1328         continue;
1329
1330       type = TREE_TYPE (PHI_RESULT (phi));
1331       base = fold_convert (type, base);
1332       if (step)
1333         {
1334           if (POINTER_TYPE_P (type))
1335             step = convert_to_ptrofftype (step);
1336           else
1337             step = fold_convert (type, step);
1338         }
1339
1340       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1341       found = true;
1342     }
1343
1344   return found;
1345 }
1346
1347 /* Marks basic ivs.  */
1348
1349 static void
1350 mark_bivs (struct ivopts_data *data)
1351 {
1352   gphi *phi;
1353   gimple *def;
1354   tree var;
1355   struct iv *iv, *incr_iv;
1356   class loop *loop = data->current_loop;
1357   basic_block incr_bb;
1358   gphi_iterator psi;
1359
1360   data->bivs_not_used_in_addr = 0;
1361   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1362     {
1363       phi = psi.phi ();
1364
1365       iv = get_iv (data, PHI_RESULT (phi));
1366       if (!iv)
1367         continue;
1368
1369       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1370       def = SSA_NAME_DEF_STMT (var);
1371       /* Don't mark iv peeled from other one as biv.  */
1372       if (def
1373           && gimple_code (def) == GIMPLE_PHI
1374           && gimple_bb (def) == loop->header)
1375         continue;
1376
1377       incr_iv = get_iv (data, var);
1378       if (!incr_iv)
1379         continue;
1380
1381       /* If the increment is in the subloop, ignore it.  */
1382       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1383       if (incr_bb->loop_father != data->current_loop
1384           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1385         continue;
1386
1387       iv->biv_p = true;
1388       incr_iv->biv_p = true;
1389       if (iv->no_overflow)
1390         data->bivs_not_used_in_addr++;
1391       if (incr_iv->no_overflow)
1392         data->bivs_not_used_in_addr++;
1393     }
1394 }
1395
1396 /* Checks whether STMT defines a linear induction variable and stores its
1397    parameters to IV.  */
1398
1399 static bool
1400 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1401 {
1402   tree lhs, stop;
1403   class loop *loop = data->current_loop;
1404
1405   iv->base = NULL_TREE;
1406   iv->step = NULL_TREE;
1407
1408   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1409     return false;
1410
1411   lhs = gimple_assign_lhs (stmt);
1412   if (TREE_CODE (lhs) != SSA_NAME)
1413     return false;
1414
1415   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1416     return false;
1417
1418   /* Stop expanding iv base at the first ssa var referred by iv step.
1419      Ideally we should stop at any ssa var, because that's expensive
1420      and unusual to happen, we just do it on the first one.
1421
1422      See PR64705 for the rationale.  */
1423   stop = extract_single_var_from_expr (iv->step);
1424   iv->base = expand_simple_operations (iv->base, stop);
1425   if (contains_abnormal_ssa_name_p (iv->base)
1426       || contains_abnormal_ssa_name_p (iv->step))
1427     return false;
1428
1429   /* If STMT could throw, then do not consider STMT as defining a GIV.
1430      While this will suppress optimizations, we cannot safely delete this
1431      GIV and associated statements, even if it appears it is not used.  */
1432   if (stmt_could_throw_p (cfun, stmt))
1433     return false;
1434
1435   return true;
1436 }
1437
1438 /* Finds general ivs in statement STMT.  */
1439
1440 static void
1441 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1442 {
1443   affine_iv iv;
1444
1445   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1446     return;
1447
1448   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1449 }
1450
1451 /* Finds general ivs in basic block BB.  */
1452
1453 static void
1454 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1455 {
1456   gimple_stmt_iterator bsi;
1457
1458   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1459     find_givs_in_stmt (data, gsi_stmt (bsi));
1460 }
1461
1462 /* Finds general ivs.  */
1463
1464 static void
1465 find_givs (struct ivopts_data *data)
1466 {
1467   class loop *loop = data->current_loop;
1468   basic_block *body = get_loop_body_in_dom_order (loop);
1469   unsigned i;
1470
1471   for (i = 0; i < loop->num_nodes; i++)
1472     find_givs_in_bb (data, body[i]);
1473   free (body);
1474 }
1475
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478
1479 static bool
1480 find_induction_variables (struct ivopts_data *data)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484
1485   if (!find_bivs (data))
1486     return false;
1487
1488   find_givs (data);
1489   mark_bivs (data);
1490
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494
1495       if (niter)
1496         {
1497           fprintf (dump_file, "  number of iterations ");
1498           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499           if (!integer_zerop (niter->may_be_zero))
1500             {
1501               fprintf (dump_file, "; zero if ");
1502               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503             }
1504           fprintf (dump_file, "\n");
1505         };
1506
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509         {
1510           struct version_info *info = ver_info (data, i);
1511           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513         }
1514     }
1515
1516   return true;
1517 }
1518
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527             gimple *stmt, enum use_type type, tree mem_type,
1528             tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572
1573 /* Record a group of TYPE.  */
1574
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597                   struct iv *iv, gimple *stmt, enum use_type type,
1598                   tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611         {
1612           struct iv_use *use;
1613
1614           group = data->vgroups[i];
1615           use = group->vuses[0];
1616           if (!address_p (use->type))
1617             continue;
1618
1619           /* Check if it has the same stripped base and step.  */
1620           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621               && operand_equal_p (iv->step, use->iv->step, 0)
1622               && operand_equal_p (addr_base, use->addr_base, 0))
1623             break;
1624         }
1625       if (i == data->vgroups.length ())
1626         group = NULL;
1627     }
1628
1629   if (!group)
1630     group = record_group (data, type);
1631
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633                      addr_base, addr_offset);
1634 }
1635
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637
1638 static struct iv_use *
1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693
1694 static enum comp_iv_rewrite
1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696                        tree **control_var, tree **bound,
1697                        struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760
1761   return rewrite_type;
1762 }
1763
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766
1767 static void
1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773
1774   ret = extract_cond_operands (data, stmt,
1775                                &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793
1794 class loop *
1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807         {
1808           if (flow_bb_inside_loop_p (loop, def_bb))
1809             return NULL;
1810           return superloop_at_depth (loop,
1811                                      loop_depth (def_bb->loop_father) + 1);
1812         }
1813
1814       return current_loops->tree_root;
1815     }
1816
1817   if (!EXPR_P (expr))
1818     return NULL;
1819
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       class loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826         continue;
1827
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830         return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840
1841 bool
1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846
1847   gcc_assert (loop_depth (loop) > 0);
1848
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856           && flow_bb_inside_loop_p (loop, def_bb))
1857         return false;
1858
1859       return true;
1860     }
1861
1862   if (!EXPR_P (expr))
1863     return false;
1864
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870
1871   return true;
1872 }
1873
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877
1878 static struct iv*
1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886
1887   if (expr == NULL_TREE)
1888     return NULL;
1889
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898         {
1899           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900           if (iv)
1901             return iv;
1902         }
1903     }
1904
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924         return NULL;
1925
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927         return NULL;
1928
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930         {
1931           tree use = USE_FROM_PTR (use_p);
1932           iv = find_deriving_biv_for_expr (data, use);
1933           if (iv)
1934             return iv;
1935         }
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953          are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957         return iv;
1958       gcc_fallthrough ();
1959
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963
1964     default:
1965       break;
1966     }
1967
1968   return NULL;
1969 }
1970
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973
1974 static void
1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997           || iv->have_address_use || !iv->no_overflow)
1998         continue;
1999
2000       if (type != TREE_TYPE (iv->base)
2001           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002         continue;
2003
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005         continue;
2006
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009           || operand_equal_p (base_2, biv->base, 0))
2010         {
2011           iv->have_address_use = true;
2012           data->bivs_not_used_in_addr--;
2013         }
2014     }
2015 }
2016
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027
2028 static bool
2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   class loop *loop = dta->ivopts_data->current_loop;
2036
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054         return false;
2055
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058
2059       if (!expr_invariant_in_loop_p (loop, step)
2060           || !expr_invariant_in_loop_p (loop, lbound))
2061         return false;
2062     }
2063
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072           *&x[0], which is not folded and does not trigger the
2073           ARRAY_REF path below.  */
2074   *idx = iv->base;
2075
2076   if (integer_zerop (iv->step))
2077     return true;
2078
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085         return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097                             sizetype, &iv_base, &iv_step, dta->stmt,
2098                             use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119
2120 static bool
2121 idx_record_use (tree base, tree *idx,
2122                 void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       find_interesting_uses_op (data, array_ref_element_size (base));
2129       find_interesting_uses_op (data, array_ref_low_bound (base));
2130     }
2131   return true;
2132 }
2133
2134 /* If we can prove that TOP = cst * BOT for some constant cst,
2135    store cst to MUL and return true.  Otherwise return false.
2136    The returned value is always sign-extended, regardless of the
2137    signedness of TOP and BOT.  */
2138
2139 static bool
2140 constant_multiple_of (tree top, tree bot, widest_int *mul)
2141 {
2142   tree mby;
2143   enum tree_code code;
2144   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145   widest_int res, p0, p1;
2146
2147   STRIP_NOPS (top);
2148   STRIP_NOPS (bot);
2149
2150   if (operand_equal_p (top, bot, 0))
2151     {
2152       *mul = 1;
2153       return true;
2154     }
2155
2156   code = TREE_CODE (top);
2157   switch (code)
2158     {
2159     case MULT_EXPR:
2160       mby = TREE_OPERAND (top, 1);
2161       if (TREE_CODE (mby) != INTEGER_CST)
2162         return false;
2163
2164       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165         return false;
2166
2167       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168       return true;
2169
2170     case PLUS_EXPR:
2171     case MINUS_EXPR:
2172       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174         return false;
2175
2176       if (code == MINUS_EXPR)
2177         p1 = -p1;
2178       *mul = wi::sext (p0 + p1, precision);
2179       return true;
2180
2181     case INTEGER_CST:
2182       if (TREE_CODE (bot) != INTEGER_CST)
2183         return false;
2184
2185       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187       if (p1 == 0)
2188         return false;
2189       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190       return res == 0;
2191
2192     default:
2193       if (POLY_INT_CST_P (top)
2194           && POLY_INT_CST_P (bot)
2195           && constant_multiple_p (wi::to_poly_widest (top),
2196                                   wi::to_poly_widest (bot), mul))
2197         return true;
2198
2199       return false;
2200     }
2201 }
2202
2203 /* Return true if memory reference REF with step STEP may be unaligned.  */
2204
2205 static bool
2206 may_be_unaligned_p (tree ref, tree step)
2207 {
2208   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209      thus they are not misaligned.  */
2210   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211     return false;
2212
2213   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216
2217   unsigned HOST_WIDE_INT bitpos;
2218   unsigned int ref_align;
2219   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220   if (ref_align < align
2221       || (bitpos % align) != 0
2222       || (bitpos % BITS_PER_UNIT) != 0)
2223     return true;
2224
2225   unsigned int trailing_zeros = tree_ctz (step);
2226   if (trailing_zeros < HOST_BITS_PER_INT
2227       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228     return true;
2229
2230   return false;
2231 }
2232
2233 /* Return true if EXPR may be non-addressable.   */
2234
2235 bool
2236 may_be_nonaddressable_p (tree expr)
2237 {
2238   switch (TREE_CODE (expr))
2239     {
2240     case VAR_DECL:
2241       /* Check if it's a register variable.  */
2242       return DECL_HARD_REGISTER (expr);
2243
2244     case TARGET_MEM_REF:
2245       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2246          target, thus they are always addressable.  */
2247       return false;
2248
2249     case MEM_REF:
2250       /* Likewise for MEM_REFs, modulo the storage order.  */
2251       return REF_REVERSE_STORAGE_ORDER (expr);
2252
2253     case BIT_FIELD_REF:
2254       if (REF_REVERSE_STORAGE_ORDER (expr))
2255         return true;
2256       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2257
2258     case COMPONENT_REF:
2259       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2260         return true;
2261       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2262              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263
2264     case ARRAY_REF:
2265     case ARRAY_RANGE_REF:
2266       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2267         return true;
2268       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269
2270     case VIEW_CONVERT_EXPR:
2271       /* This kind of view-conversions may wrap non-addressable objects
2272          and make them look addressable.  After some processing the
2273          non-addressability may be uncovered again, causing ADDR_EXPRs
2274          of inappropriate objects to be built.  */
2275       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2276           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2277         return true;
2278       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279
2280     CASE_CONVERT:
2281       return true;
2282
2283     default:
2284       break;
2285     }
2286
2287   return false;
2288 }
2289
2290 /* Finds addresses in *OP_P inside STMT.  */
2291
2292 static void
2293 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2294                                tree *op_p)
2295 {
2296   tree base = *op_p, step = size_zero_node;
2297   struct iv *civ;
2298   struct ifs_ivopts_data ifs_ivopts_data;
2299
2300   /* Do not play with volatile memory references.  A bit too conservative,
2301      perhaps, but safe.  */
2302   if (gimple_has_volatile_ops (stmt))
2303     goto fail;
2304
2305   /* Ignore bitfields for now.  Not really something terribly complicated
2306      to handle.  TODO.  */
2307   if (TREE_CODE (base) == BIT_FIELD_REF)
2308     goto fail;
2309
2310   base = unshare_expr (base);
2311
2312   if (TREE_CODE (base) == TARGET_MEM_REF)
2313     {
2314       tree type = build_pointer_type (TREE_TYPE (base));
2315       tree astep;
2316
2317       if (TMR_BASE (base)
2318           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2319         {
2320           civ = get_iv (data, TMR_BASE (base));
2321           if (!civ)
2322             goto fail;
2323
2324           TMR_BASE (base) = civ->base;
2325           step = civ->step;
2326         }
2327       if (TMR_INDEX2 (base)
2328           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2329         {
2330           civ = get_iv (data, TMR_INDEX2 (base));
2331           if (!civ)
2332             goto fail;
2333
2334           TMR_INDEX2 (base) = civ->base;
2335           step = civ->step;
2336         }
2337       if (TMR_INDEX (base)
2338           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2339         {
2340           civ = get_iv (data, TMR_INDEX (base));
2341           if (!civ)
2342             goto fail;
2343
2344           TMR_INDEX (base) = civ->base;
2345           astep = civ->step;
2346
2347           if (astep)
2348             {
2349               if (TMR_STEP (base))
2350                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2351
2352               step = fold_build2 (PLUS_EXPR, type, step, astep);
2353             }
2354         }
2355
2356       if (integer_zerop (step))
2357         goto fail;
2358       base = tree_mem_ref_addr (type, base);
2359     }
2360   else
2361     {
2362       ifs_ivopts_data.ivopts_data = data;
2363       ifs_ivopts_data.stmt = stmt;
2364       ifs_ivopts_data.step = size_zero_node;
2365       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2366           || integer_zerop (ifs_ivopts_data.step))
2367         goto fail;
2368       step = ifs_ivopts_data.step;
2369
2370       /* Check that the base expression is addressable.  This needs
2371          to be done after substituting bases of IVs into it.  */
2372       if (may_be_nonaddressable_p (base))
2373         goto fail;
2374
2375       /* Moreover, on strict alignment platforms, check that it is
2376          sufficiently aligned.  */
2377       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2378         goto fail;
2379
2380       base = build_fold_addr_expr (base);
2381
2382       /* Substituting bases of IVs into the base expression might
2383          have caused folding opportunities.  */
2384       if (TREE_CODE (base) == ADDR_EXPR)
2385         {
2386           tree *ref = &TREE_OPERAND (base, 0);
2387           while (handled_component_p (*ref))
2388             ref = &TREE_OPERAND (*ref, 0);
2389           if (TREE_CODE (*ref) == MEM_REF)
2390             {
2391               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2392                                       TREE_OPERAND (*ref, 0),
2393                                       TREE_OPERAND (*ref, 1));
2394               if (tem)
2395                 *ref = tem;
2396             }
2397         }
2398     }
2399
2400   civ = alloc_iv (data, base, step);
2401   /* Fail if base object of this memory reference is unknown.  */
2402   if (civ->base_object == NULL_TREE)
2403     goto fail;
2404
2405   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2406   return;
2407
2408 fail:
2409   for_each_index (op_p, idx_record_use, data);
2410 }
2411
2412 /* Finds and records invariants used in STMT.  */
2413
2414 static void
2415 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2416 {
2417   ssa_op_iter iter;
2418   use_operand_p use_p;
2419   tree op;
2420
2421   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2422     {
2423       op = USE_FROM_PTR (use_p);
2424       record_invariant (data, op, false);
2425     }
2426 }
2427
2428 /* CALL calls an internal function.  If operand *OP_P will become an
2429    address when the call is expanded, return the type of the memory
2430    being addressed, otherwise return null.  */
2431
2432 static tree
2433 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2434 {
2435   switch (gimple_call_internal_fn (call))
2436     {
2437     case IFN_MASK_LOAD:
2438     case IFN_MASK_LOAD_LANES:
2439     case IFN_LEN_LOAD:
2440       if (op_p == gimple_call_arg_ptr (call, 0))
2441         return TREE_TYPE (gimple_call_lhs (call));
2442       return NULL_TREE;
2443
2444     case IFN_MASK_STORE:
2445     case IFN_MASK_STORE_LANES:
2446     case IFN_LEN_STORE:
2447       if (op_p == gimple_call_arg_ptr (call, 0))
2448         return TREE_TYPE (gimple_call_arg (call, 3));
2449       return NULL_TREE;
2450
2451     default:
2452       return NULL_TREE;
2453     }
2454 }
2455
2456 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2457    Return true if the operand will become an address when STMT
2458    is expanded and record the associated address use if so.  */
2459
2460 static bool
2461 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2462                        struct iv *iv)
2463 {
2464   /* Fail if base object of this memory reference is unknown.  */
2465   if (iv->base_object == NULL_TREE)
2466     return false;
2467
2468   tree mem_type = NULL_TREE;
2469   if (gcall *call = dyn_cast <gcall *> (stmt))
2470     if (gimple_call_internal_p (call))
2471       mem_type = get_mem_type_for_internal_fn (call, op_p);
2472   if (mem_type)
2473     {
2474       iv = alloc_iv (data, iv->base, iv->step);
2475       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2476       return true;
2477     }
2478   return false;
2479 }
2480
2481 /* Finds interesting uses of induction variables in the statement STMT.  */
2482
2483 static void
2484 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2485 {
2486   struct iv *iv;
2487   tree op, *lhs, *rhs;
2488   ssa_op_iter iter;
2489   use_operand_p use_p;
2490   enum tree_code code;
2491
2492   find_invariants_stmt (data, stmt);
2493
2494   if (gimple_code (stmt) == GIMPLE_COND)
2495     {
2496       find_interesting_uses_cond (data, stmt);
2497       return;
2498     }
2499
2500   if (is_gimple_assign (stmt))
2501     {
2502       lhs = gimple_assign_lhs_ptr (stmt);
2503       rhs = gimple_assign_rhs1_ptr (stmt);
2504
2505       if (TREE_CODE (*lhs) == SSA_NAME)
2506         {
2507           /* If the statement defines an induction variable, the uses are not
2508              interesting by themselves.  */
2509
2510           iv = get_iv (data, *lhs);
2511
2512           if (iv && !integer_zerop (iv->step))
2513             return;
2514         }
2515
2516       code = gimple_assign_rhs_code (stmt);
2517       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2518           && (REFERENCE_CLASS_P (*rhs)
2519               || is_gimple_val (*rhs)))
2520         {
2521           if (REFERENCE_CLASS_P (*rhs))
2522             find_interesting_uses_address (data, stmt, rhs);
2523           else
2524             find_interesting_uses_op (data, *rhs);
2525
2526           if (REFERENCE_CLASS_P (*lhs))
2527             find_interesting_uses_address (data, stmt, lhs);
2528           return;
2529         }
2530       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2531         {
2532           find_interesting_uses_cond (data, stmt);
2533           return;
2534         }
2535
2536       /* TODO -- we should also handle address uses of type
2537
2538          memory = call (whatever);
2539
2540          and
2541
2542          call (memory).  */
2543     }
2544
2545   if (gimple_code (stmt) == GIMPLE_PHI
2546       && gimple_bb (stmt) == data->current_loop->header)
2547     {
2548       iv = get_iv (data, PHI_RESULT (stmt));
2549
2550       if (iv && !integer_zerop (iv->step))
2551         return;
2552     }
2553
2554   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2555     {
2556       op = USE_FROM_PTR (use_p);
2557
2558       if (TREE_CODE (op) != SSA_NAME)
2559         continue;
2560
2561       iv = get_iv (data, op);
2562       if (!iv)
2563         continue;
2564
2565       if (!find_address_like_use (data, stmt, use_p->use, iv))
2566         find_interesting_uses_op (data, op);
2567     }
2568 }
2569
2570 /* Finds interesting uses of induction variables outside of loops
2571    on loop exit edge EXIT.  */
2572
2573 static void
2574 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2575 {
2576   gphi *phi;
2577   gphi_iterator psi;
2578   tree def;
2579
2580   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2581     {
2582       phi = psi.phi ();
2583       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2584       if (!virtual_operand_p (def))
2585         find_interesting_uses_op (data, def);
2586     }
2587 }
2588
2589 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2590    mode for memory reference represented by USE.  */
2591
2592 static GTY (()) vec<rtx, va_gc> *addr_list;
2593
2594 static bool
2595 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2596 {
2597   rtx reg, addr;
2598   unsigned list_index;
2599   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2600   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2601
2602   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2603   if (list_index >= vec_safe_length (addr_list))
2604     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2605
2606   addr = (*addr_list)[list_index];
2607   if (!addr)
2608     {
2609       addr_mode = targetm.addr_space.address_mode (as);
2610       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2611       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2612       (*addr_list)[list_index] = addr;
2613     }
2614   else
2615     addr_mode = GET_MODE (addr);
2616
2617   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2618   return (memory_address_addr_space_p (mem_mode, addr, as));
2619 }
2620
2621 /* Comparison function to sort group in ascending order of addr_offset.  */
2622
2623 static int
2624 group_compare_offset (const void *a, const void *b)
2625 {
2626   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2627   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2628
2629   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2630 }
2631
2632 /* Check if small groups should be split.  Return true if no group
2633    contains more than two uses with distinct addr_offsets.  Return
2634    false otherwise.  We want to split such groups because:
2635
2636      1) Small groups don't have much benefit and may interfer with
2637         general candidate selection.
2638      2) Size for problem with only small groups is usually small and
2639         general algorithm can handle it well.
2640
2641    TODO -- Above claim may not hold when we want to merge memory
2642    accesses with conseuctive addresses.  */
2643
2644 static bool
2645 split_small_address_groups_p (struct ivopts_data *data)
2646 {
2647   unsigned int i, j, distinct = 1;
2648   struct iv_use *pre;
2649   struct iv_group *group;
2650
2651   for (i = 0; i < data->vgroups.length (); i++)
2652     {
2653       group = data->vgroups[i];
2654       if (group->vuses.length () == 1)
2655         continue;
2656
2657       gcc_assert (address_p (group->type));
2658       if (group->vuses.length () == 2)
2659         {
2660           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2661                                       group->vuses[1]->addr_offset) > 0)
2662             std::swap (group->vuses[0], group->vuses[1]);
2663         }
2664       else
2665         group->vuses.qsort (group_compare_offset);
2666
2667       if (distinct > 2)
2668         continue;
2669
2670       distinct = 1;
2671       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2672         {
2673           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2674             {
2675               pre = group->vuses[j];
2676               distinct++;
2677             }
2678
2679           if (distinct > 2)
2680             break;
2681         }
2682     }
2683
2684   return (distinct <= 2);
2685 }
2686
2687 /* For each group of address type uses, this function further groups
2688    these uses according to the maximum offset supported by target's
2689    [base + offset] addressing mode.  */
2690
2691 static void
2692 split_address_groups (struct ivopts_data *data)
2693 {
2694   unsigned int i, j;
2695   /* Always split group.  */
2696   bool split_p = split_small_address_groups_p (data);
2697
2698   for (i = 0; i < data->vgroups.length (); i++)
2699     {
2700       struct iv_group *new_group = NULL;
2701       struct iv_group *group = data->vgroups[i];
2702       struct iv_use *use = group->vuses[0];
2703
2704       use->id = 0;
2705       use->group_id = group->id;
2706       if (group->vuses.length () == 1)
2707         continue;
2708
2709       gcc_assert (address_p (use->type));
2710
2711       for (j = 1; j < group->vuses.length ();)
2712         {
2713           struct iv_use *next = group->vuses[j];
2714           poly_int64 offset = next->addr_offset - use->addr_offset;
2715
2716           /* Split group if aksed to, or the offset against the first
2717              use can't fit in offset part of addressing mode.  IV uses
2718              having the same offset are still kept in one group.  */
2719           if (maybe_ne (offset, 0)
2720               && (split_p || !addr_offset_valid_p (use, offset)))
2721             {
2722               if (!new_group)
2723                 new_group = record_group (data, group->type);
2724               group->vuses.ordered_remove (j);
2725               new_group->vuses.safe_push (next);
2726               continue;
2727             }
2728
2729           next->id = j;
2730           next->group_id = group->id;
2731           j++;
2732         }
2733     }
2734 }
2735
2736 /* Finds uses of the induction variables that are interesting.  */
2737
2738 static void
2739 find_interesting_uses (struct ivopts_data *data)
2740 {
2741   basic_block bb;
2742   gimple_stmt_iterator bsi;
2743   basic_block *body = get_loop_body (data->current_loop);
2744   unsigned i;
2745   edge e;
2746
2747   for (i = 0; i < data->current_loop->num_nodes; i++)
2748     {
2749       edge_iterator ei;
2750       bb = body[i];
2751
2752       FOR_EACH_EDGE (e, ei, bb->succs)
2753         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2754             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2755           find_interesting_uses_outside (data, e);
2756
2757       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2758         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2759       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2760         if (!is_gimple_debug (gsi_stmt (bsi)))
2761           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2762     }
2763   free (body);
2764
2765   split_address_groups (data);
2766
2767   if (dump_file && (dump_flags & TDF_DETAILS))
2768     {
2769       fprintf (dump_file, "\n<IV Groups>:\n");
2770       dump_groups (dump_file, data);
2771       fprintf (dump_file, "\n");
2772     }
2773 }
2774
2775 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2776    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2777    we are at the top-level of the processed address.  */
2778
2779 static tree
2780 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2781                 poly_int64 *offset)
2782 {
2783   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2784   enum tree_code code;
2785   tree type, orig_type = TREE_TYPE (expr);
2786   poly_int64 off0, off1;
2787   HOST_WIDE_INT st;
2788   tree orig_expr = expr;
2789
2790   STRIP_NOPS (expr);
2791
2792   type = TREE_TYPE (expr);
2793   code = TREE_CODE (expr);
2794   *offset = 0;
2795
2796   switch (code)
2797     {
2798     case POINTER_PLUS_EXPR:
2799     case PLUS_EXPR:
2800     case MINUS_EXPR:
2801       op0 = TREE_OPERAND (expr, 0);
2802       op1 = TREE_OPERAND (expr, 1);
2803
2804       op0 = strip_offset_1 (op0, false, false, &off0);
2805       op1 = strip_offset_1 (op1, false, false, &off1);
2806
2807       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2808       if (op0 == TREE_OPERAND (expr, 0)
2809           && op1 == TREE_OPERAND (expr, 1))
2810         return orig_expr;
2811
2812       if (integer_zerop (op1))
2813         expr = op0;
2814       else if (integer_zerop (op0))
2815         {
2816           if (code == MINUS_EXPR)
2817             expr = fold_build1 (NEGATE_EXPR, type, op1);
2818           else
2819             expr = op1;
2820         }
2821       else
2822         expr = fold_build2 (code, type, op0, op1);
2823
2824       return fold_convert (orig_type, expr);
2825
2826     case MULT_EXPR:
2827       op1 = TREE_OPERAND (expr, 1);
2828       if (!cst_and_fits_in_hwi (op1))
2829         return orig_expr;
2830
2831       op0 = TREE_OPERAND (expr, 0);
2832       op0 = strip_offset_1 (op0, false, false, &off0);
2833       if (op0 == TREE_OPERAND (expr, 0))
2834         return orig_expr;
2835
2836       *offset = off0 * int_cst_value (op1);
2837       if (integer_zerop (op0))
2838         expr = op0;
2839       else
2840         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2841
2842       return fold_convert (orig_type, expr);
2843
2844     case ARRAY_REF:
2845     case ARRAY_RANGE_REF:
2846       if (!inside_addr)
2847         return orig_expr;
2848
2849       step = array_ref_element_size (expr);
2850       if (!cst_and_fits_in_hwi (step))
2851         break;
2852
2853       st = int_cst_value (step);
2854       op1 = TREE_OPERAND (expr, 1);
2855       op1 = strip_offset_1 (op1, false, false, &off1);
2856       *offset = off1 * st;
2857
2858       if (top_compref
2859           && integer_zerop (op1))
2860         {
2861           /* Strip the component reference completely.  */
2862           op0 = TREE_OPERAND (expr, 0);
2863           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2864           *offset += off0;
2865           return op0;
2866         }
2867       break;
2868
2869     case COMPONENT_REF:
2870       {
2871         tree field;
2872
2873         if (!inside_addr)
2874           return orig_expr;
2875
2876         tmp = component_ref_field_offset (expr);
2877         field = TREE_OPERAND (expr, 1);
2878         if (top_compref
2879             && cst_and_fits_in_hwi (tmp)
2880             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2881           {
2882             HOST_WIDE_INT boffset, abs_off;
2883
2884             /* Strip the component reference completely.  */
2885             op0 = TREE_OPERAND (expr, 0);
2886             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2887             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2888             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2889             if (boffset < 0)
2890               abs_off = -abs_off;
2891
2892             *offset = off0 + int_cst_value (tmp) + abs_off;
2893             return op0;
2894           }
2895       }
2896       break;
2897
2898     case ADDR_EXPR:
2899       op0 = TREE_OPERAND (expr, 0);
2900       op0 = strip_offset_1 (op0, true, true, &off0);
2901       *offset += off0;
2902
2903       if (op0 == TREE_OPERAND (expr, 0))
2904         return orig_expr;
2905
2906       expr = build_fold_addr_expr (op0);
2907       return fold_convert (orig_type, expr);
2908
2909     case MEM_REF:
2910       /* ???  Offset operand?  */
2911       inside_addr = false;
2912       break;
2913
2914     default:
2915       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2916         return build_int_cst (orig_type, 0);
2917       return orig_expr;
2918     }
2919
2920   /* Default handling of expressions for that we want to recurse into
2921      the first operand.  */
2922   op0 = TREE_OPERAND (expr, 0);
2923   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2924   *offset += off0;
2925
2926   if (op0 == TREE_OPERAND (expr, 0)
2927       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2928     return orig_expr;
2929
2930   expr = copy_node (expr);
2931   TREE_OPERAND (expr, 0) = op0;
2932   if (op1)
2933     TREE_OPERAND (expr, 1) = op1;
2934
2935   /* Inside address, we might strip the top level component references,
2936      thus changing type of the expression.  Handling of ADDR_EXPR
2937      will fix that.  */
2938   expr = fold_convert (orig_type, expr);
2939
2940   return expr;
2941 }
2942
2943 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2944
2945 tree
2946 strip_offset (tree expr, poly_uint64_pod *offset)
2947 {
2948   poly_int64 off;
2949   tree core = strip_offset_1 (expr, false, false, &off);
2950   *offset = off;
2951   return core;
2952 }
2953
2954 /* Returns variant of TYPE that can be used as base for different uses.
2955    We return unsigned type with the same precision, which avoids problems
2956    with overflows.  */
2957
2958 static tree
2959 generic_type_for (tree type)
2960 {
2961   if (POINTER_TYPE_P (type))
2962     return unsigned_type_for (type);
2963
2964   if (TYPE_UNSIGNED (type))
2965     return type;
2966
2967   return unsigned_type_for (type);
2968 }
2969
2970 /* Private data for walk_tree.  */
2971
2972 struct walk_tree_data
2973 {
2974   bitmap *inv_vars;
2975   struct ivopts_data *idata;
2976 };
2977
2978 /* Callback function for walk_tree, it records invariants and symbol
2979    reference in *EXPR_P.  DATA is the structure storing result info.  */
2980
2981 static tree
2982 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2983 {
2984   tree op = *expr_p;
2985   struct version_info *info;
2986   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2987
2988   if (TREE_CODE (op) != SSA_NAME)
2989     return NULL_TREE;
2990
2991   info = name_info (wdata->idata, op);
2992   /* Because we expand simple operations when finding IVs, loop invariant
2993      variable that isn't referred by the original loop could be used now.
2994      Record such invariant variables here.  */
2995   if (!info->iv)
2996     {
2997       struct ivopts_data *idata = wdata->idata;
2998       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2999
3000       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3001         {
3002           tree steptype = TREE_TYPE (op);
3003           if (POINTER_TYPE_P (steptype))
3004             steptype = sizetype;
3005           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3006           record_invariant (idata, op, false);
3007         }
3008     }
3009   if (!info->inv_id || info->has_nonlin_use)
3010     return NULL_TREE;
3011
3012   if (!*wdata->inv_vars)
3013     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3014   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3015
3016   return NULL_TREE;
3017 }
3018
3019 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3020    store it.  */
3021
3022 static inline void
3023 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3024 {
3025   struct walk_tree_data wdata;
3026
3027   if (!inv_vars)
3028     return;
3029
3030   wdata.idata = data;
3031   wdata.inv_vars = inv_vars;
3032   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3033 }
3034
3035 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3036    will be recorded if it doesn't exist yet.  Given below two exprs:
3037      inv_expr + cst1, inv_expr + cst2
3038    It's hard to make decision whether constant part should be stripped
3039    or not.  We choose to not strip based on below facts:
3040      1) We need to count ADD cost for constant part if it's stripped,
3041         which isn't always trivial where this functions is called.
3042      2) Stripping constant away may be conflict with following loop
3043         invariant hoisting pass.
3044      3) Not stripping constant away results in more invariant exprs,
3045         which usually leads to decision preferring lower reg pressure.  */
3046
3047 static iv_inv_expr_ent *
3048 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3049 {
3050   STRIP_NOPS (inv_expr);
3051
3052   if (poly_int_tree_p (inv_expr)
3053       || TREE_CODE (inv_expr) == SSA_NAME)
3054     return NULL;
3055
3056   /* Don't strip constant part away as we used to.  */
3057
3058   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3059   struct iv_inv_expr_ent ent;
3060   ent.expr = inv_expr;
3061   ent.hash = iterative_hash_expr (inv_expr, 0);
3062   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3063
3064   if (!*slot)
3065     {
3066       *slot = XNEW (struct iv_inv_expr_ent);
3067       (*slot)->expr = inv_expr;
3068       (*slot)->hash = ent.hash;
3069       (*slot)->id = ++data->max_inv_expr_id;
3070     }
3071
3072   return *slot;
3073 }
3074
3075 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3076    position to POS.  If USE is not NULL, the candidate is set as related to
3077    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3078    replacement of the final value of the iv by a direct computation.  */
3079
3080 static struct iv_cand *
3081 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3082                  enum iv_position pos, struct iv_use *use,
3083                  gimple *incremented_at, struct iv *orig_iv = NULL,
3084                  bool doloop = false)
3085 {
3086   unsigned i;
3087   struct iv_cand *cand = NULL;
3088   tree type, orig_type;
3089
3090   gcc_assert (base && step);
3091
3092   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3093      live, but the ivopts code may replace a real pointer with one
3094      pointing before or after the memory block that is then adjusted
3095      into the memory block during the loop.  FIXME: It would likely be
3096      better to actually force the pointer live and still use ivopts;
3097      for example, it would be enough to write the pointer into memory
3098      and keep it there until after the loop.  */
3099   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3100     return NULL;
3101
3102   /* For non-original variables, make sure their values are computed in a type
3103      that does not invoke undefined behavior on overflows (since in general,
3104      we cannot prove that these induction variables are non-wrapping).  */
3105   if (pos != IP_ORIGINAL)
3106     {
3107       orig_type = TREE_TYPE (base);
3108       type = generic_type_for (orig_type);
3109       if (type != orig_type)
3110         {
3111           base = fold_convert (type, base);
3112           step = fold_convert (type, step);
3113         }
3114     }
3115
3116   for (i = 0; i < data->vcands.length (); i++)
3117     {
3118       cand = data->vcands[i];
3119
3120       if (cand->pos != pos)
3121         continue;
3122
3123       if (cand->incremented_at != incremented_at
3124           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3125               && cand->ainc_use != use))
3126         continue;
3127
3128       if (operand_equal_p (base, cand->iv->base, 0)
3129           && operand_equal_p (step, cand->iv->step, 0)
3130           && (TYPE_PRECISION (TREE_TYPE (base))
3131               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3132         break;
3133     }
3134
3135   if (i == data->vcands.length ())
3136     {
3137       cand = XCNEW (struct iv_cand);
3138       cand->id = i;
3139       cand->iv = alloc_iv (data, base, step);
3140       cand->pos = pos;
3141       if (pos != IP_ORIGINAL)
3142         {
3143           if (doloop)
3144             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3145           else
3146             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147           cand->var_after = cand->var_before;
3148         }
3149       cand->important = important;
3150       cand->incremented_at = incremented_at;
3151       cand->doloop_p = doloop;
3152       data->vcands.safe_push (cand);
3153
3154       if (!poly_int_tree_p (step))
3155         {
3156           find_inv_vars (data, &step, &cand->inv_vars);
3157
3158           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3159           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3160           if (inv_expr != NULL)
3161             {
3162               cand->inv_exprs = cand->inv_vars;
3163               cand->inv_vars = NULL;
3164               if (cand->inv_exprs)
3165                 bitmap_clear (cand->inv_exprs);
3166               else
3167                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3168
3169               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3170             }
3171         }
3172
3173       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3174         cand->ainc_use = use;
3175       else
3176         cand->ainc_use = NULL;
3177
3178       cand->orig_iv = orig_iv;
3179       if (dump_file && (dump_flags & TDF_DETAILS))
3180         dump_cand (dump_file, cand);
3181     }
3182
3183   cand->important |= important;
3184   cand->doloop_p |= doloop;
3185
3186   /* Relate candidate to the group for which it is added.  */
3187   if (use)
3188     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3189
3190   return cand;
3191 }
3192
3193 /* Returns true if incrementing the induction variable at the end of the LOOP
3194    is allowed.
3195
3196    The purpose is to avoid splitting latch edge with a biv increment, thus
3197    creating a jump, possibly confusing other optimization passes and leaving
3198    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3199    available (so we do not have a better alternative), or if the latch edge
3200    is already nonempty.  */
3201
3202 static bool
3203 allow_ip_end_pos_p (class loop *loop)
3204 {
3205   if (!ip_normal_pos (loop))
3206     return true;
3207
3208   if (!empty_block_p (ip_end_pos (loop)))
3209     return true;
3210
3211   return false;
3212 }
3213
3214 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3215    Important field is set to IMPORTANT.  */
3216
3217 static void
3218 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3219                         bool important, struct iv_use *use)
3220 {
3221   basic_block use_bb = gimple_bb (use->stmt);
3222   machine_mode mem_mode;
3223   unsigned HOST_WIDE_INT cstepi;
3224
3225   /* If we insert the increment in any position other than the standard
3226      ones, we must ensure that it is incremented once per iteration.
3227      It must not be in an inner nested loop, or one side of an if
3228      statement.  */
3229   if (use_bb->loop_father != data->current_loop
3230       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3231       || stmt_can_throw_internal (cfun, use->stmt)
3232       || !cst_and_fits_in_hwi (step))
3233     return;
3234
3235   cstepi = int_cst_value (step);
3236
3237   mem_mode = TYPE_MODE (use->mem_type);
3238   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3239         || USE_STORE_PRE_INCREMENT (mem_mode))
3240        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3241       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3242            || USE_STORE_PRE_DECREMENT (mem_mode))
3243           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3244     {
3245       enum tree_code code = MINUS_EXPR;
3246       tree new_base;
3247       tree new_step = step;
3248
3249       if (POINTER_TYPE_P (TREE_TYPE (base)))
3250         {
3251           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3252           code = POINTER_PLUS_EXPR;
3253         }
3254       else
3255         new_step = fold_convert (TREE_TYPE (base), new_step);
3256       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3257       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3258                        use->stmt);
3259     }
3260   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3261         || USE_STORE_POST_INCREMENT (mem_mode))
3262        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3263       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3264            || USE_STORE_POST_DECREMENT (mem_mode))
3265           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3266     {
3267       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3268                        use->stmt);
3269     }
3270 }
3271
3272 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3273    position to POS.  If USE is not NULL, the candidate is set as related to
3274    it.  The candidate computation is scheduled before exit condition and at
3275    the end of loop.  */
3276
3277 static void
3278 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3279                struct iv_use *use, struct iv *orig_iv = NULL,
3280                bool doloop = false)
3281 {
3282   if (ip_normal_pos (data->current_loop))
3283     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3284                      doloop);
3285   /* Exclude doloop candidate here since it requires decrement then comparison
3286      and jump, the IP_END position doesn't match.  */
3287   if (!doloop && ip_end_pos (data->current_loop)
3288       && allow_ip_end_pos_p (data->current_loop))
3289     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3290 }
3291
3292 /* Adds standard iv candidates.  */
3293
3294 static void
3295 add_standard_iv_candidates (struct ivopts_data *data)
3296 {
3297   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3298
3299   /* The same for a double-integer type if it is still fast enough.  */
3300   if (TYPE_PRECISION
3301         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3302       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3303     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3304                    build_int_cst (long_integer_type_node, 1), true, NULL);
3305
3306   /* The same for a double-integer type if it is still fast enough.  */
3307   if (TYPE_PRECISION
3308         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3309       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3310     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3311                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3312 }
3313
3314
3315 /* Adds candidates bases on the old induction variable IV.  */
3316
3317 static void
3318 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3319 {
3320   gimple *phi;
3321   tree def;
3322   struct iv_cand *cand;
3323
3324   /* Check if this biv is used in address type use.  */
3325   if (iv->no_overflow  && iv->have_address_use
3326       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3327       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3328     {
3329       tree base = fold_convert (sizetype, iv->base);
3330       tree step = fold_convert (sizetype, iv->step);
3331
3332       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3333       add_candidate (data, base, step, true, NULL, iv);
3334       /* Add iv cand of the original type only if it has nonlinear use.  */
3335       if (iv->nonlin_use)
3336         add_candidate (data, iv->base, iv->step, true, NULL);
3337     }
3338   else
3339     add_candidate (data, iv->base, iv->step, true, NULL);
3340
3341   /* The same, but with initial value zero.  */
3342   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3343     add_candidate (data, size_int (0), iv->step, true, NULL);
3344   else
3345     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3346                    iv->step, true, NULL);
3347
3348   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3349   if (gimple_code (phi) == GIMPLE_PHI)
3350     {
3351       /* Additionally record the possibility of leaving the original iv
3352          untouched.  */
3353       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3354       /* Don't add candidate if it's from another PHI node because
3355          it's an affine iv appearing in the form of PEELED_CHREC.  */
3356       phi = SSA_NAME_DEF_STMT (def);
3357       if (gimple_code (phi) != GIMPLE_PHI)
3358         {
3359           cand = add_candidate_1 (data,
3360                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3361                                   SSA_NAME_DEF_STMT (def));
3362           if (cand)
3363             {
3364               cand->var_before = iv->ssa_name;
3365               cand->var_after = def;
3366             }
3367         }
3368       else
3369         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3370     }
3371 }
3372
3373 /* Adds candidates based on the old induction variables.  */
3374
3375 static void
3376 add_iv_candidate_for_bivs (struct ivopts_data *data)
3377 {
3378   unsigned i;
3379   struct iv *iv;
3380   bitmap_iterator bi;
3381
3382   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3383     {
3384       iv = ver_info (data, i)->iv;
3385       if (iv && iv->biv_p && !integer_zerop (iv->step))
3386         add_iv_candidate_for_biv (data, iv);
3387     }
3388 }
3389
3390 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3391
3392 static void
3393 record_common_cand (struct ivopts_data *data, tree base,
3394                     tree step, struct iv_use *use)
3395 {
3396   class iv_common_cand ent;
3397   class iv_common_cand **slot;
3398
3399   ent.base = base;
3400   ent.step = step;
3401   ent.hash = iterative_hash_expr (base, 0);
3402   ent.hash = iterative_hash_expr (step, ent.hash);
3403
3404   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3405   if (*slot == NULL)
3406     {
3407       *slot = new iv_common_cand ();
3408       (*slot)->base = base;
3409       (*slot)->step = step;
3410       (*slot)->uses.create (8);
3411       (*slot)->hash = ent.hash;
3412       data->iv_common_cands.safe_push ((*slot));
3413     }
3414
3415   gcc_assert (use != NULL);
3416   (*slot)->uses.safe_push (use);
3417   return;
3418 }
3419
3420 /* Comparison function used to sort common candidates.  */
3421
3422 static int
3423 common_cand_cmp (const void *p1, const void *p2)
3424 {
3425   unsigned n1, n2;
3426   const class iv_common_cand *const *const ccand1
3427     = (const class iv_common_cand *const *)p1;
3428   const class iv_common_cand *const *const ccand2
3429     = (const class iv_common_cand *const *)p2;
3430
3431   n1 = (*ccand1)->uses.length ();
3432   n2 = (*ccand2)->uses.length ();
3433   return n2 - n1;
3434 }
3435
3436 /* Adds IV candidates based on common candidated recorded.  */
3437
3438 static void
3439 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3440 {
3441   unsigned i, j;
3442   struct iv_cand *cand_1, *cand_2;
3443
3444   data->iv_common_cands.qsort (common_cand_cmp);
3445   for (i = 0; i < data->iv_common_cands.length (); i++)
3446     {
3447       class iv_common_cand *ptr = data->iv_common_cands[i];
3448
3449       /* Only add IV candidate if it's derived from multiple uses.  */
3450       if (ptr->uses.length () <= 1)
3451         break;
3452
3453       cand_1 = NULL;
3454       cand_2 = NULL;
3455       if (ip_normal_pos (data->current_loop))
3456         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3457                                   false, IP_NORMAL, NULL, NULL);
3458
3459       if (ip_end_pos (data->current_loop)
3460           && allow_ip_end_pos_p (data->current_loop))
3461         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3462                                   false, IP_END, NULL, NULL);
3463
3464       /* Bind deriving uses and the new candidates.  */
3465       for (j = 0; j < ptr->uses.length (); j++)
3466         {
3467           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3468           if (cand_1)
3469             bitmap_set_bit (group->related_cands, cand_1->id);
3470           if (cand_2)
3471             bitmap_set_bit (group->related_cands, cand_2->id);
3472         }
3473     }
3474
3475   /* Release data since it is useless from this point.  */
3476   data->iv_common_cand_tab->empty ();
3477   data->iv_common_cands.truncate (0);
3478 }
3479
3480 /* Adds candidates based on the value of USE's iv.  */
3481
3482 static void
3483 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3484 {
3485   poly_uint64 offset;
3486   tree base;
3487   struct iv *iv = use->iv;
3488   tree basetype = TREE_TYPE (iv->base);
3489
3490   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3491      precision types, instead, add candidate for the corresponding scev in
3492      unsigned type with the same precision.  See PR93674 for more info.  */
3493   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3494       || !type_has_mode_precision_p (basetype))
3495     {
3496       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3497                                                  TYPE_UNSIGNED (basetype));
3498       add_candidate (data, fold_convert (basetype, iv->base),
3499                      fold_convert (basetype, iv->step), false, NULL);
3500       return;
3501     }
3502
3503   add_candidate (data, iv->base, iv->step, false, use);
3504
3505   /* Record common candidate for use in case it can be shared by others.  */
3506   record_common_cand (data, iv->base, iv->step, use);
3507
3508   /* Record common candidate with initial value zero.  */
3509   basetype = TREE_TYPE (iv->base);
3510   if (POINTER_TYPE_P (basetype))
3511     basetype = sizetype;
3512   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3513
3514   /* Compare the cost of an address with an unscaled index with the cost of
3515     an address with a scaled index and add candidate if useful.  */
3516   poly_int64 step;
3517   if (use != NULL
3518       && poly_int_tree_p (iv->step, &step)
3519       && address_p (use->type))
3520     {
3521       poly_int64 new_step;
3522       unsigned int fact = preferred_mem_scale_factor
3523         (use->iv->base,
3524          TYPE_MODE (use->mem_type),
3525          optimize_loop_for_speed_p (data->current_loop));
3526
3527       if (fact != 1
3528           && multiple_p (step, fact, &new_step))
3529         add_candidate (data, size_int (0),
3530                        wide_int_to_tree (sizetype, new_step),
3531                        true, NULL);
3532     }
3533
3534   /* Record common candidate with constant offset stripped in base.
3535      Like the use itself, we also add candidate directly for it.  */
3536   base = strip_offset (iv->base, &offset);
3537   if (maybe_ne (offset, 0U) || base != iv->base)
3538     {
3539       record_common_cand (data, base, iv->step, use);
3540       add_candidate (data, base, iv->step, false, use);
3541     }
3542
3543   /* Record common candidate with base_object removed in base.  */
3544   base = iv->base;
3545   STRIP_NOPS (base);
3546   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3547     {
3548       tree step = iv->step;
3549
3550       STRIP_NOPS (step);
3551       base = TREE_OPERAND (base, 1);
3552       step = fold_convert (sizetype, step);
3553       record_common_cand (data, base, step, use);
3554       /* Also record common candidate with offset stripped.  */
3555       base = strip_offset (base, &offset);
3556       if (maybe_ne (offset, 0U))
3557         record_common_cand (data, base, step, use);
3558     }
3559
3560   /* At last, add auto-incremental candidates.  Make such variables
3561      important since other iv uses with same base object may be based
3562      on it.  */
3563   if (use != NULL && address_p (use->type))
3564     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3565 }
3566
3567 /* Adds candidates based on the uses.  */
3568
3569 static void
3570 add_iv_candidate_for_groups (struct ivopts_data *data)
3571 {
3572   unsigned i;
3573
3574   /* Only add candidate for the first use in group.  */
3575   for (i = 0; i < data->vgroups.length (); i++)
3576     {
3577       struct iv_group *group = data->vgroups[i];
3578
3579       gcc_assert (group->vuses[0] != NULL);
3580       add_iv_candidate_for_use (data, group->vuses[0]);
3581     }
3582   add_iv_candidate_derived_from_uses (data);
3583 }
3584
3585 /* Record important candidates and add them to related_cands bitmaps.  */
3586
3587 static void
3588 record_important_candidates (struct ivopts_data *data)
3589 {
3590   unsigned i;
3591   struct iv_group *group;
3592
3593   for (i = 0; i < data->vcands.length (); i++)
3594     {
3595       struct iv_cand *cand = data->vcands[i];
3596
3597       if (cand->important)
3598         bitmap_set_bit (data->important_candidates, i);
3599     }
3600
3601   data->consider_all_candidates = (data->vcands.length ()
3602                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3603
3604   /* Add important candidates to groups' related_cands bitmaps.  */
3605   for (i = 0; i < data->vgroups.length (); i++)
3606     {
3607       group = data->vgroups[i];
3608       bitmap_ior_into (group->related_cands, data->important_candidates);
3609     }
3610 }
3611
3612 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3613    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3614    we allocate a simple list to every use.  */
3615
3616 static void
3617 alloc_use_cost_map (struct ivopts_data *data)
3618 {
3619   unsigned i, size, s;
3620
3621   for (i = 0; i < data->vgroups.length (); i++)
3622     {
3623       struct iv_group *group = data->vgroups[i];
3624
3625       if (data->consider_all_candidates)
3626         size = data->vcands.length ();
3627       else
3628         {
3629           s = bitmap_count_bits (group->related_cands);
3630
3631           /* Round up to the power of two, so that moduling by it is fast.  */
3632           size = s ? (1 << ceil_log2 (s)) : 1;
3633         }
3634
3635       group->n_map_members = size;
3636       group->cost_map = XCNEWVEC (class cost_pair, size);
3637     }
3638 }
3639
3640 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3641    on invariants INV_VARS and that the value used in expressing it is
3642    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3643
3644 static void
3645 set_group_iv_cost (struct ivopts_data *data,
3646                    struct iv_group *group, struct iv_cand *cand,
3647                    comp_cost cost, bitmap inv_vars, tree value,
3648                    enum tree_code comp, bitmap inv_exprs)
3649 {
3650   unsigned i, s;
3651
3652   if (cost.infinite_cost_p ())
3653     {
3654       BITMAP_FREE (inv_vars);
3655       BITMAP_FREE (inv_exprs);
3656       return;
3657     }
3658
3659   if (data->consider_all_candidates)
3660     {
3661       group->cost_map[cand->id].cand = cand;
3662       group->cost_map[cand->id].cost = cost;
3663       group->cost_map[cand->id].inv_vars = inv_vars;
3664       group->cost_map[cand->id].inv_exprs = inv_exprs;
3665       group->cost_map[cand->id].value = value;
3666       group->cost_map[cand->id].comp = comp;
3667       return;
3668     }
3669
3670   /* n_map_members is a power of two, so this computes modulo.  */
3671   s = cand->id & (group->n_map_members - 1);
3672   for (i = s; i < group->n_map_members; i++)
3673     if (!group->cost_map[i].cand)
3674       goto found;
3675   for (i = 0; i < s; i++)
3676     if (!group->cost_map[i].cand)
3677       goto found;
3678
3679   gcc_unreachable ();
3680
3681 found:
3682   group->cost_map[i].cand = cand;
3683   group->cost_map[i].cost = cost;
3684   group->cost_map[i].inv_vars = inv_vars;
3685   group->cost_map[i].inv_exprs = inv_exprs;
3686   group->cost_map[i].value = value;
3687   group->cost_map[i].comp = comp;
3688 }
3689
3690 /* Gets cost of (GROUP, CAND) pair.  */
3691
3692 static class cost_pair *
3693 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3694                    struct iv_cand *cand)
3695 {
3696   unsigned i, s;
3697   class cost_pair *ret;
3698
3699   if (!cand)
3700     return NULL;
3701
3702   if (data->consider_all_candidates)
3703     {
3704       ret = group->cost_map + cand->id;
3705       if (!ret->cand)
3706         return NULL;
3707
3708       return ret;
3709     }
3710
3711   /* n_map_members is a power of two, so this computes modulo.  */
3712   s = cand->id & (group->n_map_members - 1);
3713   for (i = s; i < group->n_map_members; i++)
3714     if (group->cost_map[i].cand == cand)
3715       return group->cost_map + i;
3716     else if (group->cost_map[i].cand == NULL)
3717       return NULL;
3718   for (i = 0; i < s; i++)
3719     if (group->cost_map[i].cand == cand)
3720       return group->cost_map + i;
3721     else if (group->cost_map[i].cand == NULL)
3722       return NULL;
3723
3724   return NULL;
3725 }
3726
3727 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3728 static rtx
3729 produce_memory_decl_rtl (tree obj, int *regno)
3730 {
3731   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3732   machine_mode address_mode = targetm.addr_space.address_mode (as);
3733   rtx x;
3734
3735   gcc_assert (obj);
3736   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3737     {
3738       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3739       x = gen_rtx_SYMBOL_REF (address_mode, name);
3740       SET_SYMBOL_REF_DECL (x, obj);
3741       x = gen_rtx_MEM (DECL_MODE (obj), x);
3742       set_mem_addr_space (x, as);
3743       targetm.encode_section_info (obj, x, true);
3744     }
3745   else
3746     {
3747       x = gen_raw_REG (address_mode, (*regno)++);
3748       x = gen_rtx_MEM (DECL_MODE (obj), x);
3749       set_mem_addr_space (x, as);
3750     }
3751
3752   return x;
3753 }
3754
3755 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3756    walk_tree.  DATA contains the actual fake register number.  */
3757
3758 static tree
3759 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3760 {
3761   tree obj = NULL_TREE;
3762   rtx x = NULL_RTX;
3763   int *regno = (int *) data;
3764
3765   switch (TREE_CODE (*expr_p))
3766     {
3767     case ADDR_EXPR:
3768       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3769            handled_component_p (*expr_p);
3770            expr_p = &TREE_OPERAND (*expr_p, 0))
3771         continue;
3772       obj = *expr_p;
3773       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3774         x = produce_memory_decl_rtl (obj, regno);
3775       break;
3776
3777     case SSA_NAME:
3778       *ws = 0;
3779       obj = SSA_NAME_VAR (*expr_p);
3780       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3781       if (!obj)
3782         return NULL_TREE;
3783       if (!DECL_RTL_SET_P (obj))
3784         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3785       break;
3786
3787     case VAR_DECL:
3788     case PARM_DECL:
3789     case RESULT_DECL:
3790       *ws = 0;
3791       obj = *expr_p;
3792
3793       if (DECL_RTL_SET_P (obj))
3794         break;
3795
3796       if (DECL_MODE (obj) == BLKmode)
3797         x = produce_memory_decl_rtl (obj, regno);
3798       else
3799         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3800
3801       break;
3802
3803     default:
3804       break;
3805     }
3806
3807   if (x)
3808     {
3809       decl_rtl_to_reset.safe_push (obj);
3810       SET_DECL_RTL (obj, x);
3811     }
3812
3813   return NULL_TREE;
3814 }
3815
3816 /* Predict whether the given loop will be transformed in the RTL
3817    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3818    This is only for target independent checks, see targetm.predict_doloop_p
3819    for the target dependent ones.
3820
3821    Note that according to some initial investigation, some checks like costly
3822    niter check and invalid stmt scanning don't have much gains among general
3823    cases, so keep this as simple as possible first.
3824
3825    Some RTL specific checks seems unable to be checked in gimple, if any new
3826    checks or easy checks _are_ missing here, please add them.  */
3827
3828 static bool
3829 generic_predict_doloop_p (struct ivopts_data *data)
3830 {
3831   class loop *loop = data->current_loop;
3832
3833   /* Call target hook for target dependent checks.  */
3834   if (!targetm.predict_doloop_p (loop))
3835     {
3836       if (dump_file && (dump_flags & TDF_DETAILS))
3837         fprintf (dump_file, "Predict doloop failure due to"
3838                             " target specific checks.\n");
3839       return false;
3840     }
3841
3842   /* Similar to doloop_optimize, check iteration description to know it's
3843      suitable or not.  Keep it as simple as possible, feel free to extend it
3844      if you find any multiple exits cases matter.  */
3845   edge exit = single_dom_exit (loop);
3846   class tree_niter_desc *niter_desc;
3847   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3848     {
3849       if (dump_file && (dump_flags & TDF_DETAILS))
3850         fprintf (dump_file, "Predict doloop failure due to"
3851                             " unexpected niters.\n");
3852       return false;
3853     }
3854
3855   /* Similar to doloop_optimize, check whether iteration count too small
3856      and not profitable.  */
3857   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3858   if (est_niter == -1)
3859     est_niter = get_likely_max_loop_iterations_int (loop);
3860   if (est_niter >= 0 && est_niter < 3)
3861     {
3862       if (dump_file && (dump_flags & TDF_DETAILS))
3863         fprintf (dump_file,
3864                  "Predict doloop failure due to"
3865                  " too few iterations (%u).\n",
3866                  (unsigned int) est_niter);
3867       return false;
3868     }
3869
3870   return true;
3871 }
3872
3873 /* Determines cost of the computation of EXPR.  */
3874
3875 static unsigned
3876 computation_cost (tree expr, bool speed)
3877 {
3878   rtx_insn *seq;
3879   rtx rslt;
3880   tree type = TREE_TYPE (expr);
3881   unsigned cost;
3882   /* Avoid using hard regs in ways which may be unsupported.  */
3883   int regno = LAST_VIRTUAL_REGISTER + 1;
3884   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3885   enum node_frequency real_frequency = node->frequency;
3886
3887   node->frequency = NODE_FREQUENCY_NORMAL;
3888   crtl->maybe_hot_insn_p = speed;
3889   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3890   start_sequence ();
3891   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3892   seq = get_insns ();
3893   end_sequence ();
3894   default_rtl_profile ();
3895   node->frequency = real_frequency;
3896
3897   cost = seq_cost (seq, speed);
3898   if (MEM_P (rslt))
3899     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3900                           TYPE_ADDR_SPACE (type), speed);
3901   else if (!REG_P (rslt))
3902     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3903
3904   return cost;
3905 }
3906
3907 /* Returns variable containing the value of candidate CAND at statement AT.  */
3908
3909 static tree
3910 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3911 {
3912   if (stmt_after_increment (loop, cand, stmt))
3913     return cand->var_after;
3914   else
3915     return cand->var_before;
3916 }
3917
3918 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3919    same precision that is at least as wide as the precision of TYPE, stores
3920    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3921    type of A and B.  */
3922
3923 static tree
3924 determine_common_wider_type (tree *a, tree *b)
3925 {
3926   tree wider_type = NULL;
3927   tree suba, subb;
3928   tree atype = TREE_TYPE (*a);
3929
3930   if (CONVERT_EXPR_P (*a))
3931     {
3932       suba = TREE_OPERAND (*a, 0);
3933       wider_type = TREE_TYPE (suba);
3934       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3935         return atype;
3936     }
3937   else
3938     return atype;
3939
3940   if (CONVERT_EXPR_P (*b))
3941     {
3942       subb = TREE_OPERAND (*b, 0);
3943       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3944         return atype;
3945     }
3946   else
3947     return atype;
3948
3949   *a = suba;
3950   *b = subb;
3951   return wider_type;
3952 }
3953
3954 /* Determines the expression by that USE is expressed from induction variable
3955    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3956    decomposed form.  The invariant part is stored in AFF_INV; while variant
3957    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3958    non-null.  Returns false if USE cannot be expressed using CAND.  */
3959
3960 static bool
3961 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3962                        struct iv_cand *cand, class aff_tree *aff_inv,
3963                        class aff_tree *aff_var, widest_int *prat = NULL)
3964 {
3965   tree ubase = use->iv->base, ustep = use->iv->step;
3966   tree cbase = cand->iv->base, cstep = cand->iv->step;
3967   tree common_type, uutype, var, cstep_common;
3968   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3969   aff_tree aff_cbase;
3970   widest_int rat;
3971
3972   /* We must have a precision to express the values of use.  */
3973   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3974     return false;
3975
3976   var = var_at_stmt (loop, cand, at);
3977   uutype = unsigned_type_for (utype);
3978
3979   /* If the conversion is not noop, perform it.  */
3980   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3981     {
3982       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3983           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3984         {
3985           tree inner_base, inner_step, inner_type;
3986           inner_base = TREE_OPERAND (cbase, 0);
3987           if (CONVERT_EXPR_P (cstep))
3988             inner_step = TREE_OPERAND (cstep, 0);
3989           else
3990             inner_step = cstep;
3991
3992           inner_type = TREE_TYPE (inner_base);
3993           /* If candidate is added from a biv whose type is smaller than
3994              ctype, we know both candidate and the biv won't overflow.
3995              In this case, it's safe to skip the convertion in candidate.
3996              As an example, (unsigned short)((unsigned long)A) equals to
3997              (unsigned short)A, if A has a type no larger than short.  */
3998           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3999             {
4000               cbase = inner_base;
4001               cstep = inner_step;
4002             }
4003         }
4004       cbase = fold_convert (uutype, cbase);
4005       cstep = fold_convert (uutype, cstep);
4006       var = fold_convert (uutype, var);
4007     }
4008
4009   /* Ratio is 1 when computing the value of biv cand by itself.
4010      We can't rely on constant_multiple_of in this case because the
4011      use is created after the original biv is selected.  The call
4012      could fail because of inconsistent fold behavior.  See PR68021
4013      for more information.  */
4014   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4015     {
4016       gcc_assert (is_gimple_assign (use->stmt));
4017       gcc_assert (use->iv->ssa_name == cand->var_after);
4018       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4019       rat = 1;
4020     }
4021   else if (!constant_multiple_of (ustep, cstep, &rat))
4022     return false;
4023
4024   if (prat)
4025     *prat = rat;
4026
4027   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4028      type, we achieve better folding by computing their difference in this
4029      wider type, and cast the result to UUTYPE.  We do not need to worry about
4030      overflows, as all the arithmetics will in the end be performed in UUTYPE
4031      anyway.  */
4032   common_type = determine_common_wider_type (&ubase, &cbase);
4033
4034   /* use = ubase - ratio * cbase + ratio * var.  */
4035   tree_to_aff_combination (ubase, common_type, aff_inv);
4036   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4037   tree_to_aff_combination (var, uutype, aff_var);
4038
4039   /* We need to shift the value if we are after the increment.  */
4040   if (stmt_after_increment (loop, cand, at))
4041     {
4042       aff_tree cstep_aff;
4043
4044       if (common_type != uutype)
4045         cstep_common = fold_convert (common_type, cstep);
4046       else
4047         cstep_common = cstep;
4048
4049       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4050       aff_combination_add (&aff_cbase, &cstep_aff);
4051     }
4052
4053   aff_combination_scale (&aff_cbase, -rat);
4054   aff_combination_add (aff_inv, &aff_cbase);
4055   if (common_type != uutype)
4056     aff_combination_convert (aff_inv, uutype);
4057
4058   aff_combination_scale (aff_var, rat);
4059   return true;
4060 }
4061
4062 /* Determines the expression by that USE is expressed from induction variable
4063    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4064    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4065
4066 static bool
4067 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4068                      struct iv_cand *cand, class aff_tree *aff)
4069 {
4070   aff_tree aff_var;
4071
4072   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4073     return false;
4074
4075   aff_combination_add (aff, &aff_var);
4076   return true;
4077 }
4078
4079 /* Return the type of USE.  */
4080
4081 static tree
4082 get_use_type (struct iv_use *use)
4083 {
4084   tree base_type = TREE_TYPE (use->iv->base);
4085   tree type;
4086
4087   if (use->type == USE_REF_ADDRESS)
4088     {
4089       /* The base_type may be a void pointer.  Create a pointer type based on
4090          the mem_ref instead.  */
4091       type = build_pointer_type (TREE_TYPE (*use->op_p));
4092       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4093                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4094     }
4095   else
4096     type = base_type;
4097
4098   return type;
4099 }
4100
4101 /* Determines the expression by that USE is expressed from induction variable
4102    CAND at statement AT in LOOP.  The computation is unshared.  */
4103
4104 static tree
4105 get_computation_at (class loop *loop, gimple *at,
4106                     struct iv_use *use, struct iv_cand *cand)
4107 {
4108   aff_tree aff;
4109   tree type = get_use_type (use);
4110
4111   if (!get_computation_aff (loop, at, use, cand, &aff))
4112     return NULL_TREE;
4113   unshare_aff_combination (&aff);
4114   return fold_convert (type, aff_combination_to_tree (&aff));
4115 }
4116
4117 /* Like get_computation_at, but try harder, even if the computation
4118    is more expensive.  Intended for debug stmts.  */
4119
4120 static tree
4121 get_debug_computation_at (class loop *loop, gimple *at,
4122                           struct iv_use *use, struct iv_cand *cand)
4123 {
4124   if (tree ret = get_computation_at (loop, at, use, cand))
4125     return ret;
4126
4127   tree ubase = use->iv->base, ustep = use->iv->step;
4128   tree cbase = cand->iv->base, cstep = cand->iv->step;
4129   tree var;
4130   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4131   widest_int rat;
4132
4133   /* We must have a precision to express the values of use.  */
4134   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4135     return NULL_TREE;
4136
4137   /* Try to handle the case that get_computation_at doesn't,
4138      try to express
4139      use = ubase + (var - cbase) / ratio.  */
4140   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4141                              &rat))
4142     return NULL_TREE;
4143
4144   bool neg_p = false;
4145   if (wi::neg_p (rat))
4146     {
4147       if (TYPE_UNSIGNED (ctype))
4148         return NULL_TREE;
4149       neg_p = true;
4150       rat = wi::neg (rat);
4151     }
4152
4153   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4154      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4155      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4156      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4157      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4158   if (!use->iv->no_overflow
4159       && !cand->iv->no_overflow
4160       && !integer_pow2p (cstep))
4161     return NULL_TREE;
4162
4163   int bits = wi::exact_log2 (rat);
4164   if (bits == -1)
4165     bits = wi::floor_log2 (rat) + 1;
4166   if (!cand->iv->no_overflow
4167       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4168     return NULL_TREE;
4169
4170   var = var_at_stmt (loop, cand, at);
4171
4172   if (POINTER_TYPE_P (ctype))
4173     {
4174       ctype = unsigned_type_for (ctype);
4175       cbase = fold_convert (ctype, cbase);
4176       cstep = fold_convert (ctype, cstep);
4177       var = fold_convert (ctype, var);
4178     }
4179
4180   if (stmt_after_increment (loop, cand, at))
4181     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4182                        unshare_expr (cstep));
4183
4184   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4185   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4186                      wide_int_to_tree (TREE_TYPE (var), rat));
4187   if (POINTER_TYPE_P (utype))
4188     {
4189       var = fold_convert (sizetype, var);
4190       if (neg_p)
4191         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4192       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4193     }
4194   else
4195     {
4196       var = fold_convert (utype, var);
4197       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4198                          ubase, var);
4199     }
4200   return var;
4201 }
4202
4203 /* Adjust the cost COST for being in loop setup rather than loop body.
4204    If we're optimizing for space, the loop setup overhead is constant;
4205    if we're optimizing for speed, amortize it over the per-iteration cost.
4206    If ROUND_UP_P is true, the result is round up rather than to zero when
4207    optimizing for speed.  */
4208 static int64_t
4209 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4210                    bool round_up_p = false)
4211 {
4212   if (cost == INFTY)
4213     return cost;
4214   else if (optimize_loop_for_speed_p (data->current_loop))
4215     {
4216       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4217       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4218     }
4219   else
4220     return cost;
4221 }
4222
4223 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4224    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4225    calculating the operands of EXPR.  Returns true if successful, and returns
4226    the cost in COST.  */
4227
4228 static bool
4229 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4230                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4231 {
4232   comp_cost res;
4233   tree op1 = TREE_OPERAND (expr, 1);
4234   tree cst = TREE_OPERAND (mult, 1);
4235   tree multop = TREE_OPERAND (mult, 0);
4236   int m = exact_log2 (int_cst_value (cst));
4237   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4238   int as_cost, sa_cost;
4239   bool mult_in_op1;
4240
4241   if (!(m >= 0 && m < maxm))
4242     return false;
4243
4244   STRIP_NOPS (op1);
4245   mult_in_op1 = operand_equal_p (op1, mult, 0);
4246
4247   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4248
4249   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4250      use that in preference to a shift insn followed by an add insn.  */
4251   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4252              ? shiftadd_cost (speed, mode, m)
4253              : (mult_in_op1
4254                 ? shiftsub1_cost (speed, mode, m)
4255                 : shiftsub0_cost (speed, mode, m)));
4256
4257   res = comp_cost (MIN (as_cost, sa_cost), 0);
4258   res += (mult_in_op1 ? cost0 : cost1);
4259
4260   STRIP_NOPS (multop);
4261   if (!is_gimple_val (multop))
4262     res += force_expr_to_var_cost (multop, speed);
4263
4264   *cost = res;
4265   return true;
4266 }
4267
4268 /* Estimates cost of forcing expression EXPR into a variable.  */
4269
4270 static comp_cost
4271 force_expr_to_var_cost (tree expr, bool speed)
4272 {
4273   static bool costs_initialized = false;
4274   static unsigned integer_cost [2];
4275   static unsigned symbol_cost [2];
4276   static unsigned address_cost [2];
4277   tree op0, op1;
4278   comp_cost cost0, cost1, cost;
4279   machine_mode mode;
4280   scalar_int_mode int_mode;
4281
4282   if (!costs_initialized)
4283     {
4284       tree type = build_pointer_type (integer_type_node);
4285       tree var, addr;
4286       rtx x;
4287       int i;
4288
4289       var = create_tmp_var_raw (integer_type_node, "test_var");
4290       TREE_STATIC (var) = 1;
4291       x = produce_memory_decl_rtl (var, NULL);
4292       SET_DECL_RTL (var, x);
4293
4294       addr = build1 (ADDR_EXPR, type, var);
4295
4296
4297       for (i = 0; i < 2; i++)
4298         {
4299           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4300                                                              2000), i);
4301
4302           symbol_cost[i] = computation_cost (addr, i) + 1;
4303
4304           address_cost[i]
4305             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4306           if (dump_file && (dump_flags & TDF_DETAILS))
4307             {
4308               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4309               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4310               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4311               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4312               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4313               fprintf (dump_file, "\n");
4314             }
4315         }
4316
4317       costs_initialized = true;
4318     }
4319
4320   STRIP_NOPS (expr);
4321
4322   if (SSA_VAR_P (expr))
4323     return no_cost;
4324
4325   if (is_gimple_min_invariant (expr))
4326     {
4327       if (poly_int_tree_p (expr))
4328         return comp_cost (integer_cost [speed], 0);
4329
4330       if (TREE_CODE (expr) == ADDR_EXPR)
4331         {
4332           tree obj = TREE_OPERAND (expr, 0);
4333
4334           if (VAR_P (obj)
4335               || TREE_CODE (obj) == PARM_DECL
4336               || TREE_CODE (obj) == RESULT_DECL)
4337             return comp_cost (symbol_cost [speed], 0);
4338         }
4339
4340       return comp_cost (address_cost [speed], 0);
4341     }
4342
4343   switch (TREE_CODE (expr))
4344     {
4345     case POINTER_PLUS_EXPR:
4346     case PLUS_EXPR:
4347     case MINUS_EXPR:
4348     case MULT_EXPR:
4349     case TRUNC_DIV_EXPR:
4350     case BIT_AND_EXPR:
4351     case BIT_IOR_EXPR:
4352     case LSHIFT_EXPR:
4353     case RSHIFT_EXPR:
4354       op0 = TREE_OPERAND (expr, 0);
4355       op1 = TREE_OPERAND (expr, 1);
4356       STRIP_NOPS (op0);
4357       STRIP_NOPS (op1);
4358       break;
4359
4360     CASE_CONVERT:
4361     case NEGATE_EXPR:
4362     case BIT_NOT_EXPR:
4363       op0 = TREE_OPERAND (expr, 0);
4364       STRIP_NOPS (op0);
4365       op1 = NULL_TREE;
4366       break;
4367     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4368        introduce COND_EXPR for IV base, need to support better cost estimation
4369        for this COND_EXPR and tcc_comparison.  */
4370     case COND_EXPR:
4371       op0 = TREE_OPERAND (expr, 1);
4372       STRIP_NOPS (op0);
4373       op1 = TREE_OPERAND (expr, 2);
4374       STRIP_NOPS (op1);
4375       break;
4376     case LT_EXPR:
4377     case LE_EXPR:
4378     case GT_EXPR:
4379     case GE_EXPR:
4380     case EQ_EXPR:
4381     case NE_EXPR:
4382     case UNORDERED_EXPR:
4383     case ORDERED_EXPR:
4384     case UNLT_EXPR:
4385     case UNLE_EXPR:
4386     case UNGT_EXPR:
4387     case UNGE_EXPR:
4388     case UNEQ_EXPR:
4389     case LTGT_EXPR:
4390     case MAX_EXPR:
4391     case MIN_EXPR:
4392       op0 = TREE_OPERAND (expr, 0);
4393       STRIP_NOPS (op0);
4394       op1 = TREE_OPERAND (expr, 1);
4395       STRIP_NOPS (op1);
4396       break;
4397
4398     default:
4399       /* Just an arbitrary value, FIXME.  */
4400       return comp_cost (target_spill_cost[speed], 0);
4401     }
4402
4403   if (op0 == NULL_TREE
4404       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4405     cost0 = no_cost;
4406   else
4407     cost0 = force_expr_to_var_cost (op0, speed);
4408
4409   if (op1 == NULL_TREE
4410       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4411     cost1 = no_cost;
4412   else
4413     cost1 = force_expr_to_var_cost (op1, speed);
4414
4415   mode = TYPE_MODE (TREE_TYPE (expr));
4416   switch (TREE_CODE (expr))
4417     {
4418     case POINTER_PLUS_EXPR:
4419     case PLUS_EXPR:
4420     case MINUS_EXPR:
4421     case NEGATE_EXPR:
4422       cost = comp_cost (add_cost (speed, mode), 0);
4423       if (TREE_CODE (expr) != NEGATE_EXPR)
4424         {
4425           tree mult = NULL_TREE;
4426           comp_cost sa_cost;
4427           if (TREE_CODE (op1) == MULT_EXPR)
4428             mult = op1;
4429           else if (TREE_CODE (op0) == MULT_EXPR)
4430             mult = op0;
4431
4432           if (mult != NULL_TREE
4433               && is_a <scalar_int_mode> (mode, &int_mode)
4434               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4435               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4436                                     speed, &sa_cost))
4437             return sa_cost;
4438         }
4439       break;
4440
4441     CASE_CONVERT:
4442       {
4443         tree inner_mode, outer_mode;
4444         outer_mode = TREE_TYPE (expr);
4445         inner_mode = TREE_TYPE (op0);
4446         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4447                                        TYPE_MODE (inner_mode), speed), 0);
4448       }
4449       break;
4450
4451     case MULT_EXPR:
4452       if (cst_and_fits_in_hwi (op0))
4453         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4454                                              mode, speed), 0);
4455       else if (cst_and_fits_in_hwi (op1))
4456         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4457                                              mode, speed), 0);
4458       else
4459         return comp_cost (target_spill_cost [speed], 0);
4460       break;
4461
4462     case TRUNC_DIV_EXPR:
4463       /* Division by power of two is usually cheap, so we allow it.  Forbid
4464          anything else.  */
4465       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4466         cost = comp_cost (add_cost (speed, mode), 0);
4467       else
4468         cost = comp_cost (target_spill_cost[speed], 0);
4469       break;
4470
4471     case BIT_AND_EXPR:
4472     case BIT_IOR_EXPR:
4473     case BIT_NOT_EXPR:
4474     case LSHIFT_EXPR:
4475     case RSHIFT_EXPR:
4476       cost = comp_cost (add_cost (speed, mode), 0);
4477       break;
4478     case COND_EXPR:
4479       op0 = TREE_OPERAND (expr, 0);
4480       STRIP_NOPS (op0);
4481       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4482           || CONSTANT_CLASS_P (op0))
4483         cost = no_cost;
4484       else
4485         cost = force_expr_to_var_cost (op0, speed);
4486       break;
4487     case LT_EXPR:
4488     case LE_EXPR:
4489     case GT_EXPR:
4490     case GE_EXPR:
4491     case EQ_EXPR:
4492     case NE_EXPR:
4493     case UNORDERED_EXPR:
4494     case ORDERED_EXPR:
4495     case UNLT_EXPR:
4496     case UNLE_EXPR:
4497     case UNGT_EXPR:
4498     case UNGE_EXPR:
4499     case UNEQ_EXPR:
4500     case LTGT_EXPR:
4501     case MAX_EXPR:
4502     case MIN_EXPR:
4503       /* Simply use add cost for now, FIXME if there is some more accurate cost
4504          evaluation way.  */
4505       cost = comp_cost (add_cost (speed, mode), 0);
4506       break;
4507
4508     default:
4509       gcc_unreachable ();
4510     }
4511
4512   cost += cost0;
4513   cost += cost1;
4514   return cost;
4515 }
4516
4517 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4518    invariants the computation depends on.  */
4519
4520 static comp_cost
4521 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4522 {
4523   if (!expr)
4524     return no_cost;
4525
4526   find_inv_vars (data, &expr, inv_vars);
4527   return force_expr_to_var_cost (expr, data->speed);
4528 }
4529
4530 /* Returns cost of auto-modifying address expression in shape base + offset.
4531    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4532    address expression.  The address expression has ADDR_MODE in addr space
4533    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4534    speed or size.  */
4535
4536 enum ainc_type
4537 {
4538   AINC_PRE_INC,         /* Pre increment.  */
4539   AINC_PRE_DEC,         /* Pre decrement.  */
4540   AINC_POST_INC,        /* Post increment.  */
4541   AINC_POST_DEC,        /* Post decrement.  */
4542   AINC_NONE             /* Also the number of auto increment types.  */
4543 };
4544
4545 struct ainc_cost_data
4546 {
4547   int64_t costs[AINC_NONE];
4548 };
4549
4550 static comp_cost
4551 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4552                        machine_mode addr_mode, machine_mode mem_mode,
4553                        addr_space_t as, bool speed)
4554 {
4555   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4556       && !USE_STORE_PRE_DECREMENT (mem_mode)
4557       && !USE_LOAD_POST_DECREMENT (mem_mode)
4558       && !USE_STORE_POST_DECREMENT (mem_mode)
4559       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4560       && !USE_STORE_PRE_INCREMENT (mem_mode)
4561       && !USE_LOAD_POST_INCREMENT (mem_mode)
4562       && !USE_STORE_POST_INCREMENT (mem_mode))
4563     return infinite_cost;
4564
4565   static vec<ainc_cost_data *> ainc_cost_data_list;
4566   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4567   if (idx >= ainc_cost_data_list.length ())
4568     {
4569       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4570
4571       gcc_assert (nsize > idx);
4572       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4573     }
4574
4575   ainc_cost_data *data = ainc_cost_data_list[idx];
4576   if (data == NULL)
4577     {
4578       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4579
4580       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4581       data->costs[AINC_PRE_DEC] = INFTY;
4582       data->costs[AINC_POST_DEC] = INFTY;
4583       data->costs[AINC_PRE_INC] = INFTY;
4584       data->costs[AINC_POST_INC] = INFTY;
4585       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4586           || USE_STORE_PRE_DECREMENT (mem_mode))
4587         {
4588           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4589
4590           if (memory_address_addr_space_p (mem_mode, addr, as))
4591             data->costs[AINC_PRE_DEC]
4592               = address_cost (addr, mem_mode, as, speed);
4593         }
4594       if (USE_LOAD_POST_DECREMENT (mem_mode)
4595           || USE_STORE_POST_DECREMENT (mem_mode))
4596         {
4597           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4598
4599           if (memory_address_addr_space_p (mem_mode, addr, as))
4600             data->costs[AINC_POST_DEC]
4601               = address_cost (addr, mem_mode, as, speed);
4602         }
4603       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4604           || USE_STORE_PRE_INCREMENT (mem_mode))
4605         {
4606           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4607
4608           if (memory_address_addr_space_p (mem_mode, addr, as))
4609             data->costs[AINC_PRE_INC]
4610               = address_cost (addr, mem_mode, as, speed);
4611         }
4612       if (USE_LOAD_POST_INCREMENT (mem_mode)
4613           || USE_STORE_POST_INCREMENT (mem_mode))
4614         {
4615           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4616
4617           if (memory_address_addr_space_p (mem_mode, addr, as))
4618             data->costs[AINC_POST_INC]
4619               = address_cost (addr, mem_mode, as, speed);
4620         }
4621       ainc_cost_data_list[idx] = data;
4622     }
4623
4624   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4625   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4626     return comp_cost (data->costs[AINC_POST_INC], 0);
4627   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4628     return comp_cost (data->costs[AINC_POST_DEC], 0);
4629   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4630     return comp_cost (data->costs[AINC_PRE_INC], 0);
4631   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4632     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4633
4634   return infinite_cost;
4635 }
4636
4637 /* Return cost of computing USE's address expression by using CAND.
4638    AFF_INV and AFF_VAR represent invariant and variant parts of the
4639    address expression, respectively.  If AFF_INV is simple, store
4640    the loop invariant variables which are depended by it in INV_VARS;
4641    if AFF_INV is complicated, handle it as a new invariant expression
4642    and record it in INV_EXPR.  RATIO indicates multiple times between
4643    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4644    value to it indicating if this is an auto-increment address.  */
4645
4646 static comp_cost
4647 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4648                   struct iv_cand *cand, aff_tree *aff_inv,
4649                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4650                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4651                   bool *can_autoinc, bool speed)
4652 {
4653   rtx addr;
4654   bool simple_inv = true;
4655   tree comp_inv = NULL_TREE, type = aff_var->type;
4656   comp_cost var_cost = no_cost, cost = no_cost;
4657   struct mem_address parts = {NULL_TREE, integer_one_node,
4658                               NULL_TREE, NULL_TREE, NULL_TREE};
4659   machine_mode addr_mode = TYPE_MODE (type);
4660   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4661   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4662   /* Only true if ratio != 1.  */
4663   bool ok_with_ratio_p = false;
4664   bool ok_without_ratio_p = false;
4665
4666   if (!aff_combination_const_p (aff_inv))
4667     {
4668       parts.index = integer_one_node;
4669       /* Addressing mode "base + index".  */
4670       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4671       if (ratio != 1)
4672         {
4673           parts.step = wide_int_to_tree (type, ratio);
4674           /* Addressing mode "base + index << scale".  */
4675           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4676           if (!ok_with_ratio_p)
4677             parts.step = NULL_TREE;
4678         }
4679       if (ok_with_ratio_p || ok_without_ratio_p)
4680         {
4681           if (maybe_ne (aff_inv->offset, 0))
4682             {
4683               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4684               /* Addressing mode "base + index [<< scale] + offset".  */
4685               if (!valid_mem_ref_p (mem_mode, as, &parts))
4686                 parts.offset = NULL_TREE;
4687               else
4688                 aff_inv->offset = 0;
4689             }
4690
4691           move_fixed_address_to_symbol (&parts, aff_inv);
4692           /* Base is fixed address and is moved to symbol part.  */
4693           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4694             parts.base = NULL_TREE;
4695
4696           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4697           if (parts.symbol != NULL_TREE
4698               && !valid_mem_ref_p (mem_mode, as, &parts))
4699             {
4700               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4701               parts.symbol = NULL_TREE;
4702               /* Reset SIMPLE_INV since symbol address needs to be computed
4703                  outside of address expression in this case.  */
4704               simple_inv = false;
4705               /* Symbol part is moved back to base part, it can't be NULL.  */
4706               parts.base = integer_one_node;
4707             }
4708         }
4709       else
4710         parts.index = NULL_TREE;
4711     }
4712   else
4713     {
4714       poly_int64 ainc_step;
4715       if (can_autoinc
4716           && ratio == 1
4717           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4718         {
4719           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4720
4721           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4722             ainc_offset += ainc_step;
4723           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4724                                         addr_mode, mem_mode, as, speed);
4725           if (!cost.infinite_cost_p ())
4726             {
4727               *can_autoinc = true;
4728               return cost;
4729             }
4730           cost = no_cost;
4731         }
4732       if (!aff_combination_zero_p (aff_inv))
4733         {
4734           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4735           /* Addressing mode "base + offset".  */
4736           if (!valid_mem_ref_p (mem_mode, as, &parts))
4737             parts.offset = NULL_TREE;
4738           else
4739             aff_inv->offset = 0;
4740         }
4741     }
4742
4743   if (simple_inv)
4744     simple_inv = (aff_inv == NULL
4745                   || aff_combination_const_p (aff_inv)
4746                   || aff_combination_singleton_var_p (aff_inv));
4747   if (!aff_combination_zero_p (aff_inv))
4748     comp_inv = aff_combination_to_tree (aff_inv);
4749   if (comp_inv != NULL_TREE)
4750     cost = force_var_cost (data, comp_inv, inv_vars);
4751   if (ratio != 1 && parts.step == NULL_TREE)
4752     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4753   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4754     var_cost += add_cost (speed, addr_mode);
4755
4756   if (comp_inv && inv_expr && !simple_inv)
4757     {
4758       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4759       /* Clear depends on.  */
4760       if (*inv_expr != NULL && inv_vars && *inv_vars)
4761         bitmap_clear (*inv_vars);
4762
4763       /* Cost of small invariant expression adjusted against loop niters
4764          is usually zero, which makes it difficult to be differentiated
4765          from candidate based on loop invariant variables.  Secondly, the
4766          generated invariant expression may not be hoisted out of loop by
4767          following pass.  We penalize the cost by rounding up in order to
4768          neutralize such effects.  */
4769       cost.cost = adjust_setup_cost (data, cost.cost, true);
4770       cost.scratch = cost.cost;
4771     }
4772
4773   cost += var_cost;
4774   addr = addr_for_mem_ref (&parts, as, false);
4775   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4776   cost += address_cost (addr, mem_mode, as, speed);
4777
4778   if (parts.symbol != NULL_TREE)
4779     cost.complexity += 1;
4780   /* Don't increase the complexity of adding a scaled index if it's
4781      the only kind of index that the target allows.  */
4782   if (parts.step != NULL_TREE && ok_without_ratio_p)
4783     cost.complexity += 1;
4784   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4785     cost.complexity += 1;
4786   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4787     cost.complexity += 1;
4788
4789   return cost;
4790 }
4791
4792 /* Scale (multiply) the computed COST (except scratch part that should be
4793    hoisted out a loop) by header->frequency / AT->frequency, which makes
4794    expected cost more accurate.  */
4795
4796 static comp_cost
4797 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4798 {
4799   if (data->speed
4800       && data->current_loop->header->count.to_frequency (cfun) > 0)
4801     {
4802       basic_block bb = gimple_bb (at);
4803       gcc_assert (cost.scratch <= cost.cost);
4804       int scale_factor = (int)(intptr_t) bb->aux;
4805       if (scale_factor == 1)
4806         return cost;
4807
4808       int64_t scaled_cost
4809         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4810
4811       if (dump_file && (dump_flags & TDF_DETAILS))
4812         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4813                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4814                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4815
4816       cost.cost = scaled_cost;
4817     }
4818
4819   return cost;
4820 }
4821
4822 /* Determines the cost of the computation by that USE is expressed
4823    from induction variable CAND.  If ADDRESS_P is true, we just need
4824    to create an address from it, otherwise we want to get it into
4825    register.  A set of invariants we depend on is stored in INV_VARS.
4826    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4827    addressing is likely.  If INV_EXPR is nonnull, record invariant
4828    expr entry in it.  */
4829
4830 static comp_cost
4831 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4832                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4833                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4834 {
4835   gimple *at = use->stmt;
4836   tree ubase = use->iv->base, cbase = cand->iv->base;
4837   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4838   tree comp_inv = NULL_TREE;
4839   HOST_WIDE_INT ratio, aratio;
4840   comp_cost cost;
4841   widest_int rat;
4842   aff_tree aff_inv, aff_var;
4843   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4844
4845   if (inv_vars)
4846     *inv_vars = NULL;
4847   if (can_autoinc)
4848     *can_autoinc = false;
4849   if (inv_expr)
4850     *inv_expr = NULL;
4851
4852   /* Check if we have enough precision to express the values of use.  */
4853   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4854     return infinite_cost;
4855
4856   if (address_p
4857       || (use->iv->base_object
4858           && cand->iv->base_object
4859           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4860           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4861     {
4862       /* Do not try to express address of an object with computation based
4863          on address of a different object.  This may cause problems in rtl
4864          level alias analysis (that does not expect this to be happening,
4865          as this is illegal in C), and would be unlikely to be useful
4866          anyway.  */
4867       if (use->iv->base_object
4868           && cand->iv->base_object
4869           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4870         return infinite_cost;
4871     }
4872
4873   if (!get_computation_aff_1 (data->current_loop, at, use,
4874                               cand, &aff_inv, &aff_var, &rat)
4875       || !wi::fits_shwi_p (rat))
4876     return infinite_cost;
4877
4878   ratio = rat.to_shwi ();
4879   if (address_p)
4880     {
4881       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4882                                inv_vars, inv_expr, can_autoinc, speed);
4883       cost = get_scaled_computation_cost_at (data, at, cost);
4884       /* For doloop IV cand, add on the extra cost.  */
4885       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4886       return cost;
4887     }
4888
4889   bool simple_inv = (aff_combination_const_p (&aff_inv)
4890                      || aff_combination_singleton_var_p (&aff_inv));
4891   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4892   aff_combination_convert (&aff_inv, signed_type);
4893   if (!aff_combination_zero_p (&aff_inv))
4894     comp_inv = aff_combination_to_tree (&aff_inv);
4895
4896   cost = force_var_cost (data, comp_inv, inv_vars);
4897   if (comp_inv && inv_expr && !simple_inv)
4898     {
4899       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4900       /* Clear depends on.  */
4901       if (*inv_expr != NULL && inv_vars && *inv_vars)
4902         bitmap_clear (*inv_vars);
4903
4904       cost.cost = adjust_setup_cost (data, cost.cost);
4905       /* Record setup cost in scratch field.  */
4906       cost.scratch = cost.cost;
4907     }
4908   /* Cost of constant integer can be covered when adding invariant part to
4909      variant part.  */
4910   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4911     cost = no_cost;
4912
4913   /* Need type narrowing to represent use with cand.  */
4914   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4915     {
4916       machine_mode outer_mode = TYPE_MODE (utype);
4917       machine_mode inner_mode = TYPE_MODE (ctype);
4918       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4919     }
4920
4921   /* Turn a + i * (-c) into a - i * c.  */
4922   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4923     aratio = -ratio;
4924   else
4925     aratio = ratio;
4926
4927   if (ratio != 1)
4928     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4929
4930   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4931      instruction.  */
4932   /* Need to add up the invariant and variant parts.  */
4933   if (comp_inv && !integer_zerop (comp_inv))
4934     cost += add_cost (speed, TYPE_MODE (utype));
4935
4936   cost = get_scaled_computation_cost_at (data, at, cost);
4937
4938   /* For doloop IV cand, add on the extra cost.  */
4939   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4940     cost += targetm.doloop_cost_for_generic;
4941
4942   return cost;
4943 }
4944
4945 /* Determines cost of computing the use in GROUP with CAND in a generic
4946    expression.  */
4947
4948 static bool
4949 determine_group_iv_cost_generic (struct ivopts_data *data,
4950                                  struct iv_group *group, struct iv_cand *cand)
4951 {
4952   comp_cost cost;
4953   iv_inv_expr_ent *inv_expr = NULL;
4954   bitmap inv_vars = NULL, inv_exprs = NULL;
4955   struct iv_use *use = group->vuses[0];
4956
4957   /* The simple case first -- if we need to express value of the preserved
4958      original biv, the cost is 0.  This also prevents us from counting the
4959      cost of increment twice -- once at this use and once in the cost of
4960      the candidate.  */
4961   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4962     cost = no_cost;
4963   else
4964     cost = get_computation_cost (data, use, cand, false,
4965                                  &inv_vars, NULL, &inv_expr);
4966
4967   if (inv_expr)
4968     {
4969       inv_exprs = BITMAP_ALLOC (NULL);
4970       bitmap_set_bit (inv_exprs, inv_expr->id);
4971     }
4972   set_group_iv_cost (data, group, cand, cost, inv_vars,
4973                      NULL_TREE, ERROR_MARK, inv_exprs);
4974   return !cost.infinite_cost_p ();
4975 }
4976
4977 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4978
4979 static bool
4980 determine_group_iv_cost_address (struct ivopts_data *data,
4981                                  struct iv_group *group, struct iv_cand *cand)
4982 {
4983   unsigned i;
4984   bitmap inv_vars = NULL, inv_exprs = NULL;
4985   bool can_autoinc;
4986   iv_inv_expr_ent *inv_expr = NULL;
4987   struct iv_use *use = group->vuses[0];
4988   comp_cost sum_cost = no_cost, cost;
4989
4990   cost = get_computation_cost (data, use, cand, true,
4991                                &inv_vars, &can_autoinc, &inv_expr);
4992
4993   if (inv_expr)
4994     {
4995       inv_exprs = BITMAP_ALLOC (NULL);
4996       bitmap_set_bit (inv_exprs, inv_expr->id);
4997     }
4998   sum_cost = cost;
4999   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5000     {
5001       if (can_autoinc)
5002         sum_cost -= cand->cost_step;
5003       /* If we generated the candidate solely for exploiting autoincrement
5004          opportunities, and it turns out it can't be used, set the cost to
5005          infinity to make sure we ignore it.  */
5006       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5007         sum_cost = infinite_cost;
5008     }
5009
5010   /* Uses in a group can share setup code, so only add setup cost once.  */
5011   cost -= cost.scratch;
5012   /* Compute and add costs for rest uses of this group.  */
5013   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5014     {
5015       struct iv_use *next = group->vuses[i];
5016
5017       /* TODO: We could skip computing cost for sub iv_use when it has the
5018          same cost as the first iv_use, but the cost really depends on the
5019          offset and where the iv_use is.  */
5020         cost = get_computation_cost (data, next, cand, true,
5021                                      NULL, &can_autoinc, &inv_expr);
5022         if (inv_expr)
5023           {
5024             if (!inv_exprs)
5025               inv_exprs = BITMAP_ALLOC (NULL);
5026
5027             bitmap_set_bit (inv_exprs, inv_expr->id);
5028           }
5029       sum_cost += cost;
5030     }
5031   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5032                      NULL_TREE, ERROR_MARK, inv_exprs);
5033
5034   return !sum_cost.infinite_cost_p ();
5035 }
5036
5037 /* Computes value of candidate CAND at position AT in iteration NITER, and
5038    stores it to VAL.  */
5039
5040 static void
5041 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5042                aff_tree *val)
5043 {
5044   aff_tree step, delta, nit;
5045   struct iv *iv = cand->iv;
5046   tree type = TREE_TYPE (iv->base);
5047   tree steptype;
5048   if (POINTER_TYPE_P (type))
5049     steptype = sizetype;
5050   else
5051     steptype = unsigned_type_for (type);
5052
5053   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5054   aff_combination_convert (&step, steptype);
5055   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5056   aff_combination_convert (&nit, steptype);
5057   aff_combination_mult (&nit, &step, &delta);
5058   if (stmt_after_increment (loop, cand, at))
5059     aff_combination_add (&delta, &step);
5060
5061   tree_to_aff_combination (iv->base, type, val);
5062   if (!POINTER_TYPE_P (type))
5063     aff_combination_convert (val, steptype);
5064   aff_combination_add (val, &delta);
5065 }
5066
5067 /* Returns period of induction variable iv.  */
5068
5069 static tree
5070 iv_period (struct iv *iv)
5071 {
5072   tree step = iv->step, period, type;
5073   tree pow2div;
5074
5075   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5076
5077   type = unsigned_type_for (TREE_TYPE (step));
5078   /* Period of the iv is lcm (step, type_range)/step -1,
5079      i.e., N*type_range/step - 1. Since type range is power
5080      of two, N == (step >> num_of_ending_zeros_binary (step),
5081      so the final result is
5082
5083        (type_range >> num_of_ending_zeros_binary (step)) - 1
5084
5085   */
5086   pow2div = num_ending_zeros (step);
5087
5088   period = build_low_bits_mask (type,
5089                                 (TYPE_PRECISION (type)
5090                                  - tree_to_uhwi (pow2div)));
5091
5092   return period;
5093 }
5094
5095 /* Returns the comparison operator used when eliminating the iv USE.  */
5096
5097 static enum tree_code
5098 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5099 {
5100   class loop *loop = data->current_loop;
5101   basic_block ex_bb;
5102   edge exit;
5103
5104   ex_bb = gimple_bb (use->stmt);
5105   exit = EDGE_SUCC (ex_bb, 0);
5106   if (flow_bb_inside_loop_p (loop, exit->dest))
5107     exit = EDGE_SUCC (ex_bb, 1);
5108
5109   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5110 }
5111
5112 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5113    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5114    calculation is performed in non-wrapping type.
5115
5116    TODO: More generally, we could test for the situation that
5117          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5118          This would require knowing the sign of OFFSET.  */
5119
5120 static bool
5121 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5122 {
5123   enum tree_code code;
5124   tree e1, e2;
5125   aff_tree aff_e1, aff_e2, aff_offset;
5126
5127   if (!nowrap_type_p (TREE_TYPE (base)))
5128     return false;
5129
5130   base = expand_simple_operations (base);
5131
5132   if (TREE_CODE (base) == SSA_NAME)
5133     {
5134       gimple *stmt = SSA_NAME_DEF_STMT (base);
5135
5136       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5137         return false;
5138
5139       code = gimple_assign_rhs_code (stmt);
5140       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5141         return false;
5142
5143       e1 = gimple_assign_rhs1 (stmt);
5144       e2 = gimple_assign_rhs2 (stmt);
5145     }
5146   else
5147     {
5148       code = TREE_CODE (base);
5149       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5150         return false;
5151       e1 = TREE_OPERAND (base, 0);
5152       e2 = TREE_OPERAND (base, 1);
5153     }
5154
5155   /* Use affine expansion as deeper inspection to prove the equality.  */
5156   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5157                                   &aff_e2, &data->name_expansion_cache);
5158   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5159                                   &aff_offset, &data->name_expansion_cache);
5160   aff_combination_scale (&aff_offset, -1);
5161   switch (code)
5162     {
5163     case PLUS_EXPR:
5164       aff_combination_add (&aff_e2, &aff_offset);
5165       if (aff_combination_zero_p (&aff_e2))
5166         return true;
5167
5168       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5169                                       &aff_e1, &data->name_expansion_cache);
5170       aff_combination_add (&aff_e1, &aff_offset);
5171       return aff_combination_zero_p (&aff_e1);
5172
5173     case POINTER_PLUS_EXPR:
5174       aff_combination_add (&aff_e2, &aff_offset);
5175       return aff_combination_zero_p (&aff_e2);
5176
5177     default:
5178       return false;
5179     }
5180 }
5181
5182 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5183    comparison with CAND.  NITER describes the number of iterations of
5184    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5185
5186    We aim to handle the following situation:
5187
5188    sometype *base, *p;
5189    int a, b, i;
5190
5191    i = a;
5192    p = p_0 = base + a;
5193
5194    do
5195      {
5196        bla (*p);
5197        p++;
5198        i++;
5199      }
5200    while (i < b);
5201
5202    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5203    We aim to optimize this to
5204
5205    p = p_0 = base + a;
5206    do
5207      {
5208        bla (*p);
5209        p++;
5210      }
5211    while (p < p_0 - a + b);
5212
5213    This preserves the correctness, since the pointer arithmetics does not
5214    overflow.  More precisely:
5215
5216    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5217       overflow in computing it or the values of p.
5218    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5219       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5220
5221 static bool
5222 iv_elimination_compare_lt (struct ivopts_data *data,
5223                            struct iv_cand *cand, enum tree_code *comp_p,
5224                            class tree_niter_desc *niter)
5225 {
5226   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5227   class aff_tree nit, tmpa, tmpb;
5228   enum tree_code comp;
5229   HOST_WIDE_INT step;
5230
5231   /* We need to know that the candidate induction variable does not overflow.
5232      While more complex analysis may be used to prove this, for now just
5233      check that the variable appears in the original program and that it
5234      is computed in a type that guarantees no overflows.  */
5235   cand_type = TREE_TYPE (cand->iv->base);
5236   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5237     return false;
5238
5239   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5240      the calculation of the BOUND could overflow, making the comparison
5241      invalid.  */
5242   if (!data->loop_single_exit_p)
5243     return false;
5244
5245   /* We need to be able to decide whether candidate is increasing or decreasing
5246      in order to choose the right comparison operator.  */
5247   if (!cst_and_fits_in_hwi (cand->iv->step))
5248     return false;
5249   step = int_cst_value (cand->iv->step);
5250
5251   /* Check that the number of iterations matches the expected pattern:
5252      a + 1 > b ? 0 : b - a - 1.  */
5253   mbz = niter->may_be_zero;
5254   if (TREE_CODE (mbz) == GT_EXPR)
5255     {
5256       /* Handle a + 1 > b.  */
5257       tree op0 = TREE_OPERAND (mbz, 0);
5258       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5259         {
5260           a = TREE_OPERAND (op0, 0);
5261           b = TREE_OPERAND (mbz, 1);
5262         }
5263       else
5264         return false;
5265     }
5266   else if (TREE_CODE (mbz) == LT_EXPR)
5267     {
5268       tree op1 = TREE_OPERAND (mbz, 1);
5269
5270       /* Handle b < a + 1.  */
5271       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5272         {
5273           a = TREE_OPERAND (op1, 0);
5274           b = TREE_OPERAND (mbz, 0);
5275         }
5276       else
5277         return false;
5278     }
5279   else
5280     return false;
5281
5282   /* Expected number of iterations is B - A - 1.  Check that it matches
5283      the actual number, i.e., that B - A - NITER = 1.  */
5284   tree_to_aff_combination (niter->niter, nit_type, &nit);
5285   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5286   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5287   aff_combination_scale (&nit, -1);
5288   aff_combination_scale (&tmpa, -1);
5289   aff_combination_add (&tmpb, &tmpa);
5290   aff_combination_add (&tmpb, &nit);
5291   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5292     return false;
5293
5294   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5295      overflow.  */
5296   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5297                         cand->iv->step,
5298                         fold_convert (TREE_TYPE (cand->iv->step), a));
5299   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5300     return false;
5301
5302   /* Determine the new comparison operator.  */
5303   comp = step < 0 ? GT_EXPR : LT_EXPR;
5304   if (*comp_p == NE_EXPR)
5305     *comp_p = comp;
5306   else if (*comp_p == EQ_EXPR)
5307     *comp_p = invert_tree_comparison (comp, false);
5308   else
5309     gcc_unreachable ();
5310
5311   return true;
5312 }
5313
5314 /* Check whether it is possible to express the condition in USE by comparison
5315    of candidate CAND.  If so, store the value compared with to BOUND, and the
5316    comparison operator to COMP.  */
5317
5318 static bool
5319 may_eliminate_iv (struct ivopts_data *data,
5320                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5321                   enum tree_code *comp)
5322 {
5323   basic_block ex_bb;
5324   edge exit;
5325   tree period;
5326   class loop *loop = data->current_loop;
5327   aff_tree bnd;
5328   class tree_niter_desc *desc = NULL;
5329
5330   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5331     return false;
5332
5333   /* For now works only for exits that dominate the loop latch.
5334      TODO: extend to other conditions inside loop body.  */
5335   ex_bb = gimple_bb (use->stmt);
5336   if (use->stmt != last_stmt (ex_bb)
5337       || gimple_code (use->stmt) != GIMPLE_COND
5338       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5339     return false;
5340
5341   exit = EDGE_SUCC (ex_bb, 0);
5342   if (flow_bb_inside_loop_p (loop, exit->dest))
5343     exit = EDGE_SUCC (ex_bb, 1);
5344   if (flow_bb_inside_loop_p (loop, exit->dest))
5345     return false;
5346
5347   desc = niter_for_exit (data, exit);
5348   if (!desc)
5349     return false;
5350
5351   /* Determine whether we can use the variable to test the exit condition.
5352      This is the case iff the period of the induction variable is greater
5353      than the number of iterations for which the exit condition is true.  */
5354   period = iv_period (cand->iv);
5355
5356   /* If the number of iterations is constant, compare against it directly.  */
5357   if (TREE_CODE (desc->niter) == INTEGER_CST)
5358     {
5359       /* See cand_value_at.  */
5360       if (stmt_after_increment (loop, cand, use->stmt))
5361         {
5362           if (!tree_int_cst_lt (desc->niter, period))
5363             return false;
5364         }
5365       else
5366         {
5367           if (tree_int_cst_lt (period, desc->niter))
5368             return false;
5369         }
5370     }
5371
5372   /* If not, and if this is the only possible exit of the loop, see whether
5373      we can get a conservative estimate on the number of iterations of the
5374      entire loop and compare against that instead.  */
5375   else
5376     {
5377       widest_int period_value, max_niter;
5378
5379       max_niter = desc->max;
5380       if (stmt_after_increment (loop, cand, use->stmt))
5381         max_niter += 1;
5382       period_value = wi::to_widest (period);
5383       if (wi::gtu_p (max_niter, period_value))
5384         {
5385           /* See if we can take advantage of inferred loop bound
5386              information.  */
5387           if (data->loop_single_exit_p)
5388             {
5389               if (!max_loop_iterations (loop, &max_niter))
5390                 return false;
5391               /* The loop bound is already adjusted by adding 1.  */
5392               if (wi::gtu_p (max_niter, period_value))
5393                 return false;
5394             }
5395           else
5396             return false;
5397         }
5398     }
5399
5400   /* For doloop IV cand, the bound would be zero.  It's safe whether
5401      may_be_zero set or not.  */
5402   if (cand->doloop_p)
5403     {
5404       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5405       *comp = iv_elimination_compare (data, use);
5406       return true;
5407     }
5408
5409   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5410
5411   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5412                          aff_combination_to_tree (&bnd));
5413   *comp = iv_elimination_compare (data, use);
5414
5415   /* It is unlikely that computing the number of iterations using division
5416      would be more profitable than keeping the original induction variable.  */
5417   if (expression_expensive_p (*bound))
5418     return false;
5419
5420   /* Sometimes, it is possible to handle the situation that the number of
5421      iterations may be zero unless additional assumptions by using <
5422      instead of != in the exit condition.
5423
5424      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5425            base the exit condition on it.  However, that is often too
5426            expensive.  */
5427   if (!integer_zerop (desc->may_be_zero))
5428     return iv_elimination_compare_lt (data, cand, comp, desc);
5429
5430   return true;
5431 }
5432
5433  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5434     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5435
5436 static int
5437 parm_decl_cost (struct ivopts_data *data, tree bound)
5438 {
5439   tree sbound = bound;
5440   STRIP_NOPS (sbound);
5441
5442   if (TREE_CODE (sbound) == SSA_NAME
5443       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5444       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5445       && data->body_includes_call)
5446     return COSTS_N_INSNS (1);
5447
5448   return 0;
5449 }
5450
5451 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5452
5453 static bool
5454 determine_group_iv_cost_cond (struct ivopts_data *data,
5455                               struct iv_group *group, struct iv_cand *cand)
5456 {
5457   tree bound = NULL_TREE;
5458   struct iv *cmp_iv;
5459   bitmap inv_exprs = NULL;
5460   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5461   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5462   enum comp_iv_rewrite rewrite_type;
5463   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5464   tree *control_var, *bound_cst;
5465   enum tree_code comp = ERROR_MARK;
5466   struct iv_use *use = group->vuses[0];
5467
5468   /* Extract condition operands.  */
5469   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5470                                         &bound_cst, NULL, &cmp_iv);
5471   gcc_assert (rewrite_type != COMP_IV_NA);
5472
5473   /* Try iv elimination.  */
5474   if (rewrite_type == COMP_IV_ELIM
5475       && may_eliminate_iv (data, use, cand, &bound, &comp))
5476     {
5477       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5478       if (elim_cost.cost == 0)
5479         elim_cost.cost = parm_decl_cost (data, bound);
5480       else if (TREE_CODE (bound) == INTEGER_CST)
5481         elim_cost.cost = 0;
5482       /* If we replace a loop condition 'i < n' with 'p < base + n',
5483          inv_vars_elim will have 'base' and 'n' set, which implies that both
5484          'base' and 'n' will be live during the loop.    More likely,
5485          'base + n' will be loop invariant, resulting in only one live value
5486          during the loop.  So in that case we clear inv_vars_elim and set
5487          inv_expr_elim instead.  */
5488       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5489         {
5490           inv_expr_elim = get_loop_invariant_expr (data, bound);
5491           bitmap_clear (inv_vars_elim);
5492         }
5493       /* The bound is a loop invariant, so it will be only computed
5494          once.  */
5495       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5496     }
5497
5498   /* When the condition is a comparison of the candidate IV against
5499      zero, prefer this IV.
5500
5501      TODO: The constant that we're subtracting from the cost should
5502      be target-dependent.  This information should be added to the
5503      target costs for each backend.  */
5504   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5505       && integer_zerop (*bound_cst)
5506       && (operand_equal_p (*control_var, cand->var_after, 0)
5507           || operand_equal_p (*control_var, cand->var_before, 0)))
5508     elim_cost -= 1;
5509
5510   express_cost = get_computation_cost (data, use, cand, false,
5511                                        &inv_vars_express, NULL,
5512                                        &inv_expr_express);
5513   if (cmp_iv != NULL)
5514     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5515
5516   /* Count the cost of the original bound as well.  */
5517   bound_cost = force_var_cost (data, *bound_cst, NULL);
5518   if (bound_cost.cost == 0)
5519     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5520   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5521     bound_cost.cost = 0;
5522   express_cost += bound_cost;
5523
5524   /* Choose the better approach, preferring the eliminated IV. */
5525   if (elim_cost <= express_cost)
5526     {
5527       cost = elim_cost;
5528       inv_vars = inv_vars_elim;
5529       inv_vars_elim = NULL;
5530       inv_expr = inv_expr_elim;
5531       /* For doloop candidate/use pair, adjust to zero cost.  */
5532       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5533         cost = no_cost;
5534     }
5535   else
5536     {
5537       cost = express_cost;
5538       inv_vars = inv_vars_express;
5539       inv_vars_express = NULL;
5540       bound = NULL_TREE;
5541       comp = ERROR_MARK;
5542       inv_expr = inv_expr_express;
5543     }
5544
5545   if (inv_expr)
5546     {
5547       inv_exprs = BITMAP_ALLOC (NULL);
5548       bitmap_set_bit (inv_exprs, inv_expr->id);
5549     }
5550   set_group_iv_cost (data, group, cand, cost,
5551                      inv_vars, bound, comp, inv_exprs);
5552
5553   if (inv_vars_elim)
5554     BITMAP_FREE (inv_vars_elim);
5555   if (inv_vars_express)
5556     BITMAP_FREE (inv_vars_express);
5557
5558   return !cost.infinite_cost_p ();
5559 }
5560
5561 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5562    if USE cannot be represented with CAND.  */
5563
5564 static bool
5565 determine_group_iv_cost (struct ivopts_data *data,
5566                          struct iv_group *group, struct iv_cand *cand)
5567 {
5568   switch (group->type)
5569     {
5570     case USE_NONLINEAR_EXPR:
5571       return determine_group_iv_cost_generic (data, group, cand);
5572
5573     case USE_REF_ADDRESS:
5574     case USE_PTR_ADDRESS:
5575       return determine_group_iv_cost_address (data, group, cand);
5576
5577     case USE_COMPARE:
5578       return determine_group_iv_cost_cond (data, group, cand);
5579
5580     default:
5581       gcc_unreachable ();
5582     }
5583 }
5584
5585 /* Return true if get_computation_cost indicates that autoincrement is
5586    a possibility for the pair of USE and CAND, false otherwise.  */
5587
5588 static bool
5589 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5590                            struct iv_cand *cand)
5591 {
5592   if (!address_p (use->type))
5593     return false;
5594
5595   bool can_autoinc = false;
5596   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5597   return can_autoinc;
5598 }
5599
5600 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5601    use that allows autoincrement, and set their AINC_USE if possible.  */
5602
5603 static void
5604 set_autoinc_for_original_candidates (struct ivopts_data *data)
5605 {
5606   unsigned i, j;
5607
5608   for (i = 0; i < data->vcands.length (); i++)
5609     {
5610       struct iv_cand *cand = data->vcands[i];
5611       struct iv_use *closest_before = NULL;
5612       struct iv_use *closest_after = NULL;
5613       if (cand->pos != IP_ORIGINAL)
5614         continue;
5615
5616       for (j = 0; j < data->vgroups.length (); j++)
5617         {
5618           struct iv_group *group = data->vgroups[j];
5619           struct iv_use *use = group->vuses[0];
5620           unsigned uid = gimple_uid (use->stmt);
5621
5622           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5623             continue;
5624
5625           if (uid < gimple_uid (cand->incremented_at)
5626               && (closest_before == NULL
5627                   || uid > gimple_uid (closest_before->stmt)))
5628             closest_before = use;
5629
5630           if (uid > gimple_uid (cand->incremented_at)
5631               && (closest_after == NULL
5632                   || uid < gimple_uid (closest_after->stmt)))
5633             closest_after = use;
5634         }
5635
5636       if (closest_before != NULL
5637           && autoinc_possible_for_pair (data, closest_before, cand))
5638         cand->ainc_use = closest_before;
5639       else if (closest_after != NULL
5640                && autoinc_possible_for_pair (data, closest_after, cand))
5641         cand->ainc_use = closest_after;
5642     }
5643 }
5644
5645 /* Relate compare use with all candidates.  */
5646
5647 static void
5648 relate_compare_use_with_all_cands (struct ivopts_data *data)
5649 {
5650   unsigned i, count = data->vcands.length ();
5651   for (i = 0; i < data->vgroups.length (); i++)
5652     {
5653       struct iv_group *group = data->vgroups[i];
5654
5655       if (group->type == USE_COMPARE)
5656         bitmap_set_range (group->related_cands, 0, count);
5657     }
5658 }
5659
5660 /* Add one doloop dedicated IV candidate:
5661      - Base is (may_be_zero ? 1 : (niter + 1)).
5662      - Step is -1.  */
5663
5664 static void
5665 add_iv_candidate_for_doloop (struct ivopts_data *data)
5666 {
5667   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5668   gcc_assert (niter_desc && niter_desc->assumptions);
5669
5670   tree niter = niter_desc->niter;
5671   tree ntype = TREE_TYPE (niter);
5672   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5673
5674   tree may_be_zero = niter_desc->may_be_zero;
5675   if (may_be_zero && integer_zerop (may_be_zero))
5676     may_be_zero = NULL_TREE;
5677   if (may_be_zero)
5678     {
5679       if (COMPARISON_CLASS_P (may_be_zero))
5680         {
5681           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5682                                build_int_cst (ntype, 0),
5683                                rewrite_to_non_trapping_overflow (niter));
5684         }
5685       /* Don't try to obtain the iteration count expression when may_be_zero is
5686          integer_nonzerop (actually iteration count is one) or else.  */
5687       else
5688         return;
5689     }
5690
5691   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5692                            build_int_cst (ntype, 1));
5693   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5694 }
5695
5696 /* Finds the candidates for the induction variables.  */
5697
5698 static void
5699 find_iv_candidates (struct ivopts_data *data)
5700 {
5701   /* Add commonly used ivs.  */
5702   add_standard_iv_candidates (data);
5703
5704   /* Add doloop dedicated ivs.  */
5705   if (data->doloop_use_p)
5706     add_iv_candidate_for_doloop (data);
5707
5708   /* Add old induction variables.  */
5709   add_iv_candidate_for_bivs (data);
5710
5711   /* Add induction variables derived from uses.  */
5712   add_iv_candidate_for_groups (data);
5713
5714   set_autoinc_for_original_candidates (data);
5715
5716   /* Record the important candidates.  */
5717   record_important_candidates (data);
5718
5719   /* Relate compare iv_use with all candidates.  */
5720   if (!data->consider_all_candidates)
5721     relate_compare_use_with_all_cands (data);
5722
5723   if (dump_file && (dump_flags & TDF_DETAILS))
5724     {
5725       unsigned i;
5726
5727       fprintf (dump_file, "\n<Important Candidates>:\t");
5728       for (i = 0; i < data->vcands.length (); i++)
5729         if (data->vcands[i]->important)
5730           fprintf (dump_file, " %d,", data->vcands[i]->id);
5731       fprintf (dump_file, "\n");
5732
5733       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5734       for (i = 0; i < data->vgroups.length (); i++)
5735         {
5736           struct iv_group *group = data->vgroups[i];
5737
5738           if (group->related_cands)
5739             {
5740               fprintf (dump_file, "  Group %d:\t", group->id);
5741               dump_bitmap (dump_file, group->related_cands);
5742             }
5743         }
5744       fprintf (dump_file, "\n");
5745     }
5746 }
5747
5748 /* Determines costs of computing use of iv with an iv candidate.  */
5749
5750 static void
5751 determine_group_iv_costs (struct ivopts_data *data)
5752 {
5753   unsigned i, j;
5754   struct iv_cand *cand;
5755   struct iv_group *group;
5756   bitmap to_clear = BITMAP_ALLOC (NULL);
5757
5758   alloc_use_cost_map (data);
5759
5760   for (i = 0; i < data->vgroups.length (); i++)
5761     {
5762       group = data->vgroups[i];
5763
5764       if (data->consider_all_candidates)
5765         {
5766           for (j = 0; j < data->vcands.length (); j++)
5767             {
5768               cand = data->vcands[j];
5769               determine_group_iv_cost (data, group, cand);
5770             }
5771         }
5772       else
5773         {
5774           bitmap_iterator bi;
5775
5776           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5777             {
5778               cand = data->vcands[j];
5779               if (!determine_group_iv_cost (data, group, cand))
5780                 bitmap_set_bit (to_clear, j);
5781             }
5782
5783           /* Remove the candidates for that the cost is infinite from
5784              the list of related candidates.  */
5785           bitmap_and_compl_into (group->related_cands, to_clear);
5786           bitmap_clear (to_clear);
5787         }
5788     }
5789
5790   BITMAP_FREE (to_clear);
5791
5792   if (dump_file && (dump_flags & TDF_DETAILS))
5793     {
5794       bitmap_iterator bi;
5795
5796       /* Dump invariant variables.  */
5797       fprintf (dump_file, "\n<Invariant Vars>:\n");
5798       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5799         {
5800           struct version_info *info = ver_info (data, i);
5801           if (info->inv_id)
5802             {
5803               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5804               print_generic_expr (dump_file, info->name, TDF_SLIM);
5805               fprintf (dump_file, "%s\n",
5806                        info->has_nonlin_use ? "" : "\t(eliminable)");
5807             }
5808         }
5809
5810       /* Dump invariant expressions.  */
5811       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5812       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5813
5814       for (hash_table<iv_inv_expr_hasher>::iterator it
5815            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5816            ++it)
5817         list.safe_push (*it);
5818
5819       list.qsort (sort_iv_inv_expr_ent);
5820
5821       for (i = 0; i < list.length (); ++i)
5822         {
5823           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5824           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5825           fprintf (dump_file, "\n");
5826         }
5827
5828       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5829
5830       for (i = 0; i < data->vgroups.length (); i++)
5831         {
5832           group = data->vgroups[i];
5833
5834           fprintf (dump_file, "Group %d:\n", i);
5835           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5836           for (j = 0; j < group->n_map_members; j++)
5837             {
5838               if (!group->cost_map[j].cand
5839                   || group->cost_map[j].cost.infinite_cost_p ())
5840                 continue;
5841
5842               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5843                        group->cost_map[j].cand->id,
5844                        group->cost_map[j].cost.cost,
5845                        group->cost_map[j].cost.complexity);
5846               if (!group->cost_map[j].inv_exprs
5847                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5848                 fprintf (dump_file, "NIL;\t");
5849               else
5850                 bitmap_print (dump_file,
5851                               group->cost_map[j].inv_exprs, "", ";\t");
5852               if (!group->cost_map[j].inv_vars
5853                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5854                 fprintf (dump_file, "NIL;\n");
5855               else
5856                 bitmap_print (dump_file,
5857                               group->cost_map[j].inv_vars, "", "\n");
5858             }
5859
5860           fprintf (dump_file, "\n");
5861         }
5862       fprintf (dump_file, "\n");
5863     }
5864 }
5865
5866 /* Determines cost of the candidate CAND.  */
5867
5868 static void
5869 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5870 {
5871   comp_cost cost_base;
5872   int64_t cost, cost_step;
5873   tree base;
5874
5875   gcc_assert (cand->iv != NULL);
5876
5877   /* There are two costs associated with the candidate -- its increment
5878      and its initialization.  The second is almost negligible for any loop
5879      that rolls enough, so we take it just very little into account.  */
5880
5881   base = cand->iv->base;
5882   cost_base = force_var_cost (data, base, NULL);
5883   /* It will be exceptional that the iv register happens to be initialized with
5884      the proper value at no cost.  In general, there will at least be a regcopy
5885      or a const set.  */
5886   if (cost_base.cost == 0)
5887     cost_base.cost = COSTS_N_INSNS (1);
5888   /* Doloop decrement should be considered as zero cost.  */
5889   if (cand->doloop_p)
5890     cost_step = 0;
5891   else
5892     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5893   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5894
5895   /* Prefer the original ivs unless we may gain something by replacing it.
5896      The reason is to make debugging simpler; so this is not relevant for
5897      artificial ivs created by other optimization passes.  */
5898   if ((cand->pos != IP_ORIGINAL
5899        || !SSA_NAME_VAR (cand->var_before)
5900        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5901       /* Prefer doloop as well.  */
5902       && !cand->doloop_p)
5903     cost++;
5904
5905   /* Prefer not to insert statements into latch unless there are some
5906      already (so that we do not create unnecessary jumps).  */
5907   if (cand->pos == IP_END
5908       && empty_block_p (ip_end_pos (data->current_loop)))
5909     cost++;
5910
5911   cand->cost = cost;
5912   cand->cost_step = cost_step;
5913 }
5914
5915 /* Determines costs of computation of the candidates.  */
5916
5917 static void
5918 determine_iv_costs (struct ivopts_data *data)
5919 {
5920   unsigned i;
5921
5922   if (dump_file && (dump_flags & TDF_DETAILS))
5923     {
5924       fprintf (dump_file, "<Candidate Costs>:\n");
5925       fprintf (dump_file, "  cand\tcost\n");
5926     }
5927
5928   for (i = 0; i < data->vcands.length (); i++)
5929     {
5930       struct iv_cand *cand = data->vcands[i];
5931
5932       determine_iv_cost (data, cand);
5933
5934       if (dump_file && (dump_flags & TDF_DETAILS))
5935         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5936     }
5937
5938   if (dump_file && (dump_flags & TDF_DETAILS))
5939     fprintf (dump_file, "\n");
5940 }
5941
5942 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5943    induction variables.  Note N_INVS includes both invariant variables and
5944    invariant expressions.  */
5945
5946 static unsigned
5947 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5948                               unsigned n_cands)
5949 {
5950   unsigned cost;
5951   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5952   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5953   bool speed = data->speed;
5954
5955   /* If there is a call in the loop body, the call-clobbered registers
5956      are not available for loop invariants.  */
5957   if (data->body_includes_call)
5958     available_regs = available_regs - target_clobbered_regs;
5959
5960   /* If we have enough registers.  */
5961   if (regs_needed + target_res_regs < available_regs)
5962     cost = n_new;
5963   /* If close to running out of registers, try to preserve them.  */
5964   else if (regs_needed <= available_regs)
5965     cost = target_reg_cost [speed] * regs_needed;
5966   /* If we run out of available registers but the number of candidates
5967      does not, we penalize extra registers using target_spill_cost.  */
5968   else if (n_cands <= available_regs)
5969     cost = target_reg_cost [speed] * available_regs
5970            + target_spill_cost [speed] * (regs_needed - available_regs);
5971   /* If the number of candidates runs out available registers, we penalize
5972      extra candidate registers using target_spill_cost * 2.  Because it is
5973      more expensive to spill induction variable than invariant.  */
5974   else
5975     cost = target_reg_cost [speed] * available_regs
5976            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5977            + target_spill_cost [speed] * (regs_needed - n_cands);
5978
5979   /* Finally, add the number of candidates, so that we prefer eliminating
5980      induction variables if possible.  */
5981   return cost + n_cands;
5982 }
5983
5984 /* For each size of the induction variable set determine the penalty.  */
5985
5986 static void
5987 determine_set_costs (struct ivopts_data *data)
5988 {
5989   unsigned j, n;
5990   gphi *phi;
5991   gphi_iterator psi;
5992   tree op;
5993   class loop *loop = data->current_loop;
5994   bitmap_iterator bi;
5995
5996   if (dump_file && (dump_flags & TDF_DETAILS))
5997     {
5998       fprintf (dump_file, "<Global Costs>:\n");
5999       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6000       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6001       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6002       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6003     }
6004
6005   n = 0;
6006   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6007     {
6008       phi = psi.phi ();
6009       op = PHI_RESULT (phi);
6010
6011       if (virtual_operand_p (op))
6012         continue;
6013
6014       if (get_iv (data, op))
6015         continue;
6016
6017       if (!POINTER_TYPE_P (TREE_TYPE (op))
6018           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6019         continue;
6020
6021       n++;
6022     }
6023
6024   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6025     {
6026       struct version_info *info = ver_info (data, j);
6027
6028       if (info->inv_id && info->has_nonlin_use)
6029         n++;
6030     }
6031
6032   data->regs_used = n;
6033   if (dump_file && (dump_flags & TDF_DETAILS))
6034     fprintf (dump_file, "  regs_used %d\n", n);
6035
6036   if (dump_file && (dump_flags & TDF_DETAILS))
6037     {
6038       fprintf (dump_file, "  cost for size:\n");
6039       fprintf (dump_file, "  ivs\tcost\n");
6040       for (j = 0; j <= 2 * target_avail_regs; j++)
6041         fprintf (dump_file, "  %d\t%d\n", j,
6042                  ivopts_estimate_reg_pressure (data, 0, j));
6043       fprintf (dump_file, "\n");
6044     }
6045 }
6046
6047 /* Returns true if A is a cheaper cost pair than B.  */
6048
6049 static bool
6050 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6051 {
6052   if (!a)
6053     return false;
6054
6055   if (!b)
6056     return true;
6057
6058   if (a->cost < b->cost)
6059     return true;
6060
6061   if (b->cost < a->cost)
6062     return false;
6063
6064   /* In case the costs are the same, prefer the cheaper candidate.  */
6065   if (a->cand->cost < b->cand->cost)
6066     return true;
6067
6068   return false;
6069 }
6070
6071 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6072    for more expensive, equal and cheaper respectively.  */
6073
6074 static int
6075 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6076 {
6077   if (cheaper_cost_pair (a, b))
6078     return -1;
6079   if (cheaper_cost_pair (b, a))
6080     return 1;
6081
6082   return 0;
6083 }
6084
6085 /* Returns candidate by that USE is expressed in IVS.  */
6086
6087 static class cost_pair *
6088 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6089 {
6090   return ivs->cand_for_group[group->id];
6091 }
6092
6093 /* Computes the cost field of IVS structure.  */
6094
6095 static void
6096 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6097 {
6098   comp_cost cost = ivs->cand_use_cost;
6099
6100   cost += ivs->cand_cost;
6101   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6102   ivs->cost = cost;
6103 }
6104
6105 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6106    and IVS.  */
6107
6108 static void
6109 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6110 {
6111   bitmap_iterator bi;
6112   unsigned iid;
6113
6114   if (!invs)
6115     return;
6116
6117   gcc_assert (n_inv_uses != NULL);
6118   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6119     {
6120       n_inv_uses[iid]--;
6121       if (n_inv_uses[iid] == 0)
6122         ivs->n_invs--;
6123     }
6124 }
6125
6126 /* Set USE not to be expressed by any candidate in IVS.  */
6127
6128 static void
6129 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6130                  struct iv_group *group)
6131 {
6132   unsigned gid = group->id, cid;
6133   class cost_pair *cp;
6134
6135   cp = ivs->cand_for_group[gid];
6136   if (!cp)
6137     return;
6138   cid = cp->cand->id;
6139
6140   ivs->bad_groups++;
6141   ivs->cand_for_group[gid] = NULL;
6142   ivs->n_cand_uses[cid]--;
6143
6144   if (ivs->n_cand_uses[cid] == 0)
6145     {
6146       bitmap_clear_bit (ivs->cands, cid);
6147       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6148         ivs->n_cands--;
6149       ivs->cand_cost -= cp->cand->cost;
6150       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6151       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6152     }
6153
6154   ivs->cand_use_cost -= cp->cost;
6155   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6156   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6157   iv_ca_recount_cost (data, ivs);
6158 }
6159
6160 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6161    IVS.  */
6162
6163 static void
6164 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6165 {
6166   bitmap_iterator bi;
6167   unsigned iid;
6168
6169   if (!invs)
6170     return;
6171
6172   gcc_assert (n_inv_uses != NULL);
6173   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6174     {
6175       n_inv_uses[iid]++;
6176       if (n_inv_uses[iid] == 1)
6177         ivs->n_invs++;
6178     }
6179 }
6180
6181 /* Set cost pair for GROUP in set IVS to CP.  */
6182
6183 static void
6184 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6185               struct iv_group *group, class cost_pair *cp)
6186 {
6187   unsigned gid = group->id, cid;
6188
6189   if (ivs->cand_for_group[gid] == cp)
6190     return;
6191
6192   if (ivs->cand_for_group[gid])
6193     iv_ca_set_no_cp (data, ivs, group);
6194
6195   if (cp)
6196     {
6197       cid = cp->cand->id;
6198
6199       ivs->bad_groups--;
6200       ivs->cand_for_group[gid] = cp;
6201       ivs->n_cand_uses[cid]++;
6202       if (ivs->n_cand_uses[cid] == 1)
6203         {
6204           bitmap_set_bit (ivs->cands, cid);
6205           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6206             ivs->n_cands++;
6207           ivs->cand_cost += cp->cand->cost;
6208           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6209           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6210         }
6211
6212       ivs->cand_use_cost += cp->cost;
6213       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6214       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6215       iv_ca_recount_cost (data, ivs);
6216     }
6217 }
6218
6219 /* Extend set IVS by expressing USE by some of the candidates in it
6220    if possible.  Consider all important candidates if candidates in
6221    set IVS don't give any result.  */
6222
6223 static void
6224 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6225                struct iv_group *group)
6226 {
6227   class cost_pair *best_cp = NULL, *cp;
6228   bitmap_iterator bi;
6229   unsigned i;
6230   struct iv_cand *cand;
6231
6232   gcc_assert (ivs->upto >= group->id);
6233   ivs->upto++;
6234   ivs->bad_groups++;
6235
6236   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6237     {
6238       cand = data->vcands[i];
6239       cp = get_group_iv_cost (data, group, cand);
6240       if (cheaper_cost_pair (cp, best_cp))
6241         best_cp = cp;
6242     }
6243
6244   if (best_cp == NULL)
6245     {
6246       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6247         {
6248           cand = data->vcands[i];
6249           cp = get_group_iv_cost (data, group, cand);
6250           if (cheaper_cost_pair (cp, best_cp))
6251             best_cp = cp;
6252         }
6253     }
6254
6255   iv_ca_set_cp (data, ivs, group, best_cp);
6256 }
6257
6258 /* Get cost for assignment IVS.  */
6259
6260 static comp_cost
6261 iv_ca_cost (class iv_ca *ivs)
6262 {
6263   /* This was a conditional expression but it triggered a bug in
6264      Sun C 5.5.  */
6265   if (ivs->bad_groups)
6266     return infinite_cost;
6267   else
6268     return ivs->cost;
6269 }
6270
6271 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6272    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6273    respectively.  */
6274
6275 static int
6276 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6277                     struct iv_group *group, class cost_pair *old_cp,
6278                     class cost_pair *new_cp)
6279 {
6280   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6281   unsigned old_n_invs = ivs->n_invs;
6282   iv_ca_set_cp (data, ivs, group, new_cp);
6283   unsigned new_n_invs = ivs->n_invs;
6284   iv_ca_set_cp (data, ivs, group, old_cp);
6285
6286   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6287 }
6288
6289 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6290    it before NEXT.  */
6291
6292 static struct iv_ca_delta *
6293 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6294                  class cost_pair *new_cp, struct iv_ca_delta *next)
6295 {
6296   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6297
6298   change->group = group;
6299   change->old_cp = old_cp;
6300   change->new_cp = new_cp;
6301   change->next = next;
6302
6303   return change;
6304 }
6305
6306 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6307    are rewritten.  */
6308
6309 static struct iv_ca_delta *
6310 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6311 {
6312   struct iv_ca_delta *last;
6313
6314   if (!l2)
6315     return l1;
6316
6317   if (!l1)
6318     return l2;
6319
6320   for (last = l1; last->next; last = last->next)
6321     continue;
6322   last->next = l2;
6323
6324   return l1;
6325 }
6326
6327 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6328
6329 static struct iv_ca_delta *
6330 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6331 {
6332   struct iv_ca_delta *act, *next, *prev = NULL;
6333
6334   for (act = delta; act; act = next)
6335     {
6336       next = act->next;
6337       act->next = prev;
6338       prev = act;
6339
6340       std::swap (act->old_cp, act->new_cp);
6341     }
6342
6343   return prev;
6344 }
6345
6346 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6347    reverted instead.  */
6348
6349 static void
6350 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6351                     struct iv_ca_delta *delta, bool forward)
6352 {
6353   class cost_pair *from, *to;
6354   struct iv_ca_delta *act;
6355
6356   if (!forward)
6357     delta = iv_ca_delta_reverse (delta);
6358
6359   for (act = delta; act; act = act->next)
6360     {
6361       from = act->old_cp;
6362       to = act->new_cp;
6363       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6364       iv_ca_set_cp (data, ivs, act->group, to);
6365     }
6366
6367   if (!forward)
6368     iv_ca_delta_reverse (delta);
6369 }
6370
6371 /* Returns true if CAND is used in IVS.  */
6372
6373 static bool
6374 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6375 {
6376   return ivs->n_cand_uses[cand->id] > 0;
6377 }
6378
6379 /* Returns number of induction variable candidates in the set IVS.  */
6380
6381 static unsigned
6382 iv_ca_n_cands (class iv_ca *ivs)
6383 {
6384   return ivs->n_cands;
6385 }
6386
6387 /* Free the list of changes DELTA.  */
6388
6389 static void
6390 iv_ca_delta_free (struct iv_ca_delta **delta)
6391 {
6392   struct iv_ca_delta *act, *next;
6393
6394   for (act = *delta; act; act = next)
6395     {
6396       next = act->next;
6397       free (act);
6398     }
6399
6400   *delta = NULL;
6401 }
6402
6403 /* Allocates new iv candidates assignment.  */
6404
6405 static class iv_ca *
6406 iv_ca_new (struct ivopts_data *data)
6407 {
6408   class iv_ca *nw = XNEW (class iv_ca);
6409
6410   nw->upto = 0;
6411   nw->bad_groups = 0;
6412   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6413                                  data->vgroups.length ());
6414   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6415   nw->cands = BITMAP_ALLOC (NULL);
6416   nw->n_cands = 0;
6417   nw->n_invs = 0;
6418   nw->cand_use_cost = no_cost;
6419   nw->cand_cost = 0;
6420   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6421   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6422   nw->cost = no_cost;
6423
6424   return nw;
6425 }
6426
6427 /* Free memory occupied by the set IVS.  */
6428
6429 static void
6430 iv_ca_free (class iv_ca **ivs)
6431 {
6432   free ((*ivs)->cand_for_group);
6433   free ((*ivs)->n_cand_uses);
6434   BITMAP_FREE ((*ivs)->cands);
6435   free ((*ivs)->n_inv_var_uses);
6436   free ((*ivs)->n_inv_expr_uses);
6437   free (*ivs);
6438   *ivs = NULL;
6439 }
6440
6441 /* Dumps IVS to FILE.  */
6442
6443 static void
6444 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6445 {
6446   unsigned i;
6447   comp_cost cost = iv_ca_cost (ivs);
6448
6449   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6450            cost.complexity);
6451   fprintf (file, "  reg_cost: %d\n",
6452            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6453   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6454            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6455            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6456   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6457
6458   for (i = 0; i < ivs->upto; i++)
6459     {
6460       struct iv_group *group = data->vgroups[i];
6461       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6462       if (cp)
6463         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6464                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6465                  cp->cost.cost, cp->cost.complexity);
6466       else
6467         fprintf (file, "   group:%d --> ??\n", group->id);
6468     }
6469
6470   const char *pref = "";
6471   fprintf (file, "  invariant variables: ");
6472   for (i = 1; i <= data->max_inv_var_id; i++)
6473     if (ivs->n_inv_var_uses[i])
6474       {
6475         fprintf (file, "%s%d", pref, i);
6476         pref = ", ";
6477       }
6478
6479   pref = "";
6480   fprintf (file, "\n  invariant expressions: ");
6481   for (i = 1; i <= data->max_inv_expr_id; i++)
6482     if (ivs->n_inv_expr_uses[i])
6483       {
6484         fprintf (file, "%s%d", pref, i);
6485         pref = ", ";
6486       }
6487
6488   fprintf (file, "\n\n");
6489 }
6490
6491 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6492    new set, and store differences in DELTA.  Number of induction variables
6493    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6494    the function will try to find a solution with mimimal iv candidates.  */
6495
6496 static comp_cost
6497 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6498               struct iv_cand *cand, struct iv_ca_delta **delta,
6499               unsigned *n_ivs, bool min_ncand)
6500 {
6501   unsigned i;
6502   comp_cost cost;
6503   struct iv_group *group;
6504   class cost_pair *old_cp, *new_cp;
6505
6506   *delta = NULL;
6507   for (i = 0; i < ivs->upto; i++)
6508     {
6509       group = data->vgroups[i];
6510       old_cp = iv_ca_cand_for_group (ivs, group);
6511
6512       if (old_cp
6513           && old_cp->cand == cand)
6514         continue;
6515
6516       new_cp = get_group_iv_cost (data, group, cand);
6517       if (!new_cp)
6518         continue;
6519
6520       if (!min_ncand)
6521         {
6522           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6523           /* Skip if new_cp depends on more invariants.  */
6524           if (cmp_invs > 0)
6525             continue;
6526
6527           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6528           /* Skip if new_cp is not cheaper.  */
6529           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6530             continue;
6531         }
6532
6533       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6534     }
6535
6536   iv_ca_delta_commit (data, ivs, *delta, true);
6537   cost = iv_ca_cost (ivs);
6538   if (n_ivs)
6539     *n_ivs = iv_ca_n_cands (ivs);
6540   iv_ca_delta_commit (data, ivs, *delta, false);
6541
6542   return cost;
6543 }
6544
6545 /* Try narrowing set IVS by removing CAND.  Return the cost of
6546    the new set and store the differences in DELTA.  START is
6547    the candidate with which we start narrowing.  */
6548
6549 static comp_cost
6550 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6551               struct iv_cand *cand, struct iv_cand *start,
6552               struct iv_ca_delta **delta)
6553 {
6554   unsigned i, ci;
6555   struct iv_group *group;
6556   class cost_pair *old_cp, *new_cp, *cp;
6557   bitmap_iterator bi;
6558   struct iv_cand *cnd;
6559   comp_cost cost, best_cost, acost;
6560
6561   *delta = NULL;
6562   for (i = 0; i < data->vgroups.length (); i++)
6563     {
6564       group = data->vgroups[i];
6565
6566       old_cp = iv_ca_cand_for_group (ivs, group);
6567       if (old_cp->cand != cand)
6568         continue;
6569
6570       best_cost = iv_ca_cost (ivs);
6571       /* Start narrowing with START.  */
6572       new_cp = get_group_iv_cost (data, group, start);
6573
6574       if (data->consider_all_candidates)
6575         {
6576           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6577             {
6578               if (ci == cand->id || (start && ci == start->id))
6579                 continue;
6580
6581               cnd = data->vcands[ci];
6582
6583               cp = get_group_iv_cost (data, group, cnd);
6584               if (!cp)
6585                 continue;
6586
6587               iv_ca_set_cp (data, ivs, group, cp);
6588               acost = iv_ca_cost (ivs);
6589
6590               if (acost < best_cost)
6591                 {
6592                   best_cost = acost;
6593                   new_cp = cp;
6594                 }
6595             }
6596         }
6597       else
6598         {
6599           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6600             {
6601               if (ci == cand->id || (start && ci == start->id))
6602                 continue;
6603
6604               cnd = data->vcands[ci];
6605
6606               cp = get_group_iv_cost (data, group, cnd);
6607               if (!cp)
6608                 continue;
6609
6610               iv_ca_set_cp (data, ivs, group, cp);
6611               acost = iv_ca_cost (ivs);
6612
6613               if (acost < best_cost)
6614                 {
6615                   best_cost = acost;
6616                   new_cp = cp;
6617                 }
6618             }
6619         }
6620       /* Restore to old cp for use.  */
6621       iv_ca_set_cp (data, ivs, group, old_cp);
6622
6623       if (!new_cp)
6624         {
6625           iv_ca_delta_free (delta);
6626           return infinite_cost;
6627         }
6628
6629       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6630     }
6631
6632   iv_ca_delta_commit (data, ivs, *delta, true);
6633   cost = iv_ca_cost (ivs);
6634   iv_ca_delta_commit (data, ivs, *delta, false);
6635
6636   return cost;
6637 }
6638
6639 /* Try optimizing the set of candidates IVS by removing candidates different
6640    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6641    differences in DELTA.  */
6642
6643 static comp_cost
6644 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6645              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6646 {
6647   bitmap_iterator bi;
6648   struct iv_ca_delta *act_delta, *best_delta;
6649   unsigned i;
6650   comp_cost best_cost, acost;
6651   struct iv_cand *cand;
6652
6653   best_delta = NULL;
6654   best_cost = iv_ca_cost (ivs);
6655
6656   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6657     {
6658       cand = data->vcands[i];
6659
6660       if (cand == except_cand)
6661         continue;
6662
6663       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6664
6665       if (acost < best_cost)
6666         {
6667           best_cost = acost;
6668           iv_ca_delta_free (&best_delta);
6669           best_delta = act_delta;
6670         }
6671       else
6672         iv_ca_delta_free (&act_delta);
6673     }
6674
6675   if (!best_delta)
6676     {
6677       *delta = NULL;
6678       return best_cost;
6679     }
6680
6681   /* Recurse to possibly remove other unnecessary ivs.  */
6682   iv_ca_delta_commit (data, ivs, best_delta, true);
6683   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6684   iv_ca_delta_commit (data, ivs, best_delta, false);
6685   *delta = iv_ca_delta_join (best_delta, *delta);
6686   return best_cost;
6687 }
6688
6689 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6690    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6691    the corresponding cost_pair, otherwise just return BEST_CP.  */
6692
6693 static class cost_pair*
6694 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6695                         unsigned int cand_idx, struct iv_cand *old_cand,
6696                         class cost_pair *best_cp)
6697 {
6698   struct iv_cand *cand;
6699   class cost_pair *cp;
6700
6701   gcc_assert (old_cand != NULL && best_cp != NULL);
6702   if (cand_idx == old_cand->id)
6703     return best_cp;
6704
6705   cand = data->vcands[cand_idx];
6706   cp = get_group_iv_cost (data, group, cand);
6707   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6708     return cp;
6709
6710   return best_cp;
6711 }
6712
6713 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6714    which are used by more than one iv uses.  For each of those candidates,
6715    this function tries to represent iv uses under that candidate using
6716    other ones with lower local cost, then tries to prune the new set.
6717    If the new set has lower cost, It returns the new cost after recording
6718    candidate replacement in list DELTA.  */
6719
6720 static comp_cost
6721 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6722                struct iv_ca_delta **delta)
6723 {
6724   bitmap_iterator bi, bj;
6725   unsigned int i, j, k;
6726   struct iv_cand *cand;
6727   comp_cost orig_cost, acost;
6728   struct iv_ca_delta *act_delta, *tmp_delta;
6729   class cost_pair *old_cp, *best_cp = NULL;
6730
6731   *delta = NULL;
6732   orig_cost = iv_ca_cost (ivs);
6733
6734   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6735     {
6736       if (ivs->n_cand_uses[i] == 1
6737           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6738         continue;
6739
6740       cand = data->vcands[i];
6741
6742       act_delta = NULL;
6743       /*  Represent uses under current candidate using other ones with
6744           lower local cost.  */
6745       for (j = 0; j < ivs->upto; j++)
6746         {
6747           struct iv_group *group = data->vgroups[j];
6748           old_cp = iv_ca_cand_for_group (ivs, group);
6749
6750           if (old_cp->cand != cand)
6751             continue;
6752
6753           best_cp = old_cp;
6754           if (data->consider_all_candidates)
6755             for (k = 0; k < data->vcands.length (); k++)
6756               best_cp = cheaper_cost_with_cand (data, group, k,
6757                                                 old_cp->cand, best_cp);
6758           else
6759             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6760               best_cp = cheaper_cost_with_cand (data, group, k,
6761                                                 old_cp->cand, best_cp);
6762
6763           if (best_cp == old_cp)
6764             continue;
6765
6766           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6767         }
6768       /* No need for further prune.  */
6769       if (!act_delta)
6770         continue;
6771
6772       /* Prune the new candidate set.  */
6773       iv_ca_delta_commit (data, ivs, act_delta, true);
6774       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6775       iv_ca_delta_commit (data, ivs, act_delta, false);
6776       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6777
6778       if (acost < orig_cost)
6779         {
6780           *delta = act_delta;
6781           return acost;
6782         }
6783       else
6784         iv_ca_delta_free (&act_delta);
6785     }
6786
6787   return orig_cost;
6788 }
6789
6790 /* Tries to extend the sets IVS in the best possible way in order to
6791    express the GROUP.  If ORIGINALP is true, prefer candidates from
6792    the original set of IVs, otherwise favor important candidates not
6793    based on any memory object.  */
6794
6795 static bool
6796 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6797                   struct iv_group *group, bool originalp)
6798 {
6799   comp_cost best_cost, act_cost;
6800   unsigned i;
6801   bitmap_iterator bi;
6802   struct iv_cand *cand;
6803   struct iv_ca_delta *best_delta = NULL, *act_delta;
6804   class cost_pair *cp;
6805
6806   iv_ca_add_group (data, ivs, group);
6807   best_cost = iv_ca_cost (ivs);
6808   cp = iv_ca_cand_for_group (ivs, group);
6809   if (cp)
6810     {
6811       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6812       iv_ca_set_no_cp (data, ivs, group);
6813     }
6814
6815   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6816      first try important candidates not based on any memory object.  Only if
6817      this fails, try the specific ones.  Rationale -- in loops with many
6818      variables the best choice often is to use just one generic biv.  If we
6819      added here many ivs specific to the uses, the optimization algorithm later
6820      would be likely to get stuck in a local minimum, thus causing us to create
6821      too many ivs.  The approach from few ivs to more seems more likely to be
6822      successful -- starting from few ivs, replacing an expensive use by a
6823      specific iv should always be a win.  */
6824   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6825     {
6826       cand = data->vcands[i];
6827
6828       if (originalp && cand->pos !=IP_ORIGINAL)
6829         continue;
6830
6831       if (!originalp && cand->iv->base_object != NULL_TREE)
6832         continue;
6833
6834       if (iv_ca_cand_used_p (ivs, cand))
6835         continue;
6836
6837       cp = get_group_iv_cost (data, group, cand);
6838       if (!cp)
6839         continue;
6840
6841       iv_ca_set_cp (data, ivs, group, cp);
6842       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6843                                true);
6844       iv_ca_set_no_cp (data, ivs, group);
6845       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6846
6847       if (act_cost < best_cost)
6848         {
6849           best_cost = act_cost;
6850
6851           iv_ca_delta_free (&best_delta);
6852           best_delta = act_delta;
6853         }
6854       else
6855         iv_ca_delta_free (&act_delta);
6856     }
6857
6858   if (best_cost.infinite_cost_p ())
6859     {
6860       for (i = 0; i < group->n_map_members; i++)
6861         {
6862           cp = group->cost_map + i;
6863           cand = cp->cand;
6864           if (!cand)
6865             continue;
6866
6867           /* Already tried this.  */
6868           if (cand->important)
6869             {
6870               if (originalp && cand->pos == IP_ORIGINAL)
6871                 continue;
6872               if (!originalp && cand->iv->base_object == NULL_TREE)
6873                 continue;
6874             }
6875
6876           if (iv_ca_cand_used_p (ivs, cand))
6877             continue;
6878
6879           act_delta = NULL;
6880           iv_ca_set_cp (data, ivs, group, cp);
6881           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6882           iv_ca_set_no_cp (data, ivs, group);
6883           act_delta = iv_ca_delta_add (group,
6884                                        iv_ca_cand_for_group (ivs, group),
6885                                        cp, act_delta);
6886
6887           if (act_cost < best_cost)
6888             {
6889               best_cost = act_cost;
6890
6891               if (best_delta)
6892                 iv_ca_delta_free (&best_delta);
6893               best_delta = act_delta;
6894             }
6895           else
6896             iv_ca_delta_free (&act_delta);
6897         }
6898     }
6899
6900   iv_ca_delta_commit (data, ivs, best_delta, true);
6901   iv_ca_delta_free (&best_delta);
6902
6903   return !best_cost.infinite_cost_p ();
6904 }
6905
6906 /* Finds an initial assignment of candidates to uses.  */
6907
6908 static class iv_ca *
6909 get_initial_solution (struct ivopts_data *data, bool originalp)
6910 {
6911   unsigned i;
6912   class iv_ca *ivs = iv_ca_new (data);
6913
6914   for (i = 0; i < data->vgroups.length (); i++)
6915     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6916       {
6917         iv_ca_free (&ivs);
6918         return NULL;
6919       }
6920
6921   return ivs;
6922 }
6923
6924 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6925    points to a bool variable, this function tries to break local
6926    optimal fixed-point by replacing candidates in IVS if it's true.  */
6927
6928 static bool
6929 try_improve_iv_set (struct ivopts_data *data,
6930                     class iv_ca *ivs, bool *try_replace_p)
6931 {
6932   unsigned i, n_ivs;
6933   comp_cost acost, best_cost = iv_ca_cost (ivs);
6934   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6935   struct iv_cand *cand;
6936
6937   /* Try extending the set of induction variables by one.  */
6938   for (i = 0; i < data->vcands.length (); i++)
6939     {
6940       cand = data->vcands[i];
6941
6942       if (iv_ca_cand_used_p (ivs, cand))
6943         continue;
6944
6945       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6946       if (!act_delta)
6947         continue;
6948
6949       /* If we successfully added the candidate and the set is small enough,
6950          try optimizing it by removing other candidates.  */
6951       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6952         {
6953           iv_ca_delta_commit (data, ivs, act_delta, true);
6954           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6955           iv_ca_delta_commit (data, ivs, act_delta, false);
6956           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6957         }
6958
6959       if (acost < best_cost)
6960         {
6961           best_cost = acost;
6962           iv_ca_delta_free (&best_delta);
6963           best_delta = act_delta;
6964         }
6965       else
6966         iv_ca_delta_free (&act_delta);
6967     }
6968
6969   if (!best_delta)
6970     {
6971       /* Try removing the candidates from the set instead.  */
6972       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6973
6974       if (!best_delta && *try_replace_p)
6975         {
6976           *try_replace_p = false;
6977           /* So far candidate selecting algorithm tends to choose fewer IVs
6978              so that it can handle cases in which loops have many variables
6979              but the best choice is often to use only one general biv.  One
6980              weakness is it can't handle opposite cases, in which different
6981              candidates should be chosen with respect to each use.  To solve
6982              the problem, we replace candidates in a manner described by the
6983              comments of iv_ca_replace, thus give general algorithm a chance
6984              to break local optimal fixed-point in these cases.  */
6985           best_cost = iv_ca_replace (data, ivs, &best_delta);
6986         }
6987
6988       if (!best_delta)
6989         return false;
6990     }
6991
6992   iv_ca_delta_commit (data, ivs, best_delta, true);
6993   iv_ca_delta_free (&best_delta);
6994   return best_cost == iv_ca_cost (ivs);
6995 }
6996
6997 /* Attempts to find the optimal set of induction variables.  We do simple
6998    greedy heuristic -- we try to replace at most one candidate in the selected
6999    solution and remove the unused ivs while this improves the cost.  */
7000
7001 static class iv_ca *
7002 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7003 {
7004   class iv_ca *set;
7005   bool try_replace_p = true;
7006
7007   /* Get the initial solution.  */
7008   set = get_initial_solution (data, originalp);
7009   if (!set)
7010     {
7011       if (dump_file && (dump_flags & TDF_DETAILS))
7012         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7013       return NULL;
7014     }
7015
7016   if (dump_file && (dump_flags & TDF_DETAILS))
7017     {
7018       fprintf (dump_file, "Initial set of candidates:\n");
7019       iv_ca_dump (data, dump_file, set);
7020     }
7021
7022   while (try_improve_iv_set (data, set, &try_replace_p))
7023     {
7024       if (dump_file && (dump_flags & TDF_DETAILS))
7025         {
7026           fprintf (dump_file, "Improved to:\n");
7027           iv_ca_dump (data, dump_file, set);
7028         }
7029     }
7030
7031   /* If the set has infinite_cost, it can't be optimal.  */
7032   if (iv_ca_cost (set).infinite_cost_p ())
7033     {
7034       if (dump_file && (dump_flags & TDF_DETAILS))
7035         fprintf (dump_file,
7036                  "Overflow to infinite cost in try_improve_iv_set.\n");
7037       iv_ca_free (&set);
7038     }
7039   return set;
7040 }
7041
7042 static class iv_ca *
7043 find_optimal_iv_set (struct ivopts_data *data)
7044 {
7045   unsigned i;
7046   comp_cost cost, origcost;
7047   class iv_ca *set, *origset;
7048
7049   /* Determine the cost based on a strategy that starts with original IVs,
7050      and try again using a strategy that prefers candidates not based
7051      on any IVs.  */
7052   origset = find_optimal_iv_set_1 (data, true);
7053   set = find_optimal_iv_set_1 (data, false);
7054
7055   if (!origset && !set)
7056     return NULL;
7057
7058   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7059   cost = set ? iv_ca_cost (set) : infinite_cost;
7060
7061   if (dump_file && (dump_flags & TDF_DETAILS))
7062     {
7063       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7064                origcost.cost, origcost.complexity);
7065       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7066                cost.cost, cost.complexity);
7067     }
7068
7069   /* Choose the one with the best cost.  */
7070   if (origcost <= cost)
7071     {
7072       if (set)
7073         iv_ca_free (&set);
7074       set = origset;
7075     }
7076   else if (origset)
7077     iv_ca_free (&origset);
7078
7079   for (i = 0; i < data->vgroups.length (); i++)
7080     {
7081       struct iv_group *group = data->vgroups[i];
7082       group->selected = iv_ca_cand_for_group (set, group)->cand;
7083     }
7084
7085   return set;
7086 }
7087
7088 /* Creates a new induction variable corresponding to CAND.  */
7089
7090 static void
7091 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7092 {
7093   gimple_stmt_iterator incr_pos;
7094   tree base;
7095   struct iv_use *use;
7096   struct iv_group *group;
7097   bool after = false;
7098
7099   gcc_assert (cand->iv != NULL);
7100
7101   switch (cand->pos)
7102     {
7103     case IP_NORMAL:
7104       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7105       break;
7106
7107     case IP_END:
7108       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7109       after = true;
7110       break;
7111
7112     case IP_AFTER_USE:
7113       after = true;
7114       /* fall through */
7115     case IP_BEFORE_USE:
7116       incr_pos = gsi_for_stmt (cand->incremented_at);
7117       break;
7118
7119     case IP_ORIGINAL:
7120       /* Mark that the iv is preserved.  */
7121       name_info (data, cand->var_before)->preserve_biv = true;
7122       name_info (data, cand->var_after)->preserve_biv = true;
7123
7124       /* Rewrite the increment so that it uses var_before directly.  */
7125       use = find_interesting_uses_op (data, cand->var_after);
7126       group = data->vgroups[use->group_id];
7127       group->selected = cand;
7128       return;
7129     }
7130
7131   gimple_add_tmp_var (cand->var_before);
7132
7133   base = unshare_expr (cand->iv->base);
7134
7135   create_iv (base, unshare_expr (cand->iv->step),
7136              cand->var_before, data->current_loop,
7137              &incr_pos, after, &cand->var_before, &cand->var_after);
7138 }
7139
7140 /* Creates new induction variables described in SET.  */
7141
7142 static void
7143 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7144 {
7145   unsigned i;
7146   struct iv_cand *cand;
7147   bitmap_iterator bi;
7148
7149   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7150     {
7151       cand = data->vcands[i];
7152       create_new_iv (data, cand);
7153     }
7154
7155   if (dump_file && (dump_flags & TDF_DETAILS))
7156     {
7157       fprintf (dump_file, "Selected IV set for loop %d",
7158                data->current_loop->num);
7159       if (data->loop_loc != UNKNOWN_LOCATION)
7160         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7161                  LOCATION_LINE (data->loop_loc));
7162       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7163                avg_loop_niter (data->current_loop));
7164       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7165       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7166         {
7167           cand = data->vcands[i];
7168           dump_cand (dump_file, cand);
7169         }
7170       fprintf (dump_file, "\n");
7171     }
7172 }
7173
7174 /* Rewrites USE (definition of iv used in a nonlinear expression)
7175    using candidate CAND.  */
7176
7177 static void
7178 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7179                             struct iv_use *use, struct iv_cand *cand)
7180 {
7181   gassign *ass;
7182   gimple_stmt_iterator bsi;
7183   tree comp, type = get_use_type (use), tgt;
7184
7185   /* An important special case -- if we are asked to express value of
7186      the original iv by itself, just exit; there is no need to
7187      introduce a new computation (that might also need casting the
7188      variable to unsigned and back).  */
7189   if (cand->pos == IP_ORIGINAL
7190       && cand->incremented_at == use->stmt)
7191     {
7192       tree op = NULL_TREE;
7193       enum tree_code stmt_code;
7194
7195       gcc_assert (is_gimple_assign (use->stmt));
7196       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7197
7198       /* Check whether we may leave the computation unchanged.
7199          This is the case only if it does not rely on other
7200          computations in the loop -- otherwise, the computation
7201          we rely upon may be removed in remove_unused_ivs,
7202          thus leading to ICE.  */
7203       stmt_code = gimple_assign_rhs_code (use->stmt);
7204       if (stmt_code == PLUS_EXPR
7205           || stmt_code == MINUS_EXPR
7206           || stmt_code == POINTER_PLUS_EXPR)
7207         {
7208           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7209             op = gimple_assign_rhs2 (use->stmt);
7210           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7211             op = gimple_assign_rhs1 (use->stmt);
7212         }
7213
7214       if (op != NULL_TREE)
7215         {
7216           if (expr_invariant_in_loop_p (data->current_loop, op))
7217             return;
7218           if (TREE_CODE (op) == SSA_NAME)
7219             {
7220               struct iv *iv = get_iv (data, op);
7221               if (iv != NULL && integer_zerop (iv->step))
7222                 return;
7223             }
7224         }
7225     }
7226
7227   switch (gimple_code (use->stmt))
7228     {
7229     case GIMPLE_PHI:
7230       tgt = PHI_RESULT (use->stmt);
7231
7232       /* If we should keep the biv, do not replace it.  */
7233       if (name_info (data, tgt)->preserve_biv)
7234         return;
7235
7236       bsi = gsi_after_labels (gimple_bb (use->stmt));
7237       break;
7238
7239     case GIMPLE_ASSIGN:
7240       tgt = gimple_assign_lhs (use->stmt);
7241       bsi = gsi_for_stmt (use->stmt);
7242       break;
7243
7244     default:
7245       gcc_unreachable ();
7246     }
7247
7248   aff_tree aff_inv, aff_var;
7249   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7250                               use, cand, &aff_inv, &aff_var))
7251     gcc_unreachable ();
7252
7253   unshare_aff_combination (&aff_inv);
7254   unshare_aff_combination (&aff_var);
7255   /* Prefer CSE opportunity than loop invariant by adding offset at last
7256      so that iv_uses have different offsets can be CSEed.  */
7257   poly_widest_int offset = aff_inv.offset;
7258   aff_inv.offset = 0;
7259
7260   gimple_seq stmt_list = NULL, seq = NULL;
7261   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7262   tree comp_op2 = aff_combination_to_tree (&aff_var);
7263   gcc_assert (comp_op1 && comp_op2);
7264
7265   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7266   gimple_seq_add_seq (&stmt_list, seq);
7267   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7268   gimple_seq_add_seq (&stmt_list, seq);
7269
7270   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7271     std::swap (comp_op1, comp_op2);
7272
7273   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7274     {
7275       comp = fold_build_pointer_plus (comp_op1,
7276                                       fold_convert (sizetype, comp_op2));
7277       comp = fold_build_pointer_plus (comp,
7278                                       wide_int_to_tree (sizetype, offset));
7279     }
7280   else
7281     {
7282       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7283                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7284       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7285                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7286     }
7287
7288   comp = fold_convert (type, comp);
7289   if (!valid_gimple_rhs_p (comp)
7290       || (gimple_code (use->stmt) != GIMPLE_PHI
7291           /* We can't allow re-allocating the stmt as it might be pointed
7292              to still.  */
7293           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7294               >= gimple_num_ops (gsi_stmt (bsi)))))
7295     {
7296       comp = force_gimple_operand (comp, &seq, true, NULL);
7297       gimple_seq_add_seq (&stmt_list, seq);
7298       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7299         {
7300           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7301           /* As this isn't a plain copy we have to reset alignment
7302              information.  */
7303           if (SSA_NAME_PTR_INFO (comp))
7304             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7305         }
7306     }
7307
7308   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7309   if (gimple_code (use->stmt) == GIMPLE_PHI)
7310     {
7311       ass = gimple_build_assign (tgt, comp);
7312       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7313
7314       bsi = gsi_for_stmt (use->stmt);
7315       remove_phi_node (&bsi, false);
7316     }
7317   else
7318     {
7319       gimple_assign_set_rhs_from_tree (&bsi, comp);
7320       use->stmt = gsi_stmt (bsi);
7321     }
7322 }
7323
7324 /* Performs a peephole optimization to reorder the iv update statement with
7325    a mem ref to enable instruction combining in later phases. The mem ref uses
7326    the iv value before the update, so the reordering transformation requires
7327    adjustment of the offset. CAND is the selected IV_CAND.
7328
7329    Example:
7330
7331    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7332    iv2 = iv1 + 1;
7333
7334    if (t < val)      (1)
7335      goto L;
7336    goto Head;
7337
7338
7339    directly propagating t over to (1) will introduce overlapping live range
7340    thus increase register pressure. This peephole transform it into:
7341
7342
7343    iv2 = iv1 + 1;
7344    t = MEM_REF (base, iv2, 8, 8);
7345    if (t < val)
7346      goto L;
7347    goto Head;
7348 */
7349
7350 static void
7351 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7352 {
7353   tree var_after;
7354   gimple *iv_update, *stmt;
7355   basic_block bb;
7356   gimple_stmt_iterator gsi, gsi_iv;
7357
7358   if (cand->pos != IP_NORMAL)
7359     return;
7360
7361   var_after = cand->var_after;
7362   iv_update = SSA_NAME_DEF_STMT (var_after);
7363
7364   bb = gimple_bb (iv_update);
7365   gsi = gsi_last_nondebug_bb (bb);
7366   stmt = gsi_stmt (gsi);
7367
7368   /* Only handle conditional statement for now.  */
7369   if (gimple_code (stmt) != GIMPLE_COND)
7370     return;
7371
7372   gsi_prev_nondebug (&gsi);
7373   stmt = gsi_stmt (gsi);
7374   if (stmt != iv_update)
7375     return;
7376
7377   gsi_prev_nondebug (&gsi);
7378   if (gsi_end_p (gsi))
7379     return;
7380
7381   stmt = gsi_stmt (gsi);
7382   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7383     return;
7384
7385   if (stmt != use->stmt)
7386     return;
7387
7388   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7389     return;
7390
7391   if (dump_file && (dump_flags & TDF_DETAILS))
7392     {
7393       fprintf (dump_file, "Reordering \n");
7394       print_gimple_stmt (dump_file, iv_update, 0);
7395       print_gimple_stmt (dump_file, use->stmt, 0);
7396       fprintf (dump_file, "\n");
7397     }
7398
7399   gsi = gsi_for_stmt (use->stmt);
7400   gsi_iv = gsi_for_stmt (iv_update);
7401   gsi_move_before (&gsi_iv, &gsi);
7402
7403   cand->pos = IP_BEFORE_USE;
7404   cand->incremented_at = use->stmt;
7405 }
7406
7407 /* Return the alias pointer type that should be used for a MEM_REF
7408    associated with USE, which has type USE_PTR_ADDRESS.  */
7409
7410 static tree
7411 get_alias_ptr_type_for_ptr_address (iv_use *use)
7412 {
7413   gcall *call = as_a <gcall *> (use->stmt);
7414   switch (gimple_call_internal_fn (call))
7415     {
7416     case IFN_MASK_LOAD:
7417     case IFN_MASK_STORE:
7418     case IFN_MASK_LOAD_LANES:
7419     case IFN_MASK_STORE_LANES:
7420     case IFN_LEN_LOAD:
7421     case IFN_LEN_STORE:
7422       /* The second argument contains the correct alias type.  */
7423       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7424       return TREE_TYPE (gimple_call_arg (call, 1));
7425
7426     default:
7427       gcc_unreachable ();
7428     }
7429 }
7430
7431
7432 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7433
7434 static void
7435 rewrite_use_address (struct ivopts_data *data,
7436                      struct iv_use *use, struct iv_cand *cand)
7437 {
7438   aff_tree aff;
7439   bool ok;
7440
7441   adjust_iv_update_pos (cand, use);
7442   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7443   gcc_assert (ok);
7444   unshare_aff_combination (&aff);
7445
7446   /* To avoid undefined overflow problems, all IV candidates use unsigned
7447      integer types.  The drawback is that this makes it impossible for
7448      create_mem_ref to distinguish an IV that is based on a memory object
7449      from one that represents simply an offset.
7450
7451      To work around this problem, we pass a hint to create_mem_ref that
7452      indicates which variable (if any) in aff is an IV based on a memory
7453      object.  Note that we only consider the candidate.  If this is not
7454      based on an object, the base of the reference is in some subexpression
7455      of the use -- but these will use pointer types, so they are recognized
7456      by the create_mem_ref heuristics anyway.  */
7457   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7458   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7459   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7460   tree type = use->mem_type;
7461   tree alias_ptr_type;
7462   if (use->type == USE_PTR_ADDRESS)
7463     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7464   else
7465     {
7466       gcc_assert (type == TREE_TYPE (*use->op_p));
7467       unsigned int align = get_object_alignment (*use->op_p);
7468       if (align != TYPE_ALIGN (type))
7469         type = build_aligned_type (type, align);
7470       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7471     }
7472   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7473                              iv, base_hint, data->speed);
7474
7475   if (use->type == USE_PTR_ADDRESS)
7476     {
7477       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7478       ref = fold_convert (get_use_type (use), ref);
7479       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7480                                       true, GSI_SAME_STMT);
7481     }
7482   else
7483     copy_ref_info (ref, *use->op_p);
7484
7485   *use->op_p = ref;
7486 }
7487
7488 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7489    candidate CAND.  */
7490
7491 static void
7492 rewrite_use_compare (struct ivopts_data *data,
7493                      struct iv_use *use, struct iv_cand *cand)
7494 {
7495   tree comp, op, bound;
7496   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7497   enum tree_code compare;
7498   struct iv_group *group = data->vgroups[use->group_id];
7499   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7500
7501   bound = cp->value;
7502   if (bound)
7503     {
7504       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7505       tree var_type = TREE_TYPE (var);
7506       gimple_seq stmts;
7507
7508       if (dump_file && (dump_flags & TDF_DETAILS))
7509         {
7510           fprintf (dump_file, "Replacing exit test: ");
7511           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7512         }
7513       compare = cp->comp;
7514       bound = unshare_expr (fold_convert (var_type, bound));
7515       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7516       if (stmts)
7517         gsi_insert_seq_on_edge_immediate (
7518                 loop_preheader_edge (data->current_loop),
7519                 stmts);
7520
7521       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7522       gimple_cond_set_lhs (cond_stmt, var);
7523       gimple_cond_set_code (cond_stmt, compare);
7524       gimple_cond_set_rhs (cond_stmt, op);
7525       return;
7526     }
7527
7528   /* The induction variable elimination failed; just express the original
7529      giv.  */
7530   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7531   gcc_assert (comp != NULL_TREE);
7532   gcc_assert (use->op_p != NULL);
7533   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7534                                          SSA_NAME_VAR (*use->op_p),
7535                                          true, GSI_SAME_STMT);
7536 }
7537
7538 /* Rewrite the groups using the selected induction variables.  */
7539
7540 static void
7541 rewrite_groups (struct ivopts_data *data)
7542 {
7543   unsigned i, j;
7544
7545   for (i = 0; i < data->vgroups.length (); i++)
7546     {
7547       struct iv_group *group = data->vgroups[i];
7548       struct iv_cand *cand = group->selected;
7549
7550       gcc_assert (cand);
7551
7552       if (group->type == USE_NONLINEAR_EXPR)
7553         {
7554           for (j = 0; j < group->vuses.length (); j++)
7555             {
7556               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7557               update_stmt (group->vuses[j]->stmt);
7558             }
7559         }
7560       else if (address_p (group->type))
7561         {
7562           for (j = 0; j < group->vuses.length (); j++)
7563             {
7564               rewrite_use_address (data, group->vuses[j], cand);
7565               update_stmt (group->vuses[j]->stmt);
7566             }
7567         }
7568       else
7569         {
7570           gcc_assert (group->type == USE_COMPARE);
7571
7572           for (j = 0; j < group->vuses.length (); j++)
7573             {
7574               rewrite_use_compare (data, group->vuses[j], cand);
7575               update_stmt (group->vuses[j]->stmt);
7576             }
7577         }
7578     }
7579 }
7580
7581 /* Removes the ivs that are not used after rewriting.  */
7582
7583 static void
7584 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7585 {
7586   unsigned j;
7587   bitmap_iterator bi;
7588
7589   /* Figure out an order in which to release SSA DEFs so that we don't
7590      release something that we'd have to propagate into a debug stmt
7591      afterwards.  */
7592   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7593     {
7594       struct version_info *info;
7595
7596       info = ver_info (data, j);
7597       if (info->iv
7598           && !integer_zerop (info->iv->step)
7599           && !info->inv_id
7600           && !info->iv->nonlin_use
7601           && !info->preserve_biv)
7602         {
7603           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7604
7605           tree def = info->iv->ssa_name;
7606
7607           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7608             {
7609               imm_use_iterator imm_iter;
7610               use_operand_p use_p;
7611               gimple *stmt;
7612               int count = 0;
7613
7614               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7615                 {
7616                   if (!gimple_debug_bind_p (stmt))
7617                     continue;
7618
7619                   /* We just want to determine whether to do nothing
7620                      (count == 0), to substitute the computed
7621                      expression into a single use of the SSA DEF by
7622                      itself (count == 1), or to use a debug temp
7623                      because the SSA DEF is used multiple times or as
7624                      part of a larger expression (count > 1). */
7625                   count++;
7626                   if (gimple_debug_bind_get_value (stmt) != def)
7627                     count++;
7628
7629                   if (count > 1)
7630                     break;
7631                 }
7632
7633               if (!count)
7634                 continue;
7635
7636               struct iv_use dummy_use;
7637               struct iv_cand *best_cand = NULL, *cand;
7638               unsigned i, best_pref = 0, cand_pref;
7639               tree comp = NULL_TREE;
7640
7641               memset (&dummy_use, 0, sizeof (dummy_use));
7642               dummy_use.iv = info->iv;
7643               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7644                 {
7645                   cand = data->vgroups[i]->selected;
7646                   if (cand == best_cand)
7647                     continue;
7648                   cand_pref = operand_equal_p (cand->iv->step,
7649                                                info->iv->step, 0)
7650                     ? 4 : 0;
7651                   cand_pref
7652                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7653                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7654                     ? 2 : 0;
7655                   cand_pref
7656                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7657                     ? 1 : 0;
7658                   if (best_cand == NULL || best_pref < cand_pref)
7659                     {
7660                       tree this_comp
7661                         = get_debug_computation_at (data->current_loop,
7662                                                     SSA_NAME_DEF_STMT (def),
7663                                                     &dummy_use, cand);
7664                       if (this_comp)
7665                         {
7666                           best_cand = cand;
7667                           best_pref = cand_pref;
7668                           comp = this_comp;
7669                         }
7670                     }
7671                 }
7672
7673               if (!best_cand)
7674                 continue;
7675
7676               comp = unshare_expr (comp);
7677               if (count > 1)
7678                 {
7679                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7680                   DECL_ARTIFICIAL (vexpr) = 1;
7681                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7682                   if (SSA_NAME_VAR (def))
7683                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7684                   else
7685                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7686                   gdebug *def_temp
7687                     = gimple_build_debug_bind (vexpr, comp, NULL);
7688                   gimple_stmt_iterator gsi;
7689
7690                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7691                     gsi = gsi_after_labels (gimple_bb
7692                                             (SSA_NAME_DEF_STMT (def)));
7693                   else
7694                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7695
7696                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7697                   comp = vexpr;
7698                 }
7699
7700               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7701                 {
7702                   if (!gimple_debug_bind_p (stmt))
7703                     continue;
7704
7705                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7706                     SET_USE (use_p, comp);
7707
7708                   update_stmt (stmt);
7709                 }
7710             }
7711         }
7712     }
7713 }
7714
7715 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7716    for hash_map::traverse.  */
7717
7718 bool
7719 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7720 {
7721   free (value);
7722   return true;
7723 }
7724
7725 /* Frees data allocated by the optimization of a single loop.  */
7726
7727 static void
7728 free_loop_data (struct ivopts_data *data)
7729 {
7730   unsigned i, j;
7731   bitmap_iterator bi;
7732   tree obj;
7733
7734   if (data->niters)
7735     {
7736       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7737       delete data->niters;
7738       data->niters = NULL;
7739     }
7740
7741   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7742     {
7743       struct version_info *info;
7744
7745       info = ver_info (data, i);
7746       info->iv = NULL;
7747       info->has_nonlin_use = false;
7748       info->preserve_biv = false;
7749       info->inv_id = 0;
7750     }
7751   bitmap_clear (data->relevant);
7752   bitmap_clear (data->important_candidates);
7753
7754   for (i = 0; i < data->vgroups.length (); i++)
7755     {
7756       struct iv_group *group = data->vgroups[i];
7757
7758       for (j = 0; j < group->vuses.length (); j++)
7759         free (group->vuses[j]);
7760       group->vuses.release ();
7761
7762       BITMAP_FREE (group->related_cands);
7763       for (j = 0; j < group->n_map_members; j++)
7764         {
7765           if (group->cost_map[j].inv_vars)
7766             BITMAP_FREE (group->cost_map[j].inv_vars);
7767           if (group->cost_map[j].inv_exprs)
7768             BITMAP_FREE (group->cost_map[j].inv_exprs);
7769         }
7770
7771       free (group->cost_map);
7772       free (group);
7773     }
7774   data->vgroups.truncate (0);
7775
7776   for (i = 0; i < data->vcands.length (); i++)
7777     {
7778       struct iv_cand *cand = data->vcands[i];
7779
7780       if (cand->inv_vars)
7781         BITMAP_FREE (cand->inv_vars);
7782       if (cand->inv_exprs)
7783         BITMAP_FREE (cand->inv_exprs);
7784       free (cand);
7785     }
7786   data->vcands.truncate (0);
7787
7788   if (data->version_info_size < num_ssa_names)
7789     {
7790       data->version_info_size = 2 * num_ssa_names;
7791       free (data->version_info);
7792       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7793     }
7794
7795   data->max_inv_var_id = 0;
7796   data->max_inv_expr_id = 0;
7797
7798   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7799     SET_DECL_RTL (obj, NULL_RTX);
7800
7801   decl_rtl_to_reset.truncate (0);
7802
7803   data->inv_expr_tab->empty ();
7804
7805   data->iv_common_cand_tab->empty ();
7806   data->iv_common_cands.truncate (0);
7807 }
7808
7809 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7810    loop tree.  */
7811
7812 static void
7813 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7814 {
7815   free_loop_data (data);
7816   free (data->version_info);
7817   BITMAP_FREE (data->relevant);
7818   BITMAP_FREE (data->important_candidates);
7819
7820   decl_rtl_to_reset.release ();
7821   data->vgroups.release ();
7822   data->vcands.release ();
7823   delete data->inv_expr_tab;
7824   data->inv_expr_tab = NULL;
7825   free_affine_expand_cache (&data->name_expansion_cache);
7826   if (data->base_object_map)
7827     delete data->base_object_map;
7828   delete data->iv_common_cand_tab;
7829   data->iv_common_cand_tab = NULL;
7830   data->iv_common_cands.release ();
7831   obstack_free (&data->iv_obstack, NULL);
7832 }
7833
7834 /* Returns true if the loop body BODY includes any function calls.  */
7835
7836 static bool
7837 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7838 {
7839   gimple_stmt_iterator gsi;
7840   unsigned i;
7841
7842   for (i = 0; i < num_nodes; i++)
7843     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7844       {
7845         gimple *stmt = gsi_stmt (gsi);
7846         if (is_gimple_call (stmt)
7847             && !gimple_call_internal_p (stmt)
7848             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7849           return true;
7850       }
7851   return false;
7852 }
7853
7854 /* Determine cost scaling factor for basic blocks in loop.  */
7855 #define COST_SCALING_FACTOR_BOUND (20)
7856
7857 static void
7858 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7859 {
7860   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7861   if (!data->speed || lfreq <= 0)
7862     return;
7863
7864   int max_freq = lfreq;
7865   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7866     {
7867       body[i]->aux = (void *)(intptr_t) 1;
7868       if (max_freq < body[i]->count.to_frequency (cfun))
7869         max_freq = body[i]->count.to_frequency (cfun);
7870     }
7871   if (max_freq > lfreq)
7872     {
7873       int divisor, factor;
7874       /* Check if scaling factor itself needs to be scaled by the bound.  This
7875          is to avoid overflow when scaling cost according to profile info.  */
7876       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7877         {
7878           divisor = max_freq;
7879           factor = COST_SCALING_FACTOR_BOUND;
7880         }
7881       else
7882         {
7883           divisor = lfreq;
7884           factor = 1;
7885         }
7886       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7887         {
7888           int bfreq = body[i]->count.to_frequency (cfun);
7889           if (bfreq <= lfreq)
7890             continue;
7891
7892           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7893         }
7894     }
7895 }
7896
7897 /* Find doloop comparison use and set its doloop_p on if found.  */
7898
7899 static bool
7900 find_doloop_use (struct ivopts_data *data)
7901 {
7902   struct loop *loop = data->current_loop;
7903
7904   for (unsigned i = 0; i < data->vgroups.length (); i++)
7905     {
7906       struct iv_group *group = data->vgroups[i];
7907       if (group->type == USE_COMPARE)
7908         {
7909           gcc_assert (group->vuses.length () == 1);
7910           struct iv_use *use = group->vuses[0];
7911           gimple *stmt = use->stmt;
7912           if (gimple_code (stmt) == GIMPLE_COND)
7913             {
7914               basic_block bb = gimple_bb (stmt);
7915               edge true_edge, false_edge;
7916               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7917               /* This comparison is used for loop latch.  Require latch is empty
7918                  for now.  */
7919               if ((loop->latch == true_edge->dest
7920                    || loop->latch == false_edge->dest)
7921                   && empty_block_p (loop->latch))
7922                 {
7923                   group->doloop_p = true;
7924                   if (dump_file && (dump_flags & TDF_DETAILS))
7925                     {
7926                       fprintf (dump_file, "Doloop cmp iv use: ");
7927                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7928                     }
7929                   return true;
7930                 }
7931             }
7932         }
7933     }
7934
7935   return false;
7936 }
7937
7938 /* For the targets which support doloop, to predict whether later RTL doloop
7939    transformation will perform on this loop, further detect the doloop use and
7940    mark the flag doloop_use_p if predicted.  */
7941
7942 void
7943 analyze_and_mark_doloop_use (struct ivopts_data *data)
7944 {
7945   data->doloop_use_p = false;
7946
7947   if (!flag_branch_on_count_reg)
7948     return;
7949
7950   if (data->current_loop->unroll == USHRT_MAX)
7951     return;
7952
7953   if (!generic_predict_doloop_p (data))
7954     return;
7955
7956   if (find_doloop_use (data))
7957     {
7958       data->doloop_use_p = true;
7959       if (dump_file && (dump_flags & TDF_DETAILS))
7960         {
7961           struct loop *loop = data->current_loop;
7962           fprintf (dump_file,
7963                    "Predict loop %d can perform"
7964                    " doloop optimization later.\n",
7965                    loop->num);
7966           flow_loop_dump (loop, dump_file, NULL, 1);
7967         }
7968     }
7969 }
7970
7971 /* Optimizes the LOOP.  Returns true if anything changed.  */
7972
7973 static bool
7974 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7975                            bitmap toremove)
7976 {
7977   bool changed = false;
7978   class iv_ca *iv_ca;
7979   edge exit = single_dom_exit (loop);
7980   basic_block *body;
7981
7982   gcc_assert (!data->niters);
7983   data->current_loop = loop;
7984   data->loop_loc = find_loop_location (loop).get_location_t ();
7985   data->speed = optimize_loop_for_speed_p (loop);
7986
7987   if (dump_file && (dump_flags & TDF_DETAILS))
7988     {
7989       fprintf (dump_file, "Processing loop %d", loop->num);
7990       if (data->loop_loc != UNKNOWN_LOCATION)
7991         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7992                  LOCATION_LINE (data->loop_loc));
7993       fprintf (dump_file, "\n");
7994
7995       if (exit)
7996         {
7997           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7998                    exit->src->index, exit->dest->index);
7999           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8000           fprintf (dump_file, "\n");
8001         }
8002
8003       fprintf (dump_file, "\n");
8004     }
8005
8006   body = get_loop_body (loop);
8007   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8008   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8009
8010   data->loop_single_exit_p
8011     = exit != NULL && loop_only_exit_p (loop, body, exit);
8012
8013   /* For each ssa name determines whether it behaves as an induction variable
8014      in some loop.  */
8015   if (!find_induction_variables (data))
8016     goto finish;
8017
8018   /* Finds interesting uses (item 1).  */
8019   find_interesting_uses (data);
8020   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8021     goto finish;
8022
8023   /* Determine cost scaling factor for basic blocks in loop.  */
8024   determine_scaling_factor (data, body);
8025
8026   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8027   analyze_and_mark_doloop_use (data);
8028
8029   /* Finds candidates for the induction variables (item 2).  */
8030   find_iv_candidates (data);
8031
8032   /* Calculates the costs (item 3, part 1).  */
8033   determine_iv_costs (data);
8034   determine_group_iv_costs (data);
8035   determine_set_costs (data);
8036
8037   /* Find the optimal set of induction variables (item 3, part 2).  */
8038   iv_ca = find_optimal_iv_set (data);
8039   /* Cleanup basic block aux field.  */
8040   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8041     body[i]->aux = NULL;
8042   if (!iv_ca)
8043     goto finish;
8044   changed = true;
8045
8046   /* Create the new induction variables (item 4, part 1).  */
8047   create_new_ivs (data, iv_ca);
8048   iv_ca_free (&iv_ca);
8049
8050   /* Rewrite the uses (item 4, part 2).  */
8051   rewrite_groups (data);
8052
8053   /* Remove the ivs that are unused after rewriting.  */
8054   remove_unused_ivs (data, toremove);
8055
8056 finish:
8057   free (body);
8058   free_loop_data (data);
8059
8060   return changed;
8061 }
8062
8063 /* Main entry point.  Optimizes induction variables in loops.  */
8064
8065 void
8066 tree_ssa_iv_optimize (void)
8067 {
8068   class loop *loop;
8069   struct ivopts_data data;
8070   auto_bitmap toremove;
8071
8072   tree_ssa_iv_optimize_init (&data);
8073
8074   /* Optimize the loops starting with the innermost ones.  */
8075   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8076     {
8077       if (!dbg_cnt (ivopts_loop))
8078         continue;
8079
8080       if (dump_file && (dump_flags & TDF_DETAILS))
8081         flow_loop_dump (loop, dump_file, NULL, 1);
8082
8083       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8084     }
8085
8086   /* Remove eliminated IV defs.  */
8087   release_defs_bitset (toremove);
8088
8089   /* We have changed the structure of induction variables; it might happen
8090      that definitions in the scev database refer to some of them that were
8091      eliminated.  */
8092   scev_reset_htab ();
8093   /* Likewise niter and control-IV information.  */
8094   free_numbers_of_iterations_estimates (cfun);
8095
8096   tree_ssa_iv_optimize_finalize (&data);
8097 }
8098
8099 #include "gt-tree-ssa-loop-ivopts.h"