Daily bump.
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56
57 /* This is a simple global reassociation pass. It is, in part, based
58 on the LLVM pass of the same name (They do some things more/less
59 than we do, in different orders, etc).
60
61 It consists of five steps:
62
63 1. Breaking up subtract operations into addition + negate, where
64 it would promote the reassociation of adds.
65
66 2. Left linearization of the expression trees, so that (A+B)+(C+D)
67 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
68 During linearization, we place the operands of the binary
69 expressions into a vector of operand_entry_*
70
71 3. Optimization of the operand lists, eliminating things like a +
72 -a, a & a, etc.
73
74 3a. Combine repeated factors with the same occurrence counts
75 into a __builtin_powi call that will later be optimized into
76 an optimal number of multiplies.
77
78 4. Rewrite the expression trees we linearized and optimized so
79 they are in proper rank order.
80
81 5. Repropagate negates, as nothing else will clean it up ATM.
82
83 A bit of theory on #4, since nobody seems to write anything down
84 about why it makes sense to do it the way they do it:
85
86 We could do this much nicer theoretically, but don't (for reasons
87 explained after how to do it theoretically nice :P).
88
89 In order to promote the most redundancy elimination, you want
90 binary expressions whose operands are the same rank (or
91 preferably, the same value) exposed to the redundancy eliminator,
92 for possible elimination.
93
94 So the way to do this if we really cared, is to build the new op
95 tree from the leaves to the roots, merging as you go, and putting the
96 new op on the end of the worklist, until you are left with one
97 thing on the worklist.
98
99 IE if you have to rewrite the following set of operands (listed with
100 rank in parentheses), with opcode PLUS_EXPR:
101
102 a (1), b (1), c (1), d (2), e (2)
103
104
105 We start with our merge worklist empty, and the ops list with all of
106 those on it.
107
108 You want to first merge all leaves of the same rank, as much as
109 possible.
110
111 So first build a binary op of
112
113 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114
115 Because there is no three operand form of PLUS_EXPR, c is not going to
116 be exposed to redundancy elimination as a rank 1 operand.
117
118 So you might as well throw it on the merge worklist (you could also
119 consider it to now be a rank two operand, and merge it with d and e,
120 but in this case, you then have evicted e from a binary op. So at
121 least in this situation, you can't win.)
122
123 Then build a binary op of d + e
124 mergetmp2 = d + e
125
126 and put mergetmp2 on the merge worklist.
127
128 so merge worklist = {mergetmp, c, mergetmp2}
129
130 Continue building binary ops of these operations until you have only
131 one operation left on the worklist.
132
133 So we have
134
135 build binary op
136 mergetmp3 = mergetmp + c
137
138 worklist = {mergetmp2, mergetmp3}
139
140 mergetmp4 = mergetmp2 + mergetmp3
141
142 worklist = {mergetmp4}
143
144 because we have one operation left, we can now just set the original
145 statement equal to the result of that operation.
146
147 This will at least expose a + b and d + e to redundancy elimination
148 as binary operations.
149
150 For extra points, you can reuse the old statements to build the
151 mergetmps, since you shouldn't run out.
152
153 So why don't we do this?
154
155 Because it's expensive, and rarely will help. Most trees we are
156 reassociating have 3 or less ops. If they have 2 ops, they already
157 will be written into a nice single binary op. If you have 3 ops, a
158 single simple check suffices to tell you whether the first two are of the
159 same rank. If so, you know to order it
160
161 mergetmp = op1 + op2
162 newstmt = mergetmp + op3
163
164 instead of
165 mergetmp = op2 + op3
166 newstmt = mergetmp + op1
167
168 If all three are of the same rank, you can't expose them all in a
169 single binary operator anyway, so the above is *still* the best you
170 can do.
171
172 Thus, this is what we do. When we have three ops left, we check to see
173 what order to put them in, and call it a day. As a nod to vector sum
174 reduction, we check if any of the ops are really a phi node that is a
175 destructive update for the associating op, and keep the destructive
176 update together for vector sum reduction recognition. */
177
178 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
179 point 3a in the pass header comment. */
180 static bool reassoc_insert_powi_p;
181
182 /* Statistics */
183 static struct
184 {
185 int linearized;
186 int constants_eliminated;
187 int ops_eliminated;
188 int rewritten;
189 int pows_encountered;
190 int pows_created;
191 } reassociate_stats;
192
193
194 static object_allocator<operand_entry> operand_entry_pool
195 ("operand entry pool");
196
197 /* This is used to assign a unique ID to each struct operand_entry
198 so that qsort results are identical on different hosts. */
199 static unsigned int next_operand_entry_id;
200
201 /* Starting rank number for a given basic block, so that we can rank
202 operations using unmovable instructions in that BB based on the bb
203 depth. */
204 static int64_t *bb_rank;
205
206 /* Operand->rank hashtable. */
207 static hash_map<tree, int64_t> *operand_rank;
208
209 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
210 all basic blocks the CFG should be adjusted - basic blocks
211 split right after that SSA_NAME's definition statement and before
212 the only use, which must be a bit ior. */
213 static vec<tree> reassoc_branch_fixups;
214
215 /* Forward decls. */
216 static int64_t get_rank (tree);
217 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
218
219 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
220 possibly added by gsi_remove. */
221
222 static bool
223 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
224 {
225 gimple *stmt = gsi_stmt (*gsi);
226
227 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
228 return gsi_remove (gsi, true);
229
230 gimple_stmt_iterator prev = *gsi;
231 gsi_prev (&prev);
232 unsigned uid = gimple_uid (stmt);
233 basic_block bb = gimple_bb (stmt);
234 bool ret = gsi_remove (gsi, true);
235 if (!gsi_end_p (prev))
236 gsi_next (&prev);
237 else
238 prev = gsi_start_bb (bb);
239 gimple *end_stmt = gsi_stmt (*gsi);
240 while ((stmt = gsi_stmt (prev)) != end_stmt)
241 {
242 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
243 gimple_set_uid (stmt, uid);
244 gsi_next (&prev);
245 }
246 return ret;
247 }
248
249 /* Bias amount for loop-carried phis. We want this to be larger than
250 the depth of any reassociation tree we can see, but not larger than
251 the rank difference between two blocks. */
252 #define PHI_LOOP_BIAS (1 << 15)
253
254 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
255 an innermost loop, and the phi has only a single use which is inside
256 the loop, then the rank is the block rank of the loop latch plus an
257 extra bias for the loop-carried dependence. This causes expressions
258 calculated into an accumulator variable to be independent for each
259 iteration of the loop. If STMT is some other phi, the rank is the
260 block rank of its containing block. */
261 static int64_t
262 phi_rank (gimple *stmt)
263 {
264 basic_block bb = gimple_bb (stmt);
265 class loop *father = bb->loop_father;
266 tree res;
267 unsigned i;
268 use_operand_p use;
269 gimple *use_stmt;
270
271 /* We only care about real loops (those with a latch). */
272 if (!father->latch)
273 return bb_rank[bb->index];
274
275 /* Interesting phis must be in headers of innermost loops. */
276 if (bb != father->header
277 || father->inner)
278 return bb_rank[bb->index];
279
280 /* Ignore virtual SSA_NAMEs. */
281 res = gimple_phi_result (stmt);
282 if (virtual_operand_p (res))
283 return bb_rank[bb->index];
284
285 /* The phi definition must have a single use, and that use must be
286 within the loop. Otherwise this isn't an accumulator pattern. */
287 if (!single_imm_use (res, &use, &use_stmt)
288 || gimple_bb (use_stmt)->loop_father != father)
289 return bb_rank[bb->index];
290
291 /* Look for phi arguments from within the loop. If found, bias this phi. */
292 for (i = 0; i < gimple_phi_num_args (stmt); i++)
293 {
294 tree arg = gimple_phi_arg_def (stmt, i);
295 if (TREE_CODE (arg) == SSA_NAME
296 && !SSA_NAME_IS_DEFAULT_DEF (arg))
297 {
298 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
299 if (gimple_bb (def_stmt)->loop_father == father)
300 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
301 }
302 }
303
304 /* Must be an uninteresting phi. */
305 return bb_rank[bb->index];
306 }
307
308 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
309 loop-carried dependence of an innermost loop, return TRUE; else
310 return FALSE. */
311 static bool
312 loop_carried_phi (tree exp)
313 {
314 gimple *phi_stmt;
315 int64_t block_rank;
316
317 if (TREE_CODE (exp) != SSA_NAME
318 || SSA_NAME_IS_DEFAULT_DEF (exp))
319 return false;
320
321 phi_stmt = SSA_NAME_DEF_STMT (exp);
322
323 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
324 return false;
325
326 /* Non-loop-carried phis have block rank. Loop-carried phis have
327 an additional bias added in. If this phi doesn't have block rank,
328 it's biased and should not be propagated. */
329 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
330
331 if (phi_rank (phi_stmt) != block_rank)
332 return true;
333
334 return false;
335 }
336
337 /* Return the maximum of RANK and the rank that should be propagated
338 from expression OP. For most operands, this is just the rank of OP.
339 For loop-carried phis, the value is zero to avoid undoing the bias
340 in favor of the phi. */
341 static int64_t
342 propagate_rank (int64_t rank, tree op)
343 {
344 int64_t op_rank;
345
346 if (loop_carried_phi (op))
347 return rank;
348
349 op_rank = get_rank (op);
350
351 return MAX (rank, op_rank);
352 }
353
354 /* Look up the operand rank structure for expression E. */
355
356 static inline int64_t
357 find_operand_rank (tree e)
358 {
359 int64_t *slot = operand_rank->get (e);
360 return slot ? *slot : -1;
361 }
362
363 /* Insert {E,RANK} into the operand rank hashtable. */
364
365 static inline void
366 insert_operand_rank (tree e, int64_t rank)
367 {
368 gcc_assert (rank > 0);
369 gcc_assert (!operand_rank->put (e, rank));
370 }
371
372 /* Given an expression E, return the rank of the expression. */
373
374 static int64_t
375 get_rank (tree e)
376 {
377 /* SSA_NAME's have the rank of the expression they are the result
378 of.
379 For globals and uninitialized values, the rank is 0.
380 For function arguments, use the pre-setup rank.
381 For PHI nodes, stores, asm statements, etc, we use the rank of
382 the BB.
383 For simple operations, the rank is the maximum rank of any of
384 its operands, or the bb_rank, whichever is less.
385 I make no claims that this is optimal, however, it gives good
386 results. */
387
388 /* We make an exception to the normal ranking system to break
389 dependences of accumulator variables in loops. Suppose we
390 have a simple one-block loop containing:
391
392 x_1 = phi(x_0, x_2)
393 b = a + x_1
394 c = b + d
395 x_2 = c + e
396
397 As shown, each iteration of the calculation into x is fully
398 dependent upon the iteration before it. We would prefer to
399 see this in the form:
400
401 x_1 = phi(x_0, x_2)
402 b = a + d
403 c = b + e
404 x_2 = c + x_1
405
406 If the loop is unrolled, the calculations of b and c from
407 different iterations can be interleaved.
408
409 To obtain this result during reassociation, we bias the rank
410 of the phi definition x_1 upward, when it is recognized as an
411 accumulator pattern. The artificial rank causes it to be
412 added last, providing the desired independence. */
413
414 if (TREE_CODE (e) == SSA_NAME)
415 {
416 ssa_op_iter iter;
417 gimple *stmt;
418 int64_t rank;
419 tree op;
420
421 /* If we already have a rank for this expression, use that. */
422 rank = find_operand_rank (e);
423 if (rank != -1)
424 return rank;
425
426 stmt = SSA_NAME_DEF_STMT (e);
427 if (gimple_code (stmt) == GIMPLE_PHI)
428 rank = phi_rank (stmt);
429
430 else if (!is_gimple_assign (stmt))
431 rank = bb_rank[gimple_bb (stmt)->index];
432
433 else
434 {
435 /* Otherwise, find the maximum rank for the operands. As an
436 exception, remove the bias from loop-carried phis when propagating
437 the rank so that dependent operations are not also biased. */
438 /* Simply walk over all SSA uses - this takes advatage of the
439 fact that non-SSA operands are is_gimple_min_invariant and
440 thus have rank 0. */
441 rank = 0;
442 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
443 rank = propagate_rank (rank, op);
444
445 rank += 1;
446 }
447
448 if (dump_file && (dump_flags & TDF_DETAILS))
449 {
450 fprintf (dump_file, "Rank for ");
451 print_generic_expr (dump_file, e);
452 fprintf (dump_file, " is %" PRId64 "\n", rank);
453 }
454
455 /* Note the rank in the hashtable so we don't recompute it. */
456 insert_operand_rank (e, rank);
457 return rank;
458 }
459
460 /* Constants, globals, etc., are rank 0 */
461 return 0;
462 }
463
464
465 /* We want integer ones to end up last no matter what, since they are
466 the ones we can do the most with. */
467 #define INTEGER_CONST_TYPE 1 << 4
468 #define FLOAT_ONE_CONST_TYPE 1 << 3
469 #define FLOAT_CONST_TYPE 1 << 2
470 #define OTHER_CONST_TYPE 1 << 1
471
472 /* Classify an invariant tree into integer, float, or other, so that
473 we can sort them to be near other constants of the same type. */
474 static inline int
475 constant_type (tree t)
476 {
477 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
478 return INTEGER_CONST_TYPE;
479 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
480 {
481 /* Sort -1.0 and 1.0 constants last, while in some cases
482 const_binop can't optimize some inexact operations, multiplication
483 by -1.0 or 1.0 can be always merged with others. */
484 if (real_onep (t) || real_minus_onep (t))
485 return FLOAT_ONE_CONST_TYPE;
486 return FLOAT_CONST_TYPE;
487 }
488 else
489 return OTHER_CONST_TYPE;
490 }
491
492 /* qsort comparison function to sort operand entries PA and PB by rank
493 so that the sorted array is ordered by rank in decreasing order. */
494 static int
495 sort_by_operand_rank (const void *pa, const void *pb)
496 {
497 const operand_entry *oea = *(const operand_entry *const *)pa;
498 const operand_entry *oeb = *(const operand_entry *const *)pb;
499
500 if (oeb->rank != oea->rank)
501 return oeb->rank > oea->rank ? 1 : -1;
502
503 /* It's nicer for optimize_expression if constants that are likely
504 to fold when added/multiplied/whatever are put next to each
505 other. Since all constants have rank 0, order them by type. */
506 if (oea->rank == 0)
507 {
508 if (constant_type (oeb->op) != constant_type (oea->op))
509 return constant_type (oea->op) - constant_type (oeb->op);
510 else
511 /* To make sorting result stable, we use unique IDs to determine
512 order. */
513 return oeb->id > oea->id ? 1 : -1;
514 }
515
516 if (TREE_CODE (oea->op) != SSA_NAME)
517 {
518 if (TREE_CODE (oeb->op) != SSA_NAME)
519 return oeb->id > oea->id ? 1 : -1;
520 else
521 return 1;
522 }
523 else if (TREE_CODE (oeb->op) != SSA_NAME)
524 return -1;
525
526 /* Lastly, make sure the versions that are the same go next to each
527 other. */
528 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
529 {
530 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
531 versions of removed SSA_NAMEs, so if possible, prefer to sort
532 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
533 See PR60418. */
534 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
535 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
536 basic_block bba = gimple_bb (stmta);
537 basic_block bbb = gimple_bb (stmtb);
538 if (bbb != bba)
539 {
540 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
541 but the other might not. */
542 if (!bba)
543 return 1;
544 if (!bbb)
545 return -1;
546 /* If neither is, compare bb_rank. */
547 if (bb_rank[bbb->index] != bb_rank[bba->index])
548 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
549 }
550
551 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
552 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
553 if (da != db)
554 return da ? 1 : -1;
555
556 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
557 }
558
559 return oeb->id > oea->id ? 1 : -1;
560 }
561
562 /* Add an operand entry to *OPS for the tree operand OP. */
563
564 static void
565 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
566 {
567 operand_entry *oe = operand_entry_pool.allocate ();
568
569 oe->op = op;
570 oe->rank = get_rank (op);
571 oe->id = next_operand_entry_id++;
572 oe->count = 1;
573 oe->stmt_to_insert = stmt_to_insert;
574 ops->safe_push (oe);
575 }
576
577 /* Add an operand entry to *OPS for the tree operand OP with repeat
578 count REPEAT. */
579
580 static void
581 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
582 HOST_WIDE_INT repeat)
583 {
584 operand_entry *oe = operand_entry_pool.allocate ();
585
586 oe->op = op;
587 oe->rank = get_rank (op);
588 oe->id = next_operand_entry_id++;
589 oe->count = repeat;
590 oe->stmt_to_insert = NULL;
591 ops->safe_push (oe);
592
593 reassociate_stats.pows_encountered++;
594 }
595
596 /* Return true if STMT is reassociable operation containing a binary
597 operation with tree code CODE, and is inside LOOP. */
598
599 static bool
600 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
601 {
602 basic_block bb = gimple_bb (stmt);
603
604 if (gimple_bb (stmt) == NULL)
605 return false;
606
607 if (!flow_bb_inside_loop_p (loop, bb))
608 return false;
609
610 if (is_gimple_assign (stmt)
611 && gimple_assign_rhs_code (stmt) == code
612 && has_single_use (gimple_assign_lhs (stmt)))
613 {
614 tree rhs1 = gimple_assign_rhs1 (stmt);
615 tree rhs2 = gimple_assign_rhs2 (stmt);
616 if (TREE_CODE (rhs1) == SSA_NAME
617 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
618 return false;
619 if (rhs2
620 && TREE_CODE (rhs2) == SSA_NAME
621 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
622 return false;
623 return true;
624 }
625
626 return false;
627 }
628
629
630 /* Return true if STMT is a nop-conversion. */
631
632 static bool
633 gimple_nop_conversion_p (gimple *stmt)
634 {
635 if (gassign *ass = dyn_cast <gassign *> (stmt))
636 {
637 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
638 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
639 TREE_TYPE (gimple_assign_rhs1 (ass))))
640 return true;
641 }
642 return false;
643 }
644
645 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
646 operand of the negate operation. Otherwise, return NULL. */
647
648 static tree
649 get_unary_op (tree name, enum tree_code opcode)
650 {
651 gimple *stmt = SSA_NAME_DEF_STMT (name);
652
653 /* Look through nop conversions (sign changes). */
654 if (gimple_nop_conversion_p (stmt)
655 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
656 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
657
658 if (!is_gimple_assign (stmt))
659 return NULL_TREE;
660
661 if (gimple_assign_rhs_code (stmt) == opcode)
662 return gimple_assign_rhs1 (stmt);
663 return NULL_TREE;
664 }
665
666 /* Return true if OP1 and OP2 have the same value if casted to either type. */
667
668 static bool
669 ops_equal_values_p (tree op1, tree op2)
670 {
671 if (op1 == op2)
672 return true;
673
674 tree orig_op1 = op1;
675 if (TREE_CODE (op1) == SSA_NAME)
676 {
677 gimple *stmt = SSA_NAME_DEF_STMT (op1);
678 if (gimple_nop_conversion_p (stmt))
679 {
680 op1 = gimple_assign_rhs1 (stmt);
681 if (op1 == op2)
682 return true;
683 }
684 }
685
686 if (TREE_CODE (op2) == SSA_NAME)
687 {
688 gimple *stmt = SSA_NAME_DEF_STMT (op2);
689 if (gimple_nop_conversion_p (stmt))
690 {
691 op2 = gimple_assign_rhs1 (stmt);
692 if (op1 == op2
693 || orig_op1 == op2)
694 return true;
695 }
696 }
697
698 return false;
699 }
700
701
702 /* If CURR and LAST are a pair of ops that OPCODE allows us to
703 eliminate through equivalences, do so, remove them from OPS, and
704 return true. Otherwise, return false. */
705
706 static bool
707 eliminate_duplicate_pair (enum tree_code opcode,
708 vec<operand_entry *> *ops,
709 bool *all_done,
710 unsigned int i,
711 operand_entry *curr,
712 operand_entry *last)
713 {
714
715 /* If we have two of the same op, and the opcode is & |, min, or max,
716 we can eliminate one of them.
717 If we have two of the same op, and the opcode is ^, we can
718 eliminate both of them. */
719
720 if (last && last->op == curr->op)
721 {
722 switch (opcode)
723 {
724 case MAX_EXPR:
725 case MIN_EXPR:
726 case BIT_IOR_EXPR:
727 case BIT_AND_EXPR:
728 if (dump_file && (dump_flags & TDF_DETAILS))
729 {
730 fprintf (dump_file, "Equivalence: ");
731 print_generic_expr (dump_file, curr->op);
732 fprintf (dump_file, " [&|minmax] ");
733 print_generic_expr (dump_file, last->op);
734 fprintf (dump_file, " -> ");
735 print_generic_stmt (dump_file, last->op);
736 }
737
738 ops->ordered_remove (i);
739 reassociate_stats.ops_eliminated ++;
740
741 return true;
742
743 case BIT_XOR_EXPR:
744 if (dump_file && (dump_flags & TDF_DETAILS))
745 {
746 fprintf (dump_file, "Equivalence: ");
747 print_generic_expr (dump_file, curr->op);
748 fprintf (dump_file, " ^ ");
749 print_generic_expr (dump_file, last->op);
750 fprintf (dump_file, " -> nothing\n");
751 }
752
753 reassociate_stats.ops_eliminated += 2;
754
755 if (ops->length () == 2)
756 {
757 ops->truncate (0);
758 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
759 *all_done = true;
760 }
761 else
762 {
763 ops->ordered_remove (i-1);
764 ops->ordered_remove (i-1);
765 }
766
767 return true;
768
769 default:
770 break;
771 }
772 }
773 return false;
774 }
775
776 static vec<tree> plus_negates;
777
778 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
779 expression, look in OPS for a corresponding positive operation to cancel
780 it out. If we find one, remove the other from OPS, replace
781 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
782 return false. */
783
784 static bool
785 eliminate_plus_minus_pair (enum tree_code opcode,
786 vec<operand_entry *> *ops,
787 unsigned int currindex,
788 operand_entry *curr)
789 {
790 tree negateop;
791 tree notop;
792 unsigned int i;
793 operand_entry *oe;
794
795 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
796 return false;
797
798 negateop = get_unary_op (curr->op, NEGATE_EXPR);
799 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
800 if (negateop == NULL_TREE && notop == NULL_TREE)
801 return false;
802
803 /* Any non-negated version will have a rank that is one less than
804 the current rank. So once we hit those ranks, if we don't find
805 one, we can stop. */
806
807 for (i = currindex + 1;
808 ops->iterate (i, &oe)
809 && oe->rank >= curr->rank - 1 ;
810 i++)
811 {
812 if (negateop
813 && ops_equal_values_p (oe->op, negateop))
814 {
815 if (dump_file && (dump_flags & TDF_DETAILS))
816 {
817 fprintf (dump_file, "Equivalence: ");
818 print_generic_expr (dump_file, negateop);
819 fprintf (dump_file, " + -");
820 print_generic_expr (dump_file, oe->op);
821 fprintf (dump_file, " -> 0\n");
822 }
823
824 ops->ordered_remove (i);
825 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
826 ops->ordered_remove (currindex);
827 reassociate_stats.ops_eliminated ++;
828
829 return true;
830 }
831 else if (notop
832 && ops_equal_values_p (oe->op, notop))
833 {
834 tree op_type = TREE_TYPE (oe->op);
835
836 if (dump_file && (dump_flags & TDF_DETAILS))
837 {
838 fprintf (dump_file, "Equivalence: ");
839 print_generic_expr (dump_file, notop);
840 fprintf (dump_file, " + ~");
841 print_generic_expr (dump_file, oe->op);
842 fprintf (dump_file, " -> -1\n");
843 }
844
845 ops->ordered_remove (i);
846 add_to_ops_vec (ops, build_all_ones_cst (op_type));
847 ops->ordered_remove (currindex);
848 reassociate_stats.ops_eliminated ++;
849
850 return true;
851 }
852 }
853
854 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
855 save it for later inspection in repropagate_negates(). */
856 if (negateop != NULL_TREE
857 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
858 plus_negates.safe_push (curr->op);
859
860 return false;
861 }
862
863 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
864 bitwise not expression, look in OPS for a corresponding operand to
865 cancel it out. If we find one, remove the other from OPS, replace
866 OPS[CURRINDEX] with 0, and return true. Otherwise, return
867 false. */
868
869 static bool
870 eliminate_not_pairs (enum tree_code opcode,
871 vec<operand_entry *> *ops,
872 unsigned int currindex,
873 operand_entry *curr)
874 {
875 tree notop;
876 unsigned int i;
877 operand_entry *oe;
878
879 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
880 || TREE_CODE (curr->op) != SSA_NAME)
881 return false;
882
883 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
884 if (notop == NULL_TREE)
885 return false;
886
887 /* Any non-not version will have a rank that is one less than
888 the current rank. So once we hit those ranks, if we don't find
889 one, we can stop. */
890
891 for (i = currindex + 1;
892 ops->iterate (i, &oe)
893 && oe->rank >= curr->rank - 1;
894 i++)
895 {
896 if (oe->op == notop)
897 {
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 {
900 fprintf (dump_file, "Equivalence: ");
901 print_generic_expr (dump_file, notop);
902 if (opcode == BIT_AND_EXPR)
903 fprintf (dump_file, " & ~");
904 else if (opcode == BIT_IOR_EXPR)
905 fprintf (dump_file, " | ~");
906 print_generic_expr (dump_file, oe->op);
907 if (opcode == BIT_AND_EXPR)
908 fprintf (dump_file, " -> 0\n");
909 else if (opcode == BIT_IOR_EXPR)
910 fprintf (dump_file, " -> -1\n");
911 }
912
913 if (opcode == BIT_AND_EXPR)
914 oe->op = build_zero_cst (TREE_TYPE (oe->op));
915 else if (opcode == BIT_IOR_EXPR)
916 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
917
918 reassociate_stats.ops_eliminated += ops->length () - 1;
919 ops->truncate (0);
920 ops->quick_push (oe);
921 return true;
922 }
923 }
924
925 return false;
926 }
927
928 /* Use constant value that may be present in OPS to try to eliminate
929 operands. Note that this function is only really used when we've
930 eliminated ops for other reasons, or merged constants. Across
931 single statements, fold already does all of this, plus more. There
932 is little point in duplicating logic, so I've only included the
933 identities that I could ever construct testcases to trigger. */
934
935 static void
936 eliminate_using_constants (enum tree_code opcode,
937 vec<operand_entry *> *ops)
938 {
939 operand_entry *oelast = ops->last ();
940 tree type = TREE_TYPE (oelast->op);
941
942 if (oelast->rank == 0
943 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
944 {
945 switch (opcode)
946 {
947 case BIT_AND_EXPR:
948 if (integer_zerop (oelast->op))
949 {
950 if (ops->length () != 1)
951 {
952 if (dump_file && (dump_flags & TDF_DETAILS))
953 fprintf (dump_file, "Found & 0, removing all other ops\n");
954
955 reassociate_stats.ops_eliminated += ops->length () - 1;
956
957 ops->truncate (0);
958 ops->quick_push (oelast);
959 return;
960 }
961 }
962 else if (integer_all_onesp (oelast->op))
963 {
964 if (ops->length () != 1)
965 {
966 if (dump_file && (dump_flags & TDF_DETAILS))
967 fprintf (dump_file, "Found & -1, removing\n");
968 ops->pop ();
969 reassociate_stats.ops_eliminated++;
970 }
971 }
972 break;
973 case BIT_IOR_EXPR:
974 if (integer_all_onesp (oelast->op))
975 {
976 if (ops->length () != 1)
977 {
978 if (dump_file && (dump_flags & TDF_DETAILS))
979 fprintf (dump_file, "Found | -1, removing all other ops\n");
980
981 reassociate_stats.ops_eliminated += ops->length () - 1;
982
983 ops->truncate (0);
984 ops->quick_push (oelast);
985 return;
986 }
987 }
988 else if (integer_zerop (oelast->op))
989 {
990 if (ops->length () != 1)
991 {
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found | 0, removing\n");
994 ops->pop ();
995 reassociate_stats.ops_eliminated++;
996 }
997 }
998 break;
999 case MULT_EXPR:
1000 if (integer_zerop (oelast->op)
1001 || (FLOAT_TYPE_P (type)
1002 && !HONOR_NANS (type)
1003 && !HONOR_SIGNED_ZEROS (type)
1004 && real_zerop (oelast->op)))
1005 {
1006 if (ops->length () != 1)
1007 {
1008 if (dump_file && (dump_flags & TDF_DETAILS))
1009 fprintf (dump_file, "Found * 0, removing all other ops\n");
1010
1011 reassociate_stats.ops_eliminated += ops->length () - 1;
1012 ops->truncate (0);
1013 ops->quick_push (oelast);
1014 return;
1015 }
1016 }
1017 else if (integer_onep (oelast->op)
1018 || (FLOAT_TYPE_P (type)
1019 && !HONOR_SNANS (type)
1020 && real_onep (oelast->op)))
1021 {
1022 if (ops->length () != 1)
1023 {
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found * 1, removing\n");
1026 ops->pop ();
1027 reassociate_stats.ops_eliminated++;
1028 return;
1029 }
1030 }
1031 break;
1032 case BIT_XOR_EXPR:
1033 case PLUS_EXPR:
1034 case MINUS_EXPR:
1035 if (integer_zerop (oelast->op)
1036 || (FLOAT_TYPE_P (type)
1037 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1038 && fold_real_zero_addition_p (type, oelast->op,
1039 opcode == MINUS_EXPR)))
1040 {
1041 if (ops->length () != 1)
1042 {
1043 if (dump_file && (dump_flags & TDF_DETAILS))
1044 fprintf (dump_file, "Found [|^+] 0, removing\n");
1045 ops->pop ();
1046 reassociate_stats.ops_eliminated++;
1047 return;
1048 }
1049 }
1050 break;
1051 default:
1052 break;
1053 }
1054 }
1055 }
1056
1057
1058 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1059 bool, bool);
1060
1061 /* Structure for tracking and counting operands. */
1062 struct oecount {
1063 unsigned int cnt;
1064 unsigned int id;
1065 enum tree_code oecode;
1066 tree op;
1067 };
1068
1069
1070 /* The heap for the oecount hashtable and the sorted list of operands. */
1071 static vec<oecount> cvec;
1072
1073
1074 /* Oecount hashtable helpers. */
1075
1076 struct oecount_hasher : int_hash <int, 0, 1>
1077 {
1078 static inline hashval_t hash (int);
1079 static inline bool equal (int, int);
1080 };
1081
1082 /* Hash function for oecount. */
1083
1084 inline hashval_t
1085 oecount_hasher::hash (int p)
1086 {
1087 const oecount *c = &cvec[p - 42];
1088 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1089 }
1090
1091 /* Comparison function for oecount. */
1092
1093 inline bool
1094 oecount_hasher::equal (int p1, int p2)
1095 {
1096 const oecount *c1 = &cvec[p1 - 42];
1097 const oecount *c2 = &cvec[p2 - 42];
1098 return c1->oecode == c2->oecode && c1->op == c2->op;
1099 }
1100
1101 /* Comparison function for qsort sorting oecount elements by count. */
1102
1103 static int
1104 oecount_cmp (const void *p1, const void *p2)
1105 {
1106 const oecount *c1 = (const oecount *)p1;
1107 const oecount *c2 = (const oecount *)p2;
1108 if (c1->cnt != c2->cnt)
1109 return c1->cnt > c2->cnt ? 1 : -1;
1110 else
1111 /* If counts are identical, use unique IDs to stabilize qsort. */
1112 return c1->id > c2->id ? 1 : -1;
1113 }
1114
1115 /* Return TRUE iff STMT represents a builtin call that raises OP
1116 to some exponent. */
1117
1118 static bool
1119 stmt_is_power_of_op (gimple *stmt, tree op)
1120 {
1121 if (!is_gimple_call (stmt))
1122 return false;
1123
1124 switch (gimple_call_combined_fn (stmt))
1125 {
1126 CASE_CFN_POW:
1127 CASE_CFN_POWI:
1128 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1129
1130 default:
1131 return false;
1132 }
1133 }
1134
1135 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1136 in place and return the result. Assumes that stmt_is_power_of_op
1137 was previously called for STMT and returned TRUE. */
1138
1139 static HOST_WIDE_INT
1140 decrement_power (gimple *stmt)
1141 {
1142 REAL_VALUE_TYPE c, cint;
1143 HOST_WIDE_INT power;
1144 tree arg1;
1145
1146 switch (gimple_call_combined_fn (stmt))
1147 {
1148 CASE_CFN_POW:
1149 arg1 = gimple_call_arg (stmt, 1);
1150 c = TREE_REAL_CST (arg1);
1151 power = real_to_integer (&c) - 1;
1152 real_from_integer (&cint, VOIDmode, power, SIGNED);
1153 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1154 return power;
1155
1156 CASE_CFN_POWI:
1157 arg1 = gimple_call_arg (stmt, 1);
1158 power = TREE_INT_CST_LOW (arg1) - 1;
1159 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1160 return power;
1161
1162 default:
1163 gcc_unreachable ();
1164 }
1165 }
1166
1167 /* Replace SSA defined by STMT and replace all its uses with new
1168 SSA. Also return the new SSA. */
1169
1170 static tree
1171 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1172 {
1173 gimple *use_stmt;
1174 use_operand_p use;
1175 imm_use_iterator iter;
1176 tree new_lhs, new_debug_lhs = NULL_TREE;
1177 tree lhs = gimple_get_lhs (stmt);
1178
1179 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1180 gimple_set_lhs (stmt, new_lhs);
1181
1182 /* Also need to update GIMPLE_DEBUGs. */
1183 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1184 {
1185 tree repl = new_lhs;
1186 if (is_gimple_debug (use_stmt))
1187 {
1188 if (new_debug_lhs == NULL_TREE)
1189 {
1190 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1191 gdebug *def_temp
1192 = gimple_build_debug_bind (new_debug_lhs,
1193 build2 (opcode, TREE_TYPE (lhs),
1194 new_lhs, op),
1195 stmt);
1196 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1197 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1198 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1199 gimple_set_uid (def_temp, gimple_uid (stmt));
1200 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1201 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1202 }
1203 repl = new_debug_lhs;
1204 }
1205 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1206 SET_USE (use, repl);
1207 update_stmt (use_stmt);
1208 }
1209 return new_lhs;
1210 }
1211
1212 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1213 uses with new SSAs. Also do this for the stmt that defines DEF
1214 if *DEF is not OP. */
1215
1216 static void
1217 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1218 vec<gimple *> &stmts_to_fix)
1219 {
1220 unsigned i;
1221 gimple *stmt;
1222
1223 if (*def != op
1224 && TREE_CODE (*def) == SSA_NAME
1225 && (stmt = SSA_NAME_DEF_STMT (*def))
1226 && gimple_code (stmt) != GIMPLE_NOP)
1227 *def = make_new_ssa_for_def (stmt, opcode, op);
1228
1229 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1230 make_new_ssa_for_def (stmt, opcode, op);
1231 }
1232
1233 /* Find the single immediate use of STMT's LHS, and replace it
1234 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1235 replace *DEF with OP as well. */
1236
1237 static void
1238 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1239 {
1240 tree lhs;
1241 gimple *use_stmt;
1242 use_operand_p use;
1243 gimple_stmt_iterator gsi;
1244
1245 if (is_gimple_call (stmt))
1246 lhs = gimple_call_lhs (stmt);
1247 else
1248 lhs = gimple_assign_lhs (stmt);
1249
1250 gcc_assert (has_single_use (lhs));
1251 single_imm_use (lhs, &use, &use_stmt);
1252 if (lhs == *def)
1253 *def = op;
1254 SET_USE (use, op);
1255 if (TREE_CODE (op) != SSA_NAME)
1256 update_stmt (use_stmt);
1257 gsi = gsi_for_stmt (stmt);
1258 unlink_stmt_vdef (stmt);
1259 reassoc_remove_stmt (&gsi);
1260 release_defs (stmt);
1261 }
1262
1263 /* Walks the linear chain with result *DEF searching for an operation
1264 with operand OP and code OPCODE removing that from the chain. *DEF
1265 is updated if there is only one operand but no operation left. */
1266
1267 static void
1268 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1269 {
1270 tree orig_def = *def;
1271 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1272 /* PR72835 - Record the stmt chain that has to be updated such that
1273 we dont use the same LHS when the values computed are different. */
1274 auto_vec<gimple *, 64> stmts_to_fix;
1275
1276 do
1277 {
1278 tree name;
1279
1280 if (opcode == MULT_EXPR)
1281 {
1282 if (stmt_is_power_of_op (stmt, op))
1283 {
1284 if (decrement_power (stmt) == 1)
1285 {
1286 if (stmts_to_fix.length () > 0)
1287 stmts_to_fix.pop ();
1288 propagate_op_to_single_use (op, stmt, def);
1289 }
1290 break;
1291 }
1292 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1293 {
1294 if (gimple_assign_rhs1 (stmt) == op)
1295 {
1296 tree cst = build_minus_one_cst (TREE_TYPE (op));
1297 if (stmts_to_fix.length () > 0)
1298 stmts_to_fix.pop ();
1299 propagate_op_to_single_use (cst, stmt, def);
1300 break;
1301 }
1302 else if (integer_minus_onep (op)
1303 || real_minus_onep (op))
1304 {
1305 gimple_assign_set_rhs_code
1306 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1307 break;
1308 }
1309 }
1310 }
1311
1312 name = gimple_assign_rhs1 (stmt);
1313
1314 /* If this is the operation we look for and one of the operands
1315 is ours simply propagate the other operand into the stmts
1316 single use. */
1317 if (gimple_assign_rhs_code (stmt) == opcode
1318 && (name == op
1319 || gimple_assign_rhs2 (stmt) == op))
1320 {
1321 if (name == op)
1322 name = gimple_assign_rhs2 (stmt);
1323 if (stmts_to_fix.length () > 0)
1324 stmts_to_fix.pop ();
1325 propagate_op_to_single_use (name, stmt, def);
1326 break;
1327 }
1328
1329 /* We might have a multiply of two __builtin_pow* calls, and
1330 the operand might be hiding in the rightmost one. Likewise
1331 this can happen for a negate. */
1332 if (opcode == MULT_EXPR
1333 && gimple_assign_rhs_code (stmt) == opcode
1334 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1335 && has_single_use (gimple_assign_rhs2 (stmt)))
1336 {
1337 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1338 if (stmt_is_power_of_op (stmt2, op))
1339 {
1340 if (decrement_power (stmt2) == 1)
1341 propagate_op_to_single_use (op, stmt2, def);
1342 else
1343 stmts_to_fix.safe_push (stmt2);
1344 break;
1345 }
1346 else if (is_gimple_assign (stmt2)
1347 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1348 {
1349 if (gimple_assign_rhs1 (stmt2) == op)
1350 {
1351 tree cst = build_minus_one_cst (TREE_TYPE (op));
1352 propagate_op_to_single_use (cst, stmt2, def);
1353 break;
1354 }
1355 else if (integer_minus_onep (op)
1356 || real_minus_onep (op))
1357 {
1358 stmts_to_fix.safe_push (stmt2);
1359 gimple_assign_set_rhs_code
1360 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1361 break;
1362 }
1363 }
1364 }
1365
1366 /* Continue walking the chain. */
1367 gcc_assert (name != op
1368 && TREE_CODE (name) == SSA_NAME);
1369 stmt = SSA_NAME_DEF_STMT (name);
1370 stmts_to_fix.safe_push (stmt);
1371 }
1372 while (1);
1373
1374 if (stmts_to_fix.length () > 0 || *def == orig_def)
1375 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1376 }
1377
1378 /* Returns true if statement S1 dominates statement S2. Like
1379 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1380
1381 static bool
1382 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1383 {
1384 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1385
1386 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1387 SSA_NAME. Assume it lives at the beginning of function and
1388 thus dominates everything. */
1389 if (!bb1 || s1 == s2)
1390 return true;
1391
1392 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1393 if (!bb2)
1394 return false;
1395
1396 if (bb1 == bb2)
1397 {
1398 /* PHIs in the same basic block are assumed to be
1399 executed all in parallel, if only one stmt is a PHI,
1400 it dominates the other stmt in the same basic block. */
1401 if (gimple_code (s1) == GIMPLE_PHI)
1402 return true;
1403
1404 if (gimple_code (s2) == GIMPLE_PHI)
1405 return false;
1406
1407 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1408
1409 if (gimple_uid (s1) < gimple_uid (s2))
1410 return true;
1411
1412 if (gimple_uid (s1) > gimple_uid (s2))
1413 return false;
1414
1415 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1416 unsigned int uid = gimple_uid (s1);
1417 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1418 {
1419 gimple *s = gsi_stmt (gsi);
1420 if (gimple_uid (s) != uid)
1421 break;
1422 if (s == s2)
1423 return true;
1424 }
1425
1426 return false;
1427 }
1428
1429 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1430 }
1431
1432 /* Insert STMT after INSERT_POINT. */
1433
1434 static void
1435 insert_stmt_after (gimple *stmt, gimple *insert_point)
1436 {
1437 gimple_stmt_iterator gsi;
1438 basic_block bb;
1439
1440 if (gimple_code (insert_point) == GIMPLE_PHI)
1441 bb = gimple_bb (insert_point);
1442 else if (!stmt_ends_bb_p (insert_point))
1443 {
1444 gsi = gsi_for_stmt (insert_point);
1445 gimple_set_uid (stmt, gimple_uid (insert_point));
1446 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1447 return;
1448 }
1449 else
1450 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1451 thus if it must end a basic block, it should be a call that can
1452 throw, or some assignment that can throw. If it throws, the LHS
1453 of it will not be initialized though, so only valid places using
1454 the SSA_NAME should be dominated by the fallthru edge. */
1455 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1456 gsi = gsi_after_labels (bb);
1457 if (gsi_end_p (gsi))
1458 {
1459 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1460 gimple_set_uid (stmt,
1461 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1462 }
1463 else
1464 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1465 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1466 }
1467
1468 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1469 the result. Places the statement after the definition of either
1470 OP1 or OP2. Returns the new statement. */
1471
1472 static gimple *
1473 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1474 {
1475 gimple *op1def = NULL, *op2def = NULL;
1476 gimple_stmt_iterator gsi;
1477 tree op;
1478 gassign *sum;
1479
1480 /* Create the addition statement. */
1481 op = make_ssa_name (type);
1482 sum = gimple_build_assign (op, opcode, op1, op2);
1483
1484 /* Find an insertion place and insert. */
1485 if (TREE_CODE (op1) == SSA_NAME)
1486 op1def = SSA_NAME_DEF_STMT (op1);
1487 if (TREE_CODE (op2) == SSA_NAME)
1488 op2def = SSA_NAME_DEF_STMT (op2);
1489 if ((!op1def || gimple_nop_p (op1def))
1490 && (!op2def || gimple_nop_p (op2def)))
1491 {
1492 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1493 if (gsi_end_p (gsi))
1494 {
1495 gimple_stmt_iterator gsi2
1496 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1497 gimple_set_uid (sum,
1498 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1499 }
1500 else
1501 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1502 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1503 }
1504 else
1505 {
1506 gimple *insert_point;
1507 if ((!op1def || gimple_nop_p (op1def))
1508 || (op2def && !gimple_nop_p (op2def)
1509 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1510 insert_point = op2def;
1511 else
1512 insert_point = op1def;
1513 insert_stmt_after (sum, insert_point);
1514 }
1515 update_stmt (sum);
1516
1517 return sum;
1518 }
1519
1520 /* Perform un-distribution of divisions and multiplications.
1521 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1522 to (A + B) / X for real X.
1523
1524 The algorithm is organized as follows.
1525
1526 - First we walk the addition chain *OPS looking for summands that
1527 are defined by a multiplication or a real division. This results
1528 in the candidates bitmap with relevant indices into *OPS.
1529
1530 - Second we build the chains of multiplications or divisions for
1531 these candidates, counting the number of occurrences of (operand, code)
1532 pairs in all of the candidates chains.
1533
1534 - Third we sort the (operand, code) pairs by number of occurrence and
1535 process them starting with the pair with the most uses.
1536
1537 * For each such pair we walk the candidates again to build a
1538 second candidate bitmap noting all multiplication/division chains
1539 that have at least one occurrence of (operand, code).
1540
1541 * We build an alternate addition chain only covering these
1542 candidates with one (operand, code) operation removed from their
1543 multiplication/division chain.
1544
1545 * The first candidate gets replaced by the alternate addition chain
1546 multiplied/divided by the operand.
1547
1548 * All candidate chains get disabled for further processing and
1549 processing of (operand, code) pairs continues.
1550
1551 The alternate addition chains built are re-processed by the main
1552 reassociation algorithm which allows optimizing a * x * y + b * y * x
1553 to (a + b ) * x * y in one invocation of the reassociation pass. */
1554
1555 static bool
1556 undistribute_ops_list (enum tree_code opcode,
1557 vec<operand_entry *> *ops, class loop *loop)
1558 {
1559 unsigned int length = ops->length ();
1560 operand_entry *oe1;
1561 unsigned i, j;
1562 unsigned nr_candidates, nr_candidates2;
1563 sbitmap_iterator sbi0;
1564 vec<operand_entry *> *subops;
1565 bool changed = false;
1566 unsigned int next_oecount_id = 0;
1567
1568 if (length <= 1
1569 || opcode != PLUS_EXPR)
1570 return false;
1571
1572 /* Build a list of candidates to process. */
1573 auto_sbitmap candidates (length);
1574 bitmap_clear (candidates);
1575 nr_candidates = 0;
1576 FOR_EACH_VEC_ELT (*ops, i, oe1)
1577 {
1578 enum tree_code dcode;
1579 gimple *oe1def;
1580
1581 if (TREE_CODE (oe1->op) != SSA_NAME)
1582 continue;
1583 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1584 if (!is_gimple_assign (oe1def))
1585 continue;
1586 dcode = gimple_assign_rhs_code (oe1def);
1587 if ((dcode != MULT_EXPR
1588 && dcode != RDIV_EXPR)
1589 || !is_reassociable_op (oe1def, dcode, loop))
1590 continue;
1591
1592 bitmap_set_bit (candidates, i);
1593 nr_candidates++;
1594 }
1595
1596 if (nr_candidates < 2)
1597 return false;
1598
1599 if (dump_file && (dump_flags & TDF_DETAILS))
1600 {
1601 fprintf (dump_file, "searching for un-distribute opportunities ");
1602 print_generic_expr (dump_file,
1603 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1604 fprintf (dump_file, " %d\n", nr_candidates);
1605 }
1606
1607 /* Build linearized sub-operand lists and the counting table. */
1608 cvec.create (0);
1609
1610 hash_table<oecount_hasher> ctable (15);
1611
1612 /* ??? Macro arguments cannot have multi-argument template types in
1613 them. This typedef is needed to workaround that limitation. */
1614 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1615 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1616 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1617 {
1618 gimple *oedef;
1619 enum tree_code oecode;
1620 unsigned j;
1621
1622 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1623 oecode = gimple_assign_rhs_code (oedef);
1624 linearize_expr_tree (&subops[i], oedef,
1625 associative_tree_code (oecode), false);
1626
1627 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1628 {
1629 oecount c;
1630 int *slot;
1631 int idx;
1632 c.oecode = oecode;
1633 c.cnt = 1;
1634 c.id = next_oecount_id++;
1635 c.op = oe1->op;
1636 cvec.safe_push (c);
1637 idx = cvec.length () + 41;
1638 slot = ctable.find_slot (idx, INSERT);
1639 if (!*slot)
1640 {
1641 *slot = idx;
1642 }
1643 else
1644 {
1645 cvec.pop ();
1646 cvec[*slot - 42].cnt++;
1647 }
1648 }
1649 }
1650
1651 /* Sort the counting table. */
1652 cvec.qsort (oecount_cmp);
1653
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1655 {
1656 oecount *c;
1657 fprintf (dump_file, "Candidates:\n");
1658 FOR_EACH_VEC_ELT (cvec, j, c)
1659 {
1660 fprintf (dump_file, " %u %s: ", c->cnt,
1661 c->oecode == MULT_EXPR
1662 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1663 print_generic_expr (dump_file, c->op);
1664 fprintf (dump_file, "\n");
1665 }
1666 }
1667
1668 /* Process the (operand, code) pairs in order of most occurrence. */
1669 auto_sbitmap candidates2 (length);
1670 while (!cvec.is_empty ())
1671 {
1672 oecount *c = &cvec.last ();
1673 if (c->cnt < 2)
1674 break;
1675
1676 /* Now collect the operands in the outer chain that contain
1677 the common operand in their inner chain. */
1678 bitmap_clear (candidates2);
1679 nr_candidates2 = 0;
1680 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1681 {
1682 gimple *oedef;
1683 enum tree_code oecode;
1684 unsigned j;
1685 tree op = (*ops)[i]->op;
1686
1687 /* If we undistributed in this chain already this may be
1688 a constant. */
1689 if (TREE_CODE (op) != SSA_NAME)
1690 continue;
1691
1692 oedef = SSA_NAME_DEF_STMT (op);
1693 oecode = gimple_assign_rhs_code (oedef);
1694 if (oecode != c->oecode)
1695 continue;
1696
1697 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1698 {
1699 if (oe1->op == c->op)
1700 {
1701 bitmap_set_bit (candidates2, i);
1702 ++nr_candidates2;
1703 break;
1704 }
1705 }
1706 }
1707
1708 if (nr_candidates2 >= 2)
1709 {
1710 operand_entry *oe1, *oe2;
1711 gimple *prod;
1712 int first = bitmap_first_set_bit (candidates2);
1713
1714 /* Build the new addition chain. */
1715 oe1 = (*ops)[first];
1716 if (dump_file && (dump_flags & TDF_DETAILS))
1717 {
1718 fprintf (dump_file, "Building (");
1719 print_generic_expr (dump_file, oe1->op);
1720 }
1721 zero_one_operation (&oe1->op, c->oecode, c->op);
1722 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1723 {
1724 gimple *sum;
1725 oe2 = (*ops)[i];
1726 if (dump_file && (dump_flags & TDF_DETAILS))
1727 {
1728 fprintf (dump_file, " + ");
1729 print_generic_expr (dump_file, oe2->op);
1730 }
1731 zero_one_operation (&oe2->op, c->oecode, c->op);
1732 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1733 oe1->op, oe2->op, opcode);
1734 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1735 oe2->rank = 0;
1736 oe1->op = gimple_get_lhs (sum);
1737 }
1738
1739 /* Apply the multiplication/division. */
1740 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1741 oe1->op, c->op, c->oecode);
1742 if (dump_file && (dump_flags & TDF_DETAILS))
1743 {
1744 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1745 print_generic_expr (dump_file, c->op);
1746 fprintf (dump_file, "\n");
1747 }
1748
1749 /* Record it in the addition chain and disable further
1750 undistribution with this op. */
1751 oe1->op = gimple_assign_lhs (prod);
1752 oe1->rank = get_rank (oe1->op);
1753 subops[first].release ();
1754
1755 changed = true;
1756 }
1757
1758 cvec.pop ();
1759 }
1760
1761 for (i = 0; i < ops->length (); ++i)
1762 subops[i].release ();
1763 free (subops);
1764 cvec.release ();
1765
1766 return changed;
1767 }
1768
1769 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1770 first: element index for each relevant BIT_FIELD_REF.
1771 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1772 typedef std::pair<unsigned, unsigned> v_info_elem;
1773 struct v_info {
1774 tree vec_type;
1775 auto_vec<v_info_elem, 32> vec;
1776 };
1777 typedef v_info *v_info_ptr;
1778
1779 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1780 static int
1781 sort_by_mach_mode (const void *p_i, const void *p_j)
1782 {
1783 const tree tr1 = *((const tree *) p_i);
1784 const tree tr2 = *((const tree *) p_j);
1785 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1786 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1787 if (mode1 > mode2)
1788 return 1;
1789 else if (mode1 < mode2)
1790 return -1;
1791 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1792 return -1;
1793 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1794 return 1;
1795 return 0;
1796 }
1797
1798 /* Cleanup hash map for VECTOR information. */
1799 static void
1800 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1801 {
1802 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1803 it != info_map.end (); ++it)
1804 {
1805 v_info_ptr info = (*it).second;
1806 delete info;
1807 (*it).second = NULL;
1808 }
1809 }
1810
1811 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1812 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1813 is transformed to
1814 Vs = (V1 + V2 + ... + Vn)
1815 Vs[0] + Vs[1] + ... + Vs[k]
1816
1817 The basic steps are listed below:
1818
1819 1) Check the addition chain *OPS by looking those summands coming from
1820 VECTOR bit_field_ref on VECTOR type. Put the information into
1821 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1822
1823 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1824 continuous, they can cover the whole VECTOR perfectly without any holes.
1825 Obtain one VECTOR list which contain candidates to be transformed.
1826
1827 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1828 candidates with same mode, build the addition statements for them and
1829 generate BIT_FIELD_REFs accordingly.
1830
1831 TODO:
1832 The current implementation requires the whole VECTORs should be fully
1833 covered, but it can be extended to support partial, checking adjacent
1834 but not fill the whole, it may need some cost model to define the
1835 boundary to do or not.
1836 */
1837 static bool
1838 undistribute_bitref_for_vector (enum tree_code opcode,
1839 vec<operand_entry *> *ops, struct loop *loop)
1840 {
1841 if (ops->length () <= 1)
1842 return false;
1843
1844 if (opcode != PLUS_EXPR
1845 && opcode != MULT_EXPR
1846 && opcode != BIT_XOR_EXPR
1847 && opcode != BIT_IOR_EXPR
1848 && opcode != BIT_AND_EXPR)
1849 return false;
1850
1851 hash_map<tree, v_info_ptr> v_info_map;
1852 operand_entry *oe1;
1853 unsigned i;
1854
1855 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1856 information into map. */
1857 FOR_EACH_VEC_ELT (*ops, i, oe1)
1858 {
1859 enum tree_code dcode;
1860 gimple *oe1def;
1861
1862 if (TREE_CODE (oe1->op) != SSA_NAME)
1863 continue;
1864 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1865 if (!is_gimple_assign (oe1def))
1866 continue;
1867 dcode = gimple_assign_rhs_code (oe1def);
1868 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1869 continue;
1870
1871 tree rhs = gimple_assign_rhs1 (oe1def);
1872 tree vec = TREE_OPERAND (rhs, 0);
1873 tree vec_type = TREE_TYPE (vec);
1874
1875 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1876 continue;
1877
1878 /* Ignore it if target machine can't support this VECTOR type. */
1879 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1880 continue;
1881
1882 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1883 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1884 continue;
1885
1886 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1887 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1888 continue;
1889
1890 /* The type of BIT_FIELD_REF might not be equal to the element type of
1891 the vector. We want to use a vector type with element type the
1892 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1893 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1894 {
1895 machine_mode simd_mode;
1896 unsigned HOST_WIDE_INT size, nunits;
1897 unsigned HOST_WIDE_INT elem_size
1898 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1899 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1900 continue;
1901 if (size <= elem_size || (size % elem_size) != 0)
1902 continue;
1903 nunits = size / elem_size;
1904 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1905 nunits).exists (&simd_mode))
1906 continue;
1907 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1908
1909 /* Ignore it if target machine can't support this VECTOR type. */
1910 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1911 continue;
1912
1913 /* Check const vector type, constrain BIT_FIELD_REF offset and
1914 size. */
1915 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1916 continue;
1917
1918 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1919 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1920 continue;
1921 }
1922
1923 tree elem_type = TREE_TYPE (vec_type);
1924 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1925 if (maybe_ne (bit_field_size (rhs), elem_size))
1926 continue;
1927
1928 unsigned idx;
1929 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1930 continue;
1931
1932 /* Ignore it if target machine can't support this type of VECTOR
1933 operation. */
1934 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1935 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1936 continue;
1937
1938 bool existed;
1939 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1940 if (!existed)
1941 {
1942 info = new v_info;
1943 info->vec_type = vec_type;
1944 }
1945 else if (!types_compatible_p (vec_type, info->vec_type))
1946 continue;
1947 info->vec.safe_push (std::make_pair (idx, i));
1948 }
1949
1950 /* At least two VECTOR to combine. */
1951 if (v_info_map.elements () <= 1)
1952 {
1953 cleanup_vinfo_map (v_info_map);
1954 return false;
1955 }
1956
1957 /* Verify all VECTOR candidates by checking two conditions:
1958 1) sorted offsets are adjacent, no holes.
1959 2) can fill the whole VECTOR perfectly.
1960 And add the valid candidates to a vector for further handling. */
1961 auto_vec<tree> valid_vecs (v_info_map.elements ());
1962 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1963 it != v_info_map.end (); ++it)
1964 {
1965 tree cand_vec = (*it).first;
1966 v_info_ptr cand_info = (*it).second;
1967 unsigned int num_elems
1968 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1969 if (cand_info->vec.length () != num_elems)
1970 continue;
1971 sbitmap holes = sbitmap_alloc (num_elems);
1972 bitmap_ones (holes);
1973 bool valid = true;
1974 v_info_elem *curr;
1975 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1976 {
1977 if (!bitmap_bit_p (holes, curr->first))
1978 {
1979 valid = false;
1980 break;
1981 }
1982 else
1983 bitmap_clear_bit (holes, curr->first);
1984 }
1985 if (valid && bitmap_empty_p (holes))
1986 valid_vecs.quick_push (cand_vec);
1987 sbitmap_free (holes);
1988 }
1989
1990 /* At least two VECTOR to combine. */
1991 if (valid_vecs.length () <= 1)
1992 {
1993 cleanup_vinfo_map (v_info_map);
1994 return false;
1995 }
1996
1997 valid_vecs.qsort (sort_by_mach_mode);
1998 /* Go through all candidates by machine mode order, query the mode_to_total
1999 to get the total number for each mode and skip the single one. */
2000 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2001 {
2002 tree tvec = valid_vecs[i];
2003 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2004
2005 /* Skip modes with only a single candidate. */
2006 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2007 continue;
2008
2009 unsigned int idx, j;
2010 gimple *sum = NULL;
2011 tree sum_vec = tvec;
2012 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2013 v_info_elem *elem;
2014 tree vec_type = info_ptr->vec_type;
2015
2016 /* Build the sum for all candidates with same mode. */
2017 do
2018 {
2019 sum = build_and_add_sum (vec_type, sum_vec,
2020 valid_vecs[i + 1], opcode);
2021 if (!useless_type_conversion_p (vec_type,
2022 TREE_TYPE (valid_vecs[i + 1])))
2023 {
2024 /* Update the operands only after build_and_add_sum,
2025 so that we don't have to repeat the placement algorithm
2026 of build_and_add_sum. */
2027 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2028 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2029 valid_vecs[i + 1]);
2030 tree lhs = make_ssa_name (vec_type);
2031 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2032 gimple_set_uid (g, gimple_uid (sum));
2033 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2034 gimple_assign_set_rhs2 (sum, lhs);
2035 if (sum_vec == tvec)
2036 {
2037 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2038 lhs = make_ssa_name (vec_type);
2039 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2040 gimple_set_uid (g, gimple_uid (sum));
2041 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2042 gimple_assign_set_rhs1 (sum, lhs);
2043 }
2044 update_stmt (sum);
2045 }
2046 sum_vec = gimple_get_lhs (sum);
2047 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2048 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2049 /* Update those related ops of current candidate VECTOR. */
2050 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2051 {
2052 idx = elem->second;
2053 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2054 /* Set this then op definition will get DCEd later. */
2055 gimple_set_visited (def, true);
2056 if (opcode == PLUS_EXPR
2057 || opcode == BIT_XOR_EXPR
2058 || opcode == BIT_IOR_EXPR)
2059 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2060 else if (opcode == MULT_EXPR)
2061 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2062 else
2063 {
2064 gcc_assert (opcode == BIT_AND_EXPR);
2065 (*ops)[idx]->op
2066 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2067 }
2068 (*ops)[idx]->rank = 0;
2069 }
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 {
2072 fprintf (dump_file, "Generating addition -> ");
2073 print_gimple_stmt (dump_file, sum, 0);
2074 }
2075 i++;
2076 }
2077 while ((i < valid_vecs.length () - 1)
2078 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2079
2080 /* Referring to first valid VECTOR with this mode, generate the
2081 BIT_FIELD_REF statements accordingly. */
2082 info_ptr = *(v_info_map.get (tvec));
2083 gcc_assert (sum);
2084 tree elem_type = TREE_TYPE (vec_type);
2085 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2086 {
2087 idx = elem->second;
2088 tree dst = make_ssa_name (elem_type);
2089 tree pos = bitsize_int (elem->first
2090 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2091 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2092 TYPE_SIZE (elem_type), pos);
2093 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2094 insert_stmt_after (gs, sum);
2095 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2096 /* Set this then op definition will get DCEd later. */
2097 gimple_set_visited (def, true);
2098 (*ops)[idx]->op = gimple_assign_lhs (gs);
2099 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2100 if (dump_file && (dump_flags & TDF_DETAILS))
2101 {
2102 fprintf (dump_file, "Generating bit_field_ref -> ");
2103 print_gimple_stmt (dump_file, gs, 0);
2104 }
2105 }
2106 }
2107
2108 if (dump_file && (dump_flags & TDF_DETAILS))
2109 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2110
2111 cleanup_vinfo_map (v_info_map);
2112
2113 return true;
2114 }
2115
2116 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2117 expression, examine the other OPS to see if any of them are comparisons
2118 of the same values, which we may be able to combine or eliminate.
2119 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2120
2121 static bool
2122 eliminate_redundant_comparison (enum tree_code opcode,
2123 vec<operand_entry *> *ops,
2124 unsigned int currindex,
2125 operand_entry *curr)
2126 {
2127 tree op1, op2;
2128 enum tree_code lcode, rcode;
2129 gimple *def1, *def2;
2130 int i;
2131 operand_entry *oe;
2132
2133 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2134 return false;
2135
2136 /* Check that CURR is a comparison. */
2137 if (TREE_CODE (curr->op) != SSA_NAME)
2138 return false;
2139 def1 = SSA_NAME_DEF_STMT (curr->op);
2140 if (!is_gimple_assign (def1))
2141 return false;
2142 lcode = gimple_assign_rhs_code (def1);
2143 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2144 return false;
2145 op1 = gimple_assign_rhs1 (def1);
2146 op2 = gimple_assign_rhs2 (def1);
2147
2148 /* Now look for a similar comparison in the remaining OPS. */
2149 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2150 {
2151 tree t;
2152
2153 if (TREE_CODE (oe->op) != SSA_NAME)
2154 continue;
2155 def2 = SSA_NAME_DEF_STMT (oe->op);
2156 if (!is_gimple_assign (def2))
2157 continue;
2158 rcode = gimple_assign_rhs_code (def2);
2159 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2160 continue;
2161
2162 /* If we got here, we have a match. See if we can combine the
2163 two comparisons. */
2164 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2165 if (opcode == BIT_IOR_EXPR)
2166 t = maybe_fold_or_comparisons (type,
2167 lcode, op1, op2,
2168 rcode, gimple_assign_rhs1 (def2),
2169 gimple_assign_rhs2 (def2));
2170 else
2171 t = maybe_fold_and_comparisons (type,
2172 lcode, op1, op2,
2173 rcode, gimple_assign_rhs1 (def2),
2174 gimple_assign_rhs2 (def2));
2175 if (!t)
2176 continue;
2177
2178 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2179 always give us a boolean_type_node value back. If the original
2180 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2181 we need to convert. */
2182 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2183 t = fold_convert (TREE_TYPE (curr->op), t);
2184
2185 if (TREE_CODE (t) != INTEGER_CST
2186 && !operand_equal_p (t, curr->op, 0))
2187 {
2188 enum tree_code subcode;
2189 tree newop1, newop2;
2190 if (!COMPARISON_CLASS_P (t))
2191 continue;
2192 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2193 STRIP_USELESS_TYPE_CONVERSION (newop1);
2194 STRIP_USELESS_TYPE_CONVERSION (newop2);
2195 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2196 continue;
2197 }
2198
2199 if (dump_file && (dump_flags & TDF_DETAILS))
2200 {
2201 fprintf (dump_file, "Equivalence: ");
2202 print_generic_expr (dump_file, curr->op);
2203 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2204 print_generic_expr (dump_file, oe->op);
2205 fprintf (dump_file, " -> ");
2206 print_generic_expr (dump_file, t);
2207 fprintf (dump_file, "\n");
2208 }
2209
2210 /* Now we can delete oe, as it has been subsumed by the new combined
2211 expression t. */
2212 ops->ordered_remove (i);
2213 reassociate_stats.ops_eliminated ++;
2214
2215 /* If t is the same as curr->op, we're done. Otherwise we must
2216 replace curr->op with t. Special case is if we got a constant
2217 back, in which case we add it to the end instead of in place of
2218 the current entry. */
2219 if (TREE_CODE (t) == INTEGER_CST)
2220 {
2221 ops->ordered_remove (currindex);
2222 add_to_ops_vec (ops, t);
2223 }
2224 else if (!operand_equal_p (t, curr->op, 0))
2225 {
2226 gimple *sum;
2227 enum tree_code subcode;
2228 tree newop1;
2229 tree newop2;
2230 gcc_assert (COMPARISON_CLASS_P (t));
2231 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2232 STRIP_USELESS_TYPE_CONVERSION (newop1);
2233 STRIP_USELESS_TYPE_CONVERSION (newop2);
2234 gcc_checking_assert (is_gimple_val (newop1)
2235 && is_gimple_val (newop2));
2236 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2237 curr->op = gimple_get_lhs (sum);
2238 }
2239 return true;
2240 }
2241
2242 return false;
2243 }
2244
2245
2246 /* Transform repeated addition of same values into multiply with
2247 constant. */
2248 static bool
2249 transform_add_to_multiply (vec<operand_entry *> *ops)
2250 {
2251 operand_entry *oe;
2252 tree op = NULL_TREE;
2253 int j;
2254 int i, start = -1, end = 0, count = 0;
2255 auto_vec<std::pair <int, int> > indxs;
2256 bool changed = false;
2257
2258 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2259 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2260 || !flag_unsafe_math_optimizations))
2261 return false;
2262
2263 /* Look for repeated operands. */
2264 FOR_EACH_VEC_ELT (*ops, i, oe)
2265 {
2266 if (start == -1)
2267 {
2268 count = 1;
2269 op = oe->op;
2270 start = i;
2271 }
2272 else if (operand_equal_p (oe->op, op, 0))
2273 {
2274 count++;
2275 end = i;
2276 }
2277 else
2278 {
2279 if (count > 1)
2280 indxs.safe_push (std::make_pair (start, end));
2281 count = 1;
2282 op = oe->op;
2283 start = i;
2284 }
2285 }
2286
2287 if (count > 1)
2288 indxs.safe_push (std::make_pair (start, end));
2289
2290 for (j = indxs.length () - 1; j >= 0; --j)
2291 {
2292 /* Convert repeated operand addition to multiplication. */
2293 start = indxs[j].first;
2294 end = indxs[j].second;
2295 op = (*ops)[start]->op;
2296 count = end - start + 1;
2297 for (i = end; i >= start; --i)
2298 ops->unordered_remove (i);
2299 tree tmp = make_ssa_name (TREE_TYPE (op));
2300 tree cst = build_int_cst (integer_type_node, count);
2301 gassign *mul_stmt
2302 = gimple_build_assign (tmp, MULT_EXPR,
2303 op, fold_convert (TREE_TYPE (op), cst));
2304 gimple_set_visited (mul_stmt, true);
2305 add_to_ops_vec (ops, tmp, mul_stmt);
2306 changed = true;
2307 }
2308
2309 return changed;
2310 }
2311
2312
2313 /* Perform various identities and other optimizations on the list of
2314 operand entries, stored in OPS. The tree code for the binary
2315 operation between all the operands is OPCODE. */
2316
2317 static void
2318 optimize_ops_list (enum tree_code opcode,
2319 vec<operand_entry *> *ops)
2320 {
2321 unsigned int length = ops->length ();
2322 unsigned int i;
2323 operand_entry *oe;
2324 operand_entry *oelast = NULL;
2325 bool iterate = false;
2326
2327 if (length == 1)
2328 return;
2329
2330 oelast = ops->last ();
2331
2332 /* If the last two are constants, pop the constants off, merge them
2333 and try the next two. */
2334 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2335 {
2336 operand_entry *oelm1 = (*ops)[length - 2];
2337
2338 if (oelm1->rank == 0
2339 && is_gimple_min_invariant (oelm1->op)
2340 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2341 TREE_TYPE (oelast->op)))
2342 {
2343 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2344 oelm1->op, oelast->op);
2345
2346 if (folded && is_gimple_min_invariant (folded))
2347 {
2348 if (dump_file && (dump_flags & TDF_DETAILS))
2349 fprintf (dump_file, "Merging constants\n");
2350
2351 ops->pop ();
2352 ops->pop ();
2353
2354 add_to_ops_vec (ops, folded);
2355 reassociate_stats.constants_eliminated++;
2356
2357 optimize_ops_list (opcode, ops);
2358 return;
2359 }
2360 }
2361 }
2362
2363 eliminate_using_constants (opcode, ops);
2364 oelast = NULL;
2365
2366 for (i = 0; ops->iterate (i, &oe);)
2367 {
2368 bool done = false;
2369
2370 if (eliminate_not_pairs (opcode, ops, i, oe))
2371 return;
2372 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2373 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2374 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2375 {
2376 if (done)
2377 return;
2378 iterate = true;
2379 oelast = NULL;
2380 continue;
2381 }
2382 oelast = oe;
2383 i++;
2384 }
2385
2386 if (iterate)
2387 optimize_ops_list (opcode, ops);
2388 }
2389
2390 /* The following functions are subroutines to optimize_range_tests and allow
2391 it to try to change a logical combination of comparisons into a range
2392 test.
2393
2394 For example, both
2395 X == 2 || X == 5 || X == 3 || X == 4
2396 and
2397 X >= 2 && X <= 5
2398 are converted to
2399 (unsigned) (X - 2) <= 3
2400
2401 For more information see comments above fold_test_range in fold-const.c,
2402 this implementation is for GIMPLE. */
2403
2404
2405
2406 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2407
2408 void
2409 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2410 {
2411 if (!skip_exp)
2412 print_generic_expr (file, r->exp);
2413 fprintf (file, " %c[", r->in_p ? '+' : '-');
2414 print_generic_expr (file, r->low);
2415 fputs (", ", file);
2416 print_generic_expr (file, r->high);
2417 fputc (']', file);
2418 }
2419
2420 /* Dump the range entry R to STDERR. */
2421
2422 DEBUG_FUNCTION void
2423 debug_range_entry (struct range_entry *r)
2424 {
2425 dump_range_entry (stderr, r, false);
2426 fputc ('\n', stderr);
2427 }
2428
2429 /* This is similar to make_range in fold-const.c, but on top of
2430 GIMPLE instead of trees. If EXP is non-NULL, it should be
2431 an SSA_NAME and STMT argument is ignored, otherwise STMT
2432 argument should be a GIMPLE_COND. */
2433
2434 void
2435 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2436 {
2437 int in_p;
2438 tree low, high;
2439 bool is_bool, strict_overflow_p;
2440
2441 r->exp = NULL_TREE;
2442 r->in_p = false;
2443 r->strict_overflow_p = false;
2444 r->low = NULL_TREE;
2445 r->high = NULL_TREE;
2446 if (exp != NULL_TREE
2447 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2448 return;
2449
2450 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2451 and see if we can refine the range. Some of the cases below may not
2452 happen, but it doesn't seem worth worrying about this. We "continue"
2453 the outer loop when we've changed something; otherwise we "break"
2454 the switch, which will "break" the while. */
2455 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2456 high = low;
2457 in_p = 0;
2458 strict_overflow_p = false;
2459 is_bool = false;
2460 if (exp == NULL_TREE)
2461 is_bool = true;
2462 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2463 {
2464 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2465 is_bool = true;
2466 else
2467 return;
2468 }
2469 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2470 is_bool = true;
2471
2472 while (1)
2473 {
2474 enum tree_code code;
2475 tree arg0, arg1, exp_type;
2476 tree nexp;
2477 location_t loc;
2478
2479 if (exp != NULL_TREE)
2480 {
2481 if (TREE_CODE (exp) != SSA_NAME
2482 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2483 break;
2484
2485 stmt = SSA_NAME_DEF_STMT (exp);
2486 if (!is_gimple_assign (stmt))
2487 break;
2488
2489 code = gimple_assign_rhs_code (stmt);
2490 arg0 = gimple_assign_rhs1 (stmt);
2491 arg1 = gimple_assign_rhs2 (stmt);
2492 exp_type = TREE_TYPE (exp);
2493 }
2494 else
2495 {
2496 code = gimple_cond_code (stmt);
2497 arg0 = gimple_cond_lhs (stmt);
2498 arg1 = gimple_cond_rhs (stmt);
2499 exp_type = boolean_type_node;
2500 }
2501
2502 if (TREE_CODE (arg0) != SSA_NAME
2503 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2504 break;
2505 loc = gimple_location (stmt);
2506 switch (code)
2507 {
2508 case BIT_NOT_EXPR:
2509 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2510 /* Ensure the range is either +[-,0], +[0,0],
2511 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2512 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2513 or similar expression of unconditional true or
2514 false, it should not be negated. */
2515 && ((high && integer_zerop (high))
2516 || (low && integer_onep (low))))
2517 {
2518 in_p = !in_p;
2519 exp = arg0;
2520 continue;
2521 }
2522 break;
2523 case SSA_NAME:
2524 exp = arg0;
2525 continue;
2526 CASE_CONVERT:
2527 if (is_bool)
2528 {
2529 if ((TYPE_PRECISION (exp_type) == 1
2530 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2531 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2532 return;
2533 }
2534 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2535 {
2536 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2537 is_bool = true;
2538 else
2539 return;
2540 }
2541 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2542 is_bool = true;
2543 goto do_default;
2544 case EQ_EXPR:
2545 case NE_EXPR:
2546 case LT_EXPR:
2547 case LE_EXPR:
2548 case GE_EXPR:
2549 case GT_EXPR:
2550 is_bool = true;
2551 /* FALLTHRU */
2552 default:
2553 if (!is_bool)
2554 return;
2555 do_default:
2556 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2557 &low, &high, &in_p,
2558 &strict_overflow_p);
2559 if (nexp != NULL_TREE)
2560 {
2561 exp = nexp;
2562 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2563 continue;
2564 }
2565 break;
2566 }
2567 break;
2568 }
2569 if (is_bool)
2570 {
2571 r->exp = exp;
2572 r->in_p = in_p;
2573 r->low = low;
2574 r->high = high;
2575 r->strict_overflow_p = strict_overflow_p;
2576 }
2577 }
2578
2579 /* Comparison function for qsort. Sort entries
2580 without SSA_NAME exp first, then with SSA_NAMEs sorted
2581 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2582 by increasing ->low and if ->low is the same, by increasing
2583 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2584 maximum. */
2585
2586 static int
2587 range_entry_cmp (const void *a, const void *b)
2588 {
2589 const struct range_entry *p = (const struct range_entry *) a;
2590 const struct range_entry *q = (const struct range_entry *) b;
2591
2592 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2593 {
2594 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2595 {
2596 /* Group range_entries for the same SSA_NAME together. */
2597 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2598 return -1;
2599 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2600 return 1;
2601 /* If ->low is different, NULL low goes first, then by
2602 ascending low. */
2603 if (p->low != NULL_TREE)
2604 {
2605 if (q->low != NULL_TREE)
2606 {
2607 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2608 p->low, q->low);
2609 if (tem && integer_onep (tem))
2610 return -1;
2611 tem = fold_binary (GT_EXPR, boolean_type_node,
2612 p->low, q->low);
2613 if (tem && integer_onep (tem))
2614 return 1;
2615 }
2616 else
2617 return 1;
2618 }
2619 else if (q->low != NULL_TREE)
2620 return -1;
2621 /* If ->high is different, NULL high goes last, before that by
2622 ascending high. */
2623 if (p->high != NULL_TREE)
2624 {
2625 if (q->high != NULL_TREE)
2626 {
2627 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2628 p->high, q->high);
2629 if (tem && integer_onep (tem))
2630 return -1;
2631 tem = fold_binary (GT_EXPR, boolean_type_node,
2632 p->high, q->high);
2633 if (tem && integer_onep (tem))
2634 return 1;
2635 }
2636 else
2637 return -1;
2638 }
2639 else if (q->high != NULL_TREE)
2640 return 1;
2641 /* If both ranges are the same, sort below by ascending idx. */
2642 }
2643 else
2644 return 1;
2645 }
2646 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2647 return -1;
2648
2649 if (p->idx < q->idx)
2650 return -1;
2651 else
2652 {
2653 gcc_checking_assert (p->idx > q->idx);
2654 return 1;
2655 }
2656 }
2657
2658 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2659 insert needed statements BEFORE or after GSI. */
2660
2661 static tree
2662 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2663 {
2664 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2665 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2666 if (TREE_CODE (ret) != SSA_NAME)
2667 {
2668 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2669 if (before)
2670 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2671 else
2672 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2673 ret = gimple_assign_lhs (g);
2674 }
2675 return ret;
2676 }
2677
2678 /* Helper routine of optimize_range_test.
2679 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2680 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2681 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2682 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2683 an array of COUNT pointers to other ranges. Return
2684 true if the range merge has been successful.
2685 If OPCODE is ERROR_MARK, this is called from within
2686 maybe_optimize_range_tests and is performing inter-bb range optimization.
2687 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2688 oe->rank. */
2689
2690 static bool
2691 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2692 struct range_entry **otherrangep,
2693 unsigned int count, enum tree_code opcode,
2694 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2695 bool in_p, tree low, tree high, bool strict_overflow_p)
2696 {
2697 operand_entry *oe = (*ops)[range->idx];
2698 tree op = oe->op;
2699 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2700 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2701 location_t loc = gimple_location (stmt);
2702 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2703 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2704 in_p, low, high);
2705 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2706 gimple_stmt_iterator gsi;
2707 unsigned int i, uid;
2708
2709 if (tem == NULL_TREE)
2710 return false;
2711
2712 /* If op is default def SSA_NAME, there is no place to insert the
2713 new comparison. Give up, unless we can use OP itself as the
2714 range test. */
2715 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2716 {
2717 if (op == range->exp
2718 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2719 || TREE_CODE (optype) == BOOLEAN_TYPE)
2720 && (op == tem
2721 || (TREE_CODE (tem) == EQ_EXPR
2722 && TREE_OPERAND (tem, 0) == op
2723 && integer_onep (TREE_OPERAND (tem, 1))))
2724 && opcode != BIT_IOR_EXPR
2725 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2726 {
2727 stmt = NULL;
2728 tem = op;
2729 }
2730 else
2731 return false;
2732 }
2733
2734 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2735 warning_at (loc, OPT_Wstrict_overflow,
2736 "assuming signed overflow does not occur "
2737 "when simplifying range test");
2738
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2740 {
2741 struct range_entry *r;
2742 fprintf (dump_file, "Optimizing range tests ");
2743 dump_range_entry (dump_file, range, false);
2744 for (i = 0; i < count; i++)
2745 {
2746 if (otherrange)
2747 r = otherrange + i;
2748 else
2749 r = otherrangep[i];
2750 if (r->exp
2751 && r->exp != range->exp
2752 && TREE_CODE (r->exp) == SSA_NAME)
2753 {
2754 fprintf (dump_file, " and ");
2755 dump_range_entry (dump_file, r, false);
2756 }
2757 else
2758 {
2759 fprintf (dump_file, " and");
2760 dump_range_entry (dump_file, r, true);
2761 }
2762 }
2763 fprintf (dump_file, "\n into ");
2764 print_generic_expr (dump_file, tem);
2765 fprintf (dump_file, "\n");
2766 }
2767
2768 if (opcode == BIT_IOR_EXPR
2769 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2770 tem = invert_truthvalue_loc (loc, tem);
2771
2772 tem = fold_convert_loc (loc, optype, tem);
2773 if (stmt)
2774 {
2775 gsi = gsi_for_stmt (stmt);
2776 uid = gimple_uid (stmt);
2777 }
2778 else
2779 {
2780 gsi = gsi_none ();
2781 uid = 0;
2782 }
2783 if (stmt == NULL)
2784 gcc_checking_assert (tem == op);
2785 /* In rare cases range->exp can be equal to lhs of stmt.
2786 In that case we have to insert after the stmt rather then before
2787 it. If stmt is a PHI, insert it at the start of the basic block. */
2788 else if (op != range->exp)
2789 {
2790 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2791 tem = force_into_ssa_name (&gsi, tem, true);
2792 gsi_prev (&gsi);
2793 }
2794 else if (gimple_code (stmt) != GIMPLE_PHI)
2795 {
2796 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2797 tem = force_into_ssa_name (&gsi, tem, false);
2798 }
2799 else
2800 {
2801 gsi = gsi_after_labels (gimple_bb (stmt));
2802 if (!gsi_end_p (gsi))
2803 uid = gimple_uid (gsi_stmt (gsi));
2804 else
2805 {
2806 gsi = gsi_start_bb (gimple_bb (stmt));
2807 uid = 1;
2808 while (!gsi_end_p (gsi))
2809 {
2810 uid = gimple_uid (gsi_stmt (gsi));
2811 gsi_next (&gsi);
2812 }
2813 }
2814 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2815 tem = force_into_ssa_name (&gsi, tem, true);
2816 if (gsi_end_p (gsi))
2817 gsi = gsi_last_bb (gimple_bb (stmt));
2818 else
2819 gsi_prev (&gsi);
2820 }
2821 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2822 if (gimple_uid (gsi_stmt (gsi)))
2823 break;
2824 else
2825 gimple_set_uid (gsi_stmt (gsi), uid);
2826
2827 oe->op = tem;
2828 range->exp = exp;
2829 range->low = low;
2830 range->high = high;
2831 range->in_p = in_p;
2832 range->strict_overflow_p = false;
2833
2834 for (i = 0; i < count; i++)
2835 {
2836 if (otherrange)
2837 range = otherrange + i;
2838 else
2839 range = otherrangep[i];
2840 oe = (*ops)[range->idx];
2841 /* Now change all the other range test immediate uses, so that
2842 those tests will be optimized away. */
2843 if (opcode == ERROR_MARK)
2844 {
2845 if (oe->op)
2846 oe->op = build_int_cst (TREE_TYPE (oe->op),
2847 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2848 else
2849 oe->op = (oe->rank == BIT_IOR_EXPR
2850 ? boolean_false_node : boolean_true_node);
2851 }
2852 else
2853 oe->op = error_mark_node;
2854 range->exp = NULL_TREE;
2855 range->low = NULL_TREE;
2856 range->high = NULL_TREE;
2857 }
2858 return true;
2859 }
2860
2861 /* Optimize X == CST1 || X == CST2
2862 if popcount (CST1 ^ CST2) == 1 into
2863 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2864 Similarly for ranges. E.g.
2865 X != 2 && X != 3 && X != 10 && X != 11
2866 will be transformed by the previous optimization into
2867 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2868 and this loop can transform that into
2869 !(((X & ~8) - 2U) <= 1U). */
2870
2871 static bool
2872 optimize_range_tests_xor (enum tree_code opcode, tree type,
2873 tree lowi, tree lowj, tree highi, tree highj,
2874 vec<operand_entry *> *ops,
2875 struct range_entry *rangei,
2876 struct range_entry *rangej)
2877 {
2878 tree lowxor, highxor, tem, exp;
2879 /* Check lowi ^ lowj == highi ^ highj and
2880 popcount (lowi ^ lowj) == 1. */
2881 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2882 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2883 return false;
2884 if (!integer_pow2p (lowxor))
2885 return false;
2886 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2887 if (!tree_int_cst_equal (lowxor, highxor))
2888 return false;
2889
2890 exp = rangei->exp;
2891 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2892 int prec = GET_MODE_PRECISION (mode);
2893 if (TYPE_PRECISION (type) < prec
2894 || (wi::to_wide (TYPE_MIN_VALUE (type))
2895 != wi::min_value (prec, TYPE_SIGN (type)))
2896 || (wi::to_wide (TYPE_MAX_VALUE (type))
2897 != wi::max_value (prec, TYPE_SIGN (type))))
2898 {
2899 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2900 exp = fold_convert (type, exp);
2901 lowxor = fold_convert (type, lowxor);
2902 lowi = fold_convert (type, lowi);
2903 highi = fold_convert (type, highi);
2904 }
2905 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2906 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2907 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2908 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2909 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2910 NULL, rangei->in_p, lowj, highj,
2911 rangei->strict_overflow_p
2912 || rangej->strict_overflow_p))
2913 return true;
2914 return false;
2915 }
2916
2917 /* Optimize X == CST1 || X == CST2
2918 if popcount (CST2 - CST1) == 1 into
2919 ((X - CST1) & ~(CST2 - CST1)) == 0.
2920 Similarly for ranges. E.g.
2921 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2922 || X == 75 || X == 45
2923 will be transformed by the previous optimization into
2924 (X - 43U) <= 3U || (X - 75U) <= 3U
2925 and this loop can transform that into
2926 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2927 static bool
2928 optimize_range_tests_diff (enum tree_code opcode, tree type,
2929 tree lowi, tree lowj, tree highi, tree highj,
2930 vec<operand_entry *> *ops,
2931 struct range_entry *rangei,
2932 struct range_entry *rangej)
2933 {
2934 tree tem1, tem2, mask;
2935 /* Check highi - lowi == highj - lowj. */
2936 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2937 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2938 return false;
2939 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2940 if (!tree_int_cst_equal (tem1, tem2))
2941 return false;
2942 /* Check popcount (lowj - lowi) == 1. */
2943 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2944 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2945 return false;
2946 if (!integer_pow2p (tem1))
2947 return false;
2948
2949 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2950 int prec = GET_MODE_PRECISION (mode);
2951 if (TYPE_PRECISION (type) < prec
2952 || (wi::to_wide (TYPE_MIN_VALUE (type))
2953 != wi::min_value (prec, TYPE_SIGN (type)))
2954 || (wi::to_wide (TYPE_MAX_VALUE (type))
2955 != wi::max_value (prec, TYPE_SIGN (type))))
2956 type = build_nonstandard_integer_type (prec, 1);
2957 else
2958 type = unsigned_type_for (type);
2959 tem1 = fold_convert (type, tem1);
2960 tem2 = fold_convert (type, tem2);
2961 lowi = fold_convert (type, lowi);
2962 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2963 tem1 = fold_build2 (MINUS_EXPR, type,
2964 fold_convert (type, rangei->exp), lowi);
2965 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2966 lowj = build_int_cst (type, 0);
2967 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2968 NULL, rangei->in_p, lowj, tem2,
2969 rangei->strict_overflow_p
2970 || rangej->strict_overflow_p))
2971 return true;
2972 return false;
2973 }
2974
2975 /* It does some common checks for function optimize_range_tests_xor and
2976 optimize_range_tests_diff.
2977 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2978 Else it calls optimize_range_tests_diff. */
2979
2980 static bool
2981 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2982 bool optimize_xor, vec<operand_entry *> *ops,
2983 struct range_entry *ranges)
2984 {
2985 int i, j;
2986 bool any_changes = false;
2987 for (i = first; i < length; i++)
2988 {
2989 tree lowi, highi, lowj, highj, type, tem;
2990
2991 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2992 continue;
2993 type = TREE_TYPE (ranges[i].exp);
2994 if (!INTEGRAL_TYPE_P (type))
2995 continue;
2996 lowi = ranges[i].low;
2997 if (lowi == NULL_TREE)
2998 lowi = TYPE_MIN_VALUE (type);
2999 highi = ranges[i].high;
3000 if (highi == NULL_TREE)
3001 continue;
3002 for (j = i + 1; j < length && j < i + 64; j++)
3003 {
3004 bool changes;
3005 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3006 continue;
3007 lowj = ranges[j].low;
3008 if (lowj == NULL_TREE)
3009 continue;
3010 highj = ranges[j].high;
3011 if (highj == NULL_TREE)
3012 highj = TYPE_MAX_VALUE (type);
3013 /* Check lowj > highi. */
3014 tem = fold_binary (GT_EXPR, boolean_type_node,
3015 lowj, highi);
3016 if (tem == NULL_TREE || !integer_onep (tem))
3017 continue;
3018 if (optimize_xor)
3019 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3020 highi, highj, ops,
3021 ranges + i, ranges + j);
3022 else
3023 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3024 highi, highj, ops,
3025 ranges + i, ranges + j);
3026 if (changes)
3027 {
3028 any_changes = true;
3029 break;
3030 }
3031 }
3032 }
3033 return any_changes;
3034 }
3035
3036 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3037 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3038 EXP on success, NULL otherwise. */
3039
3040 static tree
3041 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3042 wide_int *mask, tree *totallowp)
3043 {
3044 tree tem = int_const_binop (MINUS_EXPR, high, low);
3045 if (tem == NULL_TREE
3046 || TREE_CODE (tem) != INTEGER_CST
3047 || TREE_OVERFLOW (tem)
3048 || tree_int_cst_sgn (tem) == -1
3049 || compare_tree_int (tem, prec) != -1)
3050 return NULL_TREE;
3051
3052 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3053 *mask = wi::shifted_mask (0, max, false, prec);
3054 if (TREE_CODE (exp) == BIT_AND_EXPR
3055 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3056 {
3057 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3058 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3059 if (wi::popcount (msk) == 1
3060 && wi::ltu_p (msk, prec - max))
3061 {
3062 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3063 max += msk.to_uhwi ();
3064 exp = TREE_OPERAND (exp, 0);
3065 if (integer_zerop (low)
3066 && TREE_CODE (exp) == PLUS_EXPR
3067 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3068 {
3069 tree ret = TREE_OPERAND (exp, 0);
3070 STRIP_NOPS (ret);
3071 widest_int bias
3072 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3073 TYPE_PRECISION (TREE_TYPE (low))));
3074 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3075 if (totallowp)
3076 {
3077 *totallowp = tbias;
3078 return ret;
3079 }
3080 else if (!tree_int_cst_lt (totallow, tbias))
3081 return NULL_TREE;
3082 bias = wi::to_widest (tbias);
3083 bias -= wi::to_widest (totallow);
3084 if (bias >= 0 && bias < prec - max)
3085 {
3086 *mask = wi::lshift (*mask, bias);
3087 return ret;
3088 }
3089 }
3090 }
3091 }
3092 if (totallowp)
3093 return exp;
3094 if (!tree_int_cst_lt (totallow, low))
3095 return exp;
3096 tem = int_const_binop (MINUS_EXPR, low, totallow);
3097 if (tem == NULL_TREE
3098 || TREE_CODE (tem) != INTEGER_CST
3099 || TREE_OVERFLOW (tem)
3100 || compare_tree_int (tem, prec - max) == 1)
3101 return NULL_TREE;
3102
3103 *mask = wi::lshift (*mask, wi::to_widest (tem));
3104 return exp;
3105 }
3106
3107 /* Attempt to optimize small range tests using bit test.
3108 E.g.
3109 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3110 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3111 has been by earlier optimizations optimized into:
3112 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3113 As all the 43 through 82 range is less than 64 numbers,
3114 for 64-bit word targets optimize that into:
3115 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3116
3117 static bool
3118 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3119 vec<operand_entry *> *ops,
3120 struct range_entry *ranges)
3121 {
3122 int i, j;
3123 bool any_changes = false;
3124 int prec = GET_MODE_BITSIZE (word_mode);
3125 auto_vec<struct range_entry *, 64> candidates;
3126
3127 for (i = first; i < length - 1; i++)
3128 {
3129 tree lowi, highi, lowj, highj, type;
3130
3131 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3132 continue;
3133 type = TREE_TYPE (ranges[i].exp);
3134 if (!INTEGRAL_TYPE_P (type))
3135 continue;
3136 lowi = ranges[i].low;
3137 if (lowi == NULL_TREE)
3138 lowi = TYPE_MIN_VALUE (type);
3139 highi = ranges[i].high;
3140 if (highi == NULL_TREE)
3141 continue;
3142 wide_int mask;
3143 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3144 highi, &mask, &lowi);
3145 if (exp == NULL_TREE)
3146 continue;
3147 bool strict_overflow_p = ranges[i].strict_overflow_p;
3148 candidates.truncate (0);
3149 int end = MIN (i + 64, length);
3150 for (j = i + 1; j < end; j++)
3151 {
3152 tree exp2;
3153 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3154 continue;
3155 if (ranges[j].exp == exp)
3156 ;
3157 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3158 {
3159 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3160 if (exp2 == exp)
3161 ;
3162 else if (TREE_CODE (exp2) == PLUS_EXPR)
3163 {
3164 exp2 = TREE_OPERAND (exp2, 0);
3165 STRIP_NOPS (exp2);
3166 if (exp2 != exp)
3167 continue;
3168 }
3169 else
3170 continue;
3171 }
3172 else
3173 continue;
3174 lowj = ranges[j].low;
3175 if (lowj == NULL_TREE)
3176 continue;
3177 highj = ranges[j].high;
3178 if (highj == NULL_TREE)
3179 highj = TYPE_MAX_VALUE (type);
3180 wide_int mask2;
3181 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3182 highj, &mask2, NULL);
3183 if (exp2 != exp)
3184 continue;
3185 mask |= mask2;
3186 strict_overflow_p |= ranges[j].strict_overflow_p;
3187 candidates.safe_push (&ranges[j]);
3188 }
3189
3190 /* If every possible relative value of the expression is a valid shift
3191 amount, then we can merge the entry test in the bit test. In this
3192 case, if we would need otherwise 2 or more comparisons, then use
3193 the bit test; in the other cases, the threshold is 3 comparisons. */
3194 wide_int min, max;
3195 bool entry_test_needed;
3196 if (TREE_CODE (exp) == SSA_NAME
3197 && get_range_info (exp, &min, &max) == VR_RANGE
3198 && wi::leu_p (max - min, prec - 1))
3199 {
3200 wide_int ilowi = wi::to_wide (lowi);
3201 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3202 {
3203 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3204 mask = wi::lshift (mask, ilowi - min);
3205 }
3206 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3207 {
3208 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3209 mask = wi::lrshift (mask, min - ilowi);
3210 }
3211 entry_test_needed = false;
3212 }
3213 else
3214 entry_test_needed = true;
3215 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3216 {
3217 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3218 wi::to_widest (lowi)
3219 + prec - 1 - wi::clz (mask));
3220 operand_entry *oe = (*ops)[ranges[i].idx];
3221 tree op = oe->op;
3222 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3223 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3224 location_t loc = gimple_location (stmt);
3225 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3226
3227 /* See if it isn't cheaper to pretend the minimum value of the
3228 range is 0, if maximum value is small enough.
3229 We can avoid then subtraction of the minimum value, but the
3230 mask constant could be perhaps more expensive. */
3231 if (compare_tree_int (lowi, 0) > 0
3232 && compare_tree_int (high, prec) < 0)
3233 {
3234 int cost_diff;
3235 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3236 rtx reg = gen_raw_REG (word_mode, 10000);
3237 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3238 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3239 GEN_INT (-m)),
3240 word_mode, speed_p);
3241 rtx r = immed_wide_int_const (mask, word_mode);
3242 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3243 word_mode, speed_p);
3244 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3245 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3246 word_mode, speed_p);
3247 if (cost_diff > 0)
3248 {
3249 mask = wi::lshift (mask, m);
3250 lowi = build_zero_cst (TREE_TYPE (lowi));
3251 }
3252 }
3253
3254 tree tem;
3255 if (entry_test_needed)
3256 {
3257 tem = build_range_check (loc, optype, unshare_expr (exp),
3258 false, lowi, high);
3259 if (tem == NULL_TREE || is_gimple_val (tem))
3260 continue;
3261 }
3262 else
3263 tem = NULL_TREE;
3264 tree etype = unsigned_type_for (TREE_TYPE (exp));
3265 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3266 fold_convert_loc (loc, etype, exp),
3267 fold_convert_loc (loc, etype, lowi));
3268 exp = fold_convert_loc (loc, integer_type_node, exp);
3269 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3270 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3271 build_int_cst (word_type, 1), exp);
3272 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3273 wide_int_to_tree (word_type, mask));
3274 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3275 build_zero_cst (word_type));
3276 if (is_gimple_val (exp))
3277 continue;
3278
3279 /* The shift might have undefined behavior if TEM is true,
3280 but reassociate_bb isn't prepared to have basic blocks
3281 split when it is running. So, temporarily emit a code
3282 with BIT_IOR_EXPR instead of &&, and fix it up in
3283 branch_fixup. */
3284 gimple_seq seq = NULL;
3285 if (tem)
3286 {
3287 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3288 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3289 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3290 }
3291 gimple_seq seq2;
3292 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3293 gimple_seq_add_seq_without_update (&seq, seq2);
3294 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3295 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3296 if (tem)
3297 {
3298 gimple *g = gimple_build_assign (make_ssa_name (optype),
3299 BIT_IOR_EXPR, tem, exp);
3300 gimple_set_location (g, loc);
3301 gimple_seq_add_stmt_without_update (&seq, g);
3302 exp = gimple_assign_lhs (g);
3303 }
3304 tree val = build_zero_cst (optype);
3305 if (update_range_test (&ranges[i], NULL, candidates.address (),
3306 candidates.length (), opcode, ops, exp,
3307 seq, false, val, val, strict_overflow_p))
3308 {
3309 any_changes = true;
3310 if (tem)
3311 reassoc_branch_fixups.safe_push (tem);
3312 }
3313 else
3314 gimple_seq_discard (seq);
3315 }
3316 }
3317 return any_changes;
3318 }
3319
3320 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3321 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3322 Also, handle x < C && y < C && z < C where C is power of two as
3323 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3324 as (x | y | z) < 0. */
3325
3326 static bool
3327 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3328 vec<operand_entry *> *ops,
3329 struct range_entry *ranges)
3330 {
3331 int i;
3332 unsigned int b;
3333 bool any_changes = false;
3334 auto_vec<int, 128> buckets;
3335 auto_vec<int, 32> chains;
3336 auto_vec<struct range_entry *, 32> candidates;
3337
3338 for (i = first; i < length; i++)
3339 {
3340 int idx;
3341
3342 if (ranges[i].exp == NULL_TREE
3343 || TREE_CODE (ranges[i].exp) != SSA_NAME
3344 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3345 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3346 continue;
3347
3348 if (ranges[i].low != NULL_TREE
3349 && ranges[i].high != NULL_TREE
3350 && ranges[i].in_p
3351 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3352 {
3353 idx = !integer_zerop (ranges[i].low);
3354 if (idx && !integer_all_onesp (ranges[i].low))
3355 continue;
3356 }
3357 else if (ranges[i].high != NULL_TREE
3358 && TREE_CODE (ranges[i].high) == INTEGER_CST
3359 && ranges[i].in_p)
3360 {
3361 wide_int w = wi::to_wide (ranges[i].high);
3362 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3363 int l = wi::clz (w);
3364 idx = 2;
3365 if (l <= 0
3366 || l >= prec
3367 || w != wi::mask (prec - l, false, prec))
3368 continue;
3369 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3370 && ranges[i].low == NULL_TREE)
3371 || (ranges[i].low
3372 && integer_zerop (ranges[i].low))))
3373 continue;
3374 }
3375 else if (ranges[i].high == NULL_TREE
3376 && ranges[i].low != NULL_TREE
3377 /* Perform this optimization only in the last
3378 reassoc pass, as it interferes with the reassociation
3379 itself or could also with VRP etc. which might not
3380 be able to virtually undo the optimization. */
3381 && !reassoc_insert_powi_p
3382 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3383 && integer_zerop (ranges[i].low))
3384 idx = 3;
3385 else
3386 continue;
3387
3388 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3389 if (buckets.length () <= b)
3390 buckets.safe_grow_cleared (b + 1, true);
3391 if (chains.length () <= (unsigned) i)
3392 chains.safe_grow (i + 1, true);
3393 chains[i] = buckets[b];
3394 buckets[b] = i + 1;
3395 }
3396
3397 FOR_EACH_VEC_ELT (buckets, b, i)
3398 if (i && chains[i - 1])
3399 {
3400 int j, k = i;
3401 if ((b % 4) == 2)
3402 {
3403 /* When ranges[X - 1].high + 1 is a power of two,
3404 we need to process the same bucket up to
3405 precision - 1 times, each time split the entries
3406 with the same high bound into one chain and the
3407 rest into another one to be processed later. */
3408 int this_prev = i;
3409 int other_prev = 0;
3410 for (j = chains[i - 1]; j; j = chains[j - 1])
3411 {
3412 if (tree_int_cst_equal (ranges[i - 1].high,
3413 ranges[j - 1].high))
3414 {
3415 chains[this_prev - 1] = j;
3416 this_prev = j;
3417 }
3418 else if (other_prev == 0)
3419 {
3420 buckets[b] = j;
3421 other_prev = j;
3422 }
3423 else
3424 {
3425 chains[other_prev - 1] = j;
3426 other_prev = j;
3427 }
3428 }
3429 chains[this_prev - 1] = 0;
3430 if (other_prev)
3431 chains[other_prev - 1] = 0;
3432 if (chains[i - 1] == 0)
3433 {
3434 if (other_prev)
3435 b--;
3436 continue;
3437 }
3438 }
3439 for (j = chains[i - 1]; j; j = chains[j - 1])
3440 {
3441 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3442 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3443 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3444 k = j;
3445 }
3446 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3447 tree type2 = NULL_TREE;
3448 bool strict_overflow_p = false;
3449 candidates.truncate (0);
3450 for (j = i; j; j = chains[j - 1])
3451 {
3452 tree type = TREE_TYPE (ranges[j - 1].exp);
3453 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3454 if ((b % 4) == 3)
3455 {
3456 /* For the signed < 0 cases, the types should be
3457 really compatible (all signed with the same precision,
3458 instead put ranges that have different in_p from
3459 k first. */
3460 if (!useless_type_conversion_p (type1, type))
3461 continue;
3462 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3463 candidates.safe_push (&ranges[j - 1]);
3464 type2 = type1;
3465 continue;
3466 }
3467 if (j == k
3468 || useless_type_conversion_p (type1, type))
3469 ;
3470 else if (type2 == NULL_TREE
3471 || useless_type_conversion_p (type2, type))
3472 {
3473 if (type2 == NULL_TREE)
3474 type2 = type;
3475 candidates.safe_push (&ranges[j - 1]);
3476 }
3477 }
3478 unsigned l = candidates.length ();
3479 for (j = i; j; j = chains[j - 1])
3480 {
3481 tree type = TREE_TYPE (ranges[j - 1].exp);
3482 if (j == k)
3483 continue;
3484 if ((b % 4) == 3)
3485 {
3486 if (!useless_type_conversion_p (type1, type))
3487 continue;
3488 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3489 candidates.safe_push (&ranges[j - 1]);
3490 continue;
3491 }
3492 if (useless_type_conversion_p (type1, type))
3493 ;
3494 else if (type2 == NULL_TREE
3495 || useless_type_conversion_p (type2, type))
3496 continue;
3497 candidates.safe_push (&ranges[j - 1]);
3498 }
3499 gimple_seq seq = NULL;
3500 tree op = NULL_TREE;
3501 unsigned int id;
3502 struct range_entry *r;
3503 candidates.safe_push (&ranges[k - 1]);
3504 FOR_EACH_VEC_ELT (candidates, id, r)
3505 {
3506 gimple *g;
3507 enum tree_code code;
3508 if (id == 0)
3509 {
3510 op = r->exp;
3511 continue;
3512 }
3513 if (id == l)
3514 {
3515 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3516 g = gimple_build_assign (make_ssa_name (type1), code, op);
3517 gimple_seq_add_stmt_without_update (&seq, g);
3518 op = gimple_assign_lhs (g);
3519 }
3520 tree type = TREE_TYPE (r->exp);
3521 tree exp = r->exp;
3522 if (id >= l && !useless_type_conversion_p (type1, type))
3523 {
3524 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3525 gimple_seq_add_stmt_without_update (&seq, g);
3526 exp = gimple_assign_lhs (g);
3527 }
3528 if ((b % 4) == 3)
3529 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3530 else
3531 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3532 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3533 code, op, exp);
3534 gimple_seq_add_stmt_without_update (&seq, g);
3535 op = gimple_assign_lhs (g);
3536 }
3537 candidates.pop ();
3538 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3539 candidates.length (), opcode, ops, op,
3540 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3541 ranges[k - 1].high, strict_overflow_p))
3542 any_changes = true;
3543 else
3544 gimple_seq_discard (seq);
3545 if ((b % 4) == 2 && buckets[b] != i)
3546 /* There is more work to do for this bucket. */
3547 b--;
3548 }
3549
3550 return any_changes;
3551 }
3552
3553 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3554 a >= 0 && a < b into (unsigned) a < (unsigned) b
3555 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3556
3557 static bool
3558 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3559 vec<operand_entry *> *ops,
3560 struct range_entry *ranges,
3561 basic_block first_bb)
3562 {
3563 int i;
3564 bool any_changes = false;
3565 hash_map<tree, int> *map = NULL;
3566
3567 for (i = first; i < length; i++)
3568 {
3569 if (ranges[i].exp == NULL_TREE
3570 || TREE_CODE (ranges[i].exp) != SSA_NAME
3571 || !ranges[i].in_p)
3572 continue;
3573
3574 tree type = TREE_TYPE (ranges[i].exp);
3575 if (!INTEGRAL_TYPE_P (type)
3576 || TYPE_UNSIGNED (type)
3577 || ranges[i].low == NULL_TREE
3578 || !integer_zerop (ranges[i].low)
3579 || ranges[i].high != NULL_TREE)
3580 continue;
3581 /* EXP >= 0 here. */
3582 if (map == NULL)
3583 map = new hash_map <tree, int>;
3584 map->put (ranges[i].exp, i);
3585 }
3586
3587 if (map == NULL)
3588 return false;
3589
3590 for (i = 0; i < length; i++)
3591 {
3592 bool in_p = ranges[i].in_p;
3593 if (ranges[i].low == NULL_TREE
3594 || ranges[i].high == NULL_TREE)
3595 continue;
3596 if (!integer_zerop (ranges[i].low)
3597 || !integer_zerop (ranges[i].high))
3598 {
3599 if (ranges[i].exp
3600 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3601 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3602 && integer_onep (ranges[i].low)
3603 && integer_onep (ranges[i].high))
3604 in_p = !in_p;
3605 else
3606 continue;
3607 }
3608
3609 gimple *stmt;
3610 tree_code ccode;
3611 tree rhs1, rhs2;
3612 if (ranges[i].exp)
3613 {
3614 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3615 continue;
3616 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3617 if (!is_gimple_assign (stmt))
3618 continue;
3619 ccode = gimple_assign_rhs_code (stmt);
3620 rhs1 = gimple_assign_rhs1 (stmt);
3621 rhs2 = gimple_assign_rhs2 (stmt);
3622 }
3623 else
3624 {
3625 operand_entry *oe = (*ops)[ranges[i].idx];
3626 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3627 if (gimple_code (stmt) != GIMPLE_COND)
3628 continue;
3629 ccode = gimple_cond_code (stmt);
3630 rhs1 = gimple_cond_lhs (stmt);
3631 rhs2 = gimple_cond_rhs (stmt);
3632 }
3633
3634 if (TREE_CODE (rhs1) != SSA_NAME
3635 || rhs2 == NULL_TREE
3636 || TREE_CODE (rhs2) != SSA_NAME)
3637 continue;
3638
3639 switch (ccode)
3640 {
3641 case GT_EXPR:
3642 case GE_EXPR:
3643 case LT_EXPR:
3644 case LE_EXPR:
3645 break;
3646 default:
3647 continue;
3648 }
3649 if (in_p)
3650 ccode = invert_tree_comparison (ccode, false);
3651 switch (ccode)
3652 {
3653 case GT_EXPR:
3654 case GE_EXPR:
3655 std::swap (rhs1, rhs2);
3656 ccode = swap_tree_comparison (ccode);
3657 break;
3658 case LT_EXPR:
3659 case LE_EXPR:
3660 break;
3661 default:
3662 gcc_unreachable ();
3663 }
3664
3665 int *idx = map->get (rhs1);
3666 if (idx == NULL)
3667 continue;
3668
3669 /* maybe_optimize_range_tests allows statements without side-effects
3670 in the basic blocks as long as they are consumed in the same bb.
3671 Make sure rhs2's def stmt is not among them, otherwise we can't
3672 use safely get_nonzero_bits on it. E.g. in:
3673 # RANGE [-83, 1] NONZERO 173
3674 # k_32 = PHI <k_47(13), k_12(9)>
3675 ...
3676 if (k_32 >= 0)
3677 goto <bb 5>; [26.46%]
3678 else
3679 goto <bb 9>; [73.54%]
3680
3681 <bb 5> [local count: 140323371]:
3682 # RANGE [0, 1] NONZERO 1
3683 _5 = (int) k_32;
3684 # RANGE [0, 4] NONZERO 4
3685 _21 = _5 << 2;
3686 # RANGE [0, 4] NONZERO 4
3687 iftmp.0_44 = (char) _21;
3688 if (k_32 < iftmp.0_44)
3689 goto <bb 6>; [84.48%]
3690 else
3691 goto <bb 9>; [15.52%]
3692 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3693 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3694 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3695 those stmts even for negative k_32 and the value ranges would be no
3696 longer guaranteed and so the optimization would be invalid. */
3697 while (opcode == ERROR_MARK)
3698 {
3699 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3700 basic_block bb2 = gimple_bb (g);
3701 if (bb2
3702 && bb2 != first_bb
3703 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3704 {
3705 /* As an exception, handle a few common cases. */
3706 if (gimple_assign_cast_p (g)
3707 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3708 {
3709 tree op0 = gimple_assign_rhs1 (g);
3710 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3711 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3712 > TYPE_PRECISION (TREE_TYPE (op0))))
3713 /* Zero-extension is always ok. */
3714 break;
3715 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3716 == TYPE_PRECISION (TREE_TYPE (op0))
3717 && TREE_CODE (op0) == SSA_NAME)
3718 {
3719 /* Cast from signed to unsigned or vice versa. Retry
3720 with the op0 as new rhs2. */
3721 rhs2 = op0;
3722 continue;
3723 }
3724 }
3725 else if (is_gimple_assign (g)
3726 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3727 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3728 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3729 /* Masking with INTEGER_CST with MSB clear is always ok
3730 too. */
3731 break;
3732 rhs2 = NULL_TREE;
3733 }
3734 break;
3735 }
3736 if (rhs2 == NULL_TREE)
3737 continue;
3738
3739 wide_int nz = get_nonzero_bits (rhs2);
3740 if (wi::neg_p (nz))
3741 continue;
3742
3743 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3744 and RHS2 is known to be RHS2 >= 0. */
3745 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3746
3747 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3748 if ((ranges[*idx].strict_overflow_p
3749 || ranges[i].strict_overflow_p)
3750 && issue_strict_overflow_warning (wc))
3751 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3752 "assuming signed overflow does not occur "
3753 "when simplifying range test");
3754
3755 if (dump_file && (dump_flags & TDF_DETAILS))
3756 {
3757 struct range_entry *r = &ranges[*idx];
3758 fprintf (dump_file, "Optimizing range test ");
3759 print_generic_expr (dump_file, r->exp);
3760 fprintf (dump_file, " +[");
3761 print_generic_expr (dump_file, r->low);
3762 fprintf (dump_file, ", ");
3763 print_generic_expr (dump_file, r->high);
3764 fprintf (dump_file, "] and comparison ");
3765 print_generic_expr (dump_file, rhs1);
3766 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3767 print_generic_expr (dump_file, rhs2);
3768 fprintf (dump_file, "\n into (");
3769 print_generic_expr (dump_file, utype);
3770 fprintf (dump_file, ") ");
3771 print_generic_expr (dump_file, rhs1);
3772 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3773 print_generic_expr (dump_file, utype);
3774 fprintf (dump_file, ") ");
3775 print_generic_expr (dump_file, rhs2);
3776 fprintf (dump_file, "\n");
3777 }
3778
3779 operand_entry *oe = (*ops)[ranges[i].idx];
3780 ranges[i].in_p = 0;
3781 if (opcode == BIT_IOR_EXPR
3782 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3783 {
3784 ranges[i].in_p = 1;
3785 ccode = invert_tree_comparison (ccode, false);
3786 }
3787
3788 unsigned int uid = gimple_uid (stmt);
3789 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3790 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3791 gimple_set_uid (g, uid);
3792 rhs1 = gimple_assign_lhs (g);
3793 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3794 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3795 {
3796 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3797 gimple_set_uid (g, uid);
3798 rhs2 = gimple_assign_lhs (g);
3799 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3800 }
3801 if (tree_swap_operands_p (rhs1, rhs2))
3802 {
3803 std::swap (rhs1, rhs2);
3804 ccode = swap_tree_comparison (ccode);
3805 }
3806 if (gimple_code (stmt) == GIMPLE_COND)
3807 {
3808 gcond *c = as_a <gcond *> (stmt);
3809 gimple_cond_set_code (c, ccode);
3810 gimple_cond_set_lhs (c, rhs1);
3811 gimple_cond_set_rhs (c, rhs2);
3812 update_stmt (stmt);
3813 }
3814 else
3815 {
3816 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3817 if (!INTEGRAL_TYPE_P (ctype)
3818 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3819 && TYPE_PRECISION (ctype) != 1))
3820 ctype = boolean_type_node;
3821 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3822 gimple_set_uid (g, uid);
3823 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3824 if (oe->op && ctype != TREE_TYPE (oe->op))
3825 {
3826 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3827 NOP_EXPR, gimple_assign_lhs (g));
3828 gimple_set_uid (g, uid);
3829 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3830 }
3831 ranges[i].exp = gimple_assign_lhs (g);
3832 oe->op = ranges[i].exp;
3833 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3834 ranges[i].high = ranges[i].low;
3835 }
3836 ranges[i].strict_overflow_p = false;
3837 oe = (*ops)[ranges[*idx].idx];
3838 /* Now change all the other range test immediate uses, so that
3839 those tests will be optimized away. */
3840 if (opcode == ERROR_MARK)
3841 {
3842 if (oe->op)
3843 oe->op = build_int_cst (TREE_TYPE (oe->op),
3844 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3845 else
3846 oe->op = (oe->rank == BIT_IOR_EXPR
3847 ? boolean_false_node : boolean_true_node);
3848 }
3849 else
3850 oe->op = error_mark_node;
3851 ranges[*idx].exp = NULL_TREE;
3852 ranges[*idx].low = NULL_TREE;
3853 ranges[*idx].high = NULL_TREE;
3854 any_changes = true;
3855 }
3856
3857 delete map;
3858 return any_changes;
3859 }
3860
3861 /* Optimize range tests, similarly how fold_range_test optimizes
3862 it on trees. The tree code for the binary
3863 operation between all the operands is OPCODE.
3864 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3865 maybe_optimize_range_tests for inter-bb range optimization.
3866 In that case if oe->op is NULL, oe->id is bb->index whose
3867 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3868 the actual opcode.
3869 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3870
3871 static bool
3872 optimize_range_tests (enum tree_code opcode,
3873 vec<operand_entry *> *ops, basic_block first_bb)
3874 {
3875 unsigned int length = ops->length (), i, j, first;
3876 operand_entry *oe;
3877 struct range_entry *ranges;
3878 bool any_changes = false;
3879
3880 if (length == 1)
3881 return false;
3882
3883 ranges = XNEWVEC (struct range_entry, length);
3884 for (i = 0; i < length; i++)
3885 {
3886 oe = (*ops)[i];
3887 ranges[i].idx = i;
3888 init_range_entry (ranges + i, oe->op,
3889 oe->op
3890 ? NULL
3891 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3892 /* For | invert it now, we will invert it again before emitting
3893 the optimized expression. */
3894 if (opcode == BIT_IOR_EXPR
3895 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3896 ranges[i].in_p = !ranges[i].in_p;
3897 }
3898
3899 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3900 for (i = 0; i < length; i++)
3901 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3902 break;
3903
3904 /* Try to merge ranges. */
3905 for (first = i; i < length; i++)
3906 {
3907 tree low = ranges[i].low;
3908 tree high = ranges[i].high;
3909 int in_p = ranges[i].in_p;
3910 bool strict_overflow_p = ranges[i].strict_overflow_p;
3911 int update_fail_count = 0;
3912
3913 for (j = i + 1; j < length; j++)
3914 {
3915 if (ranges[i].exp != ranges[j].exp)
3916 break;
3917 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3918 ranges[j].in_p, ranges[j].low, ranges[j].high))
3919 break;
3920 strict_overflow_p |= ranges[j].strict_overflow_p;
3921 }
3922
3923 if (j == i + 1)
3924 continue;
3925
3926 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3927 opcode, ops, ranges[i].exp, NULL, in_p,
3928 low, high, strict_overflow_p))
3929 {
3930 i = j - 1;
3931 any_changes = true;
3932 }
3933 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3934 while update_range_test would fail. */
3935 else if (update_fail_count == 64)
3936 i = j - 1;
3937 else
3938 ++update_fail_count;
3939 }
3940
3941 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3942 ops, ranges);
3943
3944 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3945 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3946 ops, ranges);
3947 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3948 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3949 ops, ranges);
3950 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3951 ranges, first_bb);
3952 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3953 ops, ranges);
3954
3955 if (any_changes && opcode != ERROR_MARK)
3956 {
3957 j = 0;
3958 FOR_EACH_VEC_ELT (*ops, i, oe)
3959 {
3960 if (oe->op == error_mark_node)
3961 continue;
3962 else if (i != j)
3963 (*ops)[j] = oe;
3964 j++;
3965 }
3966 ops->truncate (j);
3967 }
3968
3969 XDELETEVEC (ranges);
3970 return any_changes;
3971 }
3972
3973 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3974 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3975 otherwise the comparison code. TYPE is a return value that is set
3976 to type of comparison. */
3977
3978 static tree_code
3979 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
3980 tree *lhs, tree *rhs, gassign **vcond)
3981 {
3982 if (TREE_CODE (var) != SSA_NAME)
3983 return ERROR_MARK;
3984
3985 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3986 if (stmt == NULL)
3987 return ERROR_MARK;
3988 if (vcond)
3989 *vcond = stmt;
3990
3991 /* ??? If we start creating more COND_EXPR, we could perform
3992 this same optimization with them. For now, simplify. */
3993 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3994 return ERROR_MARK;
3995
3996 tree cond = gimple_assign_rhs1 (stmt);
3997 tree_code cmp = TREE_CODE (cond);
3998 if (cmp != SSA_NAME)
3999 return ERROR_MARK;
4000
4001 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4002 if (assign == NULL
4003 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4004 return ERROR_MARK;
4005
4006 cmp = gimple_assign_rhs_code (assign);
4007 if (lhs)
4008 *lhs = gimple_assign_rhs1 (assign);
4009 if (rhs)
4010 *rhs = gimple_assign_rhs2 (assign);
4011
4012 /* ??? For now, allow only canonical true and false result vectors.
4013 We could expand this to other constants should the need arise,
4014 but at the moment we don't create them. */
4015 tree t = gimple_assign_rhs2 (stmt);
4016 tree f = gimple_assign_rhs3 (stmt);
4017 bool inv;
4018 if (integer_all_onesp (t))
4019 inv = false;
4020 else if (integer_all_onesp (f))
4021 {
4022 cmp = invert_tree_comparison (cmp, false);
4023 inv = true;
4024 }
4025 else
4026 return ERROR_MARK;
4027 if (!integer_zerop (f))
4028 return ERROR_MARK;
4029
4030 /* Success! */
4031 if (rets)
4032 *rets = assign;
4033 if (reti)
4034 *reti = inv;
4035 if (type)
4036 *type = TREE_TYPE (cond);
4037 return cmp;
4038 }
4039
4040 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4041 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4042
4043 static bool
4044 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4045 {
4046 unsigned int length = ops->length (), i, j;
4047 bool any_changes = false;
4048
4049 if (length == 1)
4050 return false;
4051
4052 for (i = 0; i < length; ++i)
4053 {
4054 tree elt0 = (*ops)[i]->op;
4055
4056 gassign *stmt0, *vcond0;
4057 bool invert;
4058 tree type, lhs0, rhs0;
4059 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4060 &rhs0, &vcond0);
4061 if (cmp0 == ERROR_MARK)
4062 continue;
4063
4064 for (j = i + 1; j < length; ++j)
4065 {
4066 tree &elt1 = (*ops)[j]->op;
4067
4068 gassign *stmt1, *vcond1;
4069 tree lhs1, rhs1;
4070 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4071 &rhs1, &vcond1);
4072 if (cmp1 == ERROR_MARK)
4073 continue;
4074
4075 tree comb;
4076 if (opcode == BIT_AND_EXPR)
4077 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4078 cmp1, lhs1, rhs1);
4079 else if (opcode == BIT_IOR_EXPR)
4080 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4081 cmp1, lhs1, rhs1);
4082 else
4083 gcc_unreachable ();
4084 if (comb == NULL)
4085 continue;
4086
4087 /* Success! */
4088 if (dump_file && (dump_flags & TDF_DETAILS))
4089 {
4090 fprintf (dump_file, "Transforming ");
4091 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4092 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4093 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4094 fprintf (dump_file, " into ");
4095 print_generic_expr (dump_file, comb);
4096 fputc ('\n', dump_file);
4097 }
4098
4099 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4100 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4101 true, GSI_SAME_STMT);
4102 if (invert)
4103 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4104 gimple_assign_rhs3_ptr (vcond0));
4105 gimple_assign_set_rhs1 (vcond0, exp);
4106 update_stmt (vcond0);
4107
4108 elt1 = error_mark_node;
4109 any_changes = true;
4110 }
4111 }
4112
4113 if (any_changes)
4114 {
4115 operand_entry *oe;
4116 j = 0;
4117 FOR_EACH_VEC_ELT (*ops, i, oe)
4118 {
4119 if (oe->op == error_mark_node)
4120 continue;
4121 else if (i != j)
4122 (*ops)[j] = oe;
4123 j++;
4124 }
4125 ops->truncate (j);
4126 }
4127
4128 return any_changes;
4129 }
4130
4131 /* Return true if STMT is a cast like:
4132 <bb N>:
4133 ...
4134 _123 = (int) _234;
4135
4136 <bb M>:
4137 # _345 = PHI <_123(N), 1(...), 1(...)>
4138 where _234 has bool type, _123 has single use and
4139 bb N has a single successor M. This is commonly used in
4140 the last block of a range test.
4141
4142 Also Return true if STMT is tcc_compare like:
4143 <bb N>:
4144 ...
4145 _234 = a_2(D) == 2;
4146
4147 <bb M>:
4148 # _345 = PHI <_234(N), 1(...), 1(...)>
4149 _346 = (int) _345;
4150 where _234 has booltype, single use and
4151 bb N has a single successor M. This is commonly used in
4152 the last block of a range test. */
4153
4154 static bool
4155 final_range_test_p (gimple *stmt)
4156 {
4157 basic_block bb, rhs_bb, lhs_bb;
4158 edge e;
4159 tree lhs, rhs;
4160 use_operand_p use_p;
4161 gimple *use_stmt;
4162
4163 if (!gimple_assign_cast_p (stmt)
4164 && (!is_gimple_assign (stmt)
4165 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4166 != tcc_comparison)))
4167 return false;
4168 bb = gimple_bb (stmt);
4169 if (!single_succ_p (bb))
4170 return false;
4171 e = single_succ_edge (bb);
4172 if (e->flags & EDGE_COMPLEX)
4173 return false;
4174
4175 lhs = gimple_assign_lhs (stmt);
4176 rhs = gimple_assign_rhs1 (stmt);
4177 if (gimple_assign_cast_p (stmt)
4178 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4179 || TREE_CODE (rhs) != SSA_NAME
4180 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4181 return false;
4182
4183 if (!gimple_assign_cast_p (stmt)
4184 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4185 return false;
4186
4187 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4188 if (!single_imm_use (lhs, &use_p, &use_stmt))
4189 return false;
4190
4191 if (gimple_code (use_stmt) != GIMPLE_PHI
4192 || gimple_bb (use_stmt) != e->dest)
4193 return false;
4194
4195 /* And that the rhs is defined in the same loop. */
4196 if (gimple_assign_cast_p (stmt))
4197 {
4198 if (TREE_CODE (rhs) != SSA_NAME
4199 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4200 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4201 return false;
4202 }
4203 else
4204 {
4205 if (TREE_CODE (lhs) != SSA_NAME
4206 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4207 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4208 return false;
4209 }
4210
4211 return true;
4212 }
4213
4214 /* Return true if BB is suitable basic block for inter-bb range test
4215 optimization. If BACKWARD is true, BB should be the only predecessor
4216 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4217 or compared with to find a common basic block to which all conditions
4218 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4219 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4220 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4221 block points to an empty block that falls through into *OTHER_BB and
4222 the phi args match that path. */
4223
4224 static bool
4225 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4226 bool *test_swapped_p, bool backward)
4227 {
4228 edge_iterator ei, ei2;
4229 edge e, e2;
4230 gimple *stmt;
4231 gphi_iterator gsi;
4232 bool other_edge_seen = false;
4233 bool is_cond;
4234
4235 if (test_bb == bb)
4236 return false;
4237 /* Check last stmt first. */
4238 stmt = last_stmt (bb);
4239 if (stmt == NULL
4240 || (gimple_code (stmt) != GIMPLE_COND
4241 && (backward || !final_range_test_p (stmt)))
4242 || gimple_visited_p (stmt)
4243 || stmt_could_throw_p (cfun, stmt)
4244 || *other_bb == bb)
4245 return false;
4246 is_cond = gimple_code (stmt) == GIMPLE_COND;
4247 if (is_cond)
4248 {
4249 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4250 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4251 to *OTHER_BB (if not set yet, try to find it out). */
4252 if (EDGE_COUNT (bb->succs) != 2)
4253 return false;
4254 FOR_EACH_EDGE (e, ei, bb->succs)
4255 {
4256 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4257 return false;
4258 if (e->dest == test_bb)
4259 {
4260 if (backward)
4261 continue;
4262 else
4263 return false;
4264 }
4265 if (e->dest == bb)
4266 return false;
4267 if (*other_bb == NULL)
4268 {
4269 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4270 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4271 return false;
4272 else if (e->dest == e2->dest)
4273 *other_bb = e->dest;
4274 if (*other_bb == NULL)
4275 return false;
4276 }
4277 if (e->dest == *other_bb)
4278 other_edge_seen = true;
4279 else if (backward)
4280 return false;
4281 }
4282 if (*other_bb == NULL || !other_edge_seen)
4283 return false;
4284 }
4285 else if (single_succ (bb) != *other_bb)
4286 return false;
4287
4288 /* Now check all PHIs of *OTHER_BB. */
4289 e = find_edge (bb, *other_bb);
4290 e2 = find_edge (test_bb, *other_bb);
4291 retry:;
4292 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4293 {
4294 gphi *phi = gsi.phi ();
4295 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4296 corresponding to BB and TEST_BB predecessor must be the same. */
4297 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4298 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4299 {
4300 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4301 one of the PHIs should have the lhs of the last stmt in
4302 that block as PHI arg and that PHI should have 0 or 1
4303 corresponding to it in all other range test basic blocks
4304 considered. */
4305 if (!is_cond)
4306 {
4307 if (gimple_phi_arg_def (phi, e->dest_idx)
4308 == gimple_assign_lhs (stmt)
4309 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4310 || integer_onep (gimple_phi_arg_def (phi,
4311 e2->dest_idx))))
4312 continue;
4313 }
4314 else
4315 {
4316 gimple *test_last = last_stmt (test_bb);
4317 if (gimple_code (test_last) == GIMPLE_COND)
4318 {
4319 if (backward ? e2->src != test_bb : e->src != bb)
4320 return false;
4321
4322 /* For last_bb, handle also:
4323 if (x_3(D) == 3)
4324 goto <bb 6>; [34.00%]
4325 else
4326 goto <bb 7>; [66.00%]
4327
4328 <bb 6> [local count: 79512730]:
4329
4330 <bb 7> [local count: 1073741824]:
4331 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4332 where bb 7 is *OTHER_BB, but the PHI values from the
4333 earlier bbs match the path through the empty bb
4334 in between. */
4335 edge e3;
4336 if (backward)
4337 e3 = EDGE_SUCC (test_bb,
4338 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4339 else
4340 e3 = EDGE_SUCC (bb,
4341 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4342 if (empty_block_p (e3->dest)
4343 && single_succ_p (e3->dest)
4344 && single_succ (e3->dest) == *other_bb
4345 && single_pred_p (e3->dest)
4346 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4347 {
4348 if (backward)
4349 e2 = single_succ_edge (e3->dest);
4350 else
4351 e = single_succ_edge (e3->dest);
4352 if (test_swapped_p)
4353 *test_swapped_p = true;
4354 goto retry;
4355 }
4356 }
4357 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4358 == gimple_assign_lhs (test_last)
4359 && (integer_zerop (gimple_phi_arg_def (phi,
4360 e->dest_idx))
4361 || integer_onep (gimple_phi_arg_def (phi,
4362 e->dest_idx))))
4363 continue;
4364 }
4365
4366 return false;
4367 }
4368 }
4369 return true;
4370 }
4371
4372 /* Return true if BB doesn't have side-effects that would disallow
4373 range test optimization, all SSA_NAMEs set in the bb are consumed
4374 in the bb and there are no PHIs. */
4375
4376 bool
4377 no_side_effect_bb (basic_block bb)
4378 {
4379 gimple_stmt_iterator gsi;
4380 gimple *last;
4381
4382 if (!gimple_seq_empty_p (phi_nodes (bb)))
4383 return false;
4384 last = last_stmt (bb);
4385 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4386 {
4387 gimple *stmt = gsi_stmt (gsi);
4388 tree lhs;
4389 imm_use_iterator imm_iter;
4390 use_operand_p use_p;
4391
4392 if (is_gimple_debug (stmt))
4393 continue;
4394 if (gimple_has_side_effects (stmt))
4395 return false;
4396 if (stmt == last)
4397 return true;
4398 if (!is_gimple_assign (stmt))
4399 return false;
4400 lhs = gimple_assign_lhs (stmt);
4401 if (TREE_CODE (lhs) != SSA_NAME)
4402 return false;
4403 if (gimple_assign_rhs_could_trap_p (stmt))
4404 return false;
4405 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4406 {
4407 gimple *use_stmt = USE_STMT (use_p);
4408 if (is_gimple_debug (use_stmt))
4409 continue;
4410 if (gimple_bb (use_stmt) != bb)
4411 return false;
4412 }
4413 }
4414 return false;
4415 }
4416
4417 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4418 return true and fill in *OPS recursively. */
4419
4420 static bool
4421 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4422 class loop *loop)
4423 {
4424 gimple *stmt = SSA_NAME_DEF_STMT (var);
4425 tree rhs[2];
4426 int i;
4427
4428 if (!is_reassociable_op (stmt, code, loop))
4429 return false;
4430
4431 rhs[0] = gimple_assign_rhs1 (stmt);
4432 rhs[1] = gimple_assign_rhs2 (stmt);
4433 gimple_set_visited (stmt, true);
4434 for (i = 0; i < 2; i++)
4435 if (TREE_CODE (rhs[i]) == SSA_NAME
4436 && !get_ops (rhs[i], code, ops, loop)
4437 && has_single_use (rhs[i]))
4438 {
4439 operand_entry *oe = operand_entry_pool.allocate ();
4440
4441 oe->op = rhs[i];
4442 oe->rank = code;
4443 oe->id = 0;
4444 oe->count = 1;
4445 oe->stmt_to_insert = NULL;
4446 ops->safe_push (oe);
4447 }
4448 return true;
4449 }
4450
4451 /* Find the ops that were added by get_ops starting from VAR, see if
4452 they were changed during update_range_test and if yes, create new
4453 stmts. */
4454
4455 static tree
4456 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4457 unsigned int *pidx, class loop *loop)
4458 {
4459 gimple *stmt = SSA_NAME_DEF_STMT (var);
4460 tree rhs[4];
4461 int i;
4462
4463 if (!is_reassociable_op (stmt, code, loop))
4464 return NULL;
4465
4466 rhs[0] = gimple_assign_rhs1 (stmt);
4467 rhs[1] = gimple_assign_rhs2 (stmt);
4468 rhs[2] = rhs[0];
4469 rhs[3] = rhs[1];
4470 for (i = 0; i < 2; i++)
4471 if (TREE_CODE (rhs[i]) == SSA_NAME)
4472 {
4473 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4474 if (rhs[2 + i] == NULL_TREE)
4475 {
4476 if (has_single_use (rhs[i]))
4477 rhs[2 + i] = ops[(*pidx)++]->op;
4478 else
4479 rhs[2 + i] = rhs[i];
4480 }
4481 }
4482 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4483 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4484 {
4485 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4486 var = make_ssa_name (TREE_TYPE (var));
4487 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4488 rhs[2], rhs[3]);
4489 gimple_set_uid (g, gimple_uid (stmt));
4490 gimple_set_visited (g, true);
4491 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4492 }
4493 return var;
4494 }
4495
4496 /* Structure to track the initial value passed to get_ops and
4497 the range in the ops vector for each basic block. */
4498
4499 struct inter_bb_range_test_entry
4500 {
4501 tree op;
4502 unsigned int first_idx, last_idx;
4503 };
4504
4505 /* Inter-bb range test optimization.
4506
4507 Returns TRUE if a gimple conditional is optimized to a true/false,
4508 otherwise return FALSE.
4509
4510 This indicates to the caller that it should run a CFG cleanup pass
4511 once reassociation is completed. */
4512
4513 static bool
4514 maybe_optimize_range_tests (gimple *stmt)
4515 {
4516 basic_block first_bb = gimple_bb (stmt);
4517 basic_block last_bb = first_bb;
4518 basic_block other_bb = NULL;
4519 basic_block bb;
4520 edge_iterator ei;
4521 edge e;
4522 auto_vec<operand_entry *> ops;
4523 auto_vec<inter_bb_range_test_entry> bbinfo;
4524 bool any_changes = false;
4525 bool cfg_cleanup_needed = false;
4526
4527 /* Consider only basic blocks that end with GIMPLE_COND or
4528 a cast statement satisfying final_range_test_p. All
4529 but the last bb in the first_bb .. last_bb range
4530 should end with GIMPLE_COND. */
4531 if (gimple_code (stmt) == GIMPLE_COND)
4532 {
4533 if (EDGE_COUNT (first_bb->succs) != 2)
4534 return cfg_cleanup_needed;
4535 }
4536 else if (final_range_test_p (stmt))
4537 other_bb = single_succ (first_bb);
4538 else
4539 return cfg_cleanup_needed;
4540
4541 if (stmt_could_throw_p (cfun, stmt))
4542 return cfg_cleanup_needed;
4543
4544 /* As relative ordering of post-dominator sons isn't fixed,
4545 maybe_optimize_range_tests can be called first on any
4546 bb in the range we want to optimize. So, start searching
4547 backwards, if first_bb can be set to a predecessor. */
4548 while (single_pred_p (first_bb))
4549 {
4550 basic_block pred_bb = single_pred (first_bb);
4551 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4552 break;
4553 if (!no_side_effect_bb (first_bb))
4554 break;
4555 first_bb = pred_bb;
4556 }
4557 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4558 Before starting forward search in last_bb successors, find
4559 out the other_bb. */
4560 if (first_bb == last_bb)
4561 {
4562 other_bb = NULL;
4563 /* As non-GIMPLE_COND last stmt always terminates the range,
4564 if forward search didn't discover anything, just give up. */
4565 if (gimple_code (stmt) != GIMPLE_COND)
4566 return cfg_cleanup_needed;
4567 /* Look at both successors. Either it ends with a GIMPLE_COND
4568 and satisfies suitable_cond_bb, or ends with a cast and
4569 other_bb is that cast's successor. */
4570 FOR_EACH_EDGE (e, ei, first_bb->succs)
4571 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4572 || e->dest == first_bb)
4573 return cfg_cleanup_needed;
4574 else if (single_pred_p (e->dest))
4575 {
4576 stmt = last_stmt (e->dest);
4577 if (stmt
4578 && gimple_code (stmt) == GIMPLE_COND
4579 && EDGE_COUNT (e->dest->succs) == 2)
4580 {
4581 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4582 NULL, true))
4583 break;
4584 else
4585 other_bb = NULL;
4586 }
4587 else if (stmt
4588 && final_range_test_p (stmt)
4589 && find_edge (first_bb, single_succ (e->dest)))
4590 {
4591 other_bb = single_succ (e->dest);
4592 if (other_bb == first_bb)
4593 other_bb = NULL;
4594 }
4595 }
4596 if (other_bb == NULL)
4597 return cfg_cleanup_needed;
4598 }
4599 /* Now do the forward search, moving last_bb to successor bbs
4600 that aren't other_bb. */
4601 while (EDGE_COUNT (last_bb->succs) == 2)
4602 {
4603 FOR_EACH_EDGE (e, ei, last_bb->succs)
4604 if (e->dest != other_bb)
4605 break;
4606 if (e == NULL)
4607 break;
4608 if (!single_pred_p (e->dest))
4609 break;
4610 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4611 break;
4612 if (!no_side_effect_bb (e->dest))
4613 break;
4614 last_bb = e->dest;
4615 }
4616 if (first_bb == last_bb)
4617 return cfg_cleanup_needed;
4618 /* Here basic blocks first_bb through last_bb's predecessor
4619 end with GIMPLE_COND, all of them have one of the edges to
4620 other_bb and another to another block in the range,
4621 all blocks except first_bb don't have side-effects and
4622 last_bb ends with either GIMPLE_COND, or cast satisfying
4623 final_range_test_p. */
4624 for (bb = last_bb; ; bb = single_pred (bb))
4625 {
4626 enum tree_code code;
4627 tree lhs, rhs;
4628 inter_bb_range_test_entry bb_ent;
4629
4630 bb_ent.op = NULL_TREE;
4631 bb_ent.first_idx = ops.length ();
4632 bb_ent.last_idx = bb_ent.first_idx;
4633 e = find_edge (bb, other_bb);
4634 stmt = last_stmt (bb);
4635 gimple_set_visited (stmt, true);
4636 if (gimple_code (stmt) != GIMPLE_COND)
4637 {
4638 use_operand_p use_p;
4639 gimple *phi;
4640 edge e2;
4641 unsigned int d;
4642
4643 lhs = gimple_assign_lhs (stmt);
4644 rhs = gimple_assign_rhs1 (stmt);
4645 gcc_assert (bb == last_bb);
4646
4647 /* stmt is
4648 _123 = (int) _234;
4649 OR
4650 _234 = a_2(D) == 2;
4651
4652 followed by:
4653 <bb M>:
4654 # _345 = PHI <_123(N), 1(...), 1(...)>
4655
4656 or 0 instead of 1. If it is 0, the _234
4657 range test is anded together with all the
4658 other range tests, if it is 1, it is ored with
4659 them. */
4660 single_imm_use (lhs, &use_p, &phi);
4661 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4662 e2 = find_edge (first_bb, other_bb);
4663 d = e2->dest_idx;
4664 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4665 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4666 code = BIT_AND_EXPR;
4667 else
4668 {
4669 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4670 code = BIT_IOR_EXPR;
4671 }
4672
4673 /* If _234 SSA_NAME_DEF_STMT is
4674 _234 = _567 | _789;
4675 (or &, corresponding to 1/0 in the phi arguments,
4676 push into ops the individual range test arguments
4677 of the bitwise or resp. and, recursively. */
4678 if (TREE_CODE (rhs) == SSA_NAME
4679 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4680 != tcc_comparison)
4681 && !get_ops (rhs, code, &ops,
4682 loop_containing_stmt (stmt))
4683 && has_single_use (rhs))
4684 {
4685 /* Otherwise, push the _234 range test itself. */
4686 operand_entry *oe = operand_entry_pool.allocate ();
4687
4688 oe->op = rhs;
4689 oe->rank = code;
4690 oe->id = 0;
4691 oe->count = 1;
4692 oe->stmt_to_insert = NULL;
4693 ops.safe_push (oe);
4694 bb_ent.last_idx++;
4695 bb_ent.op = rhs;
4696 }
4697 else if (is_gimple_assign (stmt)
4698 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4699 == tcc_comparison)
4700 && !get_ops (lhs, code, &ops,
4701 loop_containing_stmt (stmt))
4702 && has_single_use (lhs))
4703 {
4704 operand_entry *oe = operand_entry_pool.allocate ();
4705 oe->op = lhs;
4706 oe->rank = code;
4707 oe->id = 0;
4708 oe->count = 1;
4709 ops.safe_push (oe);
4710 bb_ent.last_idx++;
4711 bb_ent.op = lhs;
4712 }
4713 else
4714 {
4715 bb_ent.last_idx = ops.length ();
4716 bb_ent.op = rhs;
4717 }
4718 bbinfo.safe_push (bb_ent);
4719 continue;
4720 }
4721 else if (bb == last_bb)
4722 {
4723 /* For last_bb, handle also:
4724 if (x_3(D) == 3)
4725 goto <bb 6>; [34.00%]
4726 else
4727 goto <bb 7>; [66.00%]
4728
4729 <bb 6> [local count: 79512730]:
4730
4731 <bb 7> [local count: 1073741824]:
4732 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4733 where bb 7 is OTHER_BB, but the PHI values from the
4734 earlier bbs match the path through the empty bb
4735 in between. */
4736 bool test_swapped_p = false;
4737 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4738 &other_bb, &test_swapped_p, true);
4739 gcc_assert (ok);
4740 if (test_swapped_p)
4741 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4742 }
4743 /* Otherwise stmt is GIMPLE_COND. */
4744 code = gimple_cond_code (stmt);
4745 lhs = gimple_cond_lhs (stmt);
4746 rhs = gimple_cond_rhs (stmt);
4747 if (TREE_CODE (lhs) == SSA_NAME
4748 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4749 && ((code != EQ_EXPR && code != NE_EXPR)
4750 || rhs != boolean_false_node
4751 /* Either push into ops the individual bitwise
4752 or resp. and operands, depending on which
4753 edge is other_bb. */
4754 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4755 ^ (code == EQ_EXPR))
4756 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4757 loop_containing_stmt (stmt))))
4758 {
4759 /* Or push the GIMPLE_COND stmt itself. */
4760 operand_entry *oe = operand_entry_pool.allocate ();
4761
4762 oe->op = NULL;
4763 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4764 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4765 /* oe->op = NULL signs that there is no SSA_NAME
4766 for the range test, and oe->id instead is the
4767 basic block number, at which's end the GIMPLE_COND
4768 is. */
4769 oe->id = bb->index;
4770 oe->count = 1;
4771 oe->stmt_to_insert = NULL;
4772 ops.safe_push (oe);
4773 bb_ent.op = NULL;
4774 bb_ent.last_idx++;
4775 }
4776 else if (ops.length () > bb_ent.first_idx)
4777 {
4778 bb_ent.op = lhs;
4779 bb_ent.last_idx = ops.length ();
4780 }
4781 bbinfo.safe_push (bb_ent);
4782 if (bb == first_bb)
4783 break;
4784 }
4785 if (ops.length () > 1)
4786 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4787 if (any_changes)
4788 {
4789 unsigned int idx, max_idx = 0;
4790 /* update_ops relies on has_single_use predicates returning the
4791 same values as it did during get_ops earlier. Additionally it
4792 never removes statements, only adds new ones and it should walk
4793 from the single imm use and check the predicate already before
4794 making those changes.
4795 On the other side, the handling of GIMPLE_COND directly can turn
4796 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4797 it needs to be done in a separate loop afterwards. */
4798 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4799 {
4800 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4801 && bbinfo[idx].op != NULL_TREE)
4802 {
4803 tree new_op;
4804
4805 max_idx = idx;
4806 stmt = last_stmt (bb);
4807 new_op = update_ops (bbinfo[idx].op,
4808 (enum tree_code)
4809 ops[bbinfo[idx].first_idx]->rank,
4810 ops, &bbinfo[idx].first_idx,
4811 loop_containing_stmt (stmt));
4812 if (new_op == NULL_TREE)
4813 {
4814 gcc_assert (bb == last_bb);
4815 new_op = ops[bbinfo[idx].first_idx++]->op;
4816 }
4817 if (bbinfo[idx].op != new_op)
4818 {
4819 imm_use_iterator iter;
4820 use_operand_p use_p;
4821 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4822
4823 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4824 if (is_gimple_debug (use_stmt))
4825 continue;
4826 else if (gimple_code (use_stmt) == GIMPLE_COND
4827 || gimple_code (use_stmt) == GIMPLE_PHI)
4828 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4829 SET_USE (use_p, new_op);
4830 else if ((is_gimple_assign (use_stmt)
4831 && (TREE_CODE_CLASS
4832 (gimple_assign_rhs_code (use_stmt))
4833 == tcc_comparison)))
4834 cast_or_tcc_cmp_stmt = use_stmt;
4835 else if (gimple_assign_cast_p (use_stmt))
4836 cast_or_tcc_cmp_stmt = use_stmt;
4837 else
4838 gcc_unreachable ();
4839
4840 if (cast_or_tcc_cmp_stmt)
4841 {
4842 gcc_assert (bb == last_bb);
4843 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4844 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4845 enum tree_code rhs_code
4846 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4847 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4848 : CONVERT_EXPR;
4849 gassign *g;
4850 if (is_gimple_min_invariant (new_op))
4851 {
4852 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4853 g = gimple_build_assign (new_lhs, new_op);
4854 }
4855 else
4856 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4857 gimple_stmt_iterator gsi
4858 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4859 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4860 gimple_set_visited (g, true);
4861 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4862 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4863 if (is_gimple_debug (use_stmt))
4864 continue;
4865 else if (gimple_code (use_stmt) == GIMPLE_COND
4866 || gimple_code (use_stmt) == GIMPLE_PHI)
4867 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4868 SET_USE (use_p, new_lhs);
4869 else
4870 gcc_unreachable ();
4871 }
4872 }
4873 }
4874 if (bb == first_bb)
4875 break;
4876 }
4877 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4878 {
4879 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4880 && bbinfo[idx].op == NULL_TREE
4881 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4882 {
4883 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4884
4885 if (idx > max_idx)
4886 max_idx = idx;
4887
4888 /* If we collapse the conditional to a true/false
4889 condition, then bubble that knowledge up to our caller. */
4890 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4891 {
4892 gimple_cond_make_false (cond_stmt);
4893 cfg_cleanup_needed = true;
4894 }
4895 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4896 {
4897 gimple_cond_make_true (cond_stmt);
4898 cfg_cleanup_needed = true;
4899 }
4900 else
4901 {
4902 gimple_cond_set_code (cond_stmt, NE_EXPR);
4903 gimple_cond_set_lhs (cond_stmt,
4904 ops[bbinfo[idx].first_idx]->op);
4905 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4906 }
4907 update_stmt (cond_stmt);
4908 }
4909 if (bb == first_bb)
4910 break;
4911 }
4912
4913 /* The above changes could result in basic blocks after the first
4914 modified one, up to and including last_bb, to be executed even if
4915 they would not be in the original program. If the value ranges of
4916 assignment lhs' in those bbs were dependent on the conditions
4917 guarding those basic blocks which now can change, the VRs might
4918 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4919 are only used within the same bb, it should be not a big deal if
4920 we just reset all the VRs in those bbs. See PR68671. */
4921 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4922 reset_flow_sensitive_info_in_bb (bb);
4923 }
4924 return cfg_cleanup_needed;
4925 }
4926
4927 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4928 of STMT in it's operands. This is also known as a "destructive
4929 update" operation. */
4930
4931 static bool
4932 is_phi_for_stmt (gimple *stmt, tree operand)
4933 {
4934 gimple *def_stmt;
4935 gphi *def_phi;
4936 tree lhs;
4937 use_operand_p arg_p;
4938 ssa_op_iter i;
4939
4940 if (TREE_CODE (operand) != SSA_NAME)
4941 return false;
4942
4943 lhs = gimple_assign_lhs (stmt);
4944
4945 def_stmt = SSA_NAME_DEF_STMT (operand);
4946 def_phi = dyn_cast <gphi *> (def_stmt);
4947 if (!def_phi)
4948 return false;
4949
4950 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4951 if (lhs == USE_FROM_PTR (arg_p))
4952 return true;
4953 return false;
4954 }
4955
4956 /* Remove def stmt of VAR if VAR has zero uses and recurse
4957 on rhs1 operand if so. */
4958
4959 static void
4960 remove_visited_stmt_chain (tree var)
4961 {
4962 gimple *stmt;
4963 gimple_stmt_iterator gsi;
4964
4965 while (1)
4966 {
4967 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4968 return;
4969 stmt = SSA_NAME_DEF_STMT (var);
4970 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4971 {
4972 var = gimple_assign_rhs1 (stmt);
4973 gsi = gsi_for_stmt (stmt);
4974 reassoc_remove_stmt (&gsi);
4975 release_defs (stmt);
4976 }
4977 else
4978 return;
4979 }
4980 }
4981
4982 /* This function checks three consequtive operands in
4983 passed operands vector OPS starting from OPINDEX and
4984 swaps two operands if it is profitable for binary operation
4985 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4986
4987 We pair ops with the same rank if possible.
4988
4989 The alternative we try is to see if STMT is a destructive
4990 update style statement, which is like:
4991 b = phi (a, ...)
4992 a = c + b;
4993 In that case, we want to use the destructive update form to
4994 expose the possible vectorizer sum reduction opportunity.
4995 In that case, the third operand will be the phi node. This
4996 check is not performed if STMT is null.
4997
4998 We could, of course, try to be better as noted above, and do a
4999 lot of work to try to find these opportunities in >3 operand
5000 cases, but it is unlikely to be worth it. */
5001
5002 static void
5003 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
5004 unsigned int opindex, gimple *stmt)
5005 {
5006 operand_entry *oe1, *oe2, *oe3;
5007
5008 oe1 = ops[opindex];
5009 oe2 = ops[opindex + 1];
5010 oe3 = ops[opindex + 2];
5011
5012 if ((oe1->rank == oe2->rank
5013 && oe2->rank != oe3->rank)
5014 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5015 && !is_phi_for_stmt (stmt, oe1->op)
5016 && !is_phi_for_stmt (stmt, oe2->op)))
5017 std::swap (*oe1, *oe3);
5018 else if ((oe1->rank == oe3->rank
5019 && oe2->rank != oe3->rank)
5020 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5021 && !is_phi_for_stmt (stmt, oe1->op)
5022 && !is_phi_for_stmt (stmt, oe3->op)))
5023 std::swap (*oe1, *oe2);
5024 }
5025
5026 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5027 two definitions, otherwise return STMT. */
5028
5029 static inline gimple *
5030 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5031 {
5032 if (TREE_CODE (rhs1) == SSA_NAME
5033 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5034 stmt = SSA_NAME_DEF_STMT (rhs1);
5035 if (TREE_CODE (rhs2) == SSA_NAME
5036 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5037 stmt = SSA_NAME_DEF_STMT (rhs2);
5038 return stmt;
5039 }
5040
5041 /* If the stmt that defines operand has to be inserted, insert it
5042 before the use. */
5043 static void
5044 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5045 {
5046 gcc_assert (is_gimple_assign (stmt_to_insert));
5047 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5048 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5049 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5050 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5051 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5052
5053 /* If the insert point is not stmt, then insert_point would be
5054 the point where operand rhs1 or rhs2 is defined. In this case,
5055 stmt_to_insert has to be inserted afterwards. This would
5056 only happen when the stmt insertion point is flexible. */
5057 if (stmt == insert_point)
5058 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5059 else
5060 insert_stmt_after (stmt_to_insert, insert_point);
5061 }
5062
5063
5064 /* Recursively rewrite our linearized statements so that the operators
5065 match those in OPS[OPINDEX], putting the computation in rank
5066 order. Return new lhs.
5067 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5068 the current stmt and during recursive invocations.
5069 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5070 recursive invocations. */
5071
5072 static tree
5073 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5074 vec<operand_entry *> ops, bool changed, bool next_changed)
5075 {
5076 tree rhs1 = gimple_assign_rhs1 (stmt);
5077 tree rhs2 = gimple_assign_rhs2 (stmt);
5078 tree lhs = gimple_assign_lhs (stmt);
5079 operand_entry *oe;
5080
5081 /* The final recursion case for this function is that you have
5082 exactly two operations left.
5083 If we had exactly one op in the entire list to start with, we
5084 would have never called this function, and the tail recursion
5085 rewrites them one at a time. */
5086 if (opindex + 2 == ops.length ())
5087 {
5088 operand_entry *oe1, *oe2;
5089
5090 oe1 = ops[opindex];
5091 oe2 = ops[opindex + 1];
5092
5093 if (rhs1 != oe1->op || rhs2 != oe2->op)
5094 {
5095 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5096 unsigned int uid = gimple_uid (stmt);
5097
5098 if (dump_file && (dump_flags & TDF_DETAILS))
5099 {
5100 fprintf (dump_file, "Transforming ");
5101 print_gimple_stmt (dump_file, stmt, 0);
5102 }
5103
5104 /* If the stmt that defines operand has to be inserted, insert it
5105 before the use. */
5106 if (oe1->stmt_to_insert)
5107 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5108 if (oe2->stmt_to_insert)
5109 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5110 /* Even when changed is false, reassociation could have e.g. removed
5111 some redundant operations, so unless we are just swapping the
5112 arguments or unless there is no change at all (then we just
5113 return lhs), force creation of a new SSA_NAME. */
5114 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5115 {
5116 gimple *insert_point
5117 = find_insert_point (stmt, oe1->op, oe2->op);
5118 lhs = make_ssa_name (TREE_TYPE (lhs));
5119 stmt
5120 = gimple_build_assign (lhs, rhs_code,
5121 oe1->op, oe2->op);
5122 gimple_set_uid (stmt, uid);
5123 gimple_set_visited (stmt, true);
5124 if (insert_point == gsi_stmt (gsi))
5125 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5126 else
5127 insert_stmt_after (stmt, insert_point);
5128 }
5129 else
5130 {
5131 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5132 == stmt);
5133 gimple_assign_set_rhs1 (stmt, oe1->op);
5134 gimple_assign_set_rhs2 (stmt, oe2->op);
5135 update_stmt (stmt);
5136 }
5137
5138 if (rhs1 != oe1->op && rhs1 != oe2->op)
5139 remove_visited_stmt_chain (rhs1);
5140
5141 if (dump_file && (dump_flags & TDF_DETAILS))
5142 {
5143 fprintf (dump_file, " into ");
5144 print_gimple_stmt (dump_file, stmt, 0);
5145 }
5146 }
5147 return lhs;
5148 }
5149
5150 /* If we hit here, we should have 3 or more ops left. */
5151 gcc_assert (opindex + 2 < ops.length ());
5152
5153 /* Rewrite the next operator. */
5154 oe = ops[opindex];
5155
5156 /* If the stmt that defines operand has to be inserted, insert it
5157 before the use. */
5158 if (oe->stmt_to_insert)
5159 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5160
5161 /* Recurse on the LHS of the binary operator, which is guaranteed to
5162 be the non-leaf side. */
5163 tree new_rhs1
5164 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5165 changed || oe->op != rhs2 || next_changed,
5166 false);
5167
5168 if (oe->op != rhs2 || new_rhs1 != rhs1)
5169 {
5170 if (dump_file && (dump_flags & TDF_DETAILS))
5171 {
5172 fprintf (dump_file, "Transforming ");
5173 print_gimple_stmt (dump_file, stmt, 0);
5174 }
5175
5176 /* If changed is false, this is either opindex == 0
5177 or all outer rhs2's were equal to corresponding oe->op,
5178 and powi_result is NULL.
5179 That means lhs is equivalent before and after reassociation.
5180 Otherwise ensure the old lhs SSA_NAME is not reused and
5181 create a new stmt as well, so that any debug stmts will be
5182 properly adjusted. */
5183 if (changed)
5184 {
5185 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5186 unsigned int uid = gimple_uid (stmt);
5187 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5188
5189 lhs = make_ssa_name (TREE_TYPE (lhs));
5190 stmt = gimple_build_assign (lhs, rhs_code,
5191 new_rhs1, oe->op);
5192 gimple_set_uid (stmt, uid);
5193 gimple_set_visited (stmt, true);
5194 if (insert_point == gsi_stmt (gsi))
5195 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5196 else
5197 insert_stmt_after (stmt, insert_point);
5198 }
5199 else
5200 {
5201 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5202 == stmt);
5203 gimple_assign_set_rhs1 (stmt, new_rhs1);
5204 gimple_assign_set_rhs2 (stmt, oe->op);
5205 update_stmt (stmt);
5206 }
5207
5208 if (dump_file && (dump_flags & TDF_DETAILS))
5209 {
5210 fprintf (dump_file, " into ");
5211 print_gimple_stmt (dump_file, stmt, 0);
5212 }
5213 }
5214 return lhs;
5215 }
5216
5217 /* Find out how many cycles we need to compute statements chain.
5218 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5219 maximum number of independent statements we may execute per cycle. */
5220
5221 static int
5222 get_required_cycles (int ops_num, int cpu_width)
5223 {
5224 int res;
5225 int elog;
5226 unsigned int rest;
5227
5228 /* While we have more than 2 * cpu_width operands
5229 we may reduce number of operands by cpu_width
5230 per cycle. */
5231 res = ops_num / (2 * cpu_width);
5232
5233 /* Remained operands count may be reduced twice per cycle
5234 until we have only one operand. */
5235 rest = (unsigned)(ops_num - res * cpu_width);
5236 elog = exact_log2 (rest);
5237 if (elog >= 0)
5238 res += elog;
5239 else
5240 res += floor_log2 (rest) + 1;
5241
5242 return res;
5243 }
5244
5245 /* Returns an optimal number of registers to use for computation of
5246 given statements. */
5247
5248 static int
5249 get_reassociation_width (int ops_num, enum tree_code opc,
5250 machine_mode mode)
5251 {
5252 int param_width = param_tree_reassoc_width;
5253 int width;
5254 int width_min;
5255 int cycles_best;
5256
5257 if (param_width > 0)
5258 width = param_width;
5259 else
5260 width = targetm.sched.reassociation_width (opc, mode);
5261
5262 if (width == 1)
5263 return width;
5264
5265 /* Get the minimal time required for sequence computation. */
5266 cycles_best = get_required_cycles (ops_num, width);
5267
5268 /* Check if we may use less width and still compute sequence for
5269 the same time. It will allow us to reduce registers usage.
5270 get_required_cycles is monotonically increasing with lower width
5271 so we can perform a binary search for the minimal width that still
5272 results in the optimal cycle count. */
5273 width_min = 1;
5274 while (width > width_min)
5275 {
5276 int width_mid = (width + width_min) / 2;
5277
5278 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5279 width = width_mid;
5280 else if (width_min < width_mid)
5281 width_min = width_mid;
5282 else
5283 break;
5284 }
5285
5286 return width;
5287 }
5288
5289 /* Recursively rewrite our linearized statements so that the operators
5290 match those in OPS[OPINDEX], putting the computation in rank
5291 order and trying to allow operations to be executed in
5292 parallel. */
5293
5294 static void
5295 rewrite_expr_tree_parallel (gassign *stmt, int width,
5296 vec<operand_entry *> ops)
5297 {
5298 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5299 int op_num = ops.length ();
5300 gcc_assert (op_num > 0);
5301 int stmt_num = op_num - 1;
5302 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5303 int op_index = op_num - 1;
5304 int stmt_index = 0;
5305 int ready_stmts_end = 0;
5306 int i = 0;
5307 gimple *stmt1 = NULL, *stmt2 = NULL;
5308 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5309
5310 /* We start expression rewriting from the top statements.
5311 So, in this loop we create a full list of statements
5312 we will work with. */
5313 stmts[stmt_num - 1] = stmt;
5314 for (i = stmt_num - 2; i >= 0; i--)
5315 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5316
5317 for (i = 0; i < stmt_num; i++)
5318 {
5319 tree op1, op2;
5320
5321 /* Determine whether we should use results of
5322 already handled statements or not. */
5323 if (ready_stmts_end == 0
5324 && (i - stmt_index >= width || op_index < 1))
5325 ready_stmts_end = i;
5326
5327 /* Now we choose operands for the next statement. Non zero
5328 value in ready_stmts_end means here that we should use
5329 the result of already generated statements as new operand. */
5330 if (ready_stmts_end > 0)
5331 {
5332 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5333 if (ready_stmts_end > stmt_index)
5334 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5335 else if (op_index >= 0)
5336 {
5337 operand_entry *oe = ops[op_index--];
5338 stmt2 = oe->stmt_to_insert;
5339 op2 = oe->op;
5340 }
5341 else
5342 {
5343 gcc_assert (stmt_index < i);
5344 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5345 }
5346
5347 if (stmt_index >= ready_stmts_end)
5348 ready_stmts_end = 0;
5349 }
5350 else
5351 {
5352 if (op_index > 1)
5353 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5354 operand_entry *oe2 = ops[op_index--];
5355 operand_entry *oe1 = ops[op_index--];
5356 op2 = oe2->op;
5357 stmt2 = oe2->stmt_to_insert;
5358 op1 = oe1->op;
5359 stmt1 = oe1->stmt_to_insert;
5360 }
5361
5362 /* If we emit the last statement then we should put
5363 operands into the last statement. It will also
5364 break the loop. */
5365 if (op_index < 0 && stmt_index == i)
5366 i = stmt_num - 1;
5367
5368 if (dump_file && (dump_flags & TDF_DETAILS))
5369 {
5370 fprintf (dump_file, "Transforming ");
5371 print_gimple_stmt (dump_file, stmts[i], 0);
5372 }
5373
5374 /* If the stmt that defines operand has to be inserted, insert it
5375 before the use. */
5376 if (stmt1)
5377 insert_stmt_before_use (stmts[i], stmt1);
5378 if (stmt2)
5379 insert_stmt_before_use (stmts[i], stmt2);
5380 stmt1 = stmt2 = NULL;
5381
5382 /* We keep original statement only for the last one. All
5383 others are recreated. */
5384 if (i == stmt_num - 1)
5385 {
5386 gimple_assign_set_rhs1 (stmts[i], op1);
5387 gimple_assign_set_rhs2 (stmts[i], op2);
5388 update_stmt (stmts[i]);
5389 }
5390 else
5391 {
5392 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5393 gimple_set_visited (stmts[i], true);
5394 }
5395 if (dump_file && (dump_flags & TDF_DETAILS))
5396 {
5397 fprintf (dump_file, " into ");
5398 print_gimple_stmt (dump_file, stmts[i], 0);
5399 }
5400 }
5401
5402 remove_visited_stmt_chain (last_rhs1);
5403 }
5404
5405 /* Transform STMT, which is really (A +B) + (C + D) into the left
5406 linear form, ((A+B)+C)+D.
5407 Recurse on D if necessary. */
5408
5409 static void
5410 linearize_expr (gimple *stmt)
5411 {
5412 gimple_stmt_iterator gsi;
5413 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5414 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5415 gimple *oldbinrhs = binrhs;
5416 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5417 gimple *newbinrhs = NULL;
5418 class loop *loop = loop_containing_stmt (stmt);
5419 tree lhs = gimple_assign_lhs (stmt);
5420
5421 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5422 && is_reassociable_op (binrhs, rhscode, loop));
5423
5424 gsi = gsi_for_stmt (stmt);
5425
5426 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5427 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5428 gimple_assign_rhs_code (binrhs),
5429 gimple_assign_lhs (binlhs),
5430 gimple_assign_rhs2 (binrhs));
5431 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5432 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5433 gimple_set_uid (binrhs, gimple_uid (stmt));
5434
5435 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5436 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5437
5438 if (dump_file && (dump_flags & TDF_DETAILS))
5439 {
5440 fprintf (dump_file, "Linearized: ");
5441 print_gimple_stmt (dump_file, stmt, 0);
5442 }
5443
5444 reassociate_stats.linearized++;
5445 update_stmt (stmt);
5446
5447 gsi = gsi_for_stmt (oldbinrhs);
5448 reassoc_remove_stmt (&gsi);
5449 release_defs (oldbinrhs);
5450
5451 gimple_set_visited (stmt, true);
5452 gimple_set_visited (binlhs, true);
5453 gimple_set_visited (binrhs, true);
5454
5455 /* Tail recurse on the new rhs if it still needs reassociation. */
5456 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5457 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5458 want to change the algorithm while converting to tuples. */
5459 linearize_expr (stmt);
5460 }
5461
5462 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5463 it. Otherwise, return NULL. */
5464
5465 static gimple *
5466 get_single_immediate_use (tree lhs)
5467 {
5468 use_operand_p immuse;
5469 gimple *immusestmt;
5470
5471 if (TREE_CODE (lhs) == SSA_NAME
5472 && single_imm_use (lhs, &immuse, &immusestmt)
5473 && is_gimple_assign (immusestmt))
5474 return immusestmt;
5475
5476 return NULL;
5477 }
5478
5479 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5480 representing the negated value. Insertions of any necessary
5481 instructions go before GSI.
5482 This function is recursive in that, if you hand it "a_5" as the
5483 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5484 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5485
5486 static tree
5487 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5488 {
5489 gimple *negatedefstmt = NULL;
5490 tree resultofnegate;
5491 gimple_stmt_iterator gsi;
5492 unsigned int uid;
5493
5494 /* If we are trying to negate a name, defined by an add, negate the
5495 add operands instead. */
5496 if (TREE_CODE (tonegate) == SSA_NAME)
5497 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5498 if (TREE_CODE (tonegate) == SSA_NAME
5499 && is_gimple_assign (negatedefstmt)
5500 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5501 && has_single_use (gimple_assign_lhs (negatedefstmt))
5502 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5503 {
5504 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5505 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5506 tree lhs = gimple_assign_lhs (negatedefstmt);
5507 gimple *g;
5508
5509 gsi = gsi_for_stmt (negatedefstmt);
5510 rhs1 = negate_value (rhs1, &gsi);
5511
5512 gsi = gsi_for_stmt (negatedefstmt);
5513 rhs2 = negate_value (rhs2, &gsi);
5514
5515 gsi = gsi_for_stmt (negatedefstmt);
5516 lhs = make_ssa_name (TREE_TYPE (lhs));
5517 gimple_set_visited (negatedefstmt, true);
5518 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5519 gimple_set_uid (g, gimple_uid (negatedefstmt));
5520 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5521 return lhs;
5522 }
5523
5524 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5525 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5526 NULL_TREE, true, GSI_SAME_STMT);
5527 gsi = *gsip;
5528 uid = gimple_uid (gsi_stmt (gsi));
5529 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5530 {
5531 gimple *stmt = gsi_stmt (gsi);
5532 if (gimple_uid (stmt) != 0)
5533 break;
5534 gimple_set_uid (stmt, uid);
5535 }
5536 return resultofnegate;
5537 }
5538
5539 /* Return true if we should break up the subtract in STMT into an add
5540 with negate. This is true when we the subtract operands are really
5541 adds, or the subtract itself is used in an add expression. In
5542 either case, breaking up the subtract into an add with negate
5543 exposes the adds to reassociation. */
5544
5545 static bool
5546 should_break_up_subtract (gimple *stmt)
5547 {
5548 tree lhs = gimple_assign_lhs (stmt);
5549 tree binlhs = gimple_assign_rhs1 (stmt);
5550 tree binrhs = gimple_assign_rhs2 (stmt);
5551 gimple *immusestmt;
5552 class loop *loop = loop_containing_stmt (stmt);
5553
5554 if (TREE_CODE (binlhs) == SSA_NAME
5555 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5556 return true;
5557
5558 if (TREE_CODE (binrhs) == SSA_NAME
5559 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5560 return true;
5561
5562 if (TREE_CODE (lhs) == SSA_NAME
5563 && (immusestmt = get_single_immediate_use (lhs))
5564 && is_gimple_assign (immusestmt)
5565 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5566 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5567 && gimple_assign_rhs1 (immusestmt) == lhs)
5568 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5569 return true;
5570 return false;
5571 }
5572
5573 /* Transform STMT from A - B into A + -B. */
5574
5575 static void
5576 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5577 {
5578 tree rhs1 = gimple_assign_rhs1 (stmt);
5579 tree rhs2 = gimple_assign_rhs2 (stmt);
5580
5581 if (dump_file && (dump_flags & TDF_DETAILS))
5582 {
5583 fprintf (dump_file, "Breaking up subtract ");
5584 print_gimple_stmt (dump_file, stmt, 0);
5585 }
5586
5587 rhs2 = negate_value (rhs2, gsip);
5588 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5589 update_stmt (stmt);
5590 }
5591
5592 /* Determine whether STMT is a builtin call that raises an SSA name
5593 to an integer power and has only one use. If so, and this is early
5594 reassociation and unsafe math optimizations are permitted, place
5595 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5596 If any of these conditions does not hold, return FALSE. */
5597
5598 static bool
5599 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5600 {
5601 tree arg1;
5602 REAL_VALUE_TYPE c, cint;
5603
5604 switch (gimple_call_combined_fn (stmt))
5605 {
5606 CASE_CFN_POW:
5607 if (flag_errno_math)
5608 return false;
5609
5610 *base = gimple_call_arg (stmt, 0);
5611 arg1 = gimple_call_arg (stmt, 1);
5612
5613 if (TREE_CODE (arg1) != REAL_CST)
5614 return false;
5615
5616 c = TREE_REAL_CST (arg1);
5617
5618 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5619 return false;
5620
5621 *exponent = real_to_integer (&c);
5622 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5623 if (!real_identical (&c, &cint))
5624 return false;
5625
5626 break;
5627
5628 CASE_CFN_POWI:
5629 *base = gimple_call_arg (stmt, 0);
5630 arg1 = gimple_call_arg (stmt, 1);
5631
5632 if (!tree_fits_shwi_p (arg1))
5633 return false;
5634
5635 *exponent = tree_to_shwi (arg1);
5636 break;
5637
5638 default:
5639 return false;
5640 }
5641
5642 /* Expanding negative exponents is generally unproductive, so we don't
5643 complicate matters with those. Exponents of zero and one should
5644 have been handled by expression folding. */
5645 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5646 return false;
5647
5648 return true;
5649 }
5650
5651 /* Try to derive and add operand entry for OP to *OPS. Return false if
5652 unsuccessful. */
5653
5654 static bool
5655 try_special_add_to_ops (vec<operand_entry *> *ops,
5656 enum tree_code code,
5657 tree op, gimple* def_stmt)
5658 {
5659 tree base = NULL_TREE;
5660 HOST_WIDE_INT exponent = 0;
5661
5662 if (TREE_CODE (op) != SSA_NAME
5663 || ! has_single_use (op))
5664 return false;
5665
5666 if (code == MULT_EXPR
5667 && reassoc_insert_powi_p
5668 && flag_unsafe_math_optimizations
5669 && is_gimple_call (def_stmt)
5670 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5671 {
5672 add_repeat_to_ops_vec (ops, base, exponent);
5673 gimple_set_visited (def_stmt, true);
5674 return true;
5675 }
5676 else if (code == MULT_EXPR
5677 && is_gimple_assign (def_stmt)
5678 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5679 && !HONOR_SNANS (TREE_TYPE (op))
5680 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5681 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5682 {
5683 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5684 tree cst = build_minus_one_cst (TREE_TYPE (op));
5685 add_to_ops_vec (ops, rhs1);
5686 add_to_ops_vec (ops, cst);
5687 gimple_set_visited (def_stmt, true);
5688 return true;
5689 }
5690
5691 return false;
5692 }
5693
5694 /* Recursively linearize a binary expression that is the RHS of STMT.
5695 Place the operands of the expression tree in the vector named OPS. */
5696
5697 static void
5698 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5699 bool is_associative, bool set_visited)
5700 {
5701 tree binlhs = gimple_assign_rhs1 (stmt);
5702 tree binrhs = gimple_assign_rhs2 (stmt);
5703 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5704 bool binlhsisreassoc = false;
5705 bool binrhsisreassoc = false;
5706 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5707 class loop *loop = loop_containing_stmt (stmt);
5708
5709 if (set_visited)
5710 gimple_set_visited (stmt, true);
5711
5712 if (TREE_CODE (binlhs) == SSA_NAME)
5713 {
5714 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5715 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5716 && !stmt_could_throw_p (cfun, binlhsdef));
5717 }
5718
5719 if (TREE_CODE (binrhs) == SSA_NAME)
5720 {
5721 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5722 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5723 && !stmt_could_throw_p (cfun, binrhsdef));
5724 }
5725
5726 /* If the LHS is not reassociable, but the RHS is, we need to swap
5727 them. If neither is reassociable, there is nothing we can do, so
5728 just put them in the ops vector. If the LHS is reassociable,
5729 linearize it. If both are reassociable, then linearize the RHS
5730 and the LHS. */
5731
5732 if (!binlhsisreassoc)
5733 {
5734 /* If this is not a associative operation like division, give up. */
5735 if (!is_associative)
5736 {
5737 add_to_ops_vec (ops, binrhs);
5738 return;
5739 }
5740
5741 if (!binrhsisreassoc)
5742 {
5743 bool swap = false;
5744 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5745 /* If we add ops for the rhs we expect to be able to recurse
5746 to it via the lhs during expression rewrite so swap
5747 operands. */
5748 swap = true;
5749 else
5750 add_to_ops_vec (ops, binrhs);
5751
5752 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5753 add_to_ops_vec (ops, binlhs);
5754
5755 if (!swap)
5756 return;
5757 }
5758
5759 if (dump_file && (dump_flags & TDF_DETAILS))
5760 {
5761 fprintf (dump_file, "swapping operands of ");
5762 print_gimple_stmt (dump_file, stmt, 0);
5763 }
5764
5765 swap_ssa_operands (stmt,
5766 gimple_assign_rhs1_ptr (stmt),
5767 gimple_assign_rhs2_ptr (stmt));
5768 update_stmt (stmt);
5769
5770 if (dump_file && (dump_flags & TDF_DETAILS))
5771 {
5772 fprintf (dump_file, " is now ");
5773 print_gimple_stmt (dump_file, stmt, 0);
5774 }
5775 if (!binrhsisreassoc)
5776 return;
5777
5778 /* We want to make it so the lhs is always the reassociative op,
5779 so swap. */
5780 std::swap (binlhs, binrhs);
5781 }
5782 else if (binrhsisreassoc)
5783 {
5784 linearize_expr (stmt);
5785 binlhs = gimple_assign_rhs1 (stmt);
5786 binrhs = gimple_assign_rhs2 (stmt);
5787 }
5788
5789 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5790 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5791 rhscode, loop));
5792 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5793 is_associative, set_visited);
5794
5795 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5796 add_to_ops_vec (ops, binrhs);
5797 }
5798
5799 /* Repropagate the negates back into subtracts, since no other pass
5800 currently does it. */
5801
5802 static void
5803 repropagate_negates (void)
5804 {
5805 unsigned int i = 0;
5806 tree negate;
5807
5808 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5809 {
5810 gimple *user = get_single_immediate_use (negate);
5811
5812 if (!user || !is_gimple_assign (user))
5813 continue;
5814
5815 /* The negate operand can be either operand of a PLUS_EXPR
5816 (it can be the LHS if the RHS is a constant for example).
5817
5818 Force the negate operand to the RHS of the PLUS_EXPR, then
5819 transform the PLUS_EXPR into a MINUS_EXPR. */
5820 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5821 {
5822 /* If the negated operand appears on the LHS of the
5823 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5824 to force the negated operand to the RHS of the PLUS_EXPR. */
5825 if (gimple_assign_rhs1 (user) == negate)
5826 {
5827 swap_ssa_operands (user,
5828 gimple_assign_rhs1_ptr (user),
5829 gimple_assign_rhs2_ptr (user));
5830 }
5831
5832 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5833 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5834 if (gimple_assign_rhs2 (user) == negate)
5835 {
5836 tree rhs1 = gimple_assign_rhs1 (user);
5837 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5838 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5839 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5840 update_stmt (user);
5841 }
5842 }
5843 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5844 {
5845 if (gimple_assign_rhs1 (user) == negate)
5846 {
5847 /* We have
5848 x = -a
5849 y = x - b
5850 which we transform into
5851 x = a + b
5852 y = -x .
5853 This pushes down the negate which we possibly can merge
5854 into some other operation, hence insert it into the
5855 plus_negates vector. */
5856 gimple *feed = SSA_NAME_DEF_STMT (negate);
5857 tree a = gimple_assign_rhs1 (feed);
5858 tree b = gimple_assign_rhs2 (user);
5859 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5860 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5861 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5862 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5863 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5864 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5865 user = gsi_stmt (gsi2);
5866 update_stmt (user);
5867 reassoc_remove_stmt (&gsi);
5868 release_defs (feed);
5869 plus_negates.safe_push (gimple_assign_lhs (user));
5870 }
5871 else
5872 {
5873 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5874 rid of one operation. */
5875 gimple *feed = SSA_NAME_DEF_STMT (negate);
5876 tree a = gimple_assign_rhs1 (feed);
5877 tree rhs1 = gimple_assign_rhs1 (user);
5878 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5879 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5880 update_stmt (gsi_stmt (gsi));
5881 }
5882 }
5883 }
5884 }
5885
5886 /* Returns true if OP is of a type for which we can do reassociation.
5887 That is for integral or non-saturating fixed-point types, and for
5888 floating point type when associative-math is enabled. */
5889
5890 static bool
5891 can_reassociate_p (tree op)
5892 {
5893 tree type = TREE_TYPE (op);
5894 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5895 return false;
5896 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5897 || NON_SAT_FIXED_POINT_TYPE_P (type)
5898 || (flag_associative_math && FLOAT_TYPE_P (type)))
5899 return true;
5900 return false;
5901 }
5902
5903 /* Break up subtract operations in block BB.
5904
5905 We do this top down because we don't know whether the subtract is
5906 part of a possible chain of reassociation except at the top.
5907
5908 IE given
5909 d = f + g
5910 c = a + e
5911 b = c - d
5912 q = b - r
5913 k = t - q
5914
5915 we want to break up k = t - q, but we won't until we've transformed q
5916 = b - r, which won't be broken up until we transform b = c - d.
5917
5918 En passant, clear the GIMPLE visited flag on every statement
5919 and set UIDs within each basic block. */
5920
5921 static void
5922 break_up_subtract_bb (basic_block bb)
5923 {
5924 gimple_stmt_iterator gsi;
5925 basic_block son;
5926 unsigned int uid = 1;
5927
5928 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5929 {
5930 gimple *stmt = gsi_stmt (gsi);
5931 gimple_set_visited (stmt, false);
5932 gimple_set_uid (stmt, uid++);
5933
5934 if (!is_gimple_assign (stmt)
5935 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5936 continue;
5937
5938 /* Look for simple gimple subtract operations. */
5939 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5940 {
5941 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5942 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5943 continue;
5944
5945 /* Check for a subtract used only in an addition. If this
5946 is the case, transform it into add of a negate for better
5947 reassociation. IE transform C = A-B into C = A + -B if C
5948 is only used in an addition. */
5949 if (should_break_up_subtract (stmt))
5950 break_up_subtract (stmt, &gsi);
5951 }
5952 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5953 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5954 plus_negates.safe_push (gimple_assign_lhs (stmt));
5955 }
5956 for (son = first_dom_son (CDI_DOMINATORS, bb);
5957 son;
5958 son = next_dom_son (CDI_DOMINATORS, son))
5959 break_up_subtract_bb (son);
5960 }
5961
5962 /* Used for repeated factor analysis. */
5963 struct repeat_factor
5964 {
5965 /* An SSA name that occurs in a multiply chain. */
5966 tree factor;
5967
5968 /* Cached rank of the factor. */
5969 unsigned rank;
5970
5971 /* Number of occurrences of the factor in the chain. */
5972 HOST_WIDE_INT count;
5973
5974 /* An SSA name representing the product of this factor and
5975 all factors appearing later in the repeated factor vector. */
5976 tree repr;
5977 };
5978
5979
5980 static vec<repeat_factor> repeat_factor_vec;
5981
5982 /* Used for sorting the repeat factor vector. Sort primarily by
5983 ascending occurrence count, secondarily by descending rank. */
5984
5985 static int
5986 compare_repeat_factors (const void *x1, const void *x2)
5987 {
5988 const repeat_factor *rf1 = (const repeat_factor *) x1;
5989 const repeat_factor *rf2 = (const repeat_factor *) x2;
5990
5991 if (rf1->count != rf2->count)
5992 return rf1->count - rf2->count;
5993
5994 return rf2->rank - rf1->rank;
5995 }
5996
5997 /* Look for repeated operands in OPS in the multiply tree rooted at
5998 STMT. Replace them with an optimal sequence of multiplies and powi
5999 builtin calls, and remove the used operands from OPS. Return an
6000 SSA name representing the value of the replacement sequence. */
6001
6002 static tree
6003 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6004 {
6005 unsigned i, j, vec_len;
6006 int ii;
6007 operand_entry *oe;
6008 repeat_factor *rf1, *rf2;
6009 repeat_factor rfnew;
6010 tree result = NULL_TREE;
6011 tree target_ssa, iter_result;
6012 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6013 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6014 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6015 gimple *mul_stmt, *pow_stmt;
6016
6017 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6018 target, unless type is integral. */
6019 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6020 return NULL_TREE;
6021
6022 /* Allocate the repeated factor vector. */
6023 repeat_factor_vec.create (10);
6024
6025 /* Scan the OPS vector for all SSA names in the product and build
6026 up a vector of occurrence counts for each factor. */
6027 FOR_EACH_VEC_ELT (*ops, i, oe)
6028 {
6029 if (TREE_CODE (oe->op) == SSA_NAME)
6030 {
6031 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6032 {
6033 if (rf1->factor == oe->op)
6034 {
6035 rf1->count += oe->count;
6036 break;
6037 }
6038 }
6039
6040 if (j >= repeat_factor_vec.length ())
6041 {
6042 rfnew.factor = oe->op;
6043 rfnew.rank = oe->rank;
6044 rfnew.count = oe->count;
6045 rfnew.repr = NULL_TREE;
6046 repeat_factor_vec.safe_push (rfnew);
6047 }
6048 }
6049 }
6050
6051 /* Sort the repeated factor vector by (a) increasing occurrence count,
6052 and (b) decreasing rank. */
6053 repeat_factor_vec.qsort (compare_repeat_factors);
6054
6055 /* It is generally best to combine as many base factors as possible
6056 into a product before applying __builtin_powi to the result.
6057 However, the sort order chosen for the repeated factor vector
6058 allows us to cache partial results for the product of the base
6059 factors for subsequent use. When we already have a cached partial
6060 result from a previous iteration, it is best to make use of it
6061 before looking for another __builtin_pow opportunity.
6062
6063 As an example, consider x * x * y * y * y * z * z * z * z.
6064 We want to first compose the product x * y * z, raise it to the
6065 second power, then multiply this by y * z, and finally multiply
6066 by z. This can be done in 5 multiplies provided we cache y * z
6067 for use in both expressions:
6068
6069 t1 = y * z
6070 t2 = t1 * x
6071 t3 = t2 * t2
6072 t4 = t1 * t3
6073 result = t4 * z
6074
6075 If we instead ignored the cached y * z and first multiplied by
6076 the __builtin_pow opportunity z * z, we would get the inferior:
6077
6078 t1 = y * z
6079 t2 = t1 * x
6080 t3 = t2 * t2
6081 t4 = z * z
6082 t5 = t3 * t4
6083 result = t5 * y */
6084
6085 vec_len = repeat_factor_vec.length ();
6086
6087 /* Repeatedly look for opportunities to create a builtin_powi call. */
6088 while (true)
6089 {
6090 HOST_WIDE_INT power;
6091
6092 /* First look for the largest cached product of factors from
6093 preceding iterations. If found, create a builtin_powi for
6094 it if the minimum occurrence count for its factors is at
6095 least 2, or just use this cached product as our next
6096 multiplicand if the minimum occurrence count is 1. */
6097 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6098 {
6099 if (rf1->repr && rf1->count > 0)
6100 break;
6101 }
6102
6103 if (j < vec_len)
6104 {
6105 power = rf1->count;
6106
6107 if (power == 1)
6108 {
6109 iter_result = rf1->repr;
6110
6111 if (dump_file && (dump_flags & TDF_DETAILS))
6112 {
6113 unsigned elt;
6114 repeat_factor *rf;
6115 fputs ("Multiplying by cached product ", dump_file);
6116 for (elt = j; elt < vec_len; elt++)
6117 {
6118 rf = &repeat_factor_vec[elt];
6119 print_generic_expr (dump_file, rf->factor);
6120 if (elt < vec_len - 1)
6121 fputs (" * ", dump_file);
6122 }
6123 fputs ("\n", dump_file);
6124 }
6125 }
6126 else
6127 {
6128 if (INTEGRAL_TYPE_P (type))
6129 {
6130 gcc_assert (power > 1);
6131 gimple_stmt_iterator gsip = gsi;
6132 gsi_prev (&gsip);
6133 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6134 rf1->repr, power);
6135 gimple_stmt_iterator gsic = gsi;
6136 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6137 {
6138 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6139 gimple_set_visited (gsi_stmt (gsic), true);
6140 gsi_prev (&gsic);
6141 }
6142 }
6143 else
6144 {
6145 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6146 pow_stmt
6147 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6148 build_int_cst (integer_type_node,
6149 power));
6150 gimple_call_set_lhs (pow_stmt, iter_result);
6151 gimple_set_location (pow_stmt, gimple_location (stmt));
6152 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6153 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6154 }
6155
6156 if (dump_file && (dump_flags & TDF_DETAILS))
6157 {
6158 unsigned elt;
6159 repeat_factor *rf;
6160 fputs ("Building __builtin_pow call for cached product (",
6161 dump_file);
6162 for (elt = j; elt < vec_len; elt++)
6163 {
6164 rf = &repeat_factor_vec[elt];
6165 print_generic_expr (dump_file, rf->factor);
6166 if (elt < vec_len - 1)
6167 fputs (" * ", dump_file);
6168 }
6169 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6170 power);
6171 }
6172 }
6173 }
6174 else
6175 {
6176 /* Otherwise, find the first factor in the repeated factor
6177 vector whose occurrence count is at least 2. If no such
6178 factor exists, there are no builtin_powi opportunities
6179 remaining. */
6180 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6181 {
6182 if (rf1->count >= 2)
6183 break;
6184 }
6185
6186 if (j >= vec_len)
6187 break;
6188
6189 power = rf1->count;
6190
6191 if (dump_file && (dump_flags & TDF_DETAILS))
6192 {
6193 unsigned elt;
6194 repeat_factor *rf;
6195 fputs ("Building __builtin_pow call for (", dump_file);
6196 for (elt = j; elt < vec_len; elt++)
6197 {
6198 rf = &repeat_factor_vec[elt];
6199 print_generic_expr (dump_file, rf->factor);
6200 if (elt < vec_len - 1)
6201 fputs (" * ", dump_file);
6202 }
6203 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6204 }
6205
6206 reassociate_stats.pows_created++;
6207
6208 /* Visit each element of the vector in reverse order (so that
6209 high-occurrence elements are visited first, and within the
6210 same occurrence count, lower-ranked elements are visited
6211 first). Form a linear product of all elements in this order
6212 whose occurrencce count is at least that of element J.
6213 Record the SSA name representing the product of each element
6214 with all subsequent elements in the vector. */
6215 if (j == vec_len - 1)
6216 rf1->repr = rf1->factor;
6217 else
6218 {
6219 for (ii = vec_len - 2; ii >= (int)j; ii--)
6220 {
6221 tree op1, op2;
6222
6223 rf1 = &repeat_factor_vec[ii];
6224 rf2 = &repeat_factor_vec[ii + 1];
6225
6226 /* Init the last factor's representative to be itself. */
6227 if (!rf2->repr)
6228 rf2->repr = rf2->factor;
6229
6230 op1 = rf1->factor;
6231 op2 = rf2->repr;
6232
6233 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6234 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6235 op1, op2);
6236 gimple_set_location (mul_stmt, gimple_location (stmt));
6237 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6238 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6239 rf1->repr = target_ssa;
6240
6241 /* Don't reprocess the multiply we just introduced. */
6242 gimple_set_visited (mul_stmt, true);
6243 }
6244 }
6245
6246 /* Form a call to __builtin_powi for the maximum product
6247 just formed, raised to the power obtained earlier. */
6248 rf1 = &repeat_factor_vec[j];
6249 if (INTEGRAL_TYPE_P (type))
6250 {
6251 gcc_assert (power > 1);
6252 gimple_stmt_iterator gsip = gsi;
6253 gsi_prev (&gsip);
6254 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6255 rf1->repr, power);
6256 gimple_stmt_iterator gsic = gsi;
6257 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6258 {
6259 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6260 gimple_set_visited (gsi_stmt (gsic), true);
6261 gsi_prev (&gsic);
6262 }
6263 }
6264 else
6265 {
6266 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6267 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6268 build_int_cst (integer_type_node,
6269 power));
6270 gimple_call_set_lhs (pow_stmt, iter_result);
6271 gimple_set_location (pow_stmt, gimple_location (stmt));
6272 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6273 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6274 }
6275 }
6276
6277 /* If we previously formed at least one other builtin_powi call,
6278 form the product of this one and those others. */
6279 if (result)
6280 {
6281 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6282 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6283 result, iter_result);
6284 gimple_set_location (mul_stmt, gimple_location (stmt));
6285 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6286 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6287 gimple_set_visited (mul_stmt, true);
6288 result = new_result;
6289 }
6290 else
6291 result = iter_result;
6292
6293 /* Decrement the occurrence count of each element in the product
6294 by the count found above, and remove this many copies of each
6295 factor from OPS. */
6296 for (i = j; i < vec_len; i++)
6297 {
6298 unsigned k = power;
6299 unsigned n;
6300
6301 rf1 = &repeat_factor_vec[i];
6302 rf1->count -= power;
6303
6304 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6305 {
6306 if (oe->op == rf1->factor)
6307 {
6308 if (oe->count <= k)
6309 {
6310 ops->ordered_remove (n);
6311 k -= oe->count;
6312
6313 if (k == 0)
6314 break;
6315 }
6316 else
6317 {
6318 oe->count -= k;
6319 break;
6320 }
6321 }
6322 }
6323 }
6324 }
6325
6326 /* At this point all elements in the repeated factor vector have a
6327 remaining occurrence count of 0 or 1, and those with a count of 1
6328 don't have cached representatives. Re-sort the ops vector and
6329 clean up. */
6330 ops->qsort (sort_by_operand_rank);
6331 repeat_factor_vec.release ();
6332
6333 /* Return the final product computed herein. Note that there may
6334 still be some elements with single occurrence count left in OPS;
6335 those will be handled by the normal reassociation logic. */
6336 return result;
6337 }
6338
6339 /* Attempt to optimize
6340 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6341 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6342
6343 static void
6344 attempt_builtin_copysign (vec<operand_entry *> *ops)
6345 {
6346 operand_entry *oe;
6347 unsigned int i;
6348 unsigned int length = ops->length ();
6349 tree cst = ops->last ()->op;
6350
6351 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6352 return;
6353
6354 FOR_EACH_VEC_ELT (*ops, i, oe)
6355 {
6356 if (TREE_CODE (oe->op) == SSA_NAME
6357 && has_single_use (oe->op))
6358 {
6359 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6360 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6361 {
6362 tree arg0, arg1;
6363 switch (gimple_call_combined_fn (old_call))
6364 {
6365 CASE_CFN_COPYSIGN:
6366 CASE_CFN_COPYSIGN_FN:
6367 arg0 = gimple_call_arg (old_call, 0);
6368 arg1 = gimple_call_arg (old_call, 1);
6369 /* The first argument of copysign must be a constant,
6370 otherwise there's nothing to do. */
6371 if (TREE_CODE (arg0) == REAL_CST)
6372 {
6373 tree type = TREE_TYPE (arg0);
6374 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6375 /* If we couldn't fold to a single constant, skip it.
6376 That happens e.g. for inexact multiplication when
6377 -frounding-math. */
6378 if (mul == NULL_TREE)
6379 break;
6380 /* Instead of adjusting OLD_CALL, let's build a new
6381 call to not leak the LHS and prevent keeping bogus
6382 debug statements. DCE will clean up the old call. */
6383 gcall *new_call;
6384 if (gimple_call_internal_p (old_call))
6385 new_call = gimple_build_call_internal
6386 (IFN_COPYSIGN, 2, mul, arg1);
6387 else
6388 new_call = gimple_build_call
6389 (gimple_call_fndecl (old_call), 2, mul, arg1);
6390 tree lhs = make_ssa_name (type);
6391 gimple_call_set_lhs (new_call, lhs);
6392 gimple_set_location (new_call,
6393 gimple_location (old_call));
6394 insert_stmt_after (new_call, old_call);
6395 /* We've used the constant, get rid of it. */
6396 ops->pop ();
6397 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6398 /* Handle the CST1 < 0 case by negating the result. */
6399 if (cst1_neg)
6400 {
6401 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6402 gimple *negate_stmt
6403 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6404 insert_stmt_after (negate_stmt, new_call);
6405 oe->op = negrhs;
6406 }
6407 else
6408 oe->op = lhs;
6409 if (dump_file && (dump_flags & TDF_DETAILS))
6410 {
6411 fprintf (dump_file, "Optimizing copysign: ");
6412 print_generic_expr (dump_file, cst);
6413 fprintf (dump_file, " * COPYSIGN (");
6414 print_generic_expr (dump_file, arg0);
6415 fprintf (dump_file, ", ");
6416 print_generic_expr (dump_file, arg1);
6417 fprintf (dump_file, ") into %sCOPYSIGN (",
6418 cst1_neg ? "-" : "");
6419 print_generic_expr (dump_file, mul);
6420 fprintf (dump_file, ", ");
6421 print_generic_expr (dump_file, arg1);
6422 fprintf (dump_file, "\n");
6423 }
6424 return;
6425 }
6426 break;
6427 default:
6428 break;
6429 }
6430 }
6431 }
6432 }
6433 }
6434
6435 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6436
6437 static void
6438 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6439 {
6440 tree rhs1;
6441
6442 if (dump_file && (dump_flags & TDF_DETAILS))
6443 {
6444 fprintf (dump_file, "Transforming ");
6445 print_gimple_stmt (dump_file, stmt, 0);
6446 }
6447
6448 rhs1 = gimple_assign_rhs1 (stmt);
6449 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6450 update_stmt (stmt);
6451 remove_visited_stmt_chain (rhs1);
6452
6453 if (dump_file && (dump_flags & TDF_DETAILS))
6454 {
6455 fprintf (dump_file, " into ");
6456 print_gimple_stmt (dump_file, stmt, 0);
6457 }
6458 }
6459
6460 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6461
6462 static void
6463 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6464 tree rhs1, tree rhs2)
6465 {
6466 if (dump_file && (dump_flags & TDF_DETAILS))
6467 {
6468 fprintf (dump_file, "Transforming ");
6469 print_gimple_stmt (dump_file, stmt, 0);
6470 }
6471
6472 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6473 update_stmt (gsi_stmt (*gsi));
6474 remove_visited_stmt_chain (rhs1);
6475
6476 if (dump_file && (dump_flags & TDF_DETAILS))
6477 {
6478 fprintf (dump_file, " into ");
6479 print_gimple_stmt (dump_file, stmt, 0);
6480 }
6481 }
6482
6483 /* Reassociate expressions in basic block BB and its post-dominator as
6484 children.
6485
6486 Bubble up return status from maybe_optimize_range_tests. */
6487
6488 static bool
6489 reassociate_bb (basic_block bb)
6490 {
6491 gimple_stmt_iterator gsi;
6492 basic_block son;
6493 gimple *stmt = last_stmt (bb);
6494 bool cfg_cleanup_needed = false;
6495
6496 if (stmt && !gimple_visited_p (stmt))
6497 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6498
6499 bool do_prev = false;
6500 for (gsi = gsi_last_bb (bb);
6501 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6502 {
6503 do_prev = true;
6504 stmt = gsi_stmt (gsi);
6505
6506 if (is_gimple_assign (stmt)
6507 && !stmt_could_throw_p (cfun, stmt))
6508 {
6509 tree lhs, rhs1, rhs2;
6510 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6511
6512 /* If this was part of an already processed statement,
6513 we don't need to touch it again. */
6514 if (gimple_visited_p (stmt))
6515 {
6516 /* This statement might have become dead because of previous
6517 reassociations. */
6518 if (has_zero_uses (gimple_get_lhs (stmt)))
6519 {
6520 reassoc_remove_stmt (&gsi);
6521 release_defs (stmt);
6522 /* We might end up removing the last stmt above which
6523 places the iterator to the end of the sequence.
6524 Reset it to the last stmt in this case and make sure
6525 we don't do gsi_prev in that case. */
6526 if (gsi_end_p (gsi))
6527 {
6528 gsi = gsi_last_bb (bb);
6529 do_prev = false;
6530 }
6531 }
6532 continue;
6533 }
6534
6535 /* If this is not a gimple binary expression, there is
6536 nothing for us to do with it. */
6537 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6538 continue;
6539
6540 lhs = gimple_assign_lhs (stmt);
6541 rhs1 = gimple_assign_rhs1 (stmt);
6542 rhs2 = gimple_assign_rhs2 (stmt);
6543
6544 /* For non-bit or min/max operations we can't associate
6545 all types. Verify that here. */
6546 if (rhs_code != BIT_IOR_EXPR
6547 && rhs_code != BIT_AND_EXPR
6548 && rhs_code != BIT_XOR_EXPR
6549 && rhs_code != MIN_EXPR
6550 && rhs_code != MAX_EXPR
6551 && (!can_reassociate_p (lhs)
6552 || !can_reassociate_p (rhs1)
6553 || !can_reassociate_p (rhs2)))
6554 continue;
6555
6556 if (associative_tree_code (rhs_code))
6557 {
6558 auto_vec<operand_entry *> ops;
6559 tree powi_result = NULL_TREE;
6560 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6561
6562 /* There may be no immediate uses left by the time we
6563 get here because we may have eliminated them all. */
6564 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6565 continue;
6566
6567 gimple_set_visited (stmt, true);
6568 linearize_expr_tree (&ops, stmt, true, true);
6569 ops.qsort (sort_by_operand_rank);
6570 int orig_len = ops.length ();
6571 optimize_ops_list (rhs_code, &ops);
6572 if (undistribute_ops_list (rhs_code, &ops,
6573 loop_containing_stmt (stmt)))
6574 {
6575 ops.qsort (sort_by_operand_rank);
6576 optimize_ops_list (rhs_code, &ops);
6577 }
6578 if (undistribute_bitref_for_vector (rhs_code, &ops,
6579 loop_containing_stmt (stmt)))
6580 {
6581 ops.qsort (sort_by_operand_rank);
6582 optimize_ops_list (rhs_code, &ops);
6583 }
6584 if (rhs_code == PLUS_EXPR
6585 && transform_add_to_multiply (&ops))
6586 ops.qsort (sort_by_operand_rank);
6587
6588 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6589 {
6590 if (is_vector)
6591 optimize_vec_cond_expr (rhs_code, &ops);
6592 else
6593 optimize_range_tests (rhs_code, &ops, NULL);
6594 }
6595
6596 if (rhs_code == MULT_EXPR && !is_vector)
6597 {
6598 attempt_builtin_copysign (&ops);
6599
6600 if (reassoc_insert_powi_p
6601 && (flag_unsafe_math_optimizations
6602 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6603 powi_result = attempt_builtin_powi (stmt, &ops);
6604 }
6605
6606 operand_entry *last;
6607 bool negate_result = false;
6608 if (ops.length () > 1
6609 && rhs_code == MULT_EXPR)
6610 {
6611 last = ops.last ();
6612 if ((integer_minus_onep (last->op)
6613 || real_minus_onep (last->op))
6614 && !HONOR_SNANS (TREE_TYPE (lhs))
6615 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6616 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6617 {
6618 ops.pop ();
6619 negate_result = true;
6620 }
6621 }
6622
6623 tree new_lhs = lhs;
6624 /* If the operand vector is now empty, all operands were
6625 consumed by the __builtin_powi optimization. */
6626 if (ops.length () == 0)
6627 transform_stmt_to_copy (&gsi, stmt, powi_result);
6628 else if (ops.length () == 1)
6629 {
6630 tree last_op = ops.last ()->op;
6631
6632 /* If the stmt that defines operand has to be inserted, insert it
6633 before the use. */
6634 if (ops.last ()->stmt_to_insert)
6635 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6636 if (powi_result)
6637 transform_stmt_to_multiply (&gsi, stmt, last_op,
6638 powi_result);
6639 else
6640 transform_stmt_to_copy (&gsi, stmt, last_op);
6641 }
6642 else
6643 {
6644 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6645 int ops_num = ops.length ();
6646 int width;
6647
6648 /* For binary bit operations, if there are at least 3
6649 operands and the last operand in OPS is a constant,
6650 move it to the front. This helps ensure that we generate
6651 (X & Y) & C rather than (X & C) & Y. The former will
6652 often match a canonical bit test when we get to RTL. */
6653 if (ops.length () > 2
6654 && (rhs_code == BIT_AND_EXPR
6655 || rhs_code == BIT_IOR_EXPR
6656 || rhs_code == BIT_XOR_EXPR)
6657 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6658 std::swap (*ops[0], *ops[ops_num - 1]);
6659
6660 /* Only rewrite the expression tree to parallel in the
6661 last reassoc pass to avoid useless work back-and-forth
6662 with initial linearization. */
6663 if (!reassoc_insert_powi_p
6664 && ops.length () > 3
6665 && (width = get_reassociation_width (ops_num, rhs_code,
6666 mode)) > 1)
6667 {
6668 if (dump_file && (dump_flags & TDF_DETAILS))
6669 fprintf (dump_file,
6670 "Width = %d was chosen for reassociation\n",
6671 width);
6672 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6673 width, ops);
6674 }
6675 else
6676 {
6677 /* When there are three operands left, we want
6678 to make sure the ones that get the double
6679 binary op are chosen wisely. */
6680 int len = ops.length ();
6681 if (len >= 3)
6682 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6683
6684 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6685 powi_result != NULL
6686 || negate_result,
6687 len != orig_len);
6688 }
6689
6690 /* If we combined some repeated factors into a
6691 __builtin_powi call, multiply that result by the
6692 reassociated operands. */
6693 if (powi_result)
6694 {
6695 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6696 tree type = TREE_TYPE (lhs);
6697 tree target_ssa = make_temp_ssa_name (type, NULL,
6698 "reassocpow");
6699 gimple_set_lhs (lhs_stmt, target_ssa);
6700 update_stmt (lhs_stmt);
6701 if (lhs != new_lhs)
6702 {
6703 target_ssa = new_lhs;
6704 new_lhs = lhs;
6705 }
6706 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6707 powi_result, target_ssa);
6708 gimple_set_location (mul_stmt, gimple_location (stmt));
6709 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6710 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6711 }
6712 }
6713
6714 if (negate_result)
6715 {
6716 stmt = SSA_NAME_DEF_STMT (lhs);
6717 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6718 gimple_set_lhs (stmt, tmp);
6719 if (lhs != new_lhs)
6720 tmp = new_lhs;
6721 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6722 tmp);
6723 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6724 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6725 update_stmt (stmt);
6726 }
6727 }
6728 }
6729 }
6730 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6731 son;
6732 son = next_dom_son (CDI_POST_DOMINATORS, son))
6733 cfg_cleanup_needed |= reassociate_bb (son);
6734
6735 return cfg_cleanup_needed;
6736 }
6737
6738 /* Add jumps around shifts for range tests turned into bit tests.
6739 For each SSA_NAME VAR we have code like:
6740 VAR = ...; // final stmt of range comparison
6741 // bit test here...;
6742 OTHERVAR = ...; // final stmt of the bit test sequence
6743 RES = VAR | OTHERVAR;
6744 Turn the above into:
6745 VAR = ...;
6746 if (VAR != 0)
6747 goto <l3>;
6748 else
6749 goto <l2>;
6750 <l2>:
6751 // bit test here...;
6752 OTHERVAR = ...;
6753 <l3>:
6754 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6755
6756 static void
6757 branch_fixup (void)
6758 {
6759 tree var;
6760 unsigned int i;
6761
6762 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6763 {
6764 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6765 gimple *use_stmt;
6766 use_operand_p use;
6767 bool ok = single_imm_use (var, &use, &use_stmt);
6768 gcc_assert (ok
6769 && is_gimple_assign (use_stmt)
6770 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6771 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6772
6773 basic_block cond_bb = gimple_bb (def_stmt);
6774 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6775 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6776
6777 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6778 gimple *g = gimple_build_cond (NE_EXPR, var,
6779 build_zero_cst (TREE_TYPE (var)),
6780 NULL_TREE, NULL_TREE);
6781 location_t loc = gimple_location (use_stmt);
6782 gimple_set_location (g, loc);
6783 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6784
6785 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6786 etrue->probability = profile_probability::even ();
6787 edge efalse = find_edge (cond_bb, then_bb);
6788 efalse->flags = EDGE_FALSE_VALUE;
6789 efalse->probability -= etrue->probability;
6790 then_bb->count -= etrue->count ();
6791
6792 tree othervar = NULL_TREE;
6793 if (gimple_assign_rhs1 (use_stmt) == var)
6794 othervar = gimple_assign_rhs2 (use_stmt);
6795 else if (gimple_assign_rhs2 (use_stmt) == var)
6796 othervar = gimple_assign_rhs1 (use_stmt);
6797 else
6798 gcc_unreachable ();
6799 tree lhs = gimple_assign_lhs (use_stmt);
6800 gphi *phi = create_phi_node (lhs, merge_bb);
6801 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6802 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6803 gsi = gsi_for_stmt (use_stmt);
6804 gsi_remove (&gsi, true);
6805
6806 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6807 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6808 }
6809 reassoc_branch_fixups.release ();
6810 }
6811
6812 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6813 void debug_ops_vector (vec<operand_entry *> ops);
6814
6815 /* Dump the operand entry vector OPS to FILE. */
6816
6817 void
6818 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6819 {
6820 operand_entry *oe;
6821 unsigned int i;
6822
6823 FOR_EACH_VEC_ELT (ops, i, oe)
6824 {
6825 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6826 print_generic_expr (file, oe->op);
6827 fprintf (file, "\n");
6828 }
6829 }
6830
6831 /* Dump the operand entry vector OPS to STDERR. */
6832
6833 DEBUG_FUNCTION void
6834 debug_ops_vector (vec<operand_entry *> ops)
6835 {
6836 dump_ops_vector (stderr, ops);
6837 }
6838
6839 /* Bubble up return status from reassociate_bb. */
6840
6841 static bool
6842 do_reassoc (void)
6843 {
6844 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6845 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6846 }
6847
6848 /* Initialize the reassociation pass. */
6849
6850 static void
6851 init_reassoc (void)
6852 {
6853 int i;
6854 int64_t rank = 2;
6855 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6856
6857 /* Find the loops, so that we can prevent moving calculations in
6858 them. */
6859 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6860
6861 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6862
6863 next_operand_entry_id = 0;
6864
6865 /* Reverse RPO (Reverse Post Order) will give us something where
6866 deeper loops come later. */
6867 pre_and_rev_post_order_compute (NULL, bbs, false);
6868 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6869 operand_rank = new hash_map<tree, int64_t>;
6870
6871 /* Give each default definition a distinct rank. This includes
6872 parameters and the static chain. Walk backwards over all
6873 SSA names so that we get proper rank ordering according
6874 to tree_swap_operands_p. */
6875 for (i = num_ssa_names - 1; i > 0; --i)
6876 {
6877 tree name = ssa_name (i);
6878 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6879 insert_operand_rank (name, ++rank);
6880 }
6881
6882 /* Set up rank for each BB */
6883 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6884 bb_rank[bbs[i]] = ++rank << 16;
6885
6886 free (bbs);
6887 calculate_dominance_info (CDI_POST_DOMINATORS);
6888 plus_negates = vNULL;
6889 }
6890
6891 /* Cleanup after the reassociation pass, and print stats if
6892 requested. */
6893
6894 static void
6895 fini_reassoc (void)
6896 {
6897 statistics_counter_event (cfun, "Linearized",
6898 reassociate_stats.linearized);
6899 statistics_counter_event (cfun, "Constants eliminated",
6900 reassociate_stats.constants_eliminated);
6901 statistics_counter_event (cfun, "Ops eliminated",
6902 reassociate_stats.ops_eliminated);
6903 statistics_counter_event (cfun, "Statements rewritten",
6904 reassociate_stats.rewritten);
6905 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6906 reassociate_stats.pows_encountered);
6907 statistics_counter_event (cfun, "Built-in powi calls created",
6908 reassociate_stats.pows_created);
6909
6910 delete operand_rank;
6911 operand_entry_pool.release ();
6912 free (bb_rank);
6913 plus_negates.release ();
6914 free_dominance_info (CDI_POST_DOMINATORS);
6915 loop_optimizer_finalize ();
6916 }
6917
6918 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6919 insertion of __builtin_powi calls.
6920
6921 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6922 optimization of a gimple conditional. Otherwise returns zero. */
6923
6924 static unsigned int
6925 execute_reassoc (bool insert_powi_p)
6926 {
6927 reassoc_insert_powi_p = insert_powi_p;
6928
6929 init_reassoc ();
6930
6931 bool cfg_cleanup_needed;
6932 cfg_cleanup_needed = do_reassoc ();
6933 repropagate_negates ();
6934 branch_fixup ();
6935
6936 fini_reassoc ();
6937 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6938 }
6939
6940 namespace {
6941
6942 const pass_data pass_data_reassoc =
6943 {
6944 GIMPLE_PASS, /* type */
6945 "reassoc", /* name */
6946 OPTGROUP_NONE, /* optinfo_flags */
6947 TV_TREE_REASSOC, /* tv_id */
6948 ( PROP_cfg | PROP_ssa ), /* properties_required */
6949 0, /* properties_provided */
6950 0, /* properties_destroyed */
6951 0, /* todo_flags_start */
6952 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6953 };
6954
6955 class pass_reassoc : public gimple_opt_pass
6956 {
6957 public:
6958 pass_reassoc (gcc::context *ctxt)
6959 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6960 {}
6961
6962 /* opt_pass methods: */
6963 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6964 void set_pass_param (unsigned int n, bool param)
6965 {
6966 gcc_assert (n == 0);
6967 insert_powi_p = param;
6968 }
6969 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6970 virtual unsigned int execute (function *)
6971 { return execute_reassoc (insert_powi_p); }
6972
6973 private:
6974 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6975 point 3a in the pass header comment. */
6976 bool insert_powi_p;
6977 }; // class pass_reassoc
6978
6979 } // anon namespace
6980
6981 gimple_opt_pass *
6982 make_pass_reassoc (gcc::context *ctxt)
6983 {
6984 return new pass_reassoc (ctxt);
6985 }