1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
57 /* This is a simple global reassociation pass. It is, in part, based
58 on the LLVM pass of the same name (They do some things more/less
59 than we do, in different orders, etc).
61 It consists of five steps:
63 1. Breaking up subtract operations into addition + negate, where
64 it would promote the reassociation of adds.
66 2. Left linearization of the expression trees, so that (A+B)+(C+D)
67 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
68 During linearization, we place the operands of the binary
69 expressions into a vector of operand_entry_*
71 3. Optimization of the operand lists, eliminating things like a +
74 3a. Combine repeated factors with the same occurrence counts
75 into a __builtin_powi call that will later be optimized into
76 an optimal number of multiplies.
78 4. Rewrite the expression trees we linearized and optimized so
79 they are in proper rank order.
81 5. Repropagate negates, as nothing else will clean it up ATM.
83 A bit of theory on #4, since nobody seems to write anything down
84 about why it makes sense to do it the way they do it:
86 We could do this much nicer theoretically, but don't (for reasons
87 explained after how to do it theoretically nice :P).
89 In order to promote the most redundancy elimination, you want
90 binary expressions whose operands are the same rank (or
91 preferably, the same value) exposed to the redundancy eliminator,
92 for possible elimination.
94 So the way to do this if we really cared, is to build the new op
95 tree from the leaves to the roots, merging as you go, and putting the
96 new op on the end of the worklist, until you are left with one
97 thing on the worklist.
99 IE if you have to rewrite the following set of operands (listed with
100 rank in parentheses), with opcode PLUS_EXPR:
102 a (1), b (1), c (1), d (2), e (2)
105 We start with our merge worklist empty, and the ops list with all of
108 You want to first merge all leaves of the same rank, as much as
111 So first build a binary op of
113 mergetmp = a + b, and put "mergetmp" on the merge worklist.
115 Because there is no three operand form of PLUS_EXPR, c is not going to
116 be exposed to redundancy elimination as a rank 1 operand.
118 So you might as well throw it on the merge worklist (you could also
119 consider it to now be a rank two operand, and merge it with d and e,
120 but in this case, you then have evicted e from a binary op. So at
121 least in this situation, you can't win.)
123 Then build a binary op of d + e
126 and put mergetmp2 on the merge worklist.
128 so merge worklist = {mergetmp, c, mergetmp2}
130 Continue building binary ops of these operations until you have only
131 one operation left on the worklist.
136 mergetmp3 = mergetmp + c
138 worklist = {mergetmp2, mergetmp3}
140 mergetmp4 = mergetmp2 + mergetmp3
142 worklist = {mergetmp4}
144 because we have one operation left, we can now just set the original
145 statement equal to the result of that operation.
147 This will at least expose a + b and d + e to redundancy elimination
148 as binary operations.
150 For extra points, you can reuse the old statements to build the
151 mergetmps, since you shouldn't run out.
153 So why don't we do this?
155 Because it's expensive, and rarely will help. Most trees we are
156 reassociating have 3 or less ops. If they have 2 ops, they already
157 will be written into a nice single binary op. If you have 3 ops, a
158 single simple check suffices to tell you whether the first two are of the
159 same rank. If so, you know to order it
162 newstmt = mergetmp + op3
166 newstmt = mergetmp + op1
168 If all three are of the same rank, you can't expose them all in a
169 single binary operator anyway, so the above is *still* the best you
172 Thus, this is what we do. When we have three ops left, we check to see
173 what order to put them in, and call it a day. As a nod to vector sum
174 reduction, we check if any of the ops are really a phi node that is a
175 destructive update for the associating op, and keep the destructive
176 update together for vector sum reduction recognition. */
178 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
179 point 3a in the pass header comment. */
180 static bool reassoc_insert_powi_p
;
186 int constants_eliminated
;
189 int pows_encountered
;
194 static object_allocator
<operand_entry
> operand_entry_pool
195 ("operand entry pool");
197 /* This is used to assign a unique ID to each struct operand_entry
198 so that qsort results are identical on different hosts. */
199 static unsigned int next_operand_entry_id
;
201 /* Starting rank number for a given basic block, so that we can rank
202 operations using unmovable instructions in that BB based on the bb
204 static int64_t *bb_rank
;
206 /* Operand->rank hashtable. */
207 static hash_map
<tree
, int64_t> *operand_rank
;
209 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
210 all basic blocks the CFG should be adjusted - basic blocks
211 split right after that SSA_NAME's definition statement and before
212 the only use, which must be a bit ior. */
213 static vec
<tree
> reassoc_branch_fixups
;
216 static int64_t get_rank (tree
);
217 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
219 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
220 possibly added by gsi_remove. */
223 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
225 gimple
*stmt
= gsi_stmt (*gsi
);
227 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
228 return gsi_remove (gsi
, true);
230 gimple_stmt_iterator prev
= *gsi
;
232 unsigned uid
= gimple_uid (stmt
);
233 basic_block bb
= gimple_bb (stmt
);
234 bool ret
= gsi_remove (gsi
, true);
235 if (!gsi_end_p (prev
))
238 prev
= gsi_start_bb (bb
);
239 gimple
*end_stmt
= gsi_stmt (*gsi
);
240 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
242 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
243 gimple_set_uid (stmt
, uid
);
249 /* Bias amount for loop-carried phis. We want this to be larger than
250 the depth of any reassociation tree we can see, but not larger than
251 the rank difference between two blocks. */
252 #define PHI_LOOP_BIAS (1 << 15)
254 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
255 an innermost loop, and the phi has only a single use which is inside
256 the loop, then the rank is the block rank of the loop latch plus an
257 extra bias for the loop-carried dependence. This causes expressions
258 calculated into an accumulator variable to be independent for each
259 iteration of the loop. If STMT is some other phi, the rank is the
260 block rank of its containing block. */
262 phi_rank (gimple
*stmt
)
264 basic_block bb
= gimple_bb (stmt
);
265 class loop
*father
= bb
->loop_father
;
271 /* We only care about real loops (those with a latch). */
273 return bb_rank
[bb
->index
];
275 /* Interesting phis must be in headers of innermost loops. */
276 if (bb
!= father
->header
278 return bb_rank
[bb
->index
];
280 /* Ignore virtual SSA_NAMEs. */
281 res
= gimple_phi_result (stmt
);
282 if (virtual_operand_p (res
))
283 return bb_rank
[bb
->index
];
285 /* The phi definition must have a single use, and that use must be
286 within the loop. Otherwise this isn't an accumulator pattern. */
287 if (!single_imm_use (res
, &use
, &use_stmt
)
288 || gimple_bb (use_stmt
)->loop_father
!= father
)
289 return bb_rank
[bb
->index
];
291 /* Look for phi arguments from within the loop. If found, bias this phi. */
292 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
294 tree arg
= gimple_phi_arg_def (stmt
, i
);
295 if (TREE_CODE (arg
) == SSA_NAME
296 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
298 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
299 if (gimple_bb (def_stmt
)->loop_father
== father
)
300 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
304 /* Must be an uninteresting phi. */
305 return bb_rank
[bb
->index
];
308 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
309 loop-carried dependence of an innermost loop, return TRUE; else
312 loop_carried_phi (tree exp
)
317 if (TREE_CODE (exp
) != SSA_NAME
318 || SSA_NAME_IS_DEFAULT_DEF (exp
))
321 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
323 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
326 /* Non-loop-carried phis have block rank. Loop-carried phis have
327 an additional bias added in. If this phi doesn't have block rank,
328 it's biased and should not be propagated. */
329 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
331 if (phi_rank (phi_stmt
) != block_rank
)
337 /* Return the maximum of RANK and the rank that should be propagated
338 from expression OP. For most operands, this is just the rank of OP.
339 For loop-carried phis, the value is zero to avoid undoing the bias
340 in favor of the phi. */
342 propagate_rank (int64_t rank
, tree op
)
346 if (loop_carried_phi (op
))
349 op_rank
= get_rank (op
);
351 return MAX (rank
, op_rank
);
354 /* Look up the operand rank structure for expression E. */
356 static inline int64_t
357 find_operand_rank (tree e
)
359 int64_t *slot
= operand_rank
->get (e
);
360 return slot
? *slot
: -1;
363 /* Insert {E,RANK} into the operand rank hashtable. */
366 insert_operand_rank (tree e
, int64_t rank
)
368 gcc_assert (rank
> 0);
369 gcc_assert (!operand_rank
->put (e
, rank
));
372 /* Given an expression E, return the rank of the expression. */
377 /* SSA_NAME's have the rank of the expression they are the result
379 For globals and uninitialized values, the rank is 0.
380 For function arguments, use the pre-setup rank.
381 For PHI nodes, stores, asm statements, etc, we use the rank of
383 For simple operations, the rank is the maximum rank of any of
384 its operands, or the bb_rank, whichever is less.
385 I make no claims that this is optimal, however, it gives good
388 /* We make an exception to the normal ranking system to break
389 dependences of accumulator variables in loops. Suppose we
390 have a simple one-block loop containing:
397 As shown, each iteration of the calculation into x is fully
398 dependent upon the iteration before it. We would prefer to
399 see this in the form:
406 If the loop is unrolled, the calculations of b and c from
407 different iterations can be interleaved.
409 To obtain this result during reassociation, we bias the rank
410 of the phi definition x_1 upward, when it is recognized as an
411 accumulator pattern. The artificial rank causes it to be
412 added last, providing the desired independence. */
414 if (TREE_CODE (e
) == SSA_NAME
)
421 /* If we already have a rank for this expression, use that. */
422 rank
= find_operand_rank (e
);
426 stmt
= SSA_NAME_DEF_STMT (e
);
427 if (gimple_code (stmt
) == GIMPLE_PHI
)
428 rank
= phi_rank (stmt
);
430 else if (!is_gimple_assign (stmt
))
431 rank
= bb_rank
[gimple_bb (stmt
)->index
];
435 /* Otherwise, find the maximum rank for the operands. As an
436 exception, remove the bias from loop-carried phis when propagating
437 the rank so that dependent operations are not also biased. */
438 /* Simply walk over all SSA uses - this takes advatage of the
439 fact that non-SSA operands are is_gimple_min_invariant and
442 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
443 rank
= propagate_rank (rank
, op
);
448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
450 fprintf (dump_file
, "Rank for ");
451 print_generic_expr (dump_file
, e
);
452 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
455 /* Note the rank in the hashtable so we don't recompute it. */
456 insert_operand_rank (e
, rank
);
460 /* Constants, globals, etc., are rank 0 */
465 /* We want integer ones to end up last no matter what, since they are
466 the ones we can do the most with. */
467 #define INTEGER_CONST_TYPE 1 << 4
468 #define FLOAT_ONE_CONST_TYPE 1 << 3
469 #define FLOAT_CONST_TYPE 1 << 2
470 #define OTHER_CONST_TYPE 1 << 1
472 /* Classify an invariant tree into integer, float, or other, so that
473 we can sort them to be near other constants of the same type. */
475 constant_type (tree t
)
477 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
478 return INTEGER_CONST_TYPE
;
479 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
481 /* Sort -1.0 and 1.0 constants last, while in some cases
482 const_binop can't optimize some inexact operations, multiplication
483 by -1.0 or 1.0 can be always merged with others. */
484 if (real_onep (t
) || real_minus_onep (t
))
485 return FLOAT_ONE_CONST_TYPE
;
486 return FLOAT_CONST_TYPE
;
489 return OTHER_CONST_TYPE
;
492 /* qsort comparison function to sort operand entries PA and PB by rank
493 so that the sorted array is ordered by rank in decreasing order. */
495 sort_by_operand_rank (const void *pa
, const void *pb
)
497 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
498 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
500 if (oeb
->rank
!= oea
->rank
)
501 return oeb
->rank
> oea
->rank
? 1 : -1;
503 /* It's nicer for optimize_expression if constants that are likely
504 to fold when added/multiplied/whatever are put next to each
505 other. Since all constants have rank 0, order them by type. */
508 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
509 return constant_type (oea
->op
) - constant_type (oeb
->op
);
511 /* To make sorting result stable, we use unique IDs to determine
513 return oeb
->id
> oea
->id
? 1 : -1;
516 if (TREE_CODE (oea
->op
) != SSA_NAME
)
518 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
519 return oeb
->id
> oea
->id
? 1 : -1;
523 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
526 /* Lastly, make sure the versions that are the same go next to each
528 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
530 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
531 versions of removed SSA_NAMEs, so if possible, prefer to sort
532 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
534 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
535 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
536 basic_block bba
= gimple_bb (stmta
);
537 basic_block bbb
= gimple_bb (stmtb
);
540 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
541 but the other might not. */
546 /* If neither is, compare bb_rank. */
547 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
548 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
551 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
552 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
556 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
559 return oeb
->id
> oea
->id
? 1 : -1;
562 /* Add an operand entry to *OPS for the tree operand OP. */
565 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
567 operand_entry
*oe
= operand_entry_pool
.allocate ();
570 oe
->rank
= get_rank (op
);
571 oe
->id
= next_operand_entry_id
++;
573 oe
->stmt_to_insert
= stmt_to_insert
;
577 /* Add an operand entry to *OPS for the tree operand OP with repeat
581 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
582 HOST_WIDE_INT repeat
)
584 operand_entry
*oe
= operand_entry_pool
.allocate ();
587 oe
->rank
= get_rank (op
);
588 oe
->id
= next_operand_entry_id
++;
590 oe
->stmt_to_insert
= NULL
;
593 reassociate_stats
.pows_encountered
++;
596 /* Return true if STMT is reassociable operation containing a binary
597 operation with tree code CODE, and is inside LOOP. */
600 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
602 basic_block bb
= gimple_bb (stmt
);
604 if (gimple_bb (stmt
) == NULL
)
607 if (!flow_bb_inside_loop_p (loop
, bb
))
610 if (is_gimple_assign (stmt
)
611 && gimple_assign_rhs_code (stmt
) == code
612 && has_single_use (gimple_assign_lhs (stmt
)))
614 tree rhs1
= gimple_assign_rhs1 (stmt
);
615 tree rhs2
= gimple_assign_rhs2 (stmt
);
616 if (TREE_CODE (rhs1
) == SSA_NAME
617 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
620 && TREE_CODE (rhs2
) == SSA_NAME
621 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
))
630 /* Return true if STMT is a nop-conversion. */
633 gimple_nop_conversion_p (gimple
*stmt
)
635 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
637 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
638 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
639 TREE_TYPE (gimple_assign_rhs1 (ass
))))
645 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
646 operand of the negate operation. Otherwise, return NULL. */
649 get_unary_op (tree name
, enum tree_code opcode
)
651 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
653 /* Look through nop conversions (sign changes). */
654 if (gimple_nop_conversion_p (stmt
)
655 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
656 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
658 if (!is_gimple_assign (stmt
))
661 if (gimple_assign_rhs_code (stmt
) == opcode
)
662 return gimple_assign_rhs1 (stmt
);
666 /* Return true if OP1 and OP2 have the same value if casted to either type. */
669 ops_equal_values_p (tree op1
, tree op2
)
675 if (TREE_CODE (op1
) == SSA_NAME
)
677 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
678 if (gimple_nop_conversion_p (stmt
))
680 op1
= gimple_assign_rhs1 (stmt
);
686 if (TREE_CODE (op2
) == SSA_NAME
)
688 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
689 if (gimple_nop_conversion_p (stmt
))
691 op2
= gimple_assign_rhs1 (stmt
);
702 /* If CURR and LAST are a pair of ops that OPCODE allows us to
703 eliminate through equivalences, do so, remove them from OPS, and
704 return true. Otherwise, return false. */
707 eliminate_duplicate_pair (enum tree_code opcode
,
708 vec
<operand_entry
*> *ops
,
715 /* If we have two of the same op, and the opcode is & |, min, or max,
716 we can eliminate one of them.
717 If we have two of the same op, and the opcode is ^, we can
718 eliminate both of them. */
720 if (last
&& last
->op
== curr
->op
)
728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
730 fprintf (dump_file
, "Equivalence: ");
731 print_generic_expr (dump_file
, curr
->op
);
732 fprintf (dump_file
, " [&|minmax] ");
733 print_generic_expr (dump_file
, last
->op
);
734 fprintf (dump_file
, " -> ");
735 print_generic_stmt (dump_file
, last
->op
);
738 ops
->ordered_remove (i
);
739 reassociate_stats
.ops_eliminated
++;
744 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
746 fprintf (dump_file
, "Equivalence: ");
747 print_generic_expr (dump_file
, curr
->op
);
748 fprintf (dump_file
, " ^ ");
749 print_generic_expr (dump_file
, last
->op
);
750 fprintf (dump_file
, " -> nothing\n");
753 reassociate_stats
.ops_eliminated
+= 2;
755 if (ops
->length () == 2)
758 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
763 ops
->ordered_remove (i
-1);
764 ops
->ordered_remove (i
-1);
776 static vec
<tree
> plus_negates
;
778 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
779 expression, look in OPS for a corresponding positive operation to cancel
780 it out. If we find one, remove the other from OPS, replace
781 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
785 eliminate_plus_minus_pair (enum tree_code opcode
,
786 vec
<operand_entry
*> *ops
,
787 unsigned int currindex
,
795 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
798 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
799 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
800 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
803 /* Any non-negated version will have a rank that is one less than
804 the current rank. So once we hit those ranks, if we don't find
807 for (i
= currindex
+ 1;
808 ops
->iterate (i
, &oe
)
809 && oe
->rank
>= curr
->rank
- 1 ;
813 && ops_equal_values_p (oe
->op
, negateop
))
815 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
817 fprintf (dump_file
, "Equivalence: ");
818 print_generic_expr (dump_file
, negateop
);
819 fprintf (dump_file
, " + -");
820 print_generic_expr (dump_file
, oe
->op
);
821 fprintf (dump_file
, " -> 0\n");
824 ops
->ordered_remove (i
);
825 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
826 ops
->ordered_remove (currindex
);
827 reassociate_stats
.ops_eliminated
++;
832 && ops_equal_values_p (oe
->op
, notop
))
834 tree op_type
= TREE_TYPE (oe
->op
);
836 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
838 fprintf (dump_file
, "Equivalence: ");
839 print_generic_expr (dump_file
, notop
);
840 fprintf (dump_file
, " + ~");
841 print_generic_expr (dump_file
, oe
->op
);
842 fprintf (dump_file
, " -> -1\n");
845 ops
->ordered_remove (i
);
846 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
847 ops
->ordered_remove (currindex
);
848 reassociate_stats
.ops_eliminated
++;
854 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
855 save it for later inspection in repropagate_negates(). */
856 if (negateop
!= NULL_TREE
857 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
858 plus_negates
.safe_push (curr
->op
);
863 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
864 bitwise not expression, look in OPS for a corresponding operand to
865 cancel it out. If we find one, remove the other from OPS, replace
866 OPS[CURRINDEX] with 0, and return true. Otherwise, return
870 eliminate_not_pairs (enum tree_code opcode
,
871 vec
<operand_entry
*> *ops
,
872 unsigned int currindex
,
879 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
880 || TREE_CODE (curr
->op
) != SSA_NAME
)
883 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
884 if (notop
== NULL_TREE
)
887 /* Any non-not version will have a rank that is one less than
888 the current rank. So once we hit those ranks, if we don't find
891 for (i
= currindex
+ 1;
892 ops
->iterate (i
, &oe
)
893 && oe
->rank
>= curr
->rank
- 1;
898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
900 fprintf (dump_file
, "Equivalence: ");
901 print_generic_expr (dump_file
, notop
);
902 if (opcode
== BIT_AND_EXPR
)
903 fprintf (dump_file
, " & ~");
904 else if (opcode
== BIT_IOR_EXPR
)
905 fprintf (dump_file
, " | ~");
906 print_generic_expr (dump_file
, oe
->op
);
907 if (opcode
== BIT_AND_EXPR
)
908 fprintf (dump_file
, " -> 0\n");
909 else if (opcode
== BIT_IOR_EXPR
)
910 fprintf (dump_file
, " -> -1\n");
913 if (opcode
== BIT_AND_EXPR
)
914 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
915 else if (opcode
== BIT_IOR_EXPR
)
916 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
918 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
920 ops
->quick_push (oe
);
928 /* Use constant value that may be present in OPS to try to eliminate
929 operands. Note that this function is only really used when we've
930 eliminated ops for other reasons, or merged constants. Across
931 single statements, fold already does all of this, plus more. There
932 is little point in duplicating logic, so I've only included the
933 identities that I could ever construct testcases to trigger. */
936 eliminate_using_constants (enum tree_code opcode
,
937 vec
<operand_entry
*> *ops
)
939 operand_entry
*oelast
= ops
->last ();
940 tree type
= TREE_TYPE (oelast
->op
);
942 if (oelast
->rank
== 0
943 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
948 if (integer_zerop (oelast
->op
))
950 if (ops
->length () != 1)
952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
953 fprintf (dump_file
, "Found & 0, removing all other ops\n");
955 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
958 ops
->quick_push (oelast
);
962 else if (integer_all_onesp (oelast
->op
))
964 if (ops
->length () != 1)
966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
967 fprintf (dump_file
, "Found & -1, removing\n");
969 reassociate_stats
.ops_eliminated
++;
974 if (integer_all_onesp (oelast
->op
))
976 if (ops
->length () != 1)
978 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
979 fprintf (dump_file
, "Found | -1, removing all other ops\n");
981 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
984 ops
->quick_push (oelast
);
988 else if (integer_zerop (oelast
->op
))
990 if (ops
->length () != 1)
992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
993 fprintf (dump_file
, "Found | 0, removing\n");
995 reassociate_stats
.ops_eliminated
++;
1000 if (integer_zerop (oelast
->op
)
1001 || (FLOAT_TYPE_P (type
)
1002 && !HONOR_NANS (type
)
1003 && !HONOR_SIGNED_ZEROS (type
)
1004 && real_zerop (oelast
->op
)))
1006 if (ops
->length () != 1)
1008 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1009 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1011 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1013 ops
->quick_push (oelast
);
1017 else if (integer_onep (oelast
->op
)
1018 || (FLOAT_TYPE_P (type
)
1019 && !HONOR_SNANS (type
)
1020 && real_onep (oelast
->op
)))
1022 if (ops
->length () != 1)
1024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1025 fprintf (dump_file
, "Found * 1, removing\n");
1027 reassociate_stats
.ops_eliminated
++;
1035 if (integer_zerop (oelast
->op
)
1036 || (FLOAT_TYPE_P (type
)
1037 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1038 && fold_real_zero_addition_p (type
, oelast
->op
,
1039 opcode
== MINUS_EXPR
)))
1041 if (ops
->length () != 1)
1043 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1044 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1046 reassociate_stats
.ops_eliminated
++;
1058 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1061 /* Structure for tracking and counting operands. */
1065 enum tree_code oecode
;
1070 /* The heap for the oecount hashtable and the sorted list of operands. */
1071 static vec
<oecount
> cvec
;
1074 /* Oecount hashtable helpers. */
1076 struct oecount_hasher
: int_hash
<int, 0, 1>
1078 static inline hashval_t
hash (int);
1079 static inline bool equal (int, int);
1082 /* Hash function for oecount. */
1085 oecount_hasher::hash (int p
)
1087 const oecount
*c
= &cvec
[p
- 42];
1088 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1091 /* Comparison function for oecount. */
1094 oecount_hasher::equal (int p1
, int p2
)
1096 const oecount
*c1
= &cvec
[p1
- 42];
1097 const oecount
*c2
= &cvec
[p2
- 42];
1098 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1101 /* Comparison function for qsort sorting oecount elements by count. */
1104 oecount_cmp (const void *p1
, const void *p2
)
1106 const oecount
*c1
= (const oecount
*)p1
;
1107 const oecount
*c2
= (const oecount
*)p2
;
1108 if (c1
->cnt
!= c2
->cnt
)
1109 return c1
->cnt
> c2
->cnt
? 1 : -1;
1111 /* If counts are identical, use unique IDs to stabilize qsort. */
1112 return c1
->id
> c2
->id
? 1 : -1;
1115 /* Return TRUE iff STMT represents a builtin call that raises OP
1116 to some exponent. */
1119 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1121 if (!is_gimple_call (stmt
))
1124 switch (gimple_call_combined_fn (stmt
))
1128 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1135 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1136 in place and return the result. Assumes that stmt_is_power_of_op
1137 was previously called for STMT and returned TRUE. */
1139 static HOST_WIDE_INT
1140 decrement_power (gimple
*stmt
)
1142 REAL_VALUE_TYPE c
, cint
;
1143 HOST_WIDE_INT power
;
1146 switch (gimple_call_combined_fn (stmt
))
1149 arg1
= gimple_call_arg (stmt
, 1);
1150 c
= TREE_REAL_CST (arg1
);
1151 power
= real_to_integer (&c
) - 1;
1152 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1153 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1157 arg1
= gimple_call_arg (stmt
, 1);
1158 power
= TREE_INT_CST_LOW (arg1
) - 1;
1159 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1167 /* Replace SSA defined by STMT and replace all its uses with new
1168 SSA. Also return the new SSA. */
1171 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1175 imm_use_iterator iter
;
1176 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1177 tree lhs
= gimple_get_lhs (stmt
);
1179 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1180 gimple_set_lhs (stmt
, new_lhs
);
1182 /* Also need to update GIMPLE_DEBUGs. */
1183 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1185 tree repl
= new_lhs
;
1186 if (is_gimple_debug (use_stmt
))
1188 if (new_debug_lhs
== NULL_TREE
)
1190 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1192 = gimple_build_debug_bind (new_debug_lhs
,
1193 build2 (opcode
, TREE_TYPE (lhs
),
1196 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1197 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1198 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1199 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1200 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1201 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1203 repl
= new_debug_lhs
;
1205 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1206 SET_USE (use
, repl
);
1207 update_stmt (use_stmt
);
1212 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1213 uses with new SSAs. Also do this for the stmt that defines DEF
1214 if *DEF is not OP. */
1217 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1218 vec
<gimple
*> &stmts_to_fix
)
1224 && TREE_CODE (*def
) == SSA_NAME
1225 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1226 && gimple_code (stmt
) != GIMPLE_NOP
)
1227 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1229 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1230 make_new_ssa_for_def (stmt
, opcode
, op
);
1233 /* Find the single immediate use of STMT's LHS, and replace it
1234 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1235 replace *DEF with OP as well. */
1238 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1243 gimple_stmt_iterator gsi
;
1245 if (is_gimple_call (stmt
))
1246 lhs
= gimple_call_lhs (stmt
);
1248 lhs
= gimple_assign_lhs (stmt
);
1250 gcc_assert (has_single_use (lhs
));
1251 single_imm_use (lhs
, &use
, &use_stmt
);
1255 if (TREE_CODE (op
) != SSA_NAME
)
1256 update_stmt (use_stmt
);
1257 gsi
= gsi_for_stmt (stmt
);
1258 unlink_stmt_vdef (stmt
);
1259 reassoc_remove_stmt (&gsi
);
1260 release_defs (stmt
);
1263 /* Walks the linear chain with result *DEF searching for an operation
1264 with operand OP and code OPCODE removing that from the chain. *DEF
1265 is updated if there is only one operand but no operation left. */
1268 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1270 tree orig_def
= *def
;
1271 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1272 /* PR72835 - Record the stmt chain that has to be updated such that
1273 we dont use the same LHS when the values computed are different. */
1274 auto_vec
<gimple
*, 64> stmts_to_fix
;
1280 if (opcode
== MULT_EXPR
)
1282 if (stmt_is_power_of_op (stmt
, op
))
1284 if (decrement_power (stmt
) == 1)
1286 if (stmts_to_fix
.length () > 0)
1287 stmts_to_fix
.pop ();
1288 propagate_op_to_single_use (op
, stmt
, def
);
1292 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1294 if (gimple_assign_rhs1 (stmt
) == op
)
1296 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1297 if (stmts_to_fix
.length () > 0)
1298 stmts_to_fix
.pop ();
1299 propagate_op_to_single_use (cst
, stmt
, def
);
1302 else if (integer_minus_onep (op
)
1303 || real_minus_onep (op
))
1305 gimple_assign_set_rhs_code
1306 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1312 name
= gimple_assign_rhs1 (stmt
);
1314 /* If this is the operation we look for and one of the operands
1315 is ours simply propagate the other operand into the stmts
1317 if (gimple_assign_rhs_code (stmt
) == opcode
1319 || gimple_assign_rhs2 (stmt
) == op
))
1322 name
= gimple_assign_rhs2 (stmt
);
1323 if (stmts_to_fix
.length () > 0)
1324 stmts_to_fix
.pop ();
1325 propagate_op_to_single_use (name
, stmt
, def
);
1329 /* We might have a multiply of two __builtin_pow* calls, and
1330 the operand might be hiding in the rightmost one. Likewise
1331 this can happen for a negate. */
1332 if (opcode
== MULT_EXPR
1333 && gimple_assign_rhs_code (stmt
) == opcode
1334 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1335 && has_single_use (gimple_assign_rhs2 (stmt
)))
1337 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1338 if (stmt_is_power_of_op (stmt2
, op
))
1340 if (decrement_power (stmt2
) == 1)
1341 propagate_op_to_single_use (op
, stmt2
, def
);
1343 stmts_to_fix
.safe_push (stmt2
);
1346 else if (is_gimple_assign (stmt2
)
1347 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1349 if (gimple_assign_rhs1 (stmt2
) == op
)
1351 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1352 propagate_op_to_single_use (cst
, stmt2
, def
);
1355 else if (integer_minus_onep (op
)
1356 || real_minus_onep (op
))
1358 stmts_to_fix
.safe_push (stmt2
);
1359 gimple_assign_set_rhs_code
1360 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1366 /* Continue walking the chain. */
1367 gcc_assert (name
!= op
1368 && TREE_CODE (name
) == SSA_NAME
);
1369 stmt
= SSA_NAME_DEF_STMT (name
);
1370 stmts_to_fix
.safe_push (stmt
);
1374 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1375 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1378 /* Returns true if statement S1 dominates statement S2. Like
1379 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1382 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1384 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1386 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1387 SSA_NAME. Assume it lives at the beginning of function and
1388 thus dominates everything. */
1389 if (!bb1
|| s1
== s2
)
1392 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1398 /* PHIs in the same basic block are assumed to be
1399 executed all in parallel, if only one stmt is a PHI,
1400 it dominates the other stmt in the same basic block. */
1401 if (gimple_code (s1
) == GIMPLE_PHI
)
1404 if (gimple_code (s2
) == GIMPLE_PHI
)
1407 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1409 if (gimple_uid (s1
) < gimple_uid (s2
))
1412 if (gimple_uid (s1
) > gimple_uid (s2
))
1415 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1416 unsigned int uid
= gimple_uid (s1
);
1417 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1419 gimple
*s
= gsi_stmt (gsi
);
1420 if (gimple_uid (s
) != uid
)
1429 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1432 /* Insert STMT after INSERT_POINT. */
1435 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1437 gimple_stmt_iterator gsi
;
1440 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1441 bb
= gimple_bb (insert_point
);
1442 else if (!stmt_ends_bb_p (insert_point
))
1444 gsi
= gsi_for_stmt (insert_point
);
1445 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1446 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1450 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1451 thus if it must end a basic block, it should be a call that can
1452 throw, or some assignment that can throw. If it throws, the LHS
1453 of it will not be initialized though, so only valid places using
1454 the SSA_NAME should be dominated by the fallthru edge. */
1455 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1456 gsi
= gsi_after_labels (bb
);
1457 if (gsi_end_p (gsi
))
1459 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1460 gimple_set_uid (stmt
,
1461 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1464 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1465 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1468 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1469 the result. Places the statement after the definition of either
1470 OP1 or OP2. Returns the new statement. */
1473 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1475 gimple
*op1def
= NULL
, *op2def
= NULL
;
1476 gimple_stmt_iterator gsi
;
1480 /* Create the addition statement. */
1481 op
= make_ssa_name (type
);
1482 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1484 /* Find an insertion place and insert. */
1485 if (TREE_CODE (op1
) == SSA_NAME
)
1486 op1def
= SSA_NAME_DEF_STMT (op1
);
1487 if (TREE_CODE (op2
) == SSA_NAME
)
1488 op2def
= SSA_NAME_DEF_STMT (op2
);
1489 if ((!op1def
|| gimple_nop_p (op1def
))
1490 && (!op2def
|| gimple_nop_p (op2def
)))
1492 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1493 if (gsi_end_p (gsi
))
1495 gimple_stmt_iterator gsi2
1496 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1497 gimple_set_uid (sum
,
1498 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1501 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1502 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1506 gimple
*insert_point
;
1507 if ((!op1def
|| gimple_nop_p (op1def
))
1508 || (op2def
&& !gimple_nop_p (op2def
)
1509 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1510 insert_point
= op2def
;
1512 insert_point
= op1def
;
1513 insert_stmt_after (sum
, insert_point
);
1520 /* Perform un-distribution of divisions and multiplications.
1521 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1522 to (A + B) / X for real X.
1524 The algorithm is organized as follows.
1526 - First we walk the addition chain *OPS looking for summands that
1527 are defined by a multiplication or a real division. This results
1528 in the candidates bitmap with relevant indices into *OPS.
1530 - Second we build the chains of multiplications or divisions for
1531 these candidates, counting the number of occurrences of (operand, code)
1532 pairs in all of the candidates chains.
1534 - Third we sort the (operand, code) pairs by number of occurrence and
1535 process them starting with the pair with the most uses.
1537 * For each such pair we walk the candidates again to build a
1538 second candidate bitmap noting all multiplication/division chains
1539 that have at least one occurrence of (operand, code).
1541 * We build an alternate addition chain only covering these
1542 candidates with one (operand, code) operation removed from their
1543 multiplication/division chain.
1545 * The first candidate gets replaced by the alternate addition chain
1546 multiplied/divided by the operand.
1548 * All candidate chains get disabled for further processing and
1549 processing of (operand, code) pairs continues.
1551 The alternate addition chains built are re-processed by the main
1552 reassociation algorithm which allows optimizing a * x * y + b * y * x
1553 to (a + b ) * x * y in one invocation of the reassociation pass. */
1556 undistribute_ops_list (enum tree_code opcode
,
1557 vec
<operand_entry
*> *ops
, class loop
*loop
)
1559 unsigned int length
= ops
->length ();
1562 unsigned nr_candidates
, nr_candidates2
;
1563 sbitmap_iterator sbi0
;
1564 vec
<operand_entry
*> *subops
;
1565 bool changed
= false;
1566 unsigned int next_oecount_id
= 0;
1569 || opcode
!= PLUS_EXPR
)
1572 /* Build a list of candidates to process. */
1573 auto_sbitmap
candidates (length
);
1574 bitmap_clear (candidates
);
1576 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1578 enum tree_code dcode
;
1581 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1583 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1584 if (!is_gimple_assign (oe1def
))
1586 dcode
= gimple_assign_rhs_code (oe1def
);
1587 if ((dcode
!= MULT_EXPR
1588 && dcode
!= RDIV_EXPR
)
1589 || !is_reassociable_op (oe1def
, dcode
, loop
))
1592 bitmap_set_bit (candidates
, i
);
1596 if (nr_candidates
< 2)
1599 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1601 fprintf (dump_file
, "searching for un-distribute opportunities ");
1602 print_generic_expr (dump_file
,
1603 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1604 fprintf (dump_file
, " %d\n", nr_candidates
);
1607 /* Build linearized sub-operand lists and the counting table. */
1610 hash_table
<oecount_hasher
> ctable (15);
1612 /* ??? Macro arguments cannot have multi-argument template types in
1613 them. This typedef is needed to workaround that limitation. */
1614 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1615 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1616 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1619 enum tree_code oecode
;
1622 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1623 oecode
= gimple_assign_rhs_code (oedef
);
1624 linearize_expr_tree (&subops
[i
], oedef
,
1625 associative_tree_code (oecode
), false);
1627 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1634 c
.id
= next_oecount_id
++;
1637 idx
= cvec
.length () + 41;
1638 slot
= ctable
.find_slot (idx
, INSERT
);
1646 cvec
[*slot
- 42].cnt
++;
1651 /* Sort the counting table. */
1652 cvec
.qsort (oecount_cmp
);
1654 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1657 fprintf (dump_file
, "Candidates:\n");
1658 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1660 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1661 c
->oecode
== MULT_EXPR
1662 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1663 print_generic_expr (dump_file
, c
->op
);
1664 fprintf (dump_file
, "\n");
1668 /* Process the (operand, code) pairs in order of most occurrence. */
1669 auto_sbitmap
candidates2 (length
);
1670 while (!cvec
.is_empty ())
1672 oecount
*c
= &cvec
.last ();
1676 /* Now collect the operands in the outer chain that contain
1677 the common operand in their inner chain. */
1678 bitmap_clear (candidates2
);
1680 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1683 enum tree_code oecode
;
1685 tree op
= (*ops
)[i
]->op
;
1687 /* If we undistributed in this chain already this may be
1689 if (TREE_CODE (op
) != SSA_NAME
)
1692 oedef
= SSA_NAME_DEF_STMT (op
);
1693 oecode
= gimple_assign_rhs_code (oedef
);
1694 if (oecode
!= c
->oecode
)
1697 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1699 if (oe1
->op
== c
->op
)
1701 bitmap_set_bit (candidates2
, i
);
1708 if (nr_candidates2
>= 2)
1710 operand_entry
*oe1
, *oe2
;
1712 int first
= bitmap_first_set_bit (candidates2
);
1714 /* Build the new addition chain. */
1715 oe1
= (*ops
)[first
];
1716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1718 fprintf (dump_file
, "Building (");
1719 print_generic_expr (dump_file
, oe1
->op
);
1721 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1722 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1726 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1728 fprintf (dump_file
, " + ");
1729 print_generic_expr (dump_file
, oe2
->op
);
1731 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1732 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1733 oe1
->op
, oe2
->op
, opcode
);
1734 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1736 oe1
->op
= gimple_get_lhs (sum
);
1739 /* Apply the multiplication/division. */
1740 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1741 oe1
->op
, c
->op
, c
->oecode
);
1742 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1744 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1745 print_generic_expr (dump_file
, c
->op
);
1746 fprintf (dump_file
, "\n");
1749 /* Record it in the addition chain and disable further
1750 undistribution with this op. */
1751 oe1
->op
= gimple_assign_lhs (prod
);
1752 oe1
->rank
= get_rank (oe1
->op
);
1753 subops
[first
].release ();
1761 for (i
= 0; i
< ops
->length (); ++i
)
1762 subops
[i
].release ();
1769 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1770 first: element index for each relevant BIT_FIELD_REF.
1771 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1772 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1775 auto_vec
<v_info_elem
, 32> vec
;
1777 typedef v_info
*v_info_ptr
;
1779 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1781 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1783 const tree tr1
= *((const tree
*) p_i
);
1784 const tree tr2
= *((const tree
*) p_j
);
1785 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1786 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1789 else if (mode1
< mode2
)
1791 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1793 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1798 /* Cleanup hash map for VECTOR information. */
1800 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1802 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1803 it
!= info_map
.end (); ++it
)
1805 v_info_ptr info
= (*it
).second
;
1807 (*it
).second
= NULL
;
1811 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1812 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1814 Vs = (V1 + V2 + ... + Vn)
1815 Vs[0] + Vs[1] + ... + Vs[k]
1817 The basic steps are listed below:
1819 1) Check the addition chain *OPS by looking those summands coming from
1820 VECTOR bit_field_ref on VECTOR type. Put the information into
1821 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1823 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1824 continuous, they can cover the whole VECTOR perfectly without any holes.
1825 Obtain one VECTOR list which contain candidates to be transformed.
1827 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1828 candidates with same mode, build the addition statements for them and
1829 generate BIT_FIELD_REFs accordingly.
1832 The current implementation requires the whole VECTORs should be fully
1833 covered, but it can be extended to support partial, checking adjacent
1834 but not fill the whole, it may need some cost model to define the
1835 boundary to do or not.
1838 undistribute_bitref_for_vector (enum tree_code opcode
,
1839 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1841 if (ops
->length () <= 1)
1844 if (opcode
!= PLUS_EXPR
1845 && opcode
!= MULT_EXPR
1846 && opcode
!= BIT_XOR_EXPR
1847 && opcode
!= BIT_IOR_EXPR
1848 && opcode
!= BIT_AND_EXPR
)
1851 hash_map
<tree
, v_info_ptr
> v_info_map
;
1855 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1856 information into map. */
1857 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1859 enum tree_code dcode
;
1862 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1864 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1865 if (!is_gimple_assign (oe1def
))
1867 dcode
= gimple_assign_rhs_code (oe1def
);
1868 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1871 tree rhs
= gimple_assign_rhs1 (oe1def
);
1872 tree vec
= TREE_OPERAND (rhs
, 0);
1873 tree vec_type
= TREE_TYPE (vec
);
1875 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1878 /* Ignore it if target machine can't support this VECTOR type. */
1879 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1882 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1883 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1886 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1887 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1890 /* The type of BIT_FIELD_REF might not be equal to the element type of
1891 the vector. We want to use a vector type with element type the
1892 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1893 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1895 machine_mode simd_mode
;
1896 unsigned HOST_WIDE_INT size
, nunits
;
1897 unsigned HOST_WIDE_INT elem_size
1898 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1899 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1901 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1903 nunits
= size
/ elem_size
;
1904 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1905 nunits
).exists (&simd_mode
))
1907 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1909 /* Ignore it if target machine can't support this VECTOR type. */
1910 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1913 /* Check const vector type, constrain BIT_FIELD_REF offset and
1915 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1918 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
1919 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
1923 tree elem_type
= TREE_TYPE (vec_type
);
1924 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
1925 if (maybe_ne (bit_field_size (rhs
), elem_size
))
1929 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
1932 /* Ignore it if target machine can't support this type of VECTOR
1934 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
1935 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
1939 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
1943 info
->vec_type
= vec_type
;
1945 else if (!types_compatible_p (vec_type
, info
->vec_type
))
1947 info
->vec
.safe_push (std::make_pair (idx
, i
));
1950 /* At least two VECTOR to combine. */
1951 if (v_info_map
.elements () <= 1)
1953 cleanup_vinfo_map (v_info_map
);
1957 /* Verify all VECTOR candidates by checking two conditions:
1958 1) sorted offsets are adjacent, no holes.
1959 2) can fill the whole VECTOR perfectly.
1960 And add the valid candidates to a vector for further handling. */
1961 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
1962 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
1963 it
!= v_info_map
.end (); ++it
)
1965 tree cand_vec
= (*it
).first
;
1966 v_info_ptr cand_info
= (*it
).second
;
1967 unsigned int num_elems
1968 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
1969 if (cand_info
->vec
.length () != num_elems
)
1971 sbitmap holes
= sbitmap_alloc (num_elems
);
1972 bitmap_ones (holes
);
1975 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
1977 if (!bitmap_bit_p (holes
, curr
->first
))
1983 bitmap_clear_bit (holes
, curr
->first
);
1985 if (valid
&& bitmap_empty_p (holes
))
1986 valid_vecs
.quick_push (cand_vec
);
1987 sbitmap_free (holes
);
1990 /* At least two VECTOR to combine. */
1991 if (valid_vecs
.length () <= 1)
1993 cleanup_vinfo_map (v_info_map
);
1997 valid_vecs
.qsort (sort_by_mach_mode
);
1998 /* Go through all candidates by machine mode order, query the mode_to_total
1999 to get the total number for each mode and skip the single one. */
2000 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2002 tree tvec
= valid_vecs
[i
];
2003 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2005 /* Skip modes with only a single candidate. */
2006 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2009 unsigned int idx
, j
;
2011 tree sum_vec
= tvec
;
2012 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2014 tree vec_type
= info_ptr
->vec_type
;
2016 /* Build the sum for all candidates with same mode. */
2019 sum
= build_and_add_sum (vec_type
, sum_vec
,
2020 valid_vecs
[i
+ 1], opcode
);
2021 if (!useless_type_conversion_p (vec_type
,
2022 TREE_TYPE (valid_vecs
[i
+ 1])))
2024 /* Update the operands only after build_and_add_sum,
2025 so that we don't have to repeat the placement algorithm
2026 of build_and_add_sum. */
2027 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2028 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2030 tree lhs
= make_ssa_name (vec_type
);
2031 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2032 gimple_set_uid (g
, gimple_uid (sum
));
2033 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2034 gimple_assign_set_rhs2 (sum
, lhs
);
2035 if (sum_vec
== tvec
)
2037 vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2038 lhs
= make_ssa_name (vec_type
);
2039 g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2040 gimple_set_uid (g
, gimple_uid (sum
));
2041 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2042 gimple_assign_set_rhs1 (sum
, lhs
);
2046 sum_vec
= gimple_get_lhs (sum
);
2047 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2048 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2049 /* Update those related ops of current candidate VECTOR. */
2050 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2053 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2054 /* Set this then op definition will get DCEd later. */
2055 gimple_set_visited (def
, true);
2056 if (opcode
== PLUS_EXPR
2057 || opcode
== BIT_XOR_EXPR
2058 || opcode
== BIT_IOR_EXPR
)
2059 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2060 else if (opcode
== MULT_EXPR
)
2061 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2064 gcc_assert (opcode
== BIT_AND_EXPR
);
2066 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2068 (*ops
)[idx
]->rank
= 0;
2070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2072 fprintf (dump_file
, "Generating addition -> ");
2073 print_gimple_stmt (dump_file
, sum
, 0);
2077 while ((i
< valid_vecs
.length () - 1)
2078 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2080 /* Referring to first valid VECTOR with this mode, generate the
2081 BIT_FIELD_REF statements accordingly. */
2082 info_ptr
= *(v_info_map
.get (tvec
));
2084 tree elem_type
= TREE_TYPE (vec_type
);
2085 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2088 tree dst
= make_ssa_name (elem_type
);
2089 tree pos
= bitsize_int (elem
->first
2090 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2091 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2092 TYPE_SIZE (elem_type
), pos
);
2093 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2094 insert_stmt_after (gs
, sum
);
2095 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2096 /* Set this then op definition will get DCEd later. */
2097 gimple_set_visited (def
, true);
2098 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2099 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2100 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2102 fprintf (dump_file
, "Generating bit_field_ref -> ");
2103 print_gimple_stmt (dump_file
, gs
, 0);
2108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2109 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2111 cleanup_vinfo_map (v_info_map
);
2116 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2117 expression, examine the other OPS to see if any of them are comparisons
2118 of the same values, which we may be able to combine or eliminate.
2119 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2122 eliminate_redundant_comparison (enum tree_code opcode
,
2123 vec
<operand_entry
*> *ops
,
2124 unsigned int currindex
,
2125 operand_entry
*curr
)
2128 enum tree_code lcode
, rcode
;
2129 gimple
*def1
, *def2
;
2133 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2136 /* Check that CURR is a comparison. */
2137 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2139 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2140 if (!is_gimple_assign (def1
))
2142 lcode
= gimple_assign_rhs_code (def1
);
2143 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2145 op1
= gimple_assign_rhs1 (def1
);
2146 op2
= gimple_assign_rhs2 (def1
);
2148 /* Now look for a similar comparison in the remaining OPS. */
2149 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2153 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2155 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2156 if (!is_gimple_assign (def2
))
2158 rcode
= gimple_assign_rhs_code (def2
);
2159 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2162 /* If we got here, we have a match. See if we can combine the
2164 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2165 if (opcode
== BIT_IOR_EXPR
)
2166 t
= maybe_fold_or_comparisons (type
,
2168 rcode
, gimple_assign_rhs1 (def2
),
2169 gimple_assign_rhs2 (def2
));
2171 t
= maybe_fold_and_comparisons (type
,
2173 rcode
, gimple_assign_rhs1 (def2
),
2174 gimple_assign_rhs2 (def2
));
2178 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2179 always give us a boolean_type_node value back. If the original
2180 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2181 we need to convert. */
2182 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2183 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2185 if (TREE_CODE (t
) != INTEGER_CST
2186 && !operand_equal_p (t
, curr
->op
, 0))
2188 enum tree_code subcode
;
2189 tree newop1
, newop2
;
2190 if (!COMPARISON_CLASS_P (t
))
2192 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2193 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2194 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2195 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2201 fprintf (dump_file
, "Equivalence: ");
2202 print_generic_expr (dump_file
, curr
->op
);
2203 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2204 print_generic_expr (dump_file
, oe
->op
);
2205 fprintf (dump_file
, " -> ");
2206 print_generic_expr (dump_file
, t
);
2207 fprintf (dump_file
, "\n");
2210 /* Now we can delete oe, as it has been subsumed by the new combined
2212 ops
->ordered_remove (i
);
2213 reassociate_stats
.ops_eliminated
++;
2215 /* If t is the same as curr->op, we're done. Otherwise we must
2216 replace curr->op with t. Special case is if we got a constant
2217 back, in which case we add it to the end instead of in place of
2218 the current entry. */
2219 if (TREE_CODE (t
) == INTEGER_CST
)
2221 ops
->ordered_remove (currindex
);
2222 add_to_ops_vec (ops
, t
);
2224 else if (!operand_equal_p (t
, curr
->op
, 0))
2227 enum tree_code subcode
;
2230 gcc_assert (COMPARISON_CLASS_P (t
));
2231 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2232 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2233 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2234 gcc_checking_assert (is_gimple_val (newop1
)
2235 && is_gimple_val (newop2
));
2236 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2237 curr
->op
= gimple_get_lhs (sum
);
2246 /* Transform repeated addition of same values into multiply with
2249 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2252 tree op
= NULL_TREE
;
2254 int i
, start
= -1, end
= 0, count
= 0;
2255 auto_vec
<std::pair
<int, int> > indxs
;
2256 bool changed
= false;
2258 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2259 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2260 || !flag_unsafe_math_optimizations
))
2263 /* Look for repeated operands. */
2264 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2272 else if (operand_equal_p (oe
->op
, op
, 0))
2280 indxs
.safe_push (std::make_pair (start
, end
));
2288 indxs
.safe_push (std::make_pair (start
, end
));
2290 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2292 /* Convert repeated operand addition to multiplication. */
2293 start
= indxs
[j
].first
;
2294 end
= indxs
[j
].second
;
2295 op
= (*ops
)[start
]->op
;
2296 count
= end
- start
+ 1;
2297 for (i
= end
; i
>= start
; --i
)
2298 ops
->unordered_remove (i
);
2299 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2300 tree cst
= build_int_cst (integer_type_node
, count
);
2302 = gimple_build_assign (tmp
, MULT_EXPR
,
2303 op
, fold_convert (TREE_TYPE (op
), cst
));
2304 gimple_set_visited (mul_stmt
, true);
2305 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2313 /* Perform various identities and other optimizations on the list of
2314 operand entries, stored in OPS. The tree code for the binary
2315 operation between all the operands is OPCODE. */
2318 optimize_ops_list (enum tree_code opcode
,
2319 vec
<operand_entry
*> *ops
)
2321 unsigned int length
= ops
->length ();
2324 operand_entry
*oelast
= NULL
;
2325 bool iterate
= false;
2330 oelast
= ops
->last ();
2332 /* If the last two are constants, pop the constants off, merge them
2333 and try the next two. */
2334 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2336 operand_entry
*oelm1
= (*ops
)[length
- 2];
2338 if (oelm1
->rank
== 0
2339 && is_gimple_min_invariant (oelm1
->op
)
2340 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2341 TREE_TYPE (oelast
->op
)))
2343 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2344 oelm1
->op
, oelast
->op
);
2346 if (folded
&& is_gimple_min_invariant (folded
))
2348 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2349 fprintf (dump_file
, "Merging constants\n");
2354 add_to_ops_vec (ops
, folded
);
2355 reassociate_stats
.constants_eliminated
++;
2357 optimize_ops_list (opcode
, ops
);
2363 eliminate_using_constants (opcode
, ops
);
2366 for (i
= 0; ops
->iterate (i
, &oe
);)
2370 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2372 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2373 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2374 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2387 optimize_ops_list (opcode
, ops
);
2390 /* The following functions are subroutines to optimize_range_tests and allow
2391 it to try to change a logical combination of comparisons into a range
2395 X == 2 || X == 5 || X == 3 || X == 4
2399 (unsigned) (X - 2) <= 3
2401 For more information see comments above fold_test_range in fold-const.c,
2402 this implementation is for GIMPLE. */
2406 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2409 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2412 print_generic_expr (file
, r
->exp
);
2413 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2414 print_generic_expr (file
, r
->low
);
2416 print_generic_expr (file
, r
->high
);
2420 /* Dump the range entry R to STDERR. */
2423 debug_range_entry (struct range_entry
*r
)
2425 dump_range_entry (stderr
, r
, false);
2426 fputc ('\n', stderr
);
2429 /* This is similar to make_range in fold-const.c, but on top of
2430 GIMPLE instead of trees. If EXP is non-NULL, it should be
2431 an SSA_NAME and STMT argument is ignored, otherwise STMT
2432 argument should be a GIMPLE_COND. */
2435 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2439 bool is_bool
, strict_overflow_p
;
2443 r
->strict_overflow_p
= false;
2445 r
->high
= NULL_TREE
;
2446 if (exp
!= NULL_TREE
2447 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2450 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2451 and see if we can refine the range. Some of the cases below may not
2452 happen, but it doesn't seem worth worrying about this. We "continue"
2453 the outer loop when we've changed something; otherwise we "break"
2454 the switch, which will "break" the while. */
2455 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2458 strict_overflow_p
= false;
2460 if (exp
== NULL_TREE
)
2462 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2464 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2469 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2474 enum tree_code code
;
2475 tree arg0
, arg1
, exp_type
;
2479 if (exp
!= NULL_TREE
)
2481 if (TREE_CODE (exp
) != SSA_NAME
2482 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2485 stmt
= SSA_NAME_DEF_STMT (exp
);
2486 if (!is_gimple_assign (stmt
))
2489 code
= gimple_assign_rhs_code (stmt
);
2490 arg0
= gimple_assign_rhs1 (stmt
);
2491 arg1
= gimple_assign_rhs2 (stmt
);
2492 exp_type
= TREE_TYPE (exp
);
2496 code
= gimple_cond_code (stmt
);
2497 arg0
= gimple_cond_lhs (stmt
);
2498 arg1
= gimple_cond_rhs (stmt
);
2499 exp_type
= boolean_type_node
;
2502 if (TREE_CODE (arg0
) != SSA_NAME
2503 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2505 loc
= gimple_location (stmt
);
2509 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2510 /* Ensure the range is either +[-,0], +[0,0],
2511 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2512 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2513 or similar expression of unconditional true or
2514 false, it should not be negated. */
2515 && ((high
&& integer_zerop (high
))
2516 || (low
&& integer_onep (low
))))
2529 if ((TYPE_PRECISION (exp_type
) == 1
2530 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2531 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2534 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2536 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2541 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2556 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2558 &strict_overflow_p
);
2559 if (nexp
!= NULL_TREE
)
2562 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2575 r
->strict_overflow_p
= strict_overflow_p
;
2579 /* Comparison function for qsort. Sort entries
2580 without SSA_NAME exp first, then with SSA_NAMEs sorted
2581 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2582 by increasing ->low and if ->low is the same, by increasing
2583 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2587 range_entry_cmp (const void *a
, const void *b
)
2589 const struct range_entry
*p
= (const struct range_entry
*) a
;
2590 const struct range_entry
*q
= (const struct range_entry
*) b
;
2592 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2594 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2596 /* Group range_entries for the same SSA_NAME together. */
2597 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2599 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2601 /* If ->low is different, NULL low goes first, then by
2603 if (p
->low
!= NULL_TREE
)
2605 if (q
->low
!= NULL_TREE
)
2607 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2609 if (tem
&& integer_onep (tem
))
2611 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2613 if (tem
&& integer_onep (tem
))
2619 else if (q
->low
!= NULL_TREE
)
2621 /* If ->high is different, NULL high goes last, before that by
2623 if (p
->high
!= NULL_TREE
)
2625 if (q
->high
!= NULL_TREE
)
2627 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2629 if (tem
&& integer_onep (tem
))
2631 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2633 if (tem
&& integer_onep (tem
))
2639 else if (q
->high
!= NULL_TREE
)
2641 /* If both ranges are the same, sort below by ascending idx. */
2646 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2649 if (p
->idx
< q
->idx
)
2653 gcc_checking_assert (p
->idx
> q
->idx
);
2658 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2659 insert needed statements BEFORE or after GSI. */
2662 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2664 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2665 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2666 if (TREE_CODE (ret
) != SSA_NAME
)
2668 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2670 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2672 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2673 ret
= gimple_assign_lhs (g
);
2678 /* Helper routine of optimize_range_test.
2679 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2680 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2681 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2682 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2683 an array of COUNT pointers to other ranges. Return
2684 true if the range merge has been successful.
2685 If OPCODE is ERROR_MARK, this is called from within
2686 maybe_optimize_range_tests and is performing inter-bb range optimization.
2687 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2691 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2692 struct range_entry
**otherrangep
,
2693 unsigned int count
, enum tree_code opcode
,
2694 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2695 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2697 operand_entry
*oe
= (*ops
)[range
->idx
];
2699 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2700 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2701 location_t loc
= gimple_location (stmt
);
2702 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2703 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2705 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2706 gimple_stmt_iterator gsi
;
2707 unsigned int i
, uid
;
2709 if (tem
== NULL_TREE
)
2712 /* If op is default def SSA_NAME, there is no place to insert the
2713 new comparison. Give up, unless we can use OP itself as the
2715 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2717 if (op
== range
->exp
2718 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2719 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2721 || (TREE_CODE (tem
) == EQ_EXPR
2722 && TREE_OPERAND (tem
, 0) == op
2723 && integer_onep (TREE_OPERAND (tem
, 1))))
2724 && opcode
!= BIT_IOR_EXPR
2725 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2734 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2735 warning_at (loc
, OPT_Wstrict_overflow
,
2736 "assuming signed overflow does not occur "
2737 "when simplifying range test");
2739 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2741 struct range_entry
*r
;
2742 fprintf (dump_file
, "Optimizing range tests ");
2743 dump_range_entry (dump_file
, range
, false);
2744 for (i
= 0; i
< count
; i
++)
2751 && r
->exp
!= range
->exp
2752 && TREE_CODE (r
->exp
) == SSA_NAME
)
2754 fprintf (dump_file
, " and ");
2755 dump_range_entry (dump_file
, r
, false);
2759 fprintf (dump_file
, " and");
2760 dump_range_entry (dump_file
, r
, true);
2763 fprintf (dump_file
, "\n into ");
2764 print_generic_expr (dump_file
, tem
);
2765 fprintf (dump_file
, "\n");
2768 if (opcode
== BIT_IOR_EXPR
2769 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2770 tem
= invert_truthvalue_loc (loc
, tem
);
2772 tem
= fold_convert_loc (loc
, optype
, tem
);
2775 gsi
= gsi_for_stmt (stmt
);
2776 uid
= gimple_uid (stmt
);
2784 gcc_checking_assert (tem
== op
);
2785 /* In rare cases range->exp can be equal to lhs of stmt.
2786 In that case we have to insert after the stmt rather then before
2787 it. If stmt is a PHI, insert it at the start of the basic block. */
2788 else if (op
!= range
->exp
)
2790 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2791 tem
= force_into_ssa_name (&gsi
, tem
, true);
2794 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2796 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2797 tem
= force_into_ssa_name (&gsi
, tem
, false);
2801 gsi
= gsi_after_labels (gimple_bb (stmt
));
2802 if (!gsi_end_p (gsi
))
2803 uid
= gimple_uid (gsi_stmt (gsi
));
2806 gsi
= gsi_start_bb (gimple_bb (stmt
));
2808 while (!gsi_end_p (gsi
))
2810 uid
= gimple_uid (gsi_stmt (gsi
));
2814 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2815 tem
= force_into_ssa_name (&gsi
, tem
, true);
2816 if (gsi_end_p (gsi
))
2817 gsi
= gsi_last_bb (gimple_bb (stmt
));
2821 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2822 if (gimple_uid (gsi_stmt (gsi
)))
2825 gimple_set_uid (gsi_stmt (gsi
), uid
);
2832 range
->strict_overflow_p
= false;
2834 for (i
= 0; i
< count
; i
++)
2837 range
= otherrange
+ i
;
2839 range
= otherrangep
[i
];
2840 oe
= (*ops
)[range
->idx
];
2841 /* Now change all the other range test immediate uses, so that
2842 those tests will be optimized away. */
2843 if (opcode
== ERROR_MARK
)
2846 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2847 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2849 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2850 ? boolean_false_node
: boolean_true_node
);
2853 oe
->op
= error_mark_node
;
2854 range
->exp
= NULL_TREE
;
2855 range
->low
= NULL_TREE
;
2856 range
->high
= NULL_TREE
;
2861 /* Optimize X == CST1 || X == CST2
2862 if popcount (CST1 ^ CST2) == 1 into
2863 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2864 Similarly for ranges. E.g.
2865 X != 2 && X != 3 && X != 10 && X != 11
2866 will be transformed by the previous optimization into
2867 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2868 and this loop can transform that into
2869 !(((X & ~8) - 2U) <= 1U). */
2872 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2873 tree lowi
, tree lowj
, tree highi
, tree highj
,
2874 vec
<operand_entry
*> *ops
,
2875 struct range_entry
*rangei
,
2876 struct range_entry
*rangej
)
2878 tree lowxor
, highxor
, tem
, exp
;
2879 /* Check lowi ^ lowj == highi ^ highj and
2880 popcount (lowi ^ lowj) == 1. */
2881 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2882 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2884 if (!integer_pow2p (lowxor
))
2886 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2887 if (!tree_int_cst_equal (lowxor
, highxor
))
2891 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2892 int prec
= GET_MODE_PRECISION (mode
);
2893 if (TYPE_PRECISION (type
) < prec
2894 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2895 != wi::min_value (prec
, TYPE_SIGN (type
)))
2896 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2897 != wi::max_value (prec
, TYPE_SIGN (type
))))
2899 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
2900 exp
= fold_convert (type
, exp
);
2901 lowxor
= fold_convert (type
, lowxor
);
2902 lowi
= fold_convert (type
, lowi
);
2903 highi
= fold_convert (type
, highi
);
2905 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2906 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
2907 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2908 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2909 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2910 NULL
, rangei
->in_p
, lowj
, highj
,
2911 rangei
->strict_overflow_p
2912 || rangej
->strict_overflow_p
))
2917 /* Optimize X == CST1 || X == CST2
2918 if popcount (CST2 - CST1) == 1 into
2919 ((X - CST1) & ~(CST2 - CST1)) == 0.
2920 Similarly for ranges. E.g.
2921 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2922 || X == 75 || X == 45
2923 will be transformed by the previous optimization into
2924 (X - 43U) <= 3U || (X - 75U) <= 3U
2925 and this loop can transform that into
2926 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2928 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2929 tree lowi
, tree lowj
, tree highi
, tree highj
,
2930 vec
<operand_entry
*> *ops
,
2931 struct range_entry
*rangei
,
2932 struct range_entry
*rangej
)
2934 tree tem1
, tem2
, mask
;
2935 /* Check highi - lowi == highj - lowj. */
2936 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2937 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2939 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2940 if (!tree_int_cst_equal (tem1
, tem2
))
2942 /* Check popcount (lowj - lowi) == 1. */
2943 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2944 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2946 if (!integer_pow2p (tem1
))
2949 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2950 int prec
= GET_MODE_PRECISION (mode
);
2951 if (TYPE_PRECISION (type
) < prec
2952 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2953 != wi::min_value (prec
, TYPE_SIGN (type
)))
2954 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2955 != wi::max_value (prec
, TYPE_SIGN (type
))))
2956 type
= build_nonstandard_integer_type (prec
, 1);
2958 type
= unsigned_type_for (type
);
2959 tem1
= fold_convert (type
, tem1
);
2960 tem2
= fold_convert (type
, tem2
);
2961 lowi
= fold_convert (type
, lowi
);
2962 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2963 tem1
= fold_build2 (MINUS_EXPR
, type
,
2964 fold_convert (type
, rangei
->exp
), lowi
);
2965 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2966 lowj
= build_int_cst (type
, 0);
2967 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2968 NULL
, rangei
->in_p
, lowj
, tem2
,
2969 rangei
->strict_overflow_p
2970 || rangej
->strict_overflow_p
))
2975 /* It does some common checks for function optimize_range_tests_xor and
2976 optimize_range_tests_diff.
2977 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2978 Else it calls optimize_range_tests_diff. */
2981 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2982 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2983 struct range_entry
*ranges
)
2986 bool any_changes
= false;
2987 for (i
= first
; i
< length
; i
++)
2989 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2991 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2993 type
= TREE_TYPE (ranges
[i
].exp
);
2994 if (!INTEGRAL_TYPE_P (type
))
2996 lowi
= ranges
[i
].low
;
2997 if (lowi
== NULL_TREE
)
2998 lowi
= TYPE_MIN_VALUE (type
);
2999 highi
= ranges
[i
].high
;
3000 if (highi
== NULL_TREE
)
3002 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3005 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3007 lowj
= ranges
[j
].low
;
3008 if (lowj
== NULL_TREE
)
3010 highj
= ranges
[j
].high
;
3011 if (highj
== NULL_TREE
)
3012 highj
= TYPE_MAX_VALUE (type
);
3013 /* Check lowj > highi. */
3014 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3016 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3019 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3021 ranges
+ i
, ranges
+ j
);
3023 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3025 ranges
+ i
, ranges
+ j
);
3036 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3037 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3038 EXP on success, NULL otherwise. */
3041 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3042 wide_int
*mask
, tree
*totallowp
)
3044 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3045 if (tem
== NULL_TREE
3046 || TREE_CODE (tem
) != INTEGER_CST
3047 || TREE_OVERFLOW (tem
)
3048 || tree_int_cst_sgn (tem
) == -1
3049 || compare_tree_int (tem
, prec
) != -1)
3052 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3053 *mask
= wi::shifted_mask (0, max
, false, prec
);
3054 if (TREE_CODE (exp
) == BIT_AND_EXPR
3055 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3057 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3058 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3059 if (wi::popcount (msk
) == 1
3060 && wi::ltu_p (msk
, prec
- max
))
3062 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3063 max
+= msk
.to_uhwi ();
3064 exp
= TREE_OPERAND (exp
, 0);
3065 if (integer_zerop (low
)
3066 && TREE_CODE (exp
) == PLUS_EXPR
3067 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3069 tree ret
= TREE_OPERAND (exp
, 0);
3072 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3073 TYPE_PRECISION (TREE_TYPE (low
))));
3074 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3080 else if (!tree_int_cst_lt (totallow
, tbias
))
3082 bias
= wi::to_widest (tbias
);
3083 bias
-= wi::to_widest (totallow
);
3084 if (bias
>= 0 && bias
< prec
- max
)
3086 *mask
= wi::lshift (*mask
, bias
);
3094 if (!tree_int_cst_lt (totallow
, low
))
3096 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3097 if (tem
== NULL_TREE
3098 || TREE_CODE (tem
) != INTEGER_CST
3099 || TREE_OVERFLOW (tem
)
3100 || compare_tree_int (tem
, prec
- max
) == 1)
3103 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3107 /* Attempt to optimize small range tests using bit test.
3109 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3110 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3111 has been by earlier optimizations optimized into:
3112 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3113 As all the 43 through 82 range is less than 64 numbers,
3114 for 64-bit word targets optimize that into:
3115 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3118 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3119 vec
<operand_entry
*> *ops
,
3120 struct range_entry
*ranges
)
3123 bool any_changes
= false;
3124 int prec
= GET_MODE_BITSIZE (word_mode
);
3125 auto_vec
<struct range_entry
*, 64> candidates
;
3127 for (i
= first
; i
< length
- 1; i
++)
3129 tree lowi
, highi
, lowj
, highj
, type
;
3131 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3133 type
= TREE_TYPE (ranges
[i
].exp
);
3134 if (!INTEGRAL_TYPE_P (type
))
3136 lowi
= ranges
[i
].low
;
3137 if (lowi
== NULL_TREE
)
3138 lowi
= TYPE_MIN_VALUE (type
);
3139 highi
= ranges
[i
].high
;
3140 if (highi
== NULL_TREE
)
3143 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3144 highi
, &mask
, &lowi
);
3145 if (exp
== NULL_TREE
)
3147 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3148 candidates
.truncate (0);
3149 int end
= MIN (i
+ 64, length
);
3150 for (j
= i
+ 1; j
< end
; j
++)
3153 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3155 if (ranges
[j
].exp
== exp
)
3157 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3159 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3162 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3164 exp2
= TREE_OPERAND (exp2
, 0);
3174 lowj
= ranges
[j
].low
;
3175 if (lowj
== NULL_TREE
)
3177 highj
= ranges
[j
].high
;
3178 if (highj
== NULL_TREE
)
3179 highj
= TYPE_MAX_VALUE (type
);
3181 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3182 highj
, &mask2
, NULL
);
3186 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3187 candidates
.safe_push (&ranges
[j
]);
3190 /* If every possible relative value of the expression is a valid shift
3191 amount, then we can merge the entry test in the bit test. In this
3192 case, if we would need otherwise 2 or more comparisons, then use
3193 the bit test; in the other cases, the threshold is 3 comparisons. */
3195 bool entry_test_needed
;
3196 if (TREE_CODE (exp
) == SSA_NAME
3197 && get_range_info (exp
, &min
, &max
) == VR_RANGE
3198 && wi::leu_p (max
- min
, prec
- 1))
3200 wide_int ilowi
= wi::to_wide (lowi
);
3201 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3203 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3204 mask
= wi::lshift (mask
, ilowi
- min
);
3206 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3208 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3209 mask
= wi::lrshift (mask
, min
- ilowi
);
3211 entry_test_needed
= false;
3214 entry_test_needed
= true;
3215 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3217 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3218 wi::to_widest (lowi
)
3219 + prec
- 1 - wi::clz (mask
));
3220 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3222 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3223 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3224 location_t loc
= gimple_location (stmt
);
3225 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3227 /* See if it isn't cheaper to pretend the minimum value of the
3228 range is 0, if maximum value is small enough.
3229 We can avoid then subtraction of the minimum value, but the
3230 mask constant could be perhaps more expensive. */
3231 if (compare_tree_int (lowi
, 0) > 0
3232 && compare_tree_int (high
, prec
) < 0)
3235 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3236 rtx reg
= gen_raw_REG (word_mode
, 10000);
3237 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3238 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3240 word_mode
, speed_p
);
3241 rtx r
= immed_wide_int_const (mask
, word_mode
);
3242 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3243 word_mode
, speed_p
);
3244 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3245 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3246 word_mode
, speed_p
);
3249 mask
= wi::lshift (mask
, m
);
3250 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3255 if (entry_test_needed
)
3257 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3259 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3264 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3265 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3266 fold_convert_loc (loc
, etype
, exp
),
3267 fold_convert_loc (loc
, etype
, lowi
));
3268 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3269 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3270 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3271 build_int_cst (word_type
, 1), exp
);
3272 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3273 wide_int_to_tree (word_type
, mask
));
3274 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3275 build_zero_cst (word_type
));
3276 if (is_gimple_val (exp
))
3279 /* The shift might have undefined behavior if TEM is true,
3280 but reassociate_bb isn't prepared to have basic blocks
3281 split when it is running. So, temporarily emit a code
3282 with BIT_IOR_EXPR instead of &&, and fix it up in
3284 gimple_seq seq
= NULL
;
3287 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3288 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3289 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3292 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3293 gimple_seq_add_seq_without_update (&seq
, seq2
);
3294 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3295 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3298 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3299 BIT_IOR_EXPR
, tem
, exp
);
3300 gimple_set_location (g
, loc
);
3301 gimple_seq_add_stmt_without_update (&seq
, g
);
3302 exp
= gimple_assign_lhs (g
);
3304 tree val
= build_zero_cst (optype
);
3305 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3306 candidates
.length (), opcode
, ops
, exp
,
3307 seq
, false, val
, val
, strict_overflow_p
))
3311 reassoc_branch_fixups
.safe_push (tem
);
3314 gimple_seq_discard (seq
);
3320 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3321 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3322 Also, handle x < C && y < C && z < C where C is power of two as
3323 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3324 as (x | y | z) < 0. */
3327 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3328 vec
<operand_entry
*> *ops
,
3329 struct range_entry
*ranges
)
3333 bool any_changes
= false;
3334 auto_vec
<int, 128> buckets
;
3335 auto_vec
<int, 32> chains
;
3336 auto_vec
<struct range_entry
*, 32> candidates
;
3338 for (i
= first
; i
< length
; i
++)
3342 if (ranges
[i
].exp
== NULL_TREE
3343 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3344 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3345 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3348 if (ranges
[i
].low
!= NULL_TREE
3349 && ranges
[i
].high
!= NULL_TREE
3351 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3353 idx
= !integer_zerop (ranges
[i
].low
);
3354 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3357 else if (ranges
[i
].high
!= NULL_TREE
3358 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3361 wide_int w
= wi::to_wide (ranges
[i
].high
);
3362 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3363 int l
= wi::clz (w
);
3367 || w
!= wi::mask (prec
- l
, false, prec
))
3369 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3370 && ranges
[i
].low
== NULL_TREE
)
3372 && integer_zerop (ranges
[i
].low
))))
3375 else if (ranges
[i
].high
== NULL_TREE
3376 && ranges
[i
].low
!= NULL_TREE
3377 /* Perform this optimization only in the last
3378 reassoc pass, as it interferes with the reassociation
3379 itself or could also with VRP etc. which might not
3380 be able to virtually undo the optimization. */
3381 && !reassoc_insert_powi_p
3382 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3383 && integer_zerop (ranges
[i
].low
))
3388 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3389 if (buckets
.length () <= b
)
3390 buckets
.safe_grow_cleared (b
+ 1, true);
3391 if (chains
.length () <= (unsigned) i
)
3392 chains
.safe_grow (i
+ 1, true);
3393 chains
[i
] = buckets
[b
];
3397 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3398 if (i
&& chains
[i
- 1])
3403 /* When ranges[X - 1].high + 1 is a power of two,
3404 we need to process the same bucket up to
3405 precision - 1 times, each time split the entries
3406 with the same high bound into one chain and the
3407 rest into another one to be processed later. */
3410 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3412 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3413 ranges
[j
- 1].high
))
3415 chains
[this_prev
- 1] = j
;
3418 else if (other_prev
== 0)
3425 chains
[other_prev
- 1] = j
;
3429 chains
[this_prev
- 1] = 0;
3431 chains
[other_prev
- 1] = 0;
3432 if (chains
[i
- 1] == 0)
3439 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3441 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3442 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3443 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3446 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3447 tree type2
= NULL_TREE
;
3448 bool strict_overflow_p
= false;
3449 candidates
.truncate (0);
3450 for (j
= i
; j
; j
= chains
[j
- 1])
3452 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3453 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3456 /* For the signed < 0 cases, the types should be
3457 really compatible (all signed with the same precision,
3458 instead put ranges that have different in_p from
3460 if (!useless_type_conversion_p (type1
, type
))
3462 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3463 candidates
.safe_push (&ranges
[j
- 1]);
3468 || useless_type_conversion_p (type1
, type
))
3470 else if (type2
== NULL_TREE
3471 || useless_type_conversion_p (type2
, type
))
3473 if (type2
== NULL_TREE
)
3475 candidates
.safe_push (&ranges
[j
- 1]);
3478 unsigned l
= candidates
.length ();
3479 for (j
= i
; j
; j
= chains
[j
- 1])
3481 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3486 if (!useless_type_conversion_p (type1
, type
))
3488 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3489 candidates
.safe_push (&ranges
[j
- 1]);
3492 if (useless_type_conversion_p (type1
, type
))
3494 else if (type2
== NULL_TREE
3495 || useless_type_conversion_p (type2
, type
))
3497 candidates
.safe_push (&ranges
[j
- 1]);
3499 gimple_seq seq
= NULL
;
3500 tree op
= NULL_TREE
;
3502 struct range_entry
*r
;
3503 candidates
.safe_push (&ranges
[k
- 1]);
3504 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3507 enum tree_code code
;
3515 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3516 g
= gimple_build_assign (make_ssa_name (type1
), code
, op
);
3517 gimple_seq_add_stmt_without_update (&seq
, g
);
3518 op
= gimple_assign_lhs (g
);
3520 tree type
= TREE_TYPE (r
->exp
);
3522 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3524 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3525 gimple_seq_add_stmt_without_update (&seq
, g
);
3526 exp
= gimple_assign_lhs (g
);
3529 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3531 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3532 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3534 gimple_seq_add_stmt_without_update (&seq
, g
);
3535 op
= gimple_assign_lhs (g
);
3538 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3539 candidates
.length (), opcode
, ops
, op
,
3540 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3541 ranges
[k
- 1].high
, strict_overflow_p
))
3544 gimple_seq_discard (seq
);
3545 if ((b
% 4) == 2 && buckets
[b
] != i
)
3546 /* There is more work to do for this bucket. */
3553 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3554 a >= 0 && a < b into (unsigned) a < (unsigned) b
3555 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3558 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3559 vec
<operand_entry
*> *ops
,
3560 struct range_entry
*ranges
,
3561 basic_block first_bb
)
3564 bool any_changes
= false;
3565 hash_map
<tree
, int> *map
= NULL
;
3567 for (i
= first
; i
< length
; i
++)
3569 if (ranges
[i
].exp
== NULL_TREE
3570 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3574 tree type
= TREE_TYPE (ranges
[i
].exp
);
3575 if (!INTEGRAL_TYPE_P (type
)
3576 || TYPE_UNSIGNED (type
)
3577 || ranges
[i
].low
== NULL_TREE
3578 || !integer_zerop (ranges
[i
].low
)
3579 || ranges
[i
].high
!= NULL_TREE
)
3581 /* EXP >= 0 here. */
3583 map
= new hash_map
<tree
, int>;
3584 map
->put (ranges
[i
].exp
, i
);
3590 for (i
= 0; i
< length
; i
++)
3592 bool in_p
= ranges
[i
].in_p
;
3593 if (ranges
[i
].low
== NULL_TREE
3594 || ranges
[i
].high
== NULL_TREE
)
3596 if (!integer_zerop (ranges
[i
].low
)
3597 || !integer_zerop (ranges
[i
].high
))
3600 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3601 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3602 && integer_onep (ranges
[i
].low
)
3603 && integer_onep (ranges
[i
].high
))
3614 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3616 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3617 if (!is_gimple_assign (stmt
))
3619 ccode
= gimple_assign_rhs_code (stmt
);
3620 rhs1
= gimple_assign_rhs1 (stmt
);
3621 rhs2
= gimple_assign_rhs2 (stmt
);
3625 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3626 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3627 if (gimple_code (stmt
) != GIMPLE_COND
)
3629 ccode
= gimple_cond_code (stmt
);
3630 rhs1
= gimple_cond_lhs (stmt
);
3631 rhs2
= gimple_cond_rhs (stmt
);
3634 if (TREE_CODE (rhs1
) != SSA_NAME
3635 || rhs2
== NULL_TREE
3636 || TREE_CODE (rhs2
) != SSA_NAME
)
3650 ccode
= invert_tree_comparison (ccode
, false);
3655 std::swap (rhs1
, rhs2
);
3656 ccode
= swap_tree_comparison (ccode
);
3665 int *idx
= map
->get (rhs1
);
3669 /* maybe_optimize_range_tests allows statements without side-effects
3670 in the basic blocks as long as they are consumed in the same bb.
3671 Make sure rhs2's def stmt is not among them, otherwise we can't
3672 use safely get_nonzero_bits on it. E.g. in:
3673 # RANGE [-83, 1] NONZERO 173
3674 # k_32 = PHI <k_47(13), k_12(9)>
3677 goto <bb 5>; [26.46%]
3679 goto <bb 9>; [73.54%]
3681 <bb 5> [local count: 140323371]:
3682 # RANGE [0, 1] NONZERO 1
3684 # RANGE [0, 4] NONZERO 4
3686 # RANGE [0, 4] NONZERO 4
3687 iftmp.0_44 = (char) _21;
3688 if (k_32 < iftmp.0_44)
3689 goto <bb 6>; [84.48%]
3691 goto <bb 9>; [15.52%]
3692 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3693 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3694 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3695 those stmts even for negative k_32 and the value ranges would be no
3696 longer guaranteed and so the optimization would be invalid. */
3697 while (opcode
== ERROR_MARK
)
3699 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3700 basic_block bb2
= gimple_bb (g
);
3703 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3705 /* As an exception, handle a few common cases. */
3706 if (gimple_assign_cast_p (g
)
3707 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3709 tree op0
= gimple_assign_rhs1 (g
);
3710 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3711 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3712 > TYPE_PRECISION (TREE_TYPE (op0
))))
3713 /* Zero-extension is always ok. */
3715 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3716 == TYPE_PRECISION (TREE_TYPE (op0
))
3717 && TREE_CODE (op0
) == SSA_NAME
)
3719 /* Cast from signed to unsigned or vice versa. Retry
3720 with the op0 as new rhs2. */
3725 else if (is_gimple_assign (g
)
3726 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3727 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3728 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3729 /* Masking with INTEGER_CST with MSB clear is always ok
3736 if (rhs2
== NULL_TREE
)
3739 wide_int nz
= get_nonzero_bits (rhs2
);
3743 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3744 and RHS2 is known to be RHS2 >= 0. */
3745 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3747 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3748 if ((ranges
[*idx
].strict_overflow_p
3749 || ranges
[i
].strict_overflow_p
)
3750 && issue_strict_overflow_warning (wc
))
3751 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3752 "assuming signed overflow does not occur "
3753 "when simplifying range test");
3755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3757 struct range_entry
*r
= &ranges
[*idx
];
3758 fprintf (dump_file
, "Optimizing range test ");
3759 print_generic_expr (dump_file
, r
->exp
);
3760 fprintf (dump_file
, " +[");
3761 print_generic_expr (dump_file
, r
->low
);
3762 fprintf (dump_file
, ", ");
3763 print_generic_expr (dump_file
, r
->high
);
3764 fprintf (dump_file
, "] and comparison ");
3765 print_generic_expr (dump_file
, rhs1
);
3766 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3767 print_generic_expr (dump_file
, rhs2
);
3768 fprintf (dump_file
, "\n into (");
3769 print_generic_expr (dump_file
, utype
);
3770 fprintf (dump_file
, ") ");
3771 print_generic_expr (dump_file
, rhs1
);
3772 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3773 print_generic_expr (dump_file
, utype
);
3774 fprintf (dump_file
, ") ");
3775 print_generic_expr (dump_file
, rhs2
);
3776 fprintf (dump_file
, "\n");
3779 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3781 if (opcode
== BIT_IOR_EXPR
3782 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3785 ccode
= invert_tree_comparison (ccode
, false);
3788 unsigned int uid
= gimple_uid (stmt
);
3789 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3790 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3791 gimple_set_uid (g
, uid
);
3792 rhs1
= gimple_assign_lhs (g
);
3793 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3794 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
3796 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3797 gimple_set_uid (g
, uid
);
3798 rhs2
= gimple_assign_lhs (g
);
3799 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3801 if (tree_swap_operands_p (rhs1
, rhs2
))
3803 std::swap (rhs1
, rhs2
);
3804 ccode
= swap_tree_comparison (ccode
);
3806 if (gimple_code (stmt
) == GIMPLE_COND
)
3808 gcond
*c
= as_a
<gcond
*> (stmt
);
3809 gimple_cond_set_code (c
, ccode
);
3810 gimple_cond_set_lhs (c
, rhs1
);
3811 gimple_cond_set_rhs (c
, rhs2
);
3816 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3817 if (!INTEGRAL_TYPE_P (ctype
)
3818 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3819 && TYPE_PRECISION (ctype
) != 1))
3820 ctype
= boolean_type_node
;
3821 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3822 gimple_set_uid (g
, uid
);
3823 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3824 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3826 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3827 NOP_EXPR
, gimple_assign_lhs (g
));
3828 gimple_set_uid (g
, uid
);
3829 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3831 ranges
[i
].exp
= gimple_assign_lhs (g
);
3832 oe
->op
= ranges
[i
].exp
;
3833 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3834 ranges
[i
].high
= ranges
[i
].low
;
3836 ranges
[i
].strict_overflow_p
= false;
3837 oe
= (*ops
)[ranges
[*idx
].idx
];
3838 /* Now change all the other range test immediate uses, so that
3839 those tests will be optimized away. */
3840 if (opcode
== ERROR_MARK
)
3843 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3844 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3846 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3847 ? boolean_false_node
: boolean_true_node
);
3850 oe
->op
= error_mark_node
;
3851 ranges
[*idx
].exp
= NULL_TREE
;
3852 ranges
[*idx
].low
= NULL_TREE
;
3853 ranges
[*idx
].high
= NULL_TREE
;
3861 /* Optimize range tests, similarly how fold_range_test optimizes
3862 it on trees. The tree code for the binary
3863 operation between all the operands is OPCODE.
3864 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3865 maybe_optimize_range_tests for inter-bb range optimization.
3866 In that case if oe->op is NULL, oe->id is bb->index whose
3867 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3869 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3872 optimize_range_tests (enum tree_code opcode
,
3873 vec
<operand_entry
*> *ops
, basic_block first_bb
)
3875 unsigned int length
= ops
->length (), i
, j
, first
;
3877 struct range_entry
*ranges
;
3878 bool any_changes
= false;
3883 ranges
= XNEWVEC (struct range_entry
, length
);
3884 for (i
= 0; i
< length
; i
++)
3888 init_range_entry (ranges
+ i
, oe
->op
,
3891 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3892 /* For | invert it now, we will invert it again before emitting
3893 the optimized expression. */
3894 if (opcode
== BIT_IOR_EXPR
3895 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3896 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3899 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3900 for (i
= 0; i
< length
; i
++)
3901 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3904 /* Try to merge ranges. */
3905 for (first
= i
; i
< length
; i
++)
3907 tree low
= ranges
[i
].low
;
3908 tree high
= ranges
[i
].high
;
3909 int in_p
= ranges
[i
].in_p
;
3910 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3911 int update_fail_count
= 0;
3913 for (j
= i
+ 1; j
< length
; j
++)
3915 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3917 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3918 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3920 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3926 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3927 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3928 low
, high
, strict_overflow_p
))
3933 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3934 while update_range_test would fail. */
3935 else if (update_fail_count
== 64)
3938 ++update_fail_count
;
3941 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3944 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3945 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3947 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3948 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3950 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3952 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3955 if (any_changes
&& opcode
!= ERROR_MARK
)
3958 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3960 if (oe
->op
== error_mark_node
)
3969 XDELETEVEC (ranges
);
3973 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3974 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3975 otherwise the comparison code. TYPE is a return value that is set
3976 to type of comparison. */
3979 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
3980 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
3982 if (TREE_CODE (var
) != SSA_NAME
)
3985 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
3991 /* ??? If we start creating more COND_EXPR, we could perform
3992 this same optimization with them. For now, simplify. */
3993 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
3996 tree cond
= gimple_assign_rhs1 (stmt
);
3997 tree_code cmp
= TREE_CODE (cond
);
3998 if (cmp
!= SSA_NAME
)
4001 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4003 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4006 cmp
= gimple_assign_rhs_code (assign
);
4008 *lhs
= gimple_assign_rhs1 (assign
);
4010 *rhs
= gimple_assign_rhs2 (assign
);
4012 /* ??? For now, allow only canonical true and false result vectors.
4013 We could expand this to other constants should the need arise,
4014 but at the moment we don't create them. */
4015 tree t
= gimple_assign_rhs2 (stmt
);
4016 tree f
= gimple_assign_rhs3 (stmt
);
4018 if (integer_all_onesp (t
))
4020 else if (integer_all_onesp (f
))
4022 cmp
= invert_tree_comparison (cmp
, false);
4027 if (!integer_zerop (f
))
4036 *type
= TREE_TYPE (cond
);
4040 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4041 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4044 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4046 unsigned int length
= ops
->length (), i
, j
;
4047 bool any_changes
= false;
4052 for (i
= 0; i
< length
; ++i
)
4054 tree elt0
= (*ops
)[i
]->op
;
4056 gassign
*stmt0
, *vcond0
;
4058 tree type
, lhs0
, rhs0
;
4059 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4061 if (cmp0
== ERROR_MARK
)
4064 for (j
= i
+ 1; j
< length
; ++j
)
4066 tree
&elt1
= (*ops
)[j
]->op
;
4068 gassign
*stmt1
, *vcond1
;
4070 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4072 if (cmp1
== ERROR_MARK
)
4076 if (opcode
== BIT_AND_EXPR
)
4077 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4079 else if (opcode
== BIT_IOR_EXPR
)
4080 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4088 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4090 fprintf (dump_file
, "Transforming ");
4091 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4092 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4093 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4094 fprintf (dump_file
, " into ");
4095 print_generic_expr (dump_file
, comb
);
4096 fputc ('\n', dump_file
);
4099 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4100 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4101 true, GSI_SAME_STMT
);
4103 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4104 gimple_assign_rhs3_ptr (vcond0
));
4105 gimple_assign_set_rhs1 (vcond0
, exp
);
4106 update_stmt (vcond0
);
4108 elt1
= error_mark_node
;
4117 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4119 if (oe
->op
== error_mark_node
)
4131 /* Return true if STMT is a cast like:
4137 # _345 = PHI <_123(N), 1(...), 1(...)>
4138 where _234 has bool type, _123 has single use and
4139 bb N has a single successor M. This is commonly used in
4140 the last block of a range test.
4142 Also Return true if STMT is tcc_compare like:
4148 # _345 = PHI <_234(N), 1(...), 1(...)>
4150 where _234 has booltype, single use and
4151 bb N has a single successor M. This is commonly used in
4152 the last block of a range test. */
4155 final_range_test_p (gimple
*stmt
)
4157 basic_block bb
, rhs_bb
, lhs_bb
;
4160 use_operand_p use_p
;
4163 if (!gimple_assign_cast_p (stmt
)
4164 && (!is_gimple_assign (stmt
)
4165 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4166 != tcc_comparison
)))
4168 bb
= gimple_bb (stmt
);
4169 if (!single_succ_p (bb
))
4171 e
= single_succ_edge (bb
);
4172 if (e
->flags
& EDGE_COMPLEX
)
4175 lhs
= gimple_assign_lhs (stmt
);
4176 rhs
= gimple_assign_rhs1 (stmt
);
4177 if (gimple_assign_cast_p (stmt
)
4178 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4179 || TREE_CODE (rhs
) != SSA_NAME
4180 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4183 if (!gimple_assign_cast_p (stmt
)
4184 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4187 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4188 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4191 if (gimple_code (use_stmt
) != GIMPLE_PHI
4192 || gimple_bb (use_stmt
) != e
->dest
)
4195 /* And that the rhs is defined in the same loop. */
4196 if (gimple_assign_cast_p (stmt
))
4198 if (TREE_CODE (rhs
) != SSA_NAME
4199 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4200 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4205 if (TREE_CODE (lhs
) != SSA_NAME
4206 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4207 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4214 /* Return true if BB is suitable basic block for inter-bb range test
4215 optimization. If BACKWARD is true, BB should be the only predecessor
4216 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4217 or compared with to find a common basic block to which all conditions
4218 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4219 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4220 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4221 block points to an empty block that falls through into *OTHER_BB and
4222 the phi args match that path. */
4225 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4226 bool *test_swapped_p
, bool backward
)
4228 edge_iterator ei
, ei2
;
4232 bool other_edge_seen
= false;
4237 /* Check last stmt first. */
4238 stmt
= last_stmt (bb
);
4240 || (gimple_code (stmt
) != GIMPLE_COND
4241 && (backward
|| !final_range_test_p (stmt
)))
4242 || gimple_visited_p (stmt
)
4243 || stmt_could_throw_p (cfun
, stmt
)
4246 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4249 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4250 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4251 to *OTHER_BB (if not set yet, try to find it out). */
4252 if (EDGE_COUNT (bb
->succs
) != 2)
4254 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4256 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4258 if (e
->dest
== test_bb
)
4267 if (*other_bb
== NULL
)
4269 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4270 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4272 else if (e
->dest
== e2
->dest
)
4273 *other_bb
= e
->dest
;
4274 if (*other_bb
== NULL
)
4277 if (e
->dest
== *other_bb
)
4278 other_edge_seen
= true;
4282 if (*other_bb
== NULL
|| !other_edge_seen
)
4285 else if (single_succ (bb
) != *other_bb
)
4288 /* Now check all PHIs of *OTHER_BB. */
4289 e
= find_edge (bb
, *other_bb
);
4290 e2
= find_edge (test_bb
, *other_bb
);
4292 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4294 gphi
*phi
= gsi
.phi ();
4295 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4296 corresponding to BB and TEST_BB predecessor must be the same. */
4297 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4298 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4300 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4301 one of the PHIs should have the lhs of the last stmt in
4302 that block as PHI arg and that PHI should have 0 or 1
4303 corresponding to it in all other range test basic blocks
4307 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4308 == gimple_assign_lhs (stmt
)
4309 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4310 || integer_onep (gimple_phi_arg_def (phi
,
4316 gimple
*test_last
= last_stmt (test_bb
);
4317 if (gimple_code (test_last
) == GIMPLE_COND
)
4319 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4322 /* For last_bb, handle also:
4324 goto <bb 6>; [34.00%]
4326 goto <bb 7>; [66.00%]
4328 <bb 6> [local count: 79512730]:
4330 <bb 7> [local count: 1073741824]:
4331 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4332 where bb 7 is *OTHER_BB, but the PHI values from the
4333 earlier bbs match the path through the empty bb
4337 e3
= EDGE_SUCC (test_bb
,
4338 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4341 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4342 if (empty_block_p (e3
->dest
)
4343 && single_succ_p (e3
->dest
)
4344 && single_succ (e3
->dest
) == *other_bb
4345 && single_pred_p (e3
->dest
)
4346 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4349 e2
= single_succ_edge (e3
->dest
);
4351 e
= single_succ_edge (e3
->dest
);
4353 *test_swapped_p
= true;
4357 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4358 == gimple_assign_lhs (test_last
)
4359 && (integer_zerop (gimple_phi_arg_def (phi
,
4361 || integer_onep (gimple_phi_arg_def (phi
,
4372 /* Return true if BB doesn't have side-effects that would disallow
4373 range test optimization, all SSA_NAMEs set in the bb are consumed
4374 in the bb and there are no PHIs. */
4377 no_side_effect_bb (basic_block bb
)
4379 gimple_stmt_iterator gsi
;
4382 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4384 last
= last_stmt (bb
);
4385 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4387 gimple
*stmt
= gsi_stmt (gsi
);
4389 imm_use_iterator imm_iter
;
4390 use_operand_p use_p
;
4392 if (is_gimple_debug (stmt
))
4394 if (gimple_has_side_effects (stmt
))
4398 if (!is_gimple_assign (stmt
))
4400 lhs
= gimple_assign_lhs (stmt
);
4401 if (TREE_CODE (lhs
) != SSA_NAME
)
4403 if (gimple_assign_rhs_could_trap_p (stmt
))
4405 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4407 gimple
*use_stmt
= USE_STMT (use_p
);
4408 if (is_gimple_debug (use_stmt
))
4410 if (gimple_bb (use_stmt
) != bb
)
4417 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4418 return true and fill in *OPS recursively. */
4421 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4424 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4428 if (!is_reassociable_op (stmt
, code
, loop
))
4431 rhs
[0] = gimple_assign_rhs1 (stmt
);
4432 rhs
[1] = gimple_assign_rhs2 (stmt
);
4433 gimple_set_visited (stmt
, true);
4434 for (i
= 0; i
< 2; i
++)
4435 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4436 && !get_ops (rhs
[i
], code
, ops
, loop
)
4437 && has_single_use (rhs
[i
]))
4439 operand_entry
*oe
= operand_entry_pool
.allocate ();
4445 oe
->stmt_to_insert
= NULL
;
4446 ops
->safe_push (oe
);
4451 /* Find the ops that were added by get_ops starting from VAR, see if
4452 they were changed during update_range_test and if yes, create new
4456 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
4457 unsigned int *pidx
, class loop
*loop
)
4459 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4463 if (!is_reassociable_op (stmt
, code
, loop
))
4466 rhs
[0] = gimple_assign_rhs1 (stmt
);
4467 rhs
[1] = gimple_assign_rhs2 (stmt
);
4470 for (i
= 0; i
< 2; i
++)
4471 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4473 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4474 if (rhs
[2 + i
] == NULL_TREE
)
4476 if (has_single_use (rhs
[i
]))
4477 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4479 rhs
[2 + i
] = rhs
[i
];
4482 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4483 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4485 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4486 var
= make_ssa_name (TREE_TYPE (var
));
4487 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4489 gimple_set_uid (g
, gimple_uid (stmt
));
4490 gimple_set_visited (g
, true);
4491 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4496 /* Structure to track the initial value passed to get_ops and
4497 the range in the ops vector for each basic block. */
4499 struct inter_bb_range_test_entry
4502 unsigned int first_idx
, last_idx
;
4505 /* Inter-bb range test optimization.
4507 Returns TRUE if a gimple conditional is optimized to a true/false,
4508 otherwise return FALSE.
4510 This indicates to the caller that it should run a CFG cleanup pass
4511 once reassociation is completed. */
4514 maybe_optimize_range_tests (gimple
*stmt
)
4516 basic_block first_bb
= gimple_bb (stmt
);
4517 basic_block last_bb
= first_bb
;
4518 basic_block other_bb
= NULL
;
4522 auto_vec
<operand_entry
*> ops
;
4523 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4524 bool any_changes
= false;
4525 bool cfg_cleanup_needed
= false;
4527 /* Consider only basic blocks that end with GIMPLE_COND or
4528 a cast statement satisfying final_range_test_p. All
4529 but the last bb in the first_bb .. last_bb range
4530 should end with GIMPLE_COND. */
4531 if (gimple_code (stmt
) == GIMPLE_COND
)
4533 if (EDGE_COUNT (first_bb
->succs
) != 2)
4534 return cfg_cleanup_needed
;
4536 else if (final_range_test_p (stmt
))
4537 other_bb
= single_succ (first_bb
);
4539 return cfg_cleanup_needed
;
4541 if (stmt_could_throw_p (cfun
, stmt
))
4542 return cfg_cleanup_needed
;
4544 /* As relative ordering of post-dominator sons isn't fixed,
4545 maybe_optimize_range_tests can be called first on any
4546 bb in the range we want to optimize. So, start searching
4547 backwards, if first_bb can be set to a predecessor. */
4548 while (single_pred_p (first_bb
))
4550 basic_block pred_bb
= single_pred (first_bb
);
4551 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4553 if (!no_side_effect_bb (first_bb
))
4557 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4558 Before starting forward search in last_bb successors, find
4559 out the other_bb. */
4560 if (first_bb
== last_bb
)
4563 /* As non-GIMPLE_COND last stmt always terminates the range,
4564 if forward search didn't discover anything, just give up. */
4565 if (gimple_code (stmt
) != GIMPLE_COND
)
4566 return cfg_cleanup_needed
;
4567 /* Look at both successors. Either it ends with a GIMPLE_COND
4568 and satisfies suitable_cond_bb, or ends with a cast and
4569 other_bb is that cast's successor. */
4570 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4571 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4572 || e
->dest
== first_bb
)
4573 return cfg_cleanup_needed
;
4574 else if (single_pred_p (e
->dest
))
4576 stmt
= last_stmt (e
->dest
);
4578 && gimple_code (stmt
) == GIMPLE_COND
4579 && EDGE_COUNT (e
->dest
->succs
) == 2)
4581 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4588 && final_range_test_p (stmt
)
4589 && find_edge (first_bb
, single_succ (e
->dest
)))
4591 other_bb
= single_succ (e
->dest
);
4592 if (other_bb
== first_bb
)
4596 if (other_bb
== NULL
)
4597 return cfg_cleanup_needed
;
4599 /* Now do the forward search, moving last_bb to successor bbs
4600 that aren't other_bb. */
4601 while (EDGE_COUNT (last_bb
->succs
) == 2)
4603 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4604 if (e
->dest
!= other_bb
)
4608 if (!single_pred_p (e
->dest
))
4610 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4612 if (!no_side_effect_bb (e
->dest
))
4616 if (first_bb
== last_bb
)
4617 return cfg_cleanup_needed
;
4618 /* Here basic blocks first_bb through last_bb's predecessor
4619 end with GIMPLE_COND, all of them have one of the edges to
4620 other_bb and another to another block in the range,
4621 all blocks except first_bb don't have side-effects and
4622 last_bb ends with either GIMPLE_COND, or cast satisfying
4623 final_range_test_p. */
4624 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4626 enum tree_code code
;
4628 inter_bb_range_test_entry bb_ent
;
4630 bb_ent
.op
= NULL_TREE
;
4631 bb_ent
.first_idx
= ops
.length ();
4632 bb_ent
.last_idx
= bb_ent
.first_idx
;
4633 e
= find_edge (bb
, other_bb
);
4634 stmt
= last_stmt (bb
);
4635 gimple_set_visited (stmt
, true);
4636 if (gimple_code (stmt
) != GIMPLE_COND
)
4638 use_operand_p use_p
;
4643 lhs
= gimple_assign_lhs (stmt
);
4644 rhs
= gimple_assign_rhs1 (stmt
);
4645 gcc_assert (bb
== last_bb
);
4654 # _345 = PHI <_123(N), 1(...), 1(...)>
4656 or 0 instead of 1. If it is 0, the _234
4657 range test is anded together with all the
4658 other range tests, if it is 1, it is ored with
4660 single_imm_use (lhs
, &use_p
, &phi
);
4661 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4662 e2
= find_edge (first_bb
, other_bb
);
4664 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4665 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4666 code
= BIT_AND_EXPR
;
4669 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4670 code
= BIT_IOR_EXPR
;
4673 /* If _234 SSA_NAME_DEF_STMT is
4675 (or &, corresponding to 1/0 in the phi arguments,
4676 push into ops the individual range test arguments
4677 of the bitwise or resp. and, recursively. */
4678 if (TREE_CODE (rhs
) == SSA_NAME
4679 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4681 && !get_ops (rhs
, code
, &ops
,
4682 loop_containing_stmt (stmt
))
4683 && has_single_use (rhs
))
4685 /* Otherwise, push the _234 range test itself. */
4686 operand_entry
*oe
= operand_entry_pool
.allocate ();
4692 oe
->stmt_to_insert
= NULL
;
4697 else if (is_gimple_assign (stmt
)
4698 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4700 && !get_ops (lhs
, code
, &ops
,
4701 loop_containing_stmt (stmt
))
4702 && has_single_use (lhs
))
4704 operand_entry
*oe
= operand_entry_pool
.allocate ();
4715 bb_ent
.last_idx
= ops
.length ();
4718 bbinfo
.safe_push (bb_ent
);
4721 else if (bb
== last_bb
)
4723 /* For last_bb, handle also:
4725 goto <bb 6>; [34.00%]
4727 goto <bb 7>; [66.00%]
4729 <bb 6> [local count: 79512730]:
4731 <bb 7> [local count: 1073741824]:
4732 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4733 where bb 7 is OTHER_BB, but the PHI values from the
4734 earlier bbs match the path through the empty bb
4736 bool test_swapped_p
= false;
4737 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4738 &other_bb
, &test_swapped_p
, true);
4741 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4743 /* Otherwise stmt is GIMPLE_COND. */
4744 code
= gimple_cond_code (stmt
);
4745 lhs
= gimple_cond_lhs (stmt
);
4746 rhs
= gimple_cond_rhs (stmt
);
4747 if (TREE_CODE (lhs
) == SSA_NAME
4748 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4749 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4750 || rhs
!= boolean_false_node
4751 /* Either push into ops the individual bitwise
4752 or resp. and operands, depending on which
4753 edge is other_bb. */
4754 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4755 ^ (code
== EQ_EXPR
))
4756 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4757 loop_containing_stmt (stmt
))))
4759 /* Or push the GIMPLE_COND stmt itself. */
4760 operand_entry
*oe
= operand_entry_pool
.allocate ();
4763 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4764 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4765 /* oe->op = NULL signs that there is no SSA_NAME
4766 for the range test, and oe->id instead is the
4767 basic block number, at which's end the GIMPLE_COND
4771 oe
->stmt_to_insert
= NULL
;
4776 else if (ops
.length () > bb_ent
.first_idx
)
4779 bb_ent
.last_idx
= ops
.length ();
4781 bbinfo
.safe_push (bb_ent
);
4785 if (ops
.length () > 1)
4786 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
4789 unsigned int idx
, max_idx
= 0;
4790 /* update_ops relies on has_single_use predicates returning the
4791 same values as it did during get_ops earlier. Additionally it
4792 never removes statements, only adds new ones and it should walk
4793 from the single imm use and check the predicate already before
4794 making those changes.
4795 On the other side, the handling of GIMPLE_COND directly can turn
4796 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4797 it needs to be done in a separate loop afterwards. */
4798 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4800 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4801 && bbinfo
[idx
].op
!= NULL_TREE
)
4806 stmt
= last_stmt (bb
);
4807 new_op
= update_ops (bbinfo
[idx
].op
,
4809 ops
[bbinfo
[idx
].first_idx
]->rank
,
4810 ops
, &bbinfo
[idx
].first_idx
,
4811 loop_containing_stmt (stmt
));
4812 if (new_op
== NULL_TREE
)
4814 gcc_assert (bb
== last_bb
);
4815 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4817 if (bbinfo
[idx
].op
!= new_op
)
4819 imm_use_iterator iter
;
4820 use_operand_p use_p
;
4821 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4823 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4824 if (is_gimple_debug (use_stmt
))
4826 else if (gimple_code (use_stmt
) == GIMPLE_COND
4827 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4828 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4829 SET_USE (use_p
, new_op
);
4830 else if ((is_gimple_assign (use_stmt
)
4832 (gimple_assign_rhs_code (use_stmt
))
4833 == tcc_comparison
)))
4834 cast_or_tcc_cmp_stmt
= use_stmt
;
4835 else if (gimple_assign_cast_p (use_stmt
))
4836 cast_or_tcc_cmp_stmt
= use_stmt
;
4840 if (cast_or_tcc_cmp_stmt
)
4842 gcc_assert (bb
== last_bb
);
4843 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4844 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4845 enum tree_code rhs_code
4846 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4847 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4850 if (is_gimple_min_invariant (new_op
))
4852 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4853 g
= gimple_build_assign (new_lhs
, new_op
);
4856 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4857 gimple_stmt_iterator gsi
4858 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4859 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4860 gimple_set_visited (g
, true);
4861 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4862 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4863 if (is_gimple_debug (use_stmt
))
4865 else if (gimple_code (use_stmt
) == GIMPLE_COND
4866 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4867 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4868 SET_USE (use_p
, new_lhs
);
4877 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4879 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4880 && bbinfo
[idx
].op
== NULL_TREE
4881 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4883 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4888 /* If we collapse the conditional to a true/false
4889 condition, then bubble that knowledge up to our caller. */
4890 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4892 gimple_cond_make_false (cond_stmt
);
4893 cfg_cleanup_needed
= true;
4895 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4897 gimple_cond_make_true (cond_stmt
);
4898 cfg_cleanup_needed
= true;
4902 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4903 gimple_cond_set_lhs (cond_stmt
,
4904 ops
[bbinfo
[idx
].first_idx
]->op
);
4905 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4907 update_stmt (cond_stmt
);
4913 /* The above changes could result in basic blocks after the first
4914 modified one, up to and including last_bb, to be executed even if
4915 they would not be in the original program. If the value ranges of
4916 assignment lhs' in those bbs were dependent on the conditions
4917 guarding those basic blocks which now can change, the VRs might
4918 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4919 are only used within the same bb, it should be not a big deal if
4920 we just reset all the VRs in those bbs. See PR68671. */
4921 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4922 reset_flow_sensitive_info_in_bb (bb
);
4924 return cfg_cleanup_needed
;
4927 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4928 of STMT in it's operands. This is also known as a "destructive
4929 update" operation. */
4932 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4937 use_operand_p arg_p
;
4940 if (TREE_CODE (operand
) != SSA_NAME
)
4943 lhs
= gimple_assign_lhs (stmt
);
4945 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4946 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4950 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4951 if (lhs
== USE_FROM_PTR (arg_p
))
4956 /* Remove def stmt of VAR if VAR has zero uses and recurse
4957 on rhs1 operand if so. */
4960 remove_visited_stmt_chain (tree var
)
4963 gimple_stmt_iterator gsi
;
4967 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
4969 stmt
= SSA_NAME_DEF_STMT (var
);
4970 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
4972 var
= gimple_assign_rhs1 (stmt
);
4973 gsi
= gsi_for_stmt (stmt
);
4974 reassoc_remove_stmt (&gsi
);
4975 release_defs (stmt
);
4982 /* This function checks three consequtive operands in
4983 passed operands vector OPS starting from OPINDEX and
4984 swaps two operands if it is profitable for binary operation
4985 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4987 We pair ops with the same rank if possible.
4989 The alternative we try is to see if STMT is a destructive
4990 update style statement, which is like:
4993 In that case, we want to use the destructive update form to
4994 expose the possible vectorizer sum reduction opportunity.
4995 In that case, the third operand will be the phi node. This
4996 check is not performed if STMT is null.
4998 We could, of course, try to be better as noted above, and do a
4999 lot of work to try to find these opportunities in >3 operand
5000 cases, but it is unlikely to be worth it. */
5003 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
5004 unsigned int opindex
, gimple
*stmt
)
5006 operand_entry
*oe1
, *oe2
, *oe3
;
5009 oe2
= ops
[opindex
+ 1];
5010 oe3
= ops
[opindex
+ 2];
5012 if ((oe1
->rank
== oe2
->rank
5013 && oe2
->rank
!= oe3
->rank
)
5014 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
5015 && !is_phi_for_stmt (stmt
, oe1
->op
)
5016 && !is_phi_for_stmt (stmt
, oe2
->op
)))
5017 std::swap (*oe1
, *oe3
);
5018 else if ((oe1
->rank
== oe3
->rank
5019 && oe2
->rank
!= oe3
->rank
)
5020 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
5021 && !is_phi_for_stmt (stmt
, oe1
->op
)
5022 && !is_phi_for_stmt (stmt
, oe3
->op
)))
5023 std::swap (*oe1
, *oe2
);
5026 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5027 two definitions, otherwise return STMT. */
5029 static inline gimple
*
5030 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
5032 if (TREE_CODE (rhs1
) == SSA_NAME
5033 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5034 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5035 if (TREE_CODE (rhs2
) == SSA_NAME
5036 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5037 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5041 /* If the stmt that defines operand has to be inserted, insert it
5044 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5046 gcc_assert (is_gimple_assign (stmt_to_insert
));
5047 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5048 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5049 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
5050 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5051 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5053 /* If the insert point is not stmt, then insert_point would be
5054 the point where operand rhs1 or rhs2 is defined. In this case,
5055 stmt_to_insert has to be inserted afterwards. This would
5056 only happen when the stmt insertion point is flexible. */
5057 if (stmt
== insert_point
)
5058 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5060 insert_stmt_after (stmt_to_insert
, insert_point
);
5064 /* Recursively rewrite our linearized statements so that the operators
5065 match those in OPS[OPINDEX], putting the computation in rank
5066 order. Return new lhs.
5067 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5068 the current stmt and during recursive invocations.
5069 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5070 recursive invocations. */
5073 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5074 vec
<operand_entry
*> ops
, bool changed
, bool next_changed
)
5076 tree rhs1
= gimple_assign_rhs1 (stmt
);
5077 tree rhs2
= gimple_assign_rhs2 (stmt
);
5078 tree lhs
= gimple_assign_lhs (stmt
);
5081 /* The final recursion case for this function is that you have
5082 exactly two operations left.
5083 If we had exactly one op in the entire list to start with, we
5084 would have never called this function, and the tail recursion
5085 rewrites them one at a time. */
5086 if (opindex
+ 2 == ops
.length ())
5088 operand_entry
*oe1
, *oe2
;
5091 oe2
= ops
[opindex
+ 1];
5093 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5095 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5096 unsigned int uid
= gimple_uid (stmt
);
5098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5100 fprintf (dump_file
, "Transforming ");
5101 print_gimple_stmt (dump_file
, stmt
, 0);
5104 /* If the stmt that defines operand has to be inserted, insert it
5106 if (oe1
->stmt_to_insert
)
5107 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5108 if (oe2
->stmt_to_insert
)
5109 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5110 /* Even when changed is false, reassociation could have e.g. removed
5111 some redundant operations, so unless we are just swapping the
5112 arguments or unless there is no change at all (then we just
5113 return lhs), force creation of a new SSA_NAME. */
5114 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5116 gimple
*insert_point
5117 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
5118 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5120 = gimple_build_assign (lhs
, rhs_code
,
5122 gimple_set_uid (stmt
, uid
);
5123 gimple_set_visited (stmt
, true);
5124 if (insert_point
== gsi_stmt (gsi
))
5125 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5127 insert_stmt_after (stmt
, insert_point
);
5131 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
5133 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5134 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5138 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5139 remove_visited_stmt_chain (rhs1
);
5141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5143 fprintf (dump_file
, " into ");
5144 print_gimple_stmt (dump_file
, stmt
, 0);
5150 /* If we hit here, we should have 3 or more ops left. */
5151 gcc_assert (opindex
+ 2 < ops
.length ());
5153 /* Rewrite the next operator. */
5156 /* If the stmt that defines operand has to be inserted, insert it
5158 if (oe
->stmt_to_insert
)
5159 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5161 /* Recurse on the LHS of the binary operator, which is guaranteed to
5162 be the non-leaf side. */
5164 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5165 changed
|| oe
->op
!= rhs2
|| next_changed
,
5168 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5172 fprintf (dump_file
, "Transforming ");
5173 print_gimple_stmt (dump_file
, stmt
, 0);
5176 /* If changed is false, this is either opindex == 0
5177 or all outer rhs2's were equal to corresponding oe->op,
5178 and powi_result is NULL.
5179 That means lhs is equivalent before and after reassociation.
5180 Otherwise ensure the old lhs SSA_NAME is not reused and
5181 create a new stmt as well, so that any debug stmts will be
5182 properly adjusted. */
5185 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5186 unsigned int uid
= gimple_uid (stmt
);
5187 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
5189 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5190 stmt
= gimple_build_assign (lhs
, rhs_code
,
5192 gimple_set_uid (stmt
, uid
);
5193 gimple_set_visited (stmt
, true);
5194 if (insert_point
== gsi_stmt (gsi
))
5195 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5197 insert_stmt_after (stmt
, insert_point
);
5201 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
5203 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5204 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5208 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5210 fprintf (dump_file
, " into ");
5211 print_gimple_stmt (dump_file
, stmt
, 0);
5217 /* Find out how many cycles we need to compute statements chain.
5218 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5219 maximum number of independent statements we may execute per cycle. */
5222 get_required_cycles (int ops_num
, int cpu_width
)
5228 /* While we have more than 2 * cpu_width operands
5229 we may reduce number of operands by cpu_width
5231 res
= ops_num
/ (2 * cpu_width
);
5233 /* Remained operands count may be reduced twice per cycle
5234 until we have only one operand. */
5235 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5236 elog
= exact_log2 (rest
);
5240 res
+= floor_log2 (rest
) + 1;
5245 /* Returns an optimal number of registers to use for computation of
5246 given statements. */
5249 get_reassociation_width (int ops_num
, enum tree_code opc
,
5252 int param_width
= param_tree_reassoc_width
;
5257 if (param_width
> 0)
5258 width
= param_width
;
5260 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5265 /* Get the minimal time required for sequence computation. */
5266 cycles_best
= get_required_cycles (ops_num
, width
);
5268 /* Check if we may use less width and still compute sequence for
5269 the same time. It will allow us to reduce registers usage.
5270 get_required_cycles is monotonically increasing with lower width
5271 so we can perform a binary search for the minimal width that still
5272 results in the optimal cycle count. */
5274 while (width
> width_min
)
5276 int width_mid
= (width
+ width_min
) / 2;
5278 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5280 else if (width_min
< width_mid
)
5281 width_min
= width_mid
;
5289 /* Recursively rewrite our linearized statements so that the operators
5290 match those in OPS[OPINDEX], putting the computation in rank
5291 order and trying to allow operations to be executed in
5295 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
5296 vec
<operand_entry
*> ops
)
5298 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5299 int op_num
= ops
.length ();
5300 gcc_assert (op_num
> 0);
5301 int stmt_num
= op_num
- 1;
5302 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5303 int op_index
= op_num
- 1;
5305 int ready_stmts_end
= 0;
5307 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
5308 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5310 /* We start expression rewriting from the top statements.
5311 So, in this loop we create a full list of statements
5312 we will work with. */
5313 stmts
[stmt_num
- 1] = stmt
;
5314 for (i
= stmt_num
- 2; i
>= 0; i
--)
5315 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5317 for (i
= 0; i
< stmt_num
; i
++)
5321 /* Determine whether we should use results of
5322 already handled statements or not. */
5323 if (ready_stmts_end
== 0
5324 && (i
- stmt_index
>= width
|| op_index
< 1))
5325 ready_stmts_end
= i
;
5327 /* Now we choose operands for the next statement. Non zero
5328 value in ready_stmts_end means here that we should use
5329 the result of already generated statements as new operand. */
5330 if (ready_stmts_end
> 0)
5332 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
5333 if (ready_stmts_end
> stmt_index
)
5334 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5335 else if (op_index
>= 0)
5337 operand_entry
*oe
= ops
[op_index
--];
5338 stmt2
= oe
->stmt_to_insert
;
5343 gcc_assert (stmt_index
< i
);
5344 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5347 if (stmt_index
>= ready_stmts_end
)
5348 ready_stmts_end
= 0;
5353 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
5354 operand_entry
*oe2
= ops
[op_index
--];
5355 operand_entry
*oe1
= ops
[op_index
--];
5357 stmt2
= oe2
->stmt_to_insert
;
5359 stmt1
= oe1
->stmt_to_insert
;
5362 /* If we emit the last statement then we should put
5363 operands into the last statement. It will also
5365 if (op_index
< 0 && stmt_index
== i
)
5368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5370 fprintf (dump_file
, "Transforming ");
5371 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5374 /* If the stmt that defines operand has to be inserted, insert it
5377 insert_stmt_before_use (stmts
[i
], stmt1
);
5379 insert_stmt_before_use (stmts
[i
], stmt2
);
5380 stmt1
= stmt2
= NULL
;
5382 /* We keep original statement only for the last one. All
5383 others are recreated. */
5384 if (i
== stmt_num
- 1)
5386 gimple_assign_set_rhs1 (stmts
[i
], op1
);
5387 gimple_assign_set_rhs2 (stmts
[i
], op2
);
5388 update_stmt (stmts
[i
]);
5392 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
5393 gimple_set_visited (stmts
[i
], true);
5395 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5397 fprintf (dump_file
, " into ");
5398 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5402 remove_visited_stmt_chain (last_rhs1
);
5405 /* Transform STMT, which is really (A +B) + (C + D) into the left
5406 linear form, ((A+B)+C)+D.
5407 Recurse on D if necessary. */
5410 linearize_expr (gimple
*stmt
)
5412 gimple_stmt_iterator gsi
;
5413 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5414 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5415 gimple
*oldbinrhs
= binrhs
;
5416 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5417 gimple
*newbinrhs
= NULL
;
5418 class loop
*loop
= loop_containing_stmt (stmt
);
5419 tree lhs
= gimple_assign_lhs (stmt
);
5421 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5422 && is_reassociable_op (binrhs
, rhscode
, loop
));
5424 gsi
= gsi_for_stmt (stmt
);
5426 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5427 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5428 gimple_assign_rhs_code (binrhs
),
5429 gimple_assign_lhs (binlhs
),
5430 gimple_assign_rhs2 (binrhs
));
5431 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5432 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5433 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5435 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5436 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5438 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5440 fprintf (dump_file
, "Linearized: ");
5441 print_gimple_stmt (dump_file
, stmt
, 0);
5444 reassociate_stats
.linearized
++;
5447 gsi
= gsi_for_stmt (oldbinrhs
);
5448 reassoc_remove_stmt (&gsi
);
5449 release_defs (oldbinrhs
);
5451 gimple_set_visited (stmt
, true);
5452 gimple_set_visited (binlhs
, true);
5453 gimple_set_visited (binrhs
, true);
5455 /* Tail recurse on the new rhs if it still needs reassociation. */
5456 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5457 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5458 want to change the algorithm while converting to tuples. */
5459 linearize_expr (stmt
);
5462 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5463 it. Otherwise, return NULL. */
5466 get_single_immediate_use (tree lhs
)
5468 use_operand_p immuse
;
5471 if (TREE_CODE (lhs
) == SSA_NAME
5472 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5473 && is_gimple_assign (immusestmt
))
5479 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5480 representing the negated value. Insertions of any necessary
5481 instructions go before GSI.
5482 This function is recursive in that, if you hand it "a_5" as the
5483 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5484 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5487 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5489 gimple
*negatedefstmt
= NULL
;
5490 tree resultofnegate
;
5491 gimple_stmt_iterator gsi
;
5494 /* If we are trying to negate a name, defined by an add, negate the
5495 add operands instead. */
5496 if (TREE_CODE (tonegate
) == SSA_NAME
)
5497 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5498 if (TREE_CODE (tonegate
) == SSA_NAME
5499 && is_gimple_assign (negatedefstmt
)
5500 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5501 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5502 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5504 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5505 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5506 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5509 gsi
= gsi_for_stmt (negatedefstmt
);
5510 rhs1
= negate_value (rhs1
, &gsi
);
5512 gsi
= gsi_for_stmt (negatedefstmt
);
5513 rhs2
= negate_value (rhs2
, &gsi
);
5515 gsi
= gsi_for_stmt (negatedefstmt
);
5516 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5517 gimple_set_visited (negatedefstmt
, true);
5518 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5519 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5520 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5524 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5525 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5526 NULL_TREE
, true, GSI_SAME_STMT
);
5528 uid
= gimple_uid (gsi_stmt (gsi
));
5529 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5531 gimple
*stmt
= gsi_stmt (gsi
);
5532 if (gimple_uid (stmt
) != 0)
5534 gimple_set_uid (stmt
, uid
);
5536 return resultofnegate
;
5539 /* Return true if we should break up the subtract in STMT into an add
5540 with negate. This is true when we the subtract operands are really
5541 adds, or the subtract itself is used in an add expression. In
5542 either case, breaking up the subtract into an add with negate
5543 exposes the adds to reassociation. */
5546 should_break_up_subtract (gimple
*stmt
)
5548 tree lhs
= gimple_assign_lhs (stmt
);
5549 tree binlhs
= gimple_assign_rhs1 (stmt
);
5550 tree binrhs
= gimple_assign_rhs2 (stmt
);
5552 class loop
*loop
= loop_containing_stmt (stmt
);
5554 if (TREE_CODE (binlhs
) == SSA_NAME
5555 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5558 if (TREE_CODE (binrhs
) == SSA_NAME
5559 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5562 if (TREE_CODE (lhs
) == SSA_NAME
5563 && (immusestmt
= get_single_immediate_use (lhs
))
5564 && is_gimple_assign (immusestmt
)
5565 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5566 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5567 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5568 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5573 /* Transform STMT from A - B into A + -B. */
5576 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5578 tree rhs1
= gimple_assign_rhs1 (stmt
);
5579 tree rhs2
= gimple_assign_rhs2 (stmt
);
5581 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5583 fprintf (dump_file
, "Breaking up subtract ");
5584 print_gimple_stmt (dump_file
, stmt
, 0);
5587 rhs2
= negate_value (rhs2
, gsip
);
5588 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5592 /* Determine whether STMT is a builtin call that raises an SSA name
5593 to an integer power and has only one use. If so, and this is early
5594 reassociation and unsafe math optimizations are permitted, place
5595 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5596 If any of these conditions does not hold, return FALSE. */
5599 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5602 REAL_VALUE_TYPE c
, cint
;
5604 switch (gimple_call_combined_fn (stmt
))
5607 if (flag_errno_math
)
5610 *base
= gimple_call_arg (stmt
, 0);
5611 arg1
= gimple_call_arg (stmt
, 1);
5613 if (TREE_CODE (arg1
) != REAL_CST
)
5616 c
= TREE_REAL_CST (arg1
);
5618 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5621 *exponent
= real_to_integer (&c
);
5622 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5623 if (!real_identical (&c
, &cint
))
5629 *base
= gimple_call_arg (stmt
, 0);
5630 arg1
= gimple_call_arg (stmt
, 1);
5632 if (!tree_fits_shwi_p (arg1
))
5635 *exponent
= tree_to_shwi (arg1
);
5642 /* Expanding negative exponents is generally unproductive, so we don't
5643 complicate matters with those. Exponents of zero and one should
5644 have been handled by expression folding. */
5645 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5651 /* Try to derive and add operand entry for OP to *OPS. Return false if
5655 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5656 enum tree_code code
,
5657 tree op
, gimple
* def_stmt
)
5659 tree base
= NULL_TREE
;
5660 HOST_WIDE_INT exponent
= 0;
5662 if (TREE_CODE (op
) != SSA_NAME
5663 || ! has_single_use (op
))
5666 if (code
== MULT_EXPR
5667 && reassoc_insert_powi_p
5668 && flag_unsafe_math_optimizations
5669 && is_gimple_call (def_stmt
)
5670 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5672 add_repeat_to_ops_vec (ops
, base
, exponent
);
5673 gimple_set_visited (def_stmt
, true);
5676 else if (code
== MULT_EXPR
5677 && is_gimple_assign (def_stmt
)
5678 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5679 && !HONOR_SNANS (TREE_TYPE (op
))
5680 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5681 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
5683 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5684 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5685 add_to_ops_vec (ops
, rhs1
);
5686 add_to_ops_vec (ops
, cst
);
5687 gimple_set_visited (def_stmt
, true);
5694 /* Recursively linearize a binary expression that is the RHS of STMT.
5695 Place the operands of the expression tree in the vector named OPS. */
5698 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5699 bool is_associative
, bool set_visited
)
5701 tree binlhs
= gimple_assign_rhs1 (stmt
);
5702 tree binrhs
= gimple_assign_rhs2 (stmt
);
5703 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5704 bool binlhsisreassoc
= false;
5705 bool binrhsisreassoc
= false;
5706 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5707 class loop
*loop
= loop_containing_stmt (stmt
);
5710 gimple_set_visited (stmt
, true);
5712 if (TREE_CODE (binlhs
) == SSA_NAME
)
5714 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5715 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5716 && !stmt_could_throw_p (cfun
, binlhsdef
));
5719 if (TREE_CODE (binrhs
) == SSA_NAME
)
5721 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5722 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5723 && !stmt_could_throw_p (cfun
, binrhsdef
));
5726 /* If the LHS is not reassociable, but the RHS is, we need to swap
5727 them. If neither is reassociable, there is nothing we can do, so
5728 just put them in the ops vector. If the LHS is reassociable,
5729 linearize it. If both are reassociable, then linearize the RHS
5732 if (!binlhsisreassoc
)
5734 /* If this is not a associative operation like division, give up. */
5735 if (!is_associative
)
5737 add_to_ops_vec (ops
, binrhs
);
5741 if (!binrhsisreassoc
)
5744 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5745 /* If we add ops for the rhs we expect to be able to recurse
5746 to it via the lhs during expression rewrite so swap
5750 add_to_ops_vec (ops
, binrhs
);
5752 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5753 add_to_ops_vec (ops
, binlhs
);
5759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5761 fprintf (dump_file
, "swapping operands of ");
5762 print_gimple_stmt (dump_file
, stmt
, 0);
5765 swap_ssa_operands (stmt
,
5766 gimple_assign_rhs1_ptr (stmt
),
5767 gimple_assign_rhs2_ptr (stmt
));
5770 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5772 fprintf (dump_file
, " is now ");
5773 print_gimple_stmt (dump_file
, stmt
, 0);
5775 if (!binrhsisreassoc
)
5778 /* We want to make it so the lhs is always the reassociative op,
5780 std::swap (binlhs
, binrhs
);
5782 else if (binrhsisreassoc
)
5784 linearize_expr (stmt
);
5785 binlhs
= gimple_assign_rhs1 (stmt
);
5786 binrhs
= gimple_assign_rhs2 (stmt
);
5789 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5790 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5792 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5793 is_associative
, set_visited
);
5795 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5796 add_to_ops_vec (ops
, binrhs
);
5799 /* Repropagate the negates back into subtracts, since no other pass
5800 currently does it. */
5803 repropagate_negates (void)
5808 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5810 gimple
*user
= get_single_immediate_use (negate
);
5812 if (!user
|| !is_gimple_assign (user
))
5815 /* The negate operand can be either operand of a PLUS_EXPR
5816 (it can be the LHS if the RHS is a constant for example).
5818 Force the negate operand to the RHS of the PLUS_EXPR, then
5819 transform the PLUS_EXPR into a MINUS_EXPR. */
5820 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5822 /* If the negated operand appears on the LHS of the
5823 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5824 to force the negated operand to the RHS of the PLUS_EXPR. */
5825 if (gimple_assign_rhs1 (user
) == negate
)
5827 swap_ssa_operands (user
,
5828 gimple_assign_rhs1_ptr (user
),
5829 gimple_assign_rhs2_ptr (user
));
5832 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5833 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5834 if (gimple_assign_rhs2 (user
) == negate
)
5836 tree rhs1
= gimple_assign_rhs1 (user
);
5837 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5838 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5839 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5843 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5845 if (gimple_assign_rhs1 (user
) == negate
)
5850 which we transform into
5853 This pushes down the negate which we possibly can merge
5854 into some other operation, hence insert it into the
5855 plus_negates vector. */
5856 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5857 tree a
= gimple_assign_rhs1 (feed
);
5858 tree b
= gimple_assign_rhs2 (user
);
5859 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5860 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5861 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5862 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5863 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5864 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5865 user
= gsi_stmt (gsi2
);
5867 reassoc_remove_stmt (&gsi
);
5868 release_defs (feed
);
5869 plus_negates
.safe_push (gimple_assign_lhs (user
));
5873 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5874 rid of one operation. */
5875 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5876 tree a
= gimple_assign_rhs1 (feed
);
5877 tree rhs1
= gimple_assign_rhs1 (user
);
5878 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5879 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5880 update_stmt (gsi_stmt (gsi
));
5886 /* Returns true if OP is of a type for which we can do reassociation.
5887 That is for integral or non-saturating fixed-point types, and for
5888 floating point type when associative-math is enabled. */
5891 can_reassociate_p (tree op
)
5893 tree type
= TREE_TYPE (op
);
5894 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
5896 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
5897 || NON_SAT_FIXED_POINT_TYPE_P (type
)
5898 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
5903 /* Break up subtract operations in block BB.
5905 We do this top down because we don't know whether the subtract is
5906 part of a possible chain of reassociation except at the top.
5915 we want to break up k = t - q, but we won't until we've transformed q
5916 = b - r, which won't be broken up until we transform b = c - d.
5918 En passant, clear the GIMPLE visited flag on every statement
5919 and set UIDs within each basic block. */
5922 break_up_subtract_bb (basic_block bb
)
5924 gimple_stmt_iterator gsi
;
5926 unsigned int uid
= 1;
5928 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5930 gimple
*stmt
= gsi_stmt (gsi
);
5931 gimple_set_visited (stmt
, false);
5932 gimple_set_uid (stmt
, uid
++);
5934 if (!is_gimple_assign (stmt
)
5935 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
5938 /* Look for simple gimple subtract operations. */
5939 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5941 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
5942 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
5945 /* Check for a subtract used only in an addition. If this
5946 is the case, transform it into add of a negate for better
5947 reassociation. IE transform C = A-B into C = A + -B if C
5948 is only used in an addition. */
5949 if (should_break_up_subtract (stmt
))
5950 break_up_subtract (stmt
, &gsi
);
5952 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5953 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
5954 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5956 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5958 son
= next_dom_son (CDI_DOMINATORS
, son
))
5959 break_up_subtract_bb (son
);
5962 /* Used for repeated factor analysis. */
5963 struct repeat_factor
5965 /* An SSA name that occurs in a multiply chain. */
5968 /* Cached rank of the factor. */
5971 /* Number of occurrences of the factor in the chain. */
5972 HOST_WIDE_INT count
;
5974 /* An SSA name representing the product of this factor and
5975 all factors appearing later in the repeated factor vector. */
5980 static vec
<repeat_factor
> repeat_factor_vec
;
5982 /* Used for sorting the repeat factor vector. Sort primarily by
5983 ascending occurrence count, secondarily by descending rank. */
5986 compare_repeat_factors (const void *x1
, const void *x2
)
5988 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
5989 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
5991 if (rf1
->count
!= rf2
->count
)
5992 return rf1
->count
- rf2
->count
;
5994 return rf2
->rank
- rf1
->rank
;
5997 /* Look for repeated operands in OPS in the multiply tree rooted at
5998 STMT. Replace them with an optimal sequence of multiplies and powi
5999 builtin calls, and remove the used operands from OPS. Return an
6000 SSA name representing the value of the replacement sequence. */
6003 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6005 unsigned i
, j
, vec_len
;
6008 repeat_factor
*rf1
, *rf2
;
6009 repeat_factor rfnew
;
6010 tree result
= NULL_TREE
;
6011 tree target_ssa
, iter_result
;
6012 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6013 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6014 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6015 gimple
*mul_stmt
, *pow_stmt
;
6017 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6018 target, unless type is integral. */
6019 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6022 /* Allocate the repeated factor vector. */
6023 repeat_factor_vec
.create (10);
6025 /* Scan the OPS vector for all SSA names in the product and build
6026 up a vector of occurrence counts for each factor. */
6027 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6029 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6031 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6033 if (rf1
->factor
== oe
->op
)
6035 rf1
->count
+= oe
->count
;
6040 if (j
>= repeat_factor_vec
.length ())
6042 rfnew
.factor
= oe
->op
;
6043 rfnew
.rank
= oe
->rank
;
6044 rfnew
.count
= oe
->count
;
6045 rfnew
.repr
= NULL_TREE
;
6046 repeat_factor_vec
.safe_push (rfnew
);
6051 /* Sort the repeated factor vector by (a) increasing occurrence count,
6052 and (b) decreasing rank. */
6053 repeat_factor_vec
.qsort (compare_repeat_factors
);
6055 /* It is generally best to combine as many base factors as possible
6056 into a product before applying __builtin_powi to the result.
6057 However, the sort order chosen for the repeated factor vector
6058 allows us to cache partial results for the product of the base
6059 factors for subsequent use. When we already have a cached partial
6060 result from a previous iteration, it is best to make use of it
6061 before looking for another __builtin_pow opportunity.
6063 As an example, consider x * x * y * y * y * z * z * z * z.
6064 We want to first compose the product x * y * z, raise it to the
6065 second power, then multiply this by y * z, and finally multiply
6066 by z. This can be done in 5 multiplies provided we cache y * z
6067 for use in both expressions:
6075 If we instead ignored the cached y * z and first multiplied by
6076 the __builtin_pow opportunity z * z, we would get the inferior:
6085 vec_len
= repeat_factor_vec
.length ();
6087 /* Repeatedly look for opportunities to create a builtin_powi call. */
6090 HOST_WIDE_INT power
;
6092 /* First look for the largest cached product of factors from
6093 preceding iterations. If found, create a builtin_powi for
6094 it if the minimum occurrence count for its factors is at
6095 least 2, or just use this cached product as our next
6096 multiplicand if the minimum occurrence count is 1. */
6097 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6099 if (rf1
->repr
&& rf1
->count
> 0)
6109 iter_result
= rf1
->repr
;
6111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6115 fputs ("Multiplying by cached product ", dump_file
);
6116 for (elt
= j
; elt
< vec_len
; elt
++)
6118 rf
= &repeat_factor_vec
[elt
];
6119 print_generic_expr (dump_file
, rf
->factor
);
6120 if (elt
< vec_len
- 1)
6121 fputs (" * ", dump_file
);
6123 fputs ("\n", dump_file
);
6128 if (INTEGRAL_TYPE_P (type
))
6130 gcc_assert (power
> 1);
6131 gimple_stmt_iterator gsip
= gsi
;
6133 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6135 gimple_stmt_iterator gsic
= gsi
;
6136 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6138 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6139 gimple_set_visited (gsi_stmt (gsic
), true);
6145 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6147 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6148 build_int_cst (integer_type_node
,
6150 gimple_call_set_lhs (pow_stmt
, iter_result
);
6151 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6152 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6153 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6156 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6160 fputs ("Building __builtin_pow call for cached product (",
6162 for (elt
= j
; elt
< vec_len
; elt
++)
6164 rf
= &repeat_factor_vec
[elt
];
6165 print_generic_expr (dump_file
, rf
->factor
);
6166 if (elt
< vec_len
- 1)
6167 fputs (" * ", dump_file
);
6169 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6176 /* Otherwise, find the first factor in the repeated factor
6177 vector whose occurrence count is at least 2. If no such
6178 factor exists, there are no builtin_powi opportunities
6180 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6182 if (rf1
->count
>= 2)
6191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6195 fputs ("Building __builtin_pow call for (", dump_file
);
6196 for (elt
= j
; elt
< vec_len
; elt
++)
6198 rf
= &repeat_factor_vec
[elt
];
6199 print_generic_expr (dump_file
, rf
->factor
);
6200 if (elt
< vec_len
- 1)
6201 fputs (" * ", dump_file
);
6203 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6206 reassociate_stats
.pows_created
++;
6208 /* Visit each element of the vector in reverse order (so that
6209 high-occurrence elements are visited first, and within the
6210 same occurrence count, lower-ranked elements are visited
6211 first). Form a linear product of all elements in this order
6212 whose occurrencce count is at least that of element J.
6213 Record the SSA name representing the product of each element
6214 with all subsequent elements in the vector. */
6215 if (j
== vec_len
- 1)
6216 rf1
->repr
= rf1
->factor
;
6219 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6223 rf1
= &repeat_factor_vec
[ii
];
6224 rf2
= &repeat_factor_vec
[ii
+ 1];
6226 /* Init the last factor's representative to be itself. */
6228 rf2
->repr
= rf2
->factor
;
6233 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6234 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6236 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6237 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6238 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6239 rf1
->repr
= target_ssa
;
6241 /* Don't reprocess the multiply we just introduced. */
6242 gimple_set_visited (mul_stmt
, true);
6246 /* Form a call to __builtin_powi for the maximum product
6247 just formed, raised to the power obtained earlier. */
6248 rf1
= &repeat_factor_vec
[j
];
6249 if (INTEGRAL_TYPE_P (type
))
6251 gcc_assert (power
> 1);
6252 gimple_stmt_iterator gsip
= gsi
;
6254 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6256 gimple_stmt_iterator gsic
= gsi
;
6257 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6259 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6260 gimple_set_visited (gsi_stmt (gsic
), true);
6266 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6267 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6268 build_int_cst (integer_type_node
,
6270 gimple_call_set_lhs (pow_stmt
, iter_result
);
6271 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6272 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6273 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6277 /* If we previously formed at least one other builtin_powi call,
6278 form the product of this one and those others. */
6281 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6282 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6283 result
, iter_result
);
6284 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6285 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6286 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6287 gimple_set_visited (mul_stmt
, true);
6288 result
= new_result
;
6291 result
= iter_result
;
6293 /* Decrement the occurrence count of each element in the product
6294 by the count found above, and remove this many copies of each
6296 for (i
= j
; i
< vec_len
; i
++)
6301 rf1
= &repeat_factor_vec
[i
];
6302 rf1
->count
-= power
;
6304 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6306 if (oe
->op
== rf1
->factor
)
6310 ops
->ordered_remove (n
);
6326 /* At this point all elements in the repeated factor vector have a
6327 remaining occurrence count of 0 or 1, and those with a count of 1
6328 don't have cached representatives. Re-sort the ops vector and
6330 ops
->qsort (sort_by_operand_rank
);
6331 repeat_factor_vec
.release ();
6333 /* Return the final product computed herein. Note that there may
6334 still be some elements with single occurrence count left in OPS;
6335 those will be handled by the normal reassociation logic. */
6339 /* Attempt to optimize
6340 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6341 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6344 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6348 unsigned int length
= ops
->length ();
6349 tree cst
= ops
->last ()->op
;
6351 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6354 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6356 if (TREE_CODE (oe
->op
) == SSA_NAME
6357 && has_single_use (oe
->op
))
6359 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6360 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6363 switch (gimple_call_combined_fn (old_call
))
6366 CASE_CFN_COPYSIGN_FN
:
6367 arg0
= gimple_call_arg (old_call
, 0);
6368 arg1
= gimple_call_arg (old_call
, 1);
6369 /* The first argument of copysign must be a constant,
6370 otherwise there's nothing to do. */
6371 if (TREE_CODE (arg0
) == REAL_CST
)
6373 tree type
= TREE_TYPE (arg0
);
6374 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6375 /* If we couldn't fold to a single constant, skip it.
6376 That happens e.g. for inexact multiplication when
6378 if (mul
== NULL_TREE
)
6380 /* Instead of adjusting OLD_CALL, let's build a new
6381 call to not leak the LHS and prevent keeping bogus
6382 debug statements. DCE will clean up the old call. */
6384 if (gimple_call_internal_p (old_call
))
6385 new_call
= gimple_build_call_internal
6386 (IFN_COPYSIGN
, 2, mul
, arg1
);
6388 new_call
= gimple_build_call
6389 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6390 tree lhs
= make_ssa_name (type
);
6391 gimple_call_set_lhs (new_call
, lhs
);
6392 gimple_set_location (new_call
,
6393 gimple_location (old_call
));
6394 insert_stmt_after (new_call
, old_call
);
6395 /* We've used the constant, get rid of it. */
6397 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6398 /* Handle the CST1 < 0 case by negating the result. */
6401 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6403 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6404 insert_stmt_after (negate_stmt
, new_call
);
6409 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6411 fprintf (dump_file
, "Optimizing copysign: ");
6412 print_generic_expr (dump_file
, cst
);
6413 fprintf (dump_file
, " * COPYSIGN (");
6414 print_generic_expr (dump_file
, arg0
);
6415 fprintf (dump_file
, ", ");
6416 print_generic_expr (dump_file
, arg1
);
6417 fprintf (dump_file
, ") into %sCOPYSIGN (",
6418 cst1_neg
? "-" : "");
6419 print_generic_expr (dump_file
, mul
);
6420 fprintf (dump_file
, ", ");
6421 print_generic_expr (dump_file
, arg1
);
6422 fprintf (dump_file
, "\n");
6435 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6438 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6444 fprintf (dump_file
, "Transforming ");
6445 print_gimple_stmt (dump_file
, stmt
, 0);
6448 rhs1
= gimple_assign_rhs1 (stmt
);
6449 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6451 remove_visited_stmt_chain (rhs1
);
6453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6455 fprintf (dump_file
, " into ");
6456 print_gimple_stmt (dump_file
, stmt
, 0);
6460 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6463 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6464 tree rhs1
, tree rhs2
)
6466 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6468 fprintf (dump_file
, "Transforming ");
6469 print_gimple_stmt (dump_file
, stmt
, 0);
6472 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6473 update_stmt (gsi_stmt (*gsi
));
6474 remove_visited_stmt_chain (rhs1
);
6476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6478 fprintf (dump_file
, " into ");
6479 print_gimple_stmt (dump_file
, stmt
, 0);
6483 /* Reassociate expressions in basic block BB and its post-dominator as
6486 Bubble up return status from maybe_optimize_range_tests. */
6489 reassociate_bb (basic_block bb
)
6491 gimple_stmt_iterator gsi
;
6493 gimple
*stmt
= last_stmt (bb
);
6494 bool cfg_cleanup_needed
= false;
6496 if (stmt
&& !gimple_visited_p (stmt
))
6497 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6499 bool do_prev
= false;
6500 for (gsi
= gsi_last_bb (bb
);
6501 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
6504 stmt
= gsi_stmt (gsi
);
6506 if (is_gimple_assign (stmt
)
6507 && !stmt_could_throw_p (cfun
, stmt
))
6509 tree lhs
, rhs1
, rhs2
;
6510 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6512 /* If this was part of an already processed statement,
6513 we don't need to touch it again. */
6514 if (gimple_visited_p (stmt
))
6516 /* This statement might have become dead because of previous
6518 if (has_zero_uses (gimple_get_lhs (stmt
)))
6520 reassoc_remove_stmt (&gsi
);
6521 release_defs (stmt
);
6522 /* We might end up removing the last stmt above which
6523 places the iterator to the end of the sequence.
6524 Reset it to the last stmt in this case and make sure
6525 we don't do gsi_prev in that case. */
6526 if (gsi_end_p (gsi
))
6528 gsi
= gsi_last_bb (bb
);
6535 /* If this is not a gimple binary expression, there is
6536 nothing for us to do with it. */
6537 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6540 lhs
= gimple_assign_lhs (stmt
);
6541 rhs1
= gimple_assign_rhs1 (stmt
);
6542 rhs2
= gimple_assign_rhs2 (stmt
);
6544 /* For non-bit or min/max operations we can't associate
6545 all types. Verify that here. */
6546 if (rhs_code
!= BIT_IOR_EXPR
6547 && rhs_code
!= BIT_AND_EXPR
6548 && rhs_code
!= BIT_XOR_EXPR
6549 && rhs_code
!= MIN_EXPR
6550 && rhs_code
!= MAX_EXPR
6551 && (!can_reassociate_p (lhs
)
6552 || !can_reassociate_p (rhs1
)
6553 || !can_reassociate_p (rhs2
)))
6556 if (associative_tree_code (rhs_code
))
6558 auto_vec
<operand_entry
*> ops
;
6559 tree powi_result
= NULL_TREE
;
6560 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6562 /* There may be no immediate uses left by the time we
6563 get here because we may have eliminated them all. */
6564 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6567 gimple_set_visited (stmt
, true);
6568 linearize_expr_tree (&ops
, stmt
, true, true);
6569 ops
.qsort (sort_by_operand_rank
);
6570 int orig_len
= ops
.length ();
6571 optimize_ops_list (rhs_code
, &ops
);
6572 if (undistribute_ops_list (rhs_code
, &ops
,
6573 loop_containing_stmt (stmt
)))
6575 ops
.qsort (sort_by_operand_rank
);
6576 optimize_ops_list (rhs_code
, &ops
);
6578 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6579 loop_containing_stmt (stmt
)))
6581 ops
.qsort (sort_by_operand_rank
);
6582 optimize_ops_list (rhs_code
, &ops
);
6584 if (rhs_code
== PLUS_EXPR
6585 && transform_add_to_multiply (&ops
))
6586 ops
.qsort (sort_by_operand_rank
);
6588 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6591 optimize_vec_cond_expr (rhs_code
, &ops
);
6593 optimize_range_tests (rhs_code
, &ops
, NULL
);
6596 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6598 attempt_builtin_copysign (&ops
);
6600 if (reassoc_insert_powi_p
6601 && (flag_unsafe_math_optimizations
6602 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
6603 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6606 operand_entry
*last
;
6607 bool negate_result
= false;
6608 if (ops
.length () > 1
6609 && rhs_code
== MULT_EXPR
)
6612 if ((integer_minus_onep (last
->op
)
6613 || real_minus_onep (last
->op
))
6614 && !HONOR_SNANS (TREE_TYPE (lhs
))
6615 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6616 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6619 negate_result
= true;
6624 /* If the operand vector is now empty, all operands were
6625 consumed by the __builtin_powi optimization. */
6626 if (ops
.length () == 0)
6627 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6628 else if (ops
.length () == 1)
6630 tree last_op
= ops
.last ()->op
;
6632 /* If the stmt that defines operand has to be inserted, insert it
6634 if (ops
.last ()->stmt_to_insert
)
6635 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6637 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6640 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6644 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6645 int ops_num
= ops
.length ();
6648 /* For binary bit operations, if there are at least 3
6649 operands and the last operand in OPS is a constant,
6650 move it to the front. This helps ensure that we generate
6651 (X & Y) & C rather than (X & C) & Y. The former will
6652 often match a canonical bit test when we get to RTL. */
6653 if (ops
.length () > 2
6654 && (rhs_code
== BIT_AND_EXPR
6655 || rhs_code
== BIT_IOR_EXPR
6656 || rhs_code
== BIT_XOR_EXPR
)
6657 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6658 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6660 /* Only rewrite the expression tree to parallel in the
6661 last reassoc pass to avoid useless work back-and-forth
6662 with initial linearization. */
6663 if (!reassoc_insert_powi_p
6664 && ops
.length () > 3
6665 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6670 "Width = %d was chosen for reassociation\n",
6672 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6677 /* When there are three operands left, we want
6678 to make sure the ones that get the double
6679 binary op are chosen wisely. */
6680 int len
= ops
.length ();
6682 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
6684 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
6690 /* If we combined some repeated factors into a
6691 __builtin_powi call, multiply that result by the
6692 reassociated operands. */
6695 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6696 tree type
= TREE_TYPE (lhs
);
6697 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6699 gimple_set_lhs (lhs_stmt
, target_ssa
);
6700 update_stmt (lhs_stmt
);
6703 target_ssa
= new_lhs
;
6706 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6707 powi_result
, target_ssa
);
6708 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6709 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6710 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6716 stmt
= SSA_NAME_DEF_STMT (lhs
);
6717 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6718 gimple_set_lhs (stmt
, tmp
);
6721 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6723 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6724 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6730 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6732 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6733 cfg_cleanup_needed
|= reassociate_bb (son
);
6735 return cfg_cleanup_needed
;
6738 /* Add jumps around shifts for range tests turned into bit tests.
6739 For each SSA_NAME VAR we have code like:
6740 VAR = ...; // final stmt of range comparison
6741 // bit test here...;
6742 OTHERVAR = ...; // final stmt of the bit test sequence
6743 RES = VAR | OTHERVAR;
6744 Turn the above into:
6751 // bit test here...;
6754 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6762 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6764 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6767 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6769 && is_gimple_assign (use_stmt
)
6770 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6771 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6773 basic_block cond_bb
= gimple_bb (def_stmt
);
6774 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6775 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6777 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6778 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6779 build_zero_cst (TREE_TYPE (var
)),
6780 NULL_TREE
, NULL_TREE
);
6781 location_t loc
= gimple_location (use_stmt
);
6782 gimple_set_location (g
, loc
);
6783 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6785 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6786 etrue
->probability
= profile_probability::even ();
6787 edge efalse
= find_edge (cond_bb
, then_bb
);
6788 efalse
->flags
= EDGE_FALSE_VALUE
;
6789 efalse
->probability
-= etrue
->probability
;
6790 then_bb
->count
-= etrue
->count ();
6792 tree othervar
= NULL_TREE
;
6793 if (gimple_assign_rhs1 (use_stmt
) == var
)
6794 othervar
= gimple_assign_rhs2 (use_stmt
);
6795 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6796 othervar
= gimple_assign_rhs1 (use_stmt
);
6799 tree lhs
= gimple_assign_lhs (use_stmt
);
6800 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6801 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6802 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6803 gsi
= gsi_for_stmt (use_stmt
);
6804 gsi_remove (&gsi
, true);
6806 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6807 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6809 reassoc_branch_fixups
.release ();
6812 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6813 void debug_ops_vector (vec
<operand_entry
*> ops
);
6815 /* Dump the operand entry vector OPS to FILE. */
6818 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6823 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6825 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6826 print_generic_expr (file
, oe
->op
);
6827 fprintf (file
, "\n");
6831 /* Dump the operand entry vector OPS to STDERR. */
6834 debug_ops_vector (vec
<operand_entry
*> ops
)
6836 dump_ops_vector (stderr
, ops
);
6839 /* Bubble up return status from reassociate_bb. */
6844 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6845 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6848 /* Initialize the reassociation pass. */
6855 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6857 /* Find the loops, so that we can prevent moving calculations in
6859 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6861 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6863 next_operand_entry_id
= 0;
6865 /* Reverse RPO (Reverse Post Order) will give us something where
6866 deeper loops come later. */
6867 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6868 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
6869 operand_rank
= new hash_map
<tree
, int64_t>;
6871 /* Give each default definition a distinct rank. This includes
6872 parameters and the static chain. Walk backwards over all
6873 SSA names so that we get proper rank ordering according
6874 to tree_swap_operands_p. */
6875 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6877 tree name
= ssa_name (i
);
6878 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6879 insert_operand_rank (name
, ++rank
);
6882 /* Set up rank for each BB */
6883 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6884 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6887 calculate_dominance_info (CDI_POST_DOMINATORS
);
6888 plus_negates
= vNULL
;
6891 /* Cleanup after the reassociation pass, and print stats if
6897 statistics_counter_event (cfun
, "Linearized",
6898 reassociate_stats
.linearized
);
6899 statistics_counter_event (cfun
, "Constants eliminated",
6900 reassociate_stats
.constants_eliminated
);
6901 statistics_counter_event (cfun
, "Ops eliminated",
6902 reassociate_stats
.ops_eliminated
);
6903 statistics_counter_event (cfun
, "Statements rewritten",
6904 reassociate_stats
.rewritten
);
6905 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6906 reassociate_stats
.pows_encountered
);
6907 statistics_counter_event (cfun
, "Built-in powi calls created",
6908 reassociate_stats
.pows_created
);
6910 delete operand_rank
;
6911 operand_entry_pool
.release ();
6913 plus_negates
.release ();
6914 free_dominance_info (CDI_POST_DOMINATORS
);
6915 loop_optimizer_finalize ();
6918 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6919 insertion of __builtin_powi calls.
6921 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6922 optimization of a gimple conditional. Otherwise returns zero. */
6925 execute_reassoc (bool insert_powi_p
)
6927 reassoc_insert_powi_p
= insert_powi_p
;
6931 bool cfg_cleanup_needed
;
6932 cfg_cleanup_needed
= do_reassoc ();
6933 repropagate_negates ();
6937 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6942 const pass_data pass_data_reassoc
=
6944 GIMPLE_PASS
, /* type */
6945 "reassoc", /* name */
6946 OPTGROUP_NONE
, /* optinfo_flags */
6947 TV_TREE_REASSOC
, /* tv_id */
6948 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6949 0, /* properties_provided */
6950 0, /* properties_destroyed */
6951 0, /* todo_flags_start */
6952 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6955 class pass_reassoc
: public gimple_opt_pass
6958 pass_reassoc (gcc::context
*ctxt
)
6959 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6962 /* opt_pass methods: */
6963 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6964 void set_pass_param (unsigned int n
, bool param
)
6966 gcc_assert (n
== 0);
6967 insert_powi_p
= param
;
6969 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6970 virtual unsigned int execute (function
*)
6971 { return execute_reassoc (insert_powi_p
); }
6974 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6975 point 3a in the pass header comment. */
6977 }; // class pass_reassoc
6982 make_pass_reassoc (gcc::context
*ctxt
)
6984 return new pass_reassoc (ctxt
);