Daily bump.
[gcc.git] / gcc / fwprop.c
1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "rtl-ssa.h"
30
31 #include "sparseset.h"
32 #include "predict.h"
33 #include "cfgrtl.h"
34 #include "cfgcleanup.h"
35 #include "cfgloop.h"
36 #include "tree-pass.h"
37 #include "rtl-iter.h"
38 #include "target.h"
39
40 /* This pass does simple forward propagation and simplification when an
41 operand of an insn can only come from a single def. This pass uses
42 RTL SSA, so it is global. However, we only do limited analysis of
43 available expressions.
44
45 1) The pass tries to propagate the source of the def into the use,
46 and checks if the result is independent of the substituted value.
47 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48 zero, independent of the source register.
49
50 In particular, we propagate constants into the use site. Sometimes
51 RTL expansion did not put the constant in the same insn on purpose,
52 to satisfy a predicate, and the result will fail to be recognized;
53 but this happens rarely and in this case we can still create a
54 REG_EQUAL note. For multi-word operations, this
55
56 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
57 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
58 (set (subreg:SI (reg:DI 122) 0)
59 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
60 (set (subreg:SI (reg:DI 122) 4)
61 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
62
63 can be simplified to the much simpler
64
65 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
67
68 This particular propagation is also effective at putting together
69 complex addressing modes. We are more aggressive inside MEMs, in
70 that all definitions are propagated if the use is in a MEM; if the
71 result is a valid memory address we check address_cost to decide
72 whether the substitution is worthwhile.
73
74 2) The pass propagates register copies. This is not as effective as
75 the copy propagation done by CSE's canon_reg, which works by walking
76 the instruction chain, it can help the other transformations.
77
78 We should consider removing this optimization, and instead reorder the
79 RTL passes, because GCSE does this transformation too. With some luck,
80 the CSE pass at the end of rest_of_handle_gcse could also go away.
81
82 3) The pass looks for paradoxical subregs that are actually unnecessary.
83 Things like this:
84
85 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
88 (subreg:SI (reg:QI 121) 0)))
89
90 are very common on machines that can only do word-sized operations.
91 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93 we can replace the paradoxical subreg with simply (reg:WIDE M). The
94 above will simplify this to
95
96 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
99
100 where the first two insns are now dead. */
101
102 using namespace rtl_ssa;
103
104 static int num_changes;
105
106 /* Do not try to replace constant addresses or addresses of local and
107 argument slots. These MEM expressions are made only once and inserted
108 in many instructions, as well as being used to control symbol table
109 output. It is not safe to clobber them.
110
111 There are some uncommon cases where the address is already in a register
112 for some reason, but we cannot take advantage of that because we have
113 no easy way to unshare the MEM. In addition, looking up all stack
114 addresses is costly. */
115
116 static bool
117 can_simplify_addr (rtx addr)
118 {
119 rtx reg;
120
121 if (CONSTANT_ADDRESS_P (addr))
122 return false;
123
124 if (GET_CODE (addr) == PLUS)
125 reg = XEXP (addr, 0);
126 else
127 reg = addr;
128
129 return (!REG_P (reg)
130 || (REGNO (reg) != FRAME_POINTER_REGNUM
131 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132 && REGNO (reg) != ARG_POINTER_REGNUM));
133 }
134
135 /* MEM is the result of an address simplification, and temporarily
136 undoing changes OLD_NUM_CHANGES onwards restores the original address.
137 Return whether it is good to use the new address instead of the
138 old one. INSN is the containing instruction. */
139
140 static bool
141 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
142 {
143 int gain;
144
145 /* Prefer the new address if it is less expensive. */
146 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147 temporarily_undo_changes (old_num_changes);
148 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
149 MEM_ADDR_SPACE (mem), speed);
150 redo_changes (old_num_changes);
151 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
152 MEM_ADDR_SPACE (mem), speed);
153
154 /* If the addresses have equivalent cost, prefer the new address
155 if it has the highest `set_src_cost'. That has the potential of
156 eliminating the most insns without additional costs, and it
157 is the same that cse.c used to do. */
158 if (gain == 0)
159 {
160 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
161 temporarily_undo_changes (old_num_changes);
162 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163 redo_changes (old_num_changes);
164 }
165
166 return (gain > 0);
167 }
168
169
170 namespace
171 {
172 class fwprop_propagation : public insn_propagation
173 {
174 public:
175 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
177 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
178
179 fwprop_propagation (insn_info *, insn_info *, rtx, rtx);
180
181 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182 bool folded_to_constants_p () const;
183 bool profitable_p () const;
184
185 bool check_mem (int, rtx) final override;
186 void note_simplification (int, uint16_t, rtx, rtx) final override;
187 uint16_t classify_result (rtx, rtx);
188
189 private:
190 const bool single_use_p;
191 const bool single_ebb_p;
192 };
193 }
194
195 /* Prepare to replace FROM with TO in INSN. */
196
197 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
198 insn_info *def_insn, rtx from, rtx to)
199 : insn_propagation (use_insn->rtl (), from, to),
200 single_use_p (def_insn->num_uses () == 1),
201 single_ebb_p (use_insn->ebb () == def_insn->ebb ())
202 {
203 should_check_mems = true;
204 should_note_simplifications = true;
205 }
206
207 /* MEM is the result of an address simplification, and temporarily
208 undoing changes OLD_NUM_CHANGES onwards restores the original address.
209 Return true if the propagation should continue, false if it has failed. */
210
211 bool
212 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
213 {
214 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
215 MEM_ADDR_SPACE (mem)))
216 {
217 failure_reason = "would create an invalid MEM";
218 return false;
219 }
220
221 temporarily_undo_changes (old_num_changes);
222 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
223 redo_changes (old_num_changes);
224 if (!can_simplify)
225 {
226 failure_reason = "would replace a frame address";
227 return false;
228 }
229
230 /* Copy propagations are always ok. Otherwise check the costs. */
231 if (!(REG_P (from) && REG_P (to))
232 && !should_replace_address (old_num_changes, mem, insn))
233 {
234 failure_reason = "would increase the cost of a MEM";
235 return false;
236 }
237
238 result_flags |= CHANGED_MEM;
239 return true;
240 }
241
242 /* OLDX has been simplified to NEWX. Describe the change in terms of
243 result_flags. */
244
245 uint16_t
246 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
247 {
248 if (CONSTANT_P (new_rtx))
249 {
250 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
251 and NEW_RTX is likely not a legitimate address. We want it to
252 disappear if it is invalid.
253
254 ??? Using the mode of the LO_SUM as the mode of the address
255 seems odd, but it was what the pre-SSA code did. */
256 if (GET_CODE (old_rtx) == LO_SUM
257 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
258 return CONSTANT;
259 return CONSTANT | PROFITABLE;
260 }
261
262 /* Allow replacements that simplify operations on a vector or complex
263 value to a component. The most prominent case is
264 (subreg ([vec_]concat ...)). */
265 if (REG_P (new_rtx)
266 && !HARD_REGISTER_P (new_rtx)
267 && (VECTOR_MODE_P (GET_MODE (from))
268 || COMPLEX_MODE_P (GET_MODE (from)))
269 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
270 return PROFITABLE;
271
272 /* Allow (subreg (mem)) -> (mem) simplifications with the following
273 exceptions:
274 1) Propagating (mem)s into multiple uses is not profitable.
275 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
276 runs less frequently.
277 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
278 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
279 ones. */
280 if (single_use_p
281 && single_ebb_p
282 && SUBREG_P (old_rtx)
283 && !paradoxical_subreg_p (old_rtx)
284 && MEM_P (new_rtx)
285 && !MEM_VOLATILE_P (new_rtx))
286 return PROFITABLE;
287
288 return 0;
289 }
290
291 /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
292 is the number of unrelated changes that had been made before processing
293 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
294 had at that point. */
295
296 void
297 fwprop_propagation::note_simplification (int old_num_changes,
298 uint16_t old_result_flags,
299 rtx old_rtx, rtx new_rtx)
300 {
301 result_flags &= ~(CONSTANT | PROFITABLE);
302 uint16_t new_flags = classify_result (old_rtx, new_rtx);
303 if (old_num_changes)
304 new_flags &= old_result_flags;
305 result_flags |= new_flags;
306 }
307
308 /* Return true if all substitutions eventually folded to constants. */
309
310 bool
311 fwprop_propagation::folded_to_constants_p () const
312 {
313 /* If we're propagating a HIGH, require it to be folded with a
314 partnering LO_SUM. For example, a REG_EQUAL note with a register
315 replaced by an unfolded HIGH is not useful. */
316 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
317 return true;
318 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
319 }
320
321
322 /* Return true if it is worth keeping the result of the propagation,
323 false if it would increase the complexity of the pattern too much. */
324
325 bool
326 fwprop_propagation::profitable_p () const
327 {
328 if (changed_mem_p ())
329 return true;
330
331 if (!(result_flags & UNSIMPLIFIED)
332 && (result_flags & PROFITABLE))
333 return true;
334
335 if (REG_P (to))
336 return true;
337
338 if (GET_CODE (to) == SUBREG
339 && REG_P (SUBREG_REG (to))
340 && !paradoxical_subreg_p (to))
341 return true;
342
343 if (CONSTANT_P (to))
344 return true;
345
346 return false;
347 }
348
349 /* Check that X has a single def. */
350
351 static bool
352 reg_single_def_p (rtx x)
353 {
354 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
355 }
356
357 /* Return true if X contains a paradoxical subreg. */
358
359 static bool
360 contains_paradoxical_subreg_p (rtx x)
361 {
362 subrtx_var_iterator::array_type array;
363 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
364 {
365 x = *iter;
366 if (SUBREG_P (x) && paradoxical_subreg_p (x))
367 return true;
368 }
369 return false;
370 }
371
372 /* Try to substitute (set DEST SRC) from DEF_INSN into note NOTE of USE_INSN.
373 Return the number of substitutions on success, otherwise return -1 and
374 leave USE_INSN unchanged.
375
376 If REQUIRE_CONSTANT is true, require all substituted occurences of SRC
377 to fold to a constant, so that the note does not use any more registers
378 than it did previously. If REQUIRE_CONSTANT is false, also allow the
379 substitution if it's something we'd normally allow for the main
380 instruction pattern. */
381
382 static int
383 try_fwprop_subst_note (insn_info *use_insn, insn_info *def_insn,
384 rtx note, rtx dest, rtx src, bool require_constant)
385 {
386 rtx_insn *use_rtl = use_insn->rtl ();
387
388 insn_change_watermark watermark;
389 fwprop_propagation prop (use_insn, def_insn, dest, src);
390 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
391 {
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, "cannot propagate from insn %d into"
394 " notes of insn %d: %s\n", def_insn->uid (),
395 use_insn->uid (), prop.failure_reason);
396 return -1;
397 }
398
399 if (prop.num_replacements == 0)
400 return 0;
401
402 if (require_constant)
403 {
404 if (!prop.folded_to_constants_p ())
405 {
406 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, "cannot propagate from insn %d into"
408 " notes of insn %d: %s\n", def_insn->uid (),
409 use_insn->uid (), "wouldn't fold to constants");
410 return -1;
411 }
412 }
413 else
414 {
415 if (!prop.folded_to_constants_p () && !prop.profitable_p ())
416 {
417 if (dump_file && (dump_flags & TDF_DETAILS))
418 fprintf (dump_file, "cannot propagate from insn %d into"
419 " notes of insn %d: %s\n", def_insn->uid (),
420 use_insn->uid (), "would increase complexity of node");
421 return -1;
422 }
423 }
424
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 {
427 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
428 INSN_UID (use_rtl));
429 temporarily_undo_changes (0);
430 print_inline_rtx (dump_file, note, 2);
431 redo_changes (0);
432 fprintf (dump_file, "\n with:\n ");
433 print_inline_rtx (dump_file, note, 2);
434 fprintf (dump_file, "\n");
435 }
436 watermark.keep ();
437 return prop.num_replacements;
438 }
439
440 /* Try to substitute (set DEST SRC) from DEF_INSN into location LOC of
441 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
442 unchanged. */
443
444 static bool
445 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
446 insn_info *def_insn, rtx *loc, rtx dest, rtx src)
447 {
448 insn_info *use_insn = use_change.insn ();
449 rtx_insn *use_rtl = use_insn->rtl ();
450
451 insn_change_watermark watermark;
452 fwprop_propagation prop (use_insn, def_insn, dest, src);
453 if (!prop.apply_to_pattern (loc))
454 {
455 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "cannot propagate from insn %d into"
457 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
458 prop.failure_reason);
459 return false;
460 }
461
462 if (prop.num_replacements == 0)
463 return false;
464
465 if (!prop.profitable_p ())
466 {
467 if (dump_file && (dump_flags & TDF_DETAILS))
468 fprintf (dump_file, "cannot propagate from insn %d into"
469 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
470 "would increase complexity of pattern");
471 return false;
472 }
473
474 if (dump_file && (dump_flags & TDF_DETAILS))
475 {
476 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
477 def_insn->uid (), use_insn->uid ());
478 temporarily_undo_changes (0);
479 print_rtl_single (dump_file, PATTERN (use_rtl));
480 redo_changes (0);
481 }
482
483 /* ??? In theory, it should be better to use insn costs rather than
484 set_src_costs here. That would involve replacing this code with
485 change_is_worthwhile. */
486 bool ok = recog (attempt, use_change);
487 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
488 if (rtx use_set = single_set (use_rtl))
489 {
490 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
491 temporarily_undo_changes (0);
492 auto old_cost = set_src_cost (SET_SRC (use_set),
493 GET_MODE (SET_DEST (use_set)), speed);
494 redo_changes (0);
495 auto new_cost = set_src_cost (SET_SRC (use_set),
496 GET_MODE (SET_DEST (use_set)), speed);
497 if (new_cost > old_cost)
498 {
499 if (dump_file)
500 fprintf (dump_file, "change not profitable"
501 " (cost %d -> cost %d)\n", old_cost, new_cost);
502 ok = false;
503 }
504 }
505
506 if (!ok)
507 {
508 /* The pattern didn't match, but if all uses of SRC folded to
509 constants, we can add a REG_EQUAL note for the result, if there
510 isn't one already. */
511 if (!prop.folded_to_constants_p ())
512 return false;
513
514 /* Test this first to avoid creating an unnecessary copy of SRC. */
515 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
516 return false;
517
518 rtx set = set_for_reg_notes (use_rtl);
519 if (!set || !REG_P (SET_DEST (set)))
520 return false;
521
522 rtx value = copy_rtx (SET_SRC (set));
523 cancel_changes (0);
524
525 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
526 because the bits in there can be anything and so might not
527 match the REG_EQUAL note content. See PR70574. */
528 if (contains_paradoxical_subreg_p (SET_SRC (set)))
529 return false;
530
531 if (dump_file && (dump_flags & TDF_DETAILS))
532 fprintf (dump_file, " Setting REG_EQUAL note\n");
533
534 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
535 }
536
537 rtx *note_ptr = &REG_NOTES (use_rtl);
538 while (rtx note = *note_ptr)
539 {
540 if ((REG_NOTE_KIND (note) == REG_EQUAL
541 || REG_NOTE_KIND (note) == REG_EQUIV)
542 && try_fwprop_subst_note (use_insn, def_insn, note,
543 dest, src, false) < 0)
544 {
545 *note_ptr = XEXP (note, 1);
546 free_EXPR_LIST_node (note);
547 }
548 else
549 note_ptr = &XEXP (note, 1);
550 }
551
552 confirm_change_group ();
553 crtl->ssa->change_insn (use_change);
554 num_changes++;
555 return true;
556 }
557
558 /* Try to substitute (set DEST SRC) from DEF_INSN into USE_INSN's notes,
559 given that it was not possible to do this for USE_INSN's main pattern.
560 Return true on success, otherwise leave USE_INSN unchanged. */
561
562 static bool
563 try_fwprop_subst_notes (insn_info *use_insn, insn_info *def_insn,
564 rtx dest, rtx src)
565 {
566 rtx_insn *use_rtl = use_insn->rtl ();
567 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
568 if ((REG_NOTE_KIND (note) == REG_EQUAL
569 || REG_NOTE_KIND (note) == REG_EQUIV)
570 && try_fwprop_subst_note (use_insn, def_insn, note,
571 dest, src, true) > 0)
572 {
573 confirm_change_group ();
574 return true;
575 }
576
577 return false;
578 }
579
580 /* Check whether we could validly substitute (set DEST SRC) from DEF_INSN
581 into USE. If so, first try performing the substitution in location LOC
582 of USE->insn ()'s pattern. If that fails, try instead to substitute
583 into the notes.
584
585 Return true on success, otherwise leave USE_INSN unchanged. */
586
587 static bool
588 try_fwprop_subst (use_info *use, insn_info *def_insn,
589 rtx *loc, rtx dest, rtx src)
590 {
591 insn_info *use_insn = use->insn ();
592
593 auto attempt = crtl->ssa->new_change_attempt ();
594 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
595
596 /* ??? Not really a meaningful test: it means we can propagate arithmetic
597 involving hard registers but not bare references to them. A better
598 test would be to iterate over src_uses looking for hard registers
599 that are not fixed. */
600 if (REG_P (src) && HARD_REGISTER_P (src))
601 return false;
602
603 /* ??? It would be better to make this EBB-based instead. That would
604 involve checking for equal EBBs rather than equal BBs and trying
605 to make the uses available at use_insn->ebb ()->first_bb (). */
606 if (def_insn->bb () != use_insn->bb ())
607 {
608 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
609 use_insn->bb ());
610 if (!src_uses.is_valid ())
611 return false;
612 }
613
614 insn_change use_change (use_insn);
615 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
616 src_uses);
617 if (!use_change.new_uses.is_valid ())
618 return false;
619
620 /* ??? We could allow movement within the EBB by adding:
621
622 use_change.move_range = use_insn->ebb ()->insn_range (); */
623 if (!restrict_movement (use_change))
624 return false;
625
626 return (try_fwprop_subst_pattern (attempt, use_change, def_insn,
627 loc, dest, src)
628 || try_fwprop_subst_notes (use_insn, def_insn, dest, src));
629 }
630
631 /* For the given single_set INSN, containing SRC known to be a
632 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
633 is redundant due to the register being set by a LOAD_EXTEND_OP
634 load from memory. */
635
636 static bool
637 free_load_extend (rtx src, insn_info *insn)
638 {
639 rtx reg = XEXP (src, 0);
640 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
641 return false;
642
643 def_info *def = nullptr;
644 for (use_info *use : insn->uses ())
645 if (use->regno () == REGNO (reg))
646 {
647 def = use->def ();
648 break;
649 }
650
651 if (!def)
652 return false;
653
654 insn_info *def_insn = def->insn ();
655 if (def_insn->is_artificial ())
656 return false;
657
658 rtx_insn *def_rtl = def_insn->rtl ();
659 if (NONJUMP_INSN_P (def_rtl))
660 {
661 rtx patt = PATTERN (def_rtl);
662
663 if (GET_CODE (patt) == SET
664 && GET_CODE (SET_SRC (patt)) == MEM
665 && rtx_equal_p (SET_DEST (patt), reg))
666 return true;
667 }
668 return false;
669 }
670
671 /* Subroutine of forward_propagate_subreg that handles a use of DEST
672 in REF. The other parameters are the same. */
673
674 static bool
675 forward_propagate_subreg (use_info *use, insn_info *def_insn,
676 rtx dest, rtx src, df_ref ref)
677 {
678 scalar_int_mode int_use_mode, src_mode;
679
680 /* Only consider subregs... */
681 rtx use_reg = DF_REF_REG (ref);
682 machine_mode use_mode = GET_MODE (use_reg);
683 if (GET_CODE (use_reg) != SUBREG
684 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
685 return false;
686
687 /* ??? Replacing throughout the pattern would help for match_dups. */
688 rtx *loc = DF_REF_LOC (ref);
689 if (paradoxical_subreg_p (use_reg))
690 {
691 /* If this is a paradoxical SUBREG, we have no idea what value the
692 extra bits would have. However, if the operand is equivalent to
693 a SUBREG whose operand is the same as our mode, and all the modes
694 are within a word, we can just use the inner operand because
695 these SUBREGs just say how to treat the register. */
696 if (GET_CODE (src) == SUBREG
697 && REG_P (SUBREG_REG (src))
698 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
699 && GET_MODE (SUBREG_REG (src)) == use_mode
700 && subreg_lowpart_p (src))
701 return try_fwprop_subst (use, def_insn, loc,
702 use_reg, SUBREG_REG (src));
703 }
704
705 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
706 is the low part of the reg being extended then just use the inner
707 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
708 be removed due to it matching a LOAD_EXTEND_OP load from memory,
709 or due to the operation being a no-op when applied to registers.
710 For example, if we have:
711
712 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
713 B: (... (subreg:SI (reg:DI X)) ...)
714
715 and mode_rep_extended says that Y is already sign-extended,
716 the backend will typically allow A to be combined with the
717 definition of Y or, failing that, allow A to be deleted after
718 reload through register tying. Introducing more uses of Y
719 prevents both optimisations. */
720 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
721 && subreg_lowpart_p (use_reg))
722 {
723 if ((GET_CODE (src) == ZERO_EXTEND
724 || GET_CODE (src) == SIGN_EXTEND)
725 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
726 && REG_P (XEXP (src, 0))
727 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
728 && GET_MODE (XEXP (src, 0)) == use_mode
729 && !free_load_extend (src, def_insn)
730 && (targetm.mode_rep_extended (int_use_mode, src_mode)
731 != (int) GET_CODE (src)))
732 return try_fwprop_subst (use, def_insn, loc, use_reg, XEXP (src, 0));
733 }
734
735 return false;
736 }
737
738 /* Try to substitute (set DEST SRC) from DEF_INSN into USE and simplify
739 the result, handling cases where DEST is used in a subreg and where
740 applying that subreg to SRC results in a useful simplification. */
741
742 static bool
743 forward_propagate_subreg (use_info *use, insn_info *def_insn,
744 rtx dest, rtx src)
745 {
746 if (!use->includes_subregs () || !REG_P (dest))
747 return false;
748
749 if (GET_CODE (src) != SUBREG
750 && GET_CODE (src) != ZERO_EXTEND
751 && GET_CODE (src) != SIGN_EXTEND)
752 return false;
753
754 rtx_insn *use_rtl = use->insn ()->rtl ();
755 df_ref ref;
756
757 FOR_EACH_INSN_USE (ref, use_rtl)
758 if (DF_REF_REGNO (ref) == use->regno ()
759 && forward_propagate_subreg (use, def_insn, dest, src, ref))
760 return true;
761
762 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
763 if (DF_REF_REGNO (ref) == use->regno ()
764 && forward_propagate_subreg (use, def_insn, dest, src, ref))
765 return true;
766
767 return false;
768 }
769
770 /* Try to substitute (set DEST SRC) from DEF_INSN into USE and
771 simplify the result. */
772
773 static bool
774 forward_propagate_and_simplify (use_info *use, insn_info *def_insn,
775 rtx dest, rtx src)
776 {
777 insn_info *use_insn = use->insn ();
778 rtx_insn *use_rtl = use_insn->rtl ();
779
780 /* ??? This check seems unnecessary. We should be able to propagate
781 into any kind of instruction, regardless of whether it's a single set.
782 It seems odd to be more permissive with asms than normal instructions. */
783 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
784 rtx use_set = single_set (use_rtl);
785 if (need_single_set && !use_set)
786 return false;
787
788 /* Do not propagate into PC, CC0, etc.
789
790 ??? This too seems unnecessary. The current code should work correctly
791 without it, including cases where jumps become unconditional. */
792 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
793 return false;
794
795 /* In __asm don't replace if src might need more registers than
796 reg, as that could increase register pressure on the __asm. */
797 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
798 return false;
799
800 /* Check if the def is loading something from the constant pool; in this
801 case we would undo optimization such as compress_float_constant.
802 Still, we can set a REG_EQUAL note. */
803 if (MEM_P (src) && MEM_READONLY_P (src))
804 {
805 rtx x = avoid_constant_pool_reference (src);
806 rtx note_set;
807 if (x != src
808 && (note_set = set_for_reg_notes (use_rtl))
809 && REG_P (SET_DEST (note_set))
810 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
811 {
812 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
813 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
814 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
815 if (old_rtx != new_rtx)
816 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
817 }
818 return false;
819 }
820
821 /* ??? Unconditionally propagating into PATTERN would work better
822 for instructions that have match_dups. */
823 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
824 return try_fwprop_subst (use, def_insn, loc, dest, src);
825 }
826
827 /* Given a use USE of an insn, if it has a single reaching
828 definition, try to forward propagate it into that insn.
829 Return true if something changed.
830
831 REG_PROP_ONLY is true if we should only propagate register copies. */
832
833 static bool
834 forward_propagate_into (use_info *use, bool reg_prop_only = false)
835 {
836 if (use->includes_read_writes ())
837 return false;
838
839 /* Disregard uninitialized uses. */
840 def_info *def = use->def ();
841 if (!def)
842 return false;
843
844 /* Only consider single-register definitions. This could be relaxed,
845 but it should rarely be needed before RA. */
846 def = look_through_degenerate_phi (def);
847 if (def->includes_multiregs ())
848 return false;
849
850 /* Only consider uses whose definition comes from a real instruction. */
851 insn_info *def_insn = def->insn ();
852 if (def_insn->is_artificial ())
853 return false;
854
855 rtx_insn *def_rtl = def_insn->rtl ();
856 if (!NONJUMP_INSN_P (def_rtl))
857 return false;
858 /* ??? This seems an unnecessary restriction. We can easily tell
859 which set the definition comes from. */
860 if (multiple_sets (def_rtl))
861 return false;
862 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
863 if (!def_set)
864 return false;
865
866 rtx dest = SET_DEST (def_set);
867 rtx src = SET_SRC (def_set);
868
869 /* Allow propagations into a loop only for reg-to-reg copies, since
870 replacing one register by another shouldn't increase the cost. */
871 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
872 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
873 if ((reg_prop_only || def_loop != use_loop)
874 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
875 return false;
876
877 /* Don't substitute into a non-local goto, this confuses CFG. */
878 insn_info *use_insn = use->insn ();
879 rtx_insn *use_rtl = use_insn->rtl ();
880 if (JUMP_P (use_rtl)
881 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
882 return false;
883
884 if (forward_propagate_and_simplify (use, def_insn, dest, src)
885 || forward_propagate_subreg (use, def_insn, dest, src))
886 return true;
887
888 return false;
889 }
890 \f
891 static void
892 fwprop_init (void)
893 {
894 num_changes = 0;
895 calculate_dominance_info (CDI_DOMINATORS);
896
897 /* We do not always want to propagate into loops, so we have to find
898 loops and be careful about them. Avoid CFG modifications so that
899 we don't have to update dominance information afterwards for
900 build_single_def_use_links. */
901 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
902
903 df_analyze ();
904 crtl->ssa = new rtl_ssa::function_info (cfun);
905 }
906
907 static void
908 fwprop_done (void)
909 {
910 loop_optimizer_finalize ();
911
912 crtl->ssa->perform_pending_updates ();
913 free_dominance_info (CDI_DOMINATORS);
914 cleanup_cfg (0);
915
916 delete crtl->ssa;
917 crtl->ssa = nullptr;
918
919 delete_trivially_dead_insns (get_insns (), max_reg_num ());
920
921 if (dump_file)
922 fprintf (dump_file,
923 "\nNumber of successful forward propagations: %d\n\n",
924 num_changes);
925 }
926
927 /* Try to optimize INSN, returning true if something changes.
928 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
929 the full fwprop. */
930
931 static bool
932 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
933 {
934 for (use_info *use : insn->uses ())
935 {
936 if (use->is_mem ())
937 continue;
938 /* ??? The choices here follow those in the pre-SSA code. */
939 if (!use->includes_address_uses ())
940 {
941 if (forward_propagate_into (use, fwprop_addr_p))
942 return true;
943 }
944 else
945 {
946 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
947 /* The outermost loop is not really a loop. */
948 if (loop == NULL || loop_outer (loop) == NULL)
949 {
950 if (forward_propagate_into (use, fwprop_addr_p))
951 return true;
952 }
953 else if (fwprop_addr_p)
954 {
955 if (forward_propagate_into (use, false))
956 return true;
957 }
958 }
959 }
960 return false;
961 }
962
963 /* Main entry point. */
964
965 static bool
966 gate_fwprop (void)
967 {
968 return optimize > 0 && flag_forward_propagate;
969 }
970
971 static unsigned int
972 fwprop (bool fwprop_addr_p)
973 {
974 fwprop_init ();
975
976 /* Go through all the instructions (including debug instructions) looking
977 for uses that we could propagate into.
978
979 Do not forward propagate addresses into loops until after unrolling.
980 CSE did so because it was able to fix its own mess, but we are not. */
981
982 insn_info *next;
983
984 /* ??? This code uses a worklist in order to preserve the behavior
985 of the pre-SSA implementation. It would be better to instead
986 iterate on each instruction until no more propagations are
987 possible, then move on to the next. */
988 auto_vec<insn_info *> worklist;
989 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
990 {
991 next = insn->next_any_insn ();
992 if (insn->can_be_optimized () || insn->is_debug_insn ())
993 if (fwprop_insn (insn, fwprop_addr_p))
994 worklist.safe_push (insn);
995 }
996 for (unsigned int i = 0; i < worklist.length (); ++i)
997 {
998 insn_info *insn = worklist[i];
999 if (fwprop_insn (insn, fwprop_addr_p))
1000 worklist.safe_push (insn);
1001 }
1002
1003 fwprop_done ();
1004 return 0;
1005 }
1006
1007 namespace {
1008
1009 const pass_data pass_data_rtl_fwprop =
1010 {
1011 RTL_PASS, /* type */
1012 "fwprop1", /* name */
1013 OPTGROUP_NONE, /* optinfo_flags */
1014 TV_FWPROP, /* tv_id */
1015 0, /* properties_required */
1016 0, /* properties_provided */
1017 0, /* properties_destroyed */
1018 0, /* todo_flags_start */
1019 TODO_df_finish, /* todo_flags_finish */
1020 };
1021
1022 class pass_rtl_fwprop : public rtl_opt_pass
1023 {
1024 public:
1025 pass_rtl_fwprop (gcc::context *ctxt)
1026 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1027 {}
1028
1029 /* opt_pass methods: */
1030 virtual bool gate (function *) { return gate_fwprop (); }
1031 virtual unsigned int execute (function *) { return fwprop (false); }
1032
1033 }; // class pass_rtl_fwprop
1034
1035 } // anon namespace
1036
1037 rtl_opt_pass *
1038 make_pass_rtl_fwprop (gcc::context *ctxt)
1039 {
1040 return new pass_rtl_fwprop (ctxt);
1041 }
1042
1043 namespace {
1044
1045 const pass_data pass_data_rtl_fwprop_addr =
1046 {
1047 RTL_PASS, /* type */
1048 "fwprop2", /* name */
1049 OPTGROUP_NONE, /* optinfo_flags */
1050 TV_FWPROP, /* tv_id */
1051 0, /* properties_required */
1052 0, /* properties_provided */
1053 0, /* properties_destroyed */
1054 0, /* todo_flags_start */
1055 TODO_df_finish, /* todo_flags_finish */
1056 };
1057
1058 class pass_rtl_fwprop_addr : public rtl_opt_pass
1059 {
1060 public:
1061 pass_rtl_fwprop_addr (gcc::context *ctxt)
1062 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1063 {}
1064
1065 /* opt_pass methods: */
1066 virtual bool gate (function *) { return gate_fwprop (); }
1067 virtual unsigned int execute (function *) { return fwprop (true); }
1068
1069 }; // class pass_rtl_fwprop_addr
1070
1071 } // anon namespace
1072
1073 rtl_opt_pass *
1074 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1075 {
1076 return new pass_rtl_fwprop_addr (ctxt);
1077 }