Daily bump.
[gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2021 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44
45
46 /* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76
77 /* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79 static bitmap decomposable_context;
80
81 /* Bit N in this bitmap is set if regno N is used in a context in
82 which it cannot be decomposed. */
83 static bitmap non_decomposable_context;
84
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89 static bitmap subreg_context;
90
91 /* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93 static vec<bitmap> reg_copy_graph;
94
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99 #endif
100
101 #define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104 this_target_lower_subreg->x_choices
105
106 /* Return true if MODE is a mode we know how to lower. When returning true,
107 store its byte size in *BYTES and its word size in *WORDS. */
108
109 static inline bool
110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 unsigned int *words)
112 {
113 if (!GET_MODE_SIZE (mode).is_constant (bytes))
114 return false;
115 *words = CEIL (*bytes, UNITS_PER_WORD);
116 return true;
117 }
118
119 /* RTXes used while computing costs. */
120 struct cost_rtxes {
121 /* Source and target registers. */
122 rtx source;
123 rtx target;
124
125 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
126 rtx zext;
127
128 /* A shift of SOURCE. */
129 rtx shift;
130
131 /* A SET of TARGET. */
132 rtx set;
133 };
134
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
137
138 static int
139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 machine_mode mode, int op1)
141 {
142 PUT_CODE (rtxes->shift, code);
143 PUT_MODE (rtxes->shift, mode);
144 PUT_MODE (rtxes->source, mode);
145 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146 return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150 to true if it is profitable to split a double-word CODE shift
151 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
152 for speed or size profitability.
153
154 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
155 the cost of moving zero into a word-mode register. WORD_MOVE_COST
156 is the cost of moving between word registers. */
157
158 static void
159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 bool *splitting, enum rtx_code code,
161 int word_move_zero_cost, int word_move_cost)
162 {
163 int wide_cost, narrow_cost, upper_cost, i;
164
165 for (i = 0; i < BITS_PER_WORD; i++)
166 {
167 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 i + BITS_PER_WORD);
169 if (i == 0)
170 narrow_cost = word_move_cost;
171 else
172 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173
174 if (code != ASHIFTRT)
175 upper_cost = word_move_zero_cost;
176 else if (i == BITS_PER_WORD - 1)
177 upper_cost = word_move_cost;
178 else
179 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 BITS_PER_WORD - 1);
181
182 if (LOG_COSTS)
183 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186
187 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 splitting[i] = true;
189 }
190 }
191
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193 selects which. Use RTXES for computing costs. */
194
195 static void
196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198 unsigned int i;
199 int word_move_zero_cost, word_move_cost;
200
201 PUT_MODE (rtxes->target, word_mode);
202 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204
205 SET_SRC (rtxes->set) = rtxes->source;
206 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207
208 if (LOG_COSTS)
209 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211
212 for (i = 0; i < MAX_MACHINE_MODE; i++)
213 {
214 machine_mode mode = (machine_mode) i;
215 unsigned int size, factor;
216 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 {
218 unsigned int mode_move_cost;
219
220 PUT_MODE (rtxes->target, mode);
221 PUT_MODE (rtxes->source, mode);
222 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223
224 if (LOG_COSTS)
225 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 GET_MODE_NAME (mode), mode_move_cost,
227 word_move_cost, factor);
228
229 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 {
231 choices[speed_p].move_modes_to_split[i] = true;
232 choices[speed_p].something_to_do = true;
233 }
234 }
235 }
236
237 /* For the moves and shifts, the only case that is checked is one
238 where the mode of the target is an integer mode twice the width
239 of the word_mode.
240
241 If it is not profitable to split a double word move then do not
242 even consider the shifts or the zero extension. */
243 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244 {
245 int zext_cost;
246
247 /* The only case here to check to see if moving the upper part with a
248 zero is cheaper than doing the zext itself. */
249 PUT_MODE (rtxes->source, word_mode);
250 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251
252 if (LOG_COSTS)
253 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 zext_cost, word_move_cost, word_move_zero_cost);
256
257 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 choices[speed_p].splitting_zext = true;
259
260 compute_splitting_shift (speed_p, rtxes,
261 choices[speed_p].splitting_ashift, ASHIFT,
262 word_move_zero_cost, word_move_cost);
263 compute_splitting_shift (speed_p, rtxes,
264 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 word_move_zero_cost, word_move_cost);
266 compute_splitting_shift (speed_p, rtxes,
267 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 word_move_zero_cost, word_move_cost);
269 }
270 }
271
272 /* Do one-per-target initialisation. This involves determining
273 which operations on the machine are profitable. If none are found,
274 then the pass just returns when called. */
275
276 void
277 init_lower_subreg (void)
278 {
279 struct cost_rtxes rtxes;
280
281 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282
283 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284
285 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290
291 if (LOG_COSTS)
292 fprintf (stderr, "\nSize costs\n==========\n\n");
293 compute_costs (false, &rtxes);
294
295 if (LOG_COSTS)
296 fprintf (stderr, "\nSpeed costs\n===========\n\n");
297 compute_costs (true, &rtxes);
298 }
299
300 static bool
301 simple_move_operand (rtx x)
302 {
303 if (GET_CODE (x) == SUBREG)
304 x = SUBREG_REG (x);
305
306 if (!OBJECT_P (x))
307 return false;
308
309 if (GET_CODE (x) == LABEL_REF
310 || GET_CODE (x) == SYMBOL_REF
311 || GET_CODE (x) == HIGH
312 || GET_CODE (x) == CONST)
313 return false;
314
315 if (MEM_P (x)
316 && (MEM_VOLATILE_P (x)
317 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318 return false;
319
320 return true;
321 }
322
323 /* If X is an operator that can be treated as a simple move that we
324 can split, then return the operand that is operated on. */
325
326 static rtx
327 operand_for_swap_move_operator (rtx x)
328 {
329 /* A word sized rotate of a register pair is equivalent to swapping
330 the registers in the register pair. */
331 if (GET_CODE (x) == ROTATE
332 && GET_MODE (x) == twice_word_mode
333 && simple_move_operand (XEXP (x, 0))
334 && CONST_INT_P (XEXP (x, 1))
335 && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
336 return XEXP (x, 0);
337
338 return NULL_RTX;
339 }
340
341 /* If INSN is a single set between two objects that we want to split,
342 return the single set. SPEED_P says whether we are optimizing
343 INSN for speed or size.
344
345 INSN should have been passed to recog and extract_insn before this
346 is called. */
347
348 static rtx
349 simple_move (rtx_insn *insn, bool speed_p)
350 {
351 rtx x, op;
352 rtx set;
353 machine_mode mode;
354
355 if (recog_data.n_operands != 2)
356 return NULL_RTX;
357
358 set = single_set (insn);
359 if (!set)
360 return NULL_RTX;
361
362 x = SET_DEST (set);
363 if (x != recog_data.operand[0] && x != recog_data.operand[1])
364 return NULL_RTX;
365 if (!simple_move_operand (x))
366 return NULL_RTX;
367
368 x = SET_SRC (set);
369 if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
370 x = op;
371
372 if (x != recog_data.operand[0] && x != recog_data.operand[1])
373 return NULL_RTX;
374 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
375 things like x86 rdtsc which returns a DImode value. */
376 if (GET_CODE (x) != ASM_OPERANDS
377 && !simple_move_operand (x))
378 return NULL_RTX;
379
380 /* We try to decompose in integer modes, to avoid generating
381 inefficient code copying between integer and floating point
382 registers. That means that we can't decompose if this is a
383 non-integer mode for which there is no integer mode of the same
384 size. */
385 mode = GET_MODE (SET_DEST (set));
386 if (!SCALAR_INT_MODE_P (mode)
387 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
388 return NULL_RTX;
389
390 /* Reject PARTIAL_INT modes. They are used for processor specific
391 purposes and it's probably best not to tamper with them. */
392 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
393 return NULL_RTX;
394
395 if (!choices[speed_p].move_modes_to_split[(int) mode])
396 return NULL_RTX;
397
398 return set;
399 }
400
401 /* If SET is a copy from one multi-word pseudo-register to another,
402 record that in reg_copy_graph. Return whether it is such a
403 copy. */
404
405 static bool
406 find_pseudo_copy (rtx set)
407 {
408 rtx dest = SET_DEST (set);
409 rtx src = SET_SRC (set);
410 rtx op;
411 unsigned int rd, rs;
412 bitmap b;
413
414 if ((op = operand_for_swap_move_operator (src)) != NULL_RTX)
415 src = op;
416
417 if (!REG_P (dest) || !REG_P (src))
418 return false;
419
420 rd = REGNO (dest);
421 rs = REGNO (src);
422 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
423 return false;
424
425 b = reg_copy_graph[rs];
426 if (b == NULL)
427 {
428 b = BITMAP_ALLOC (NULL);
429 reg_copy_graph[rs] = b;
430 }
431
432 bitmap_set_bit (b, rd);
433
434 return true;
435 }
436
437 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
438 where they are copied to another register, add the register to
439 which they are copied to DECOMPOSABLE_CONTEXT. Use
440 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
441 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
442
443 static void
444 propagate_pseudo_copies (void)
445 {
446 auto_bitmap queue, propagate;
447
448 bitmap_copy (queue, decomposable_context);
449 do
450 {
451 bitmap_iterator iter;
452 unsigned int i;
453
454 bitmap_clear (propagate);
455
456 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
457 {
458 bitmap b = reg_copy_graph[i];
459 if (b)
460 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
461 }
462
463 bitmap_and_compl (queue, propagate, decomposable_context);
464 bitmap_ior_into (decomposable_context, propagate);
465 }
466 while (!bitmap_empty_p (queue));
467 }
468
469 /* A pointer to one of these values is passed to
470 find_decomposable_subregs. */
471
472 enum classify_move_insn
473 {
474 /* Not a simple move from one location to another. */
475 NOT_SIMPLE_MOVE,
476 /* A simple move we want to decompose. */
477 DECOMPOSABLE_SIMPLE_MOVE,
478 /* Any other simple move. */
479 SIMPLE_MOVE
480 };
481
482 /* If we find a SUBREG in *LOC which we could use to decompose a
483 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
484 unadorned register which is not a simple pseudo-register copy,
485 DATA will point at the type of move, and we set a bit in
486 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
487
488 static void
489 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
490 {
491 subrtx_var_iterator::array_type array;
492 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
493 {
494 rtx x = *iter;
495 if (GET_CODE (x) == SUBREG)
496 {
497 rtx inner = SUBREG_REG (x);
498 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
499
500 if (!REG_P (inner))
501 continue;
502
503 regno = REGNO (inner);
504 if (HARD_REGISTER_NUM_P (regno))
505 {
506 iter.skip_subrtxes ();
507 continue;
508 }
509
510 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
511 || !interesting_mode_p (GET_MODE (inner), &inner_size,
512 &inner_words))
513 continue;
514
515 /* We only try to decompose single word subregs of multi-word
516 registers. When we find one, we return -1 to avoid iterating
517 over the inner register.
518
519 ??? This doesn't allow, e.g., DImode subregs of TImode values
520 on 32-bit targets. We would need to record the way the
521 pseudo-register was used, and only decompose if all the uses
522 were the same number and size of pieces. Hopefully this
523 doesn't happen much. */
524
525 if (outer_words == 1
526 && inner_words > 1
527 /* Don't allow to decompose floating point subregs of
528 multi-word pseudos if the floating point mode does
529 not have word size, because otherwise we'd generate
530 a subreg with that floating mode from a different
531 sized integral pseudo which is not allowed by
532 validate_subreg. */
533 && (!FLOAT_MODE_P (GET_MODE (x))
534 || outer_size == UNITS_PER_WORD))
535 {
536 bitmap_set_bit (decomposable_context, regno);
537 iter.skip_subrtxes ();
538 continue;
539 }
540
541 /* If this is a cast from one mode to another, where the modes
542 have the same size, and they are not tieable, then mark this
543 register as non-decomposable. If we decompose it we are
544 likely to mess up whatever the backend is trying to do. */
545 if (outer_words > 1
546 && outer_size == inner_size
547 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
548 {
549 bitmap_set_bit (non_decomposable_context, regno);
550 bitmap_set_bit (subreg_context, regno);
551 iter.skip_subrtxes ();
552 continue;
553 }
554 }
555 else if (REG_P (x))
556 {
557 unsigned int regno, size, words;
558
559 /* We will see an outer SUBREG before we see the inner REG, so
560 when we see a plain REG here it means a direct reference to
561 the register.
562
563 If this is not a simple copy from one location to another,
564 then we cannot decompose this register. If this is a simple
565 copy we want to decompose, and the mode is right,
566 then we mark the register as decomposable.
567 Otherwise we don't say anything about this register --
568 it could be decomposed, but whether that would be
569 profitable depends upon how it is used elsewhere.
570
571 We only set bits in the bitmap for multi-word
572 pseudo-registers, since those are the only ones we care about
573 and it keeps the size of the bitmaps down. */
574
575 regno = REGNO (x);
576 if (!HARD_REGISTER_NUM_P (regno)
577 && interesting_mode_p (GET_MODE (x), &size, &words)
578 && words > 1)
579 {
580 switch (*pcmi)
581 {
582 case NOT_SIMPLE_MOVE:
583 bitmap_set_bit (non_decomposable_context, regno);
584 break;
585 case DECOMPOSABLE_SIMPLE_MOVE:
586 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
587 bitmap_set_bit (decomposable_context, regno);
588 break;
589 case SIMPLE_MOVE:
590 break;
591 default:
592 gcc_unreachable ();
593 }
594 }
595 }
596 else if (MEM_P (x))
597 {
598 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
599
600 /* Any registers used in a MEM do not participate in a
601 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
602 here, and return -1 to block the parent's recursion. */
603 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
604 iter.skip_subrtxes ();
605 }
606 }
607 }
608
609 /* Decompose REGNO into word-sized components. We smash the REG node
610 in place. This ensures that (1) something goes wrong quickly if we
611 fail to make some replacement, and (2) the debug information inside
612 the symbol table is automatically kept up to date. */
613
614 static void
615 decompose_register (unsigned int regno)
616 {
617 rtx reg;
618 unsigned int size, words, i;
619 rtvec v;
620
621 reg = regno_reg_rtx[regno];
622
623 regno_reg_rtx[regno] = NULL_RTX;
624
625 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
626 gcc_unreachable ();
627
628 v = rtvec_alloc (words);
629 for (i = 0; i < words; ++i)
630 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
631
632 PUT_CODE (reg, CONCATN);
633 XVEC (reg, 0) = v;
634
635 if (dump_file)
636 {
637 fprintf (dump_file, "; Splitting reg %u ->", regno);
638 for (i = 0; i < words; ++i)
639 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
640 fputc ('\n', dump_file);
641 }
642 }
643
644 /* Get a SUBREG of a CONCATN. */
645
646 static rtx
647 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
648 {
649 unsigned int outer_size, outer_words, inner_size, inner_words;
650 machine_mode innermode, partmode;
651 rtx part;
652 unsigned int final_offset;
653 unsigned int byte;
654
655 innermode = GET_MODE (op);
656 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
657 || !interesting_mode_p (innermode, &inner_size, &inner_words))
658 gcc_unreachable ();
659
660 /* Must be constant if interesting_mode_p passes. */
661 byte = orig_byte.to_constant ();
662 gcc_assert (GET_CODE (op) == CONCATN);
663 gcc_assert (byte % outer_size == 0);
664
665 gcc_assert (byte < inner_size);
666 if (outer_size > inner_size)
667 return NULL_RTX;
668
669 inner_size /= XVECLEN (op, 0);
670 part = XVECEXP (op, 0, byte / inner_size);
671 partmode = GET_MODE (part);
672
673 final_offset = byte % inner_size;
674 if (final_offset + outer_size > inner_size)
675 return NULL_RTX;
676
677 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
678 regular CONST_VECTORs. They have vector or integer modes, depending
679 on the capabilities of the target. Cope with them. */
680 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
681 partmode = GET_MODE_INNER (innermode);
682 else if (partmode == VOIDmode)
683 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
684 GET_MODE_CLASS (innermode), 0).require ();
685
686 return simplify_gen_subreg (outermode, part, partmode, final_offset);
687 }
688
689 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
690
691 static rtx
692 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
693 machine_mode innermode, unsigned int byte)
694 {
695 rtx ret;
696
697 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
698 If OP is a SUBREG of a CONCATN, then it must be a simple mode
699 change with the same size and offset 0, or it must extract a
700 part. We shouldn't see anything else here. */
701 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
702 {
703 rtx op2;
704
705 if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
706 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
707 && known_eq (SUBREG_BYTE (op), 0))
708 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
709 GET_MODE (SUBREG_REG (op)), byte);
710
711 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
712 SUBREG_BYTE (op));
713 if (op2 == NULL_RTX)
714 {
715 /* We don't handle paradoxical subregs here. */
716 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
717 gcc_assert (!paradoxical_subreg_p (op));
718 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
719 byte + SUBREG_BYTE (op));
720 gcc_assert (op2 != NULL_RTX);
721 return op2;
722 }
723
724 op = op2;
725 gcc_assert (op != NULL_RTX);
726 gcc_assert (innermode == GET_MODE (op));
727 }
728
729 if (GET_CODE (op) == CONCATN)
730 return simplify_subreg_concatn (outermode, op, byte);
731
732 ret = simplify_gen_subreg (outermode, op, innermode, byte);
733
734 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
735 resolve_simple_move will ask for the high part of the paradoxical
736 subreg, which does not have a value. Just return a zero. */
737 if (ret == NULL_RTX
738 && paradoxical_subreg_p (op))
739 return CONST0_RTX (outermode);
740
741 gcc_assert (ret != NULL_RTX);
742 return ret;
743 }
744
745 /* Return whether we should resolve X into the registers into which it
746 was decomposed. */
747
748 static bool
749 resolve_reg_p (rtx x)
750 {
751 return GET_CODE (x) == CONCATN;
752 }
753
754 /* Return whether X is a SUBREG of a register which we need to
755 resolve. */
756
757 static bool
758 resolve_subreg_p (rtx x)
759 {
760 if (GET_CODE (x) != SUBREG)
761 return false;
762 return resolve_reg_p (SUBREG_REG (x));
763 }
764
765 /* Look for SUBREGs in *LOC which need to be decomposed. */
766
767 static bool
768 resolve_subreg_use (rtx *loc, rtx insn)
769 {
770 subrtx_ptr_iterator::array_type array;
771 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
772 {
773 rtx *loc = *iter;
774 rtx x = *loc;
775 if (resolve_subreg_p (x))
776 {
777 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
778 SUBREG_BYTE (x));
779
780 /* It is possible for a note to contain a reference which we can
781 decompose. In this case, return 1 to the caller to indicate
782 that the note must be removed. */
783 if (!x)
784 {
785 gcc_assert (!insn);
786 return true;
787 }
788
789 validate_change (insn, loc, x, 1);
790 iter.skip_subrtxes ();
791 }
792 else if (resolve_reg_p (x))
793 /* Return 1 to the caller to indicate that we found a direct
794 reference to a register which is being decomposed. This can
795 happen inside notes, multiword shift or zero-extend
796 instructions. */
797 return true;
798 }
799
800 return false;
801 }
802
803 /* Resolve any decomposed registers which appear in register notes on
804 INSN. */
805
806 static void
807 resolve_reg_notes (rtx_insn *insn)
808 {
809 rtx *pnote, note;
810
811 note = find_reg_equal_equiv_note (insn);
812 if (note)
813 {
814 int old_count = num_validated_changes ();
815 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
816 remove_note (insn, note);
817 else
818 if (old_count != num_validated_changes ())
819 df_notes_rescan (insn);
820 }
821
822 pnote = &REG_NOTES (insn);
823 while (*pnote != NULL_RTX)
824 {
825 bool del = false;
826
827 note = *pnote;
828 switch (REG_NOTE_KIND (note))
829 {
830 case REG_DEAD:
831 case REG_UNUSED:
832 if (resolve_reg_p (XEXP (note, 0)))
833 del = true;
834 break;
835
836 default:
837 break;
838 }
839
840 if (del)
841 *pnote = XEXP (note, 1);
842 else
843 pnote = &XEXP (note, 1);
844 }
845 }
846
847 /* Return whether X can be decomposed into subwords. */
848
849 static bool
850 can_decompose_p (rtx x)
851 {
852 if (REG_P (x))
853 {
854 unsigned int regno = REGNO (x);
855
856 if (HARD_REGISTER_NUM_P (regno))
857 {
858 unsigned int byte, num_bytes, num_words;
859
860 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
861 return false;
862 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
863 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
864 return false;
865 return true;
866 }
867 else
868 return !bitmap_bit_p (subreg_context, regno);
869 }
870
871 return true;
872 }
873
874 /* OPND is a concatn operand this is used with a simple move operator.
875 Return a new rtx with the concatn's operands swapped. */
876
877 static rtx
878 resolve_operand_for_swap_move_operator (rtx opnd)
879 {
880 gcc_assert (GET_CODE (opnd) == CONCATN);
881 rtx concatn = copy_rtx (opnd);
882 rtx op0 = XVECEXP (concatn, 0, 0);
883 rtx op1 = XVECEXP (concatn, 0, 1);
884 XVECEXP (concatn, 0, 0) = op1;
885 XVECEXP (concatn, 0, 1) = op0;
886 return concatn;
887 }
888
889 /* Decompose the registers used in a simple move SET within INSN. If
890 we don't change anything, return INSN, otherwise return the start
891 of the sequence of moves. */
892
893 static rtx_insn *
894 resolve_simple_move (rtx set, rtx_insn *insn)
895 {
896 rtx src, dest, real_dest, src_op;
897 rtx_insn *insns;
898 machine_mode orig_mode, dest_mode;
899 unsigned int orig_size, words;
900 bool pushing;
901
902 src = SET_SRC (set);
903 dest = SET_DEST (set);
904 orig_mode = GET_MODE (dest);
905
906 if (!interesting_mode_p (orig_mode, &orig_size, &words))
907 gcc_unreachable ();
908 gcc_assert (words > 1);
909
910 start_sequence ();
911
912 /* We have to handle copying from a SUBREG of a decomposed reg where
913 the SUBREG is larger than word size. Rather than assume that we
914 can take a word_mode SUBREG of the destination, we copy to a new
915 register and then copy that to the destination. */
916
917 real_dest = NULL_RTX;
918
919 if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX)
920 {
921 if (resolve_reg_p (dest))
922 {
923 /* DEST is a CONCATN, so swap its operands and strip
924 SRC's operator. */
925 dest = resolve_operand_for_swap_move_operator (dest);
926 src = src_op;
927 }
928 else if (resolve_reg_p (src_op))
929 {
930 /* SRC is an operation on a CONCATN, so strip the operator and
931 swap the CONCATN's operands. */
932 src = resolve_operand_for_swap_move_operator (src_op);
933 }
934 }
935
936 if (GET_CODE (src) == SUBREG
937 && resolve_reg_p (SUBREG_REG (src))
938 && (maybe_ne (SUBREG_BYTE (src), 0)
939 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
940 {
941 real_dest = dest;
942 dest = gen_reg_rtx (orig_mode);
943 if (REG_P (real_dest))
944 REG_ATTRS (dest) = REG_ATTRS (real_dest);
945 }
946
947 /* Similarly if we are copying to a SUBREG of a decomposed reg where
948 the SUBREG is larger than word size. */
949
950 if (GET_CODE (dest) == SUBREG
951 && resolve_reg_p (SUBREG_REG (dest))
952 && (maybe_ne (SUBREG_BYTE (dest), 0)
953 || maybe_ne (orig_size,
954 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
955 {
956 rtx reg, smove;
957 rtx_insn *minsn;
958
959 reg = gen_reg_rtx (orig_mode);
960 minsn = emit_move_insn (reg, src);
961 smove = single_set (minsn);
962 gcc_assert (smove != NULL_RTX);
963 resolve_simple_move (smove, minsn);
964 src = reg;
965 }
966
967 /* If we didn't have any big SUBREGS of decomposed registers, and
968 neither side of the move is a register we are decomposing, then
969 we don't have to do anything here. */
970
971 if (src == SET_SRC (set)
972 && dest == SET_DEST (set)
973 && !resolve_reg_p (src)
974 && !resolve_subreg_p (src)
975 && !resolve_reg_p (dest)
976 && !resolve_subreg_p (dest))
977 {
978 end_sequence ();
979 return insn;
980 }
981
982 /* It's possible for the code to use a subreg of a decomposed
983 register while forming an address. We need to handle that before
984 passing the address to emit_move_insn. We pass NULL_RTX as the
985 insn parameter to resolve_subreg_use because we cannot validate
986 the insn yet. */
987 if (MEM_P (src) || MEM_P (dest))
988 {
989 int acg;
990
991 if (MEM_P (src))
992 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
993 if (MEM_P (dest))
994 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
995 acg = apply_change_group ();
996 gcc_assert (acg);
997 }
998
999 /* If SRC is a register which we can't decompose, or has side
1000 effects, we need to move via a temporary register. */
1001
1002 if (!can_decompose_p (src)
1003 || side_effects_p (src)
1004 || GET_CODE (src) == ASM_OPERANDS)
1005 {
1006 rtx reg;
1007
1008 reg = gen_reg_rtx (orig_mode);
1009
1010 if (AUTO_INC_DEC)
1011 {
1012 rtx_insn *move = emit_move_insn (reg, src);
1013 if (MEM_P (src))
1014 {
1015 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1016 if (note)
1017 add_reg_note (move, REG_INC, XEXP (note, 0));
1018 }
1019 }
1020 else
1021 emit_move_insn (reg, src);
1022
1023 src = reg;
1024 }
1025
1026 /* If DEST is a register which we can't decompose, or has side
1027 effects, we need to first move to a temporary register. We
1028 handle the common case of pushing an operand directly. We also
1029 go through a temporary register if it holds a floating point
1030 value. This gives us better code on systems which can't move
1031 data easily between integer and floating point registers. */
1032
1033 dest_mode = orig_mode;
1034 pushing = push_operand (dest, dest_mode);
1035 if (!can_decompose_p (dest)
1036 || (side_effects_p (dest) && !pushing)
1037 || (!SCALAR_INT_MODE_P (dest_mode)
1038 && !resolve_reg_p (dest)
1039 && !resolve_subreg_p (dest)))
1040 {
1041 if (real_dest == NULL_RTX)
1042 real_dest = dest;
1043 if (!SCALAR_INT_MODE_P (dest_mode))
1044 dest_mode = int_mode_for_mode (dest_mode).require ();
1045 dest = gen_reg_rtx (dest_mode);
1046 if (REG_P (real_dest))
1047 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1048 }
1049
1050 if (pushing)
1051 {
1052 unsigned int i, j, jinc;
1053
1054 gcc_assert (orig_size % UNITS_PER_WORD == 0);
1055 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1056 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1057
1058 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1059 {
1060 j = 0;
1061 jinc = 1;
1062 }
1063 else
1064 {
1065 j = words - 1;
1066 jinc = -1;
1067 }
1068
1069 for (i = 0; i < words; ++i, j += jinc)
1070 {
1071 rtx temp;
1072
1073 temp = copy_rtx (XEXP (dest, 0));
1074 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1075 j * UNITS_PER_WORD);
1076 emit_move_insn (temp,
1077 simplify_gen_subreg_concatn (word_mode, src,
1078 orig_mode,
1079 j * UNITS_PER_WORD));
1080 }
1081 }
1082 else
1083 {
1084 unsigned int i;
1085
1086 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1087 emit_clobber (dest);
1088
1089 for (i = 0; i < words; ++i)
1090 {
1091 rtx t = simplify_gen_subreg_concatn (word_mode, dest,
1092 dest_mode,
1093 i * UNITS_PER_WORD);
1094 /* simplify_gen_subreg_concatn can return (const_int 0) for
1095 some sub-objects of paradoxical subregs. As a source operand,
1096 that's fine. As a destination it must be avoided. Those are
1097 supposed to be don't care bits, so we can just drop that store
1098 on the floor. */
1099 if (t != CONST0_RTX (word_mode))
1100 emit_move_insn (t,
1101 simplify_gen_subreg_concatn (word_mode, src,
1102 orig_mode,
1103 i * UNITS_PER_WORD));
1104 }
1105 }
1106
1107 if (real_dest != NULL_RTX)
1108 {
1109 rtx mdest, smove;
1110 rtx_insn *minsn;
1111
1112 if (dest_mode == orig_mode)
1113 mdest = dest;
1114 else
1115 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1116 minsn = emit_move_insn (real_dest, mdest);
1117
1118 if (AUTO_INC_DEC && MEM_P (real_dest)
1119 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1120 {
1121 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1122 if (note)
1123 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1124 }
1125
1126 smove = single_set (minsn);
1127 gcc_assert (smove != NULL_RTX);
1128
1129 resolve_simple_move (smove, minsn);
1130 }
1131
1132 insns = get_insns ();
1133 end_sequence ();
1134
1135 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1136
1137 emit_insn_before (insns, insn);
1138
1139 /* If we get here via self-recursion, then INSN is not yet in the insns
1140 chain and delete_insn will fail. We only want to remove INSN from the
1141 current sequence. See PR56738. */
1142 if (in_sequence_p ())
1143 remove_insn (insn);
1144 else
1145 delete_insn (insn);
1146
1147 return insns;
1148 }
1149
1150 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1151 component registers. Return whether we changed something. */
1152
1153 static bool
1154 resolve_clobber (rtx pat, rtx_insn *insn)
1155 {
1156 rtx reg;
1157 machine_mode orig_mode;
1158 unsigned int orig_size, words, i;
1159 int ret;
1160
1161 reg = XEXP (pat, 0);
1162 /* For clobbers we can look through paradoxical subregs which
1163 we do not handle in simplify_gen_subreg_concatn. */
1164 if (paradoxical_subreg_p (reg))
1165 reg = SUBREG_REG (reg);
1166 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1167 return false;
1168
1169 orig_mode = GET_MODE (reg);
1170 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1171 gcc_unreachable ();
1172
1173 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1174 simplify_gen_subreg_concatn (word_mode, reg,
1175 orig_mode, 0),
1176 0);
1177 df_insn_rescan (insn);
1178 gcc_assert (ret != 0);
1179
1180 for (i = words - 1; i > 0; --i)
1181 {
1182 rtx x;
1183
1184 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1185 i * UNITS_PER_WORD);
1186 x = gen_rtx_CLOBBER (VOIDmode, x);
1187 emit_insn_after (x, insn);
1188 }
1189
1190 resolve_reg_notes (insn);
1191
1192 return true;
1193 }
1194
1195 /* A USE of a decomposed register is no longer meaningful. Return
1196 whether we changed something. */
1197
1198 static bool
1199 resolve_use (rtx pat, rtx_insn *insn)
1200 {
1201 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1202 {
1203 delete_insn (insn);
1204 return true;
1205 }
1206
1207 resolve_reg_notes (insn);
1208
1209 return false;
1210 }
1211
1212 /* A VAR_LOCATION can be simplified. */
1213
1214 static void
1215 resolve_debug (rtx_insn *insn)
1216 {
1217 subrtx_ptr_iterator::array_type array;
1218 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1219 {
1220 rtx *loc = *iter;
1221 rtx x = *loc;
1222 if (resolve_subreg_p (x))
1223 {
1224 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1225 SUBREG_BYTE (x));
1226
1227 if (x)
1228 *loc = x;
1229 else
1230 x = copy_rtx (*loc);
1231 }
1232 if (resolve_reg_p (x))
1233 *loc = copy_rtx (x);
1234 }
1235
1236 df_insn_rescan (insn);
1237
1238 resolve_reg_notes (insn);
1239 }
1240
1241 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1242 set the decomposable_context bitmap accordingly. SPEED_P is true
1243 if we are optimizing INSN for speed rather than size. Return true
1244 if INSN is decomposable. */
1245
1246 static bool
1247 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1248 {
1249 rtx set;
1250 rtx op;
1251 rtx op_operand;
1252
1253 set = single_set (insn);
1254 if (!set)
1255 return false;
1256
1257 op = SET_SRC (set);
1258 if (GET_CODE (op) != ASHIFT
1259 && GET_CODE (op) != LSHIFTRT
1260 && GET_CODE (op) != ASHIFTRT
1261 && GET_CODE (op) != ZERO_EXTEND)
1262 return false;
1263
1264 op_operand = XEXP (op, 0);
1265 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1266 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1267 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1268 || GET_MODE (op) != twice_word_mode)
1269 return false;
1270
1271 if (GET_CODE (op) == ZERO_EXTEND)
1272 {
1273 if (GET_MODE (op_operand) != word_mode
1274 || !choices[speed_p].splitting_zext)
1275 return false;
1276 }
1277 else /* left or right shift */
1278 {
1279 bool *splitting = (GET_CODE (op) == ASHIFT
1280 ? choices[speed_p].splitting_ashift
1281 : GET_CODE (op) == ASHIFTRT
1282 ? choices[speed_p].splitting_ashiftrt
1283 : choices[speed_p].splitting_lshiftrt);
1284 if (!CONST_INT_P (XEXP (op, 1))
1285 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1286 2 * BITS_PER_WORD - 1)
1287 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1288 return false;
1289
1290 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1291 }
1292
1293 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1294
1295 return true;
1296 }
1297
1298 /* Decompose a more than word wide shift (in INSN) of a multiword
1299 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1300 and 'set to zero' insn. Return a pointer to the new insn when a
1301 replacement was done. */
1302
1303 static rtx_insn *
1304 resolve_shift_zext (rtx_insn *insn)
1305 {
1306 rtx set;
1307 rtx op;
1308 rtx op_operand;
1309 rtx_insn *insns;
1310 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1311 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1312 scalar_int_mode inner_mode;
1313
1314 set = single_set (insn);
1315 if (!set)
1316 return NULL;
1317
1318 op = SET_SRC (set);
1319 if (GET_CODE (op) != ASHIFT
1320 && GET_CODE (op) != LSHIFTRT
1321 && GET_CODE (op) != ASHIFTRT
1322 && GET_CODE (op) != ZERO_EXTEND)
1323 return NULL;
1324
1325 op_operand = XEXP (op, 0);
1326 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1327 return NULL;
1328
1329 /* We can tear this operation apart only if the regs were already
1330 torn apart. */
1331 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1332 return NULL;
1333
1334 /* src_reg_num is the number of the word mode register which we
1335 are operating on. For a left shift and a zero_extend on little
1336 endian machines this is register 0. */
1337 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1338 ? 1 : 0;
1339
1340 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1341 src_reg_num = 1 - src_reg_num;
1342
1343 if (GET_CODE (op) == ZERO_EXTEND)
1344 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1345 else
1346 dest_reg_num = 1 - src_reg_num;
1347
1348 offset1 = UNITS_PER_WORD * dest_reg_num;
1349 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1350 src_offset = UNITS_PER_WORD * src_reg_num;
1351
1352 start_sequence ();
1353
1354 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1355 GET_MODE (SET_DEST (set)),
1356 offset1);
1357 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1358 GET_MODE (SET_DEST (set)),
1359 offset2);
1360 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1361 GET_MODE (op_operand),
1362 src_offset);
1363 if (GET_CODE (op) == ASHIFTRT
1364 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1365 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1366 BITS_PER_WORD - 1, NULL_RTX, 0);
1367
1368 if (GET_CODE (op) != ZERO_EXTEND)
1369 {
1370 int shift_count = INTVAL (XEXP (op, 1));
1371 if (shift_count > BITS_PER_WORD)
1372 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1373 LSHIFT_EXPR : RSHIFT_EXPR,
1374 word_mode, src_reg,
1375 shift_count - BITS_PER_WORD,
1376 dest_reg, GET_CODE (op) != ASHIFTRT);
1377 }
1378
1379 if (dest_reg != src_reg)
1380 emit_move_insn (dest_reg, src_reg);
1381 if (GET_CODE (op) != ASHIFTRT)
1382 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1383 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1384 emit_move_insn (dest_upper, copy_rtx (src_reg));
1385 else
1386 emit_move_insn (dest_upper, upper_src);
1387 insns = get_insns ();
1388
1389 end_sequence ();
1390
1391 emit_insn_before (insns, insn);
1392
1393 if (dump_file)
1394 {
1395 rtx_insn *in;
1396 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1397 for (in = insns; in != insn; in = NEXT_INSN (in))
1398 fprintf (dump_file, "%d ", INSN_UID (in));
1399 fprintf (dump_file, "\n");
1400 }
1401
1402 delete_insn (insn);
1403 return insns;
1404 }
1405
1406 /* Print to dump_file a description of what we're doing with shift code CODE.
1407 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1408
1409 static void
1410 dump_shift_choices (enum rtx_code code, bool *splitting)
1411 {
1412 int i;
1413 const char *sep;
1414
1415 fprintf (dump_file,
1416 " Splitting mode %s for %s lowering with shift amounts = ",
1417 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1418 sep = "";
1419 for (i = 0; i < BITS_PER_WORD; i++)
1420 if (splitting[i])
1421 {
1422 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1423 sep = ",";
1424 }
1425 fprintf (dump_file, "\n");
1426 }
1427
1428 /* Print to dump_file a description of what we're doing when optimizing
1429 for speed or size; SPEED_P says which. DESCRIPTION is a description
1430 of the SPEED_P choice. */
1431
1432 static void
1433 dump_choices (bool speed_p, const char *description)
1434 {
1435 unsigned int size, factor, i;
1436
1437 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1438
1439 for (i = 0; i < MAX_MACHINE_MODE; i++)
1440 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1441 && factor > 1)
1442 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1443 choices[speed_p].move_modes_to_split[i]
1444 ? "Splitting"
1445 : "Skipping",
1446 GET_MODE_NAME ((machine_mode) i));
1447
1448 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1449 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1450 GET_MODE_NAME (twice_word_mode));
1451
1452 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1453 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1454 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1455 fprintf (dump_file, "\n");
1456 }
1457
1458 /* Look for registers which are always accessed via word-sized SUBREGs
1459 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1460 registers into several word-sized pseudo-registers. */
1461
1462 static void
1463 decompose_multiword_subregs (bool decompose_copies)
1464 {
1465 unsigned int max;
1466 basic_block bb;
1467 bool speed_p;
1468
1469 if (dump_file)
1470 {
1471 dump_choices (false, "size");
1472 dump_choices (true, "speed");
1473 }
1474
1475 /* Check if this target even has any modes to consider lowering. */
1476 if (!choices[false].something_to_do && !choices[true].something_to_do)
1477 {
1478 if (dump_file)
1479 fprintf (dump_file, "Nothing to do!\n");
1480 return;
1481 }
1482
1483 max = max_reg_num ();
1484
1485 /* First see if there are any multi-word pseudo-registers. If there
1486 aren't, there is nothing we can do. This should speed up this
1487 pass in the normal case, since it should be faster than scanning
1488 all the insns. */
1489 {
1490 unsigned int i;
1491 bool useful_modes_seen = false;
1492
1493 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1494 if (regno_reg_rtx[i] != NULL)
1495 {
1496 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1497 if (choices[false].move_modes_to_split[(int) mode]
1498 || choices[true].move_modes_to_split[(int) mode])
1499 {
1500 useful_modes_seen = true;
1501 break;
1502 }
1503 }
1504
1505 if (!useful_modes_seen)
1506 {
1507 if (dump_file)
1508 fprintf (dump_file, "Nothing to lower in this function.\n");
1509 return;
1510 }
1511 }
1512
1513 if (df)
1514 {
1515 df_set_flags (DF_DEFER_INSN_RESCAN);
1516 run_word_dce ();
1517 }
1518
1519 /* FIXME: It may be possible to change this code to look for each
1520 multi-word pseudo-register and to find each insn which sets or
1521 uses that register. That should be faster than scanning all the
1522 insns. */
1523
1524 decomposable_context = BITMAP_ALLOC (NULL);
1525 non_decomposable_context = BITMAP_ALLOC (NULL);
1526 subreg_context = BITMAP_ALLOC (NULL);
1527
1528 reg_copy_graph.create (max);
1529 reg_copy_graph.safe_grow_cleared (max, true);
1530 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1531
1532 speed_p = optimize_function_for_speed_p (cfun);
1533 FOR_EACH_BB_FN (bb, cfun)
1534 {
1535 rtx_insn *insn;
1536
1537 FOR_BB_INSNS (bb, insn)
1538 {
1539 rtx set;
1540 enum classify_move_insn cmi;
1541 int i, n;
1542
1543 if (!INSN_P (insn)
1544 || GET_CODE (PATTERN (insn)) == CLOBBER
1545 || GET_CODE (PATTERN (insn)) == USE)
1546 continue;
1547
1548 recog_memoized (insn);
1549
1550 if (find_decomposable_shift_zext (insn, speed_p))
1551 continue;
1552
1553 extract_insn (insn);
1554
1555 set = simple_move (insn, speed_p);
1556
1557 if (!set)
1558 cmi = NOT_SIMPLE_MOVE;
1559 else
1560 {
1561 /* We mark pseudo-to-pseudo copies as decomposable during the
1562 second pass only. The first pass is so early that there is
1563 good chance such moves will be optimized away completely by
1564 subsequent optimizations anyway.
1565
1566 However, we call find_pseudo_copy even during the first pass
1567 so as to properly set up the reg_copy_graph. */
1568 if (find_pseudo_copy (set))
1569 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1570 else
1571 cmi = SIMPLE_MOVE;
1572 }
1573
1574 n = recog_data.n_operands;
1575 for (i = 0; i < n; ++i)
1576 {
1577 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1578
1579 /* We handle ASM_OPERANDS as a special case to support
1580 things like x86 rdtsc which returns a DImode value.
1581 We can decompose the output, which will certainly be
1582 operand 0, but not the inputs. */
1583
1584 if (cmi == SIMPLE_MOVE
1585 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1586 {
1587 gcc_assert (i == 0);
1588 cmi = NOT_SIMPLE_MOVE;
1589 }
1590 }
1591 }
1592 }
1593
1594 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1595 if (!bitmap_empty_p (decomposable_context))
1596 {
1597 unsigned int i;
1598 sbitmap_iterator sbi;
1599 bitmap_iterator iter;
1600 unsigned int regno;
1601
1602 propagate_pseudo_copies ();
1603
1604 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1605 bitmap_clear (sub_blocks);
1606
1607 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1608 decompose_register (regno);
1609
1610 FOR_EACH_BB_FN (bb, cfun)
1611 {
1612 rtx_insn *insn;
1613
1614 FOR_BB_INSNS (bb, insn)
1615 {
1616 rtx pat;
1617
1618 if (!INSN_P (insn))
1619 continue;
1620
1621 pat = PATTERN (insn);
1622 if (GET_CODE (pat) == CLOBBER)
1623 resolve_clobber (pat, insn);
1624 else if (GET_CODE (pat) == USE)
1625 resolve_use (pat, insn);
1626 else if (DEBUG_INSN_P (insn))
1627 resolve_debug (insn);
1628 else
1629 {
1630 rtx set;
1631 int i;
1632
1633 recog_memoized (insn);
1634 extract_insn (insn);
1635
1636 set = simple_move (insn, speed_p);
1637 if (set)
1638 {
1639 rtx_insn *orig_insn = insn;
1640 bool cfi = control_flow_insn_p (insn);
1641
1642 /* We can end up splitting loads to multi-word pseudos
1643 into separate loads to machine word size pseudos.
1644 When this happens, we first had one load that can
1645 throw, and after resolve_simple_move we'll have a
1646 bunch of loads (at least two). All those loads may
1647 trap if we can have non-call exceptions, so they
1648 all will end the current basic block. We split the
1649 block after the outer loop over all insns, but we
1650 make sure here that we will be able to split the
1651 basic block and still produce the correct control
1652 flow graph for it. */
1653 gcc_assert (!cfi
1654 || (cfun->can_throw_non_call_exceptions
1655 && can_throw_internal (insn)));
1656
1657 insn = resolve_simple_move (set, insn);
1658 if (insn != orig_insn)
1659 {
1660 recog_memoized (insn);
1661 extract_insn (insn);
1662
1663 if (cfi)
1664 bitmap_set_bit (sub_blocks, bb->index);
1665 }
1666 }
1667 else
1668 {
1669 rtx_insn *decomposed_shift;
1670
1671 decomposed_shift = resolve_shift_zext (insn);
1672 if (decomposed_shift != NULL_RTX)
1673 {
1674 insn = decomposed_shift;
1675 recog_memoized (insn);
1676 extract_insn (insn);
1677 }
1678 }
1679
1680 for (i = recog_data.n_operands - 1; i >= 0; --i)
1681 resolve_subreg_use (recog_data.operand_loc[i], insn);
1682
1683 resolve_reg_notes (insn);
1684
1685 if (num_validated_changes () > 0)
1686 {
1687 for (i = recog_data.n_dups - 1; i >= 0; --i)
1688 {
1689 rtx *pl = recog_data.dup_loc[i];
1690 int dup_num = recog_data.dup_num[i];
1691 rtx *px = recog_data.operand_loc[dup_num];
1692
1693 validate_unshare_change (insn, pl, *px, 1);
1694 }
1695
1696 i = apply_change_group ();
1697 gcc_assert (i);
1698 }
1699 }
1700 }
1701 }
1702
1703 /* If we had insns to split that caused control flow insns in the middle
1704 of a basic block, split those blocks now. Note that we only handle
1705 the case where splitting a load has caused multiple possibly trapping
1706 loads to appear. */
1707 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1708 {
1709 rtx_insn *insn, *end;
1710 edge fallthru;
1711
1712 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1713 insn = BB_HEAD (bb);
1714 end = BB_END (bb);
1715
1716 while (insn != end)
1717 {
1718 if (control_flow_insn_p (insn))
1719 {
1720 /* Split the block after insn. There will be a fallthru
1721 edge, which is OK so we keep it. We have to create the
1722 exception edges ourselves. */
1723 fallthru = split_block (bb, insn);
1724 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1725 bb = fallthru->dest;
1726 insn = BB_HEAD (bb);
1727 }
1728 else
1729 insn = NEXT_INSN (insn);
1730 }
1731 }
1732 }
1733
1734 {
1735 unsigned int i;
1736 bitmap b;
1737
1738 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1739 if (b)
1740 BITMAP_FREE (b);
1741 }
1742
1743 reg_copy_graph.release ();
1744
1745 BITMAP_FREE (decomposable_context);
1746 BITMAP_FREE (non_decomposable_context);
1747 BITMAP_FREE (subreg_context);
1748 }
1749 \f
1750 /* Implement first lower subreg pass. */
1751
1752 namespace {
1753
1754 const pass_data pass_data_lower_subreg =
1755 {
1756 RTL_PASS, /* type */
1757 "subreg1", /* name */
1758 OPTGROUP_NONE, /* optinfo_flags */
1759 TV_LOWER_SUBREG, /* tv_id */
1760 0, /* properties_required */
1761 0, /* properties_provided */
1762 0, /* properties_destroyed */
1763 0, /* todo_flags_start */
1764 0, /* todo_flags_finish */
1765 };
1766
1767 class pass_lower_subreg : public rtl_opt_pass
1768 {
1769 public:
1770 pass_lower_subreg (gcc::context *ctxt)
1771 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1772 {}
1773
1774 /* opt_pass methods: */
1775 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1776 virtual unsigned int execute (function *)
1777 {
1778 decompose_multiword_subregs (false);
1779 return 0;
1780 }
1781
1782 }; // class pass_lower_subreg
1783
1784 } // anon namespace
1785
1786 rtl_opt_pass *
1787 make_pass_lower_subreg (gcc::context *ctxt)
1788 {
1789 return new pass_lower_subreg (ctxt);
1790 }
1791
1792 /* Implement second lower subreg pass. */
1793
1794 namespace {
1795
1796 const pass_data pass_data_lower_subreg2 =
1797 {
1798 RTL_PASS, /* type */
1799 "subreg2", /* name */
1800 OPTGROUP_NONE, /* optinfo_flags */
1801 TV_LOWER_SUBREG, /* tv_id */
1802 0, /* properties_required */
1803 0, /* properties_provided */
1804 0, /* properties_destroyed */
1805 0, /* todo_flags_start */
1806 TODO_df_finish, /* todo_flags_finish */
1807 };
1808
1809 class pass_lower_subreg2 : public rtl_opt_pass
1810 {
1811 public:
1812 pass_lower_subreg2 (gcc::context *ctxt)
1813 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1814 {}
1815
1816 /* opt_pass methods: */
1817 virtual bool gate (function *) { return flag_split_wide_types
1818 && flag_split_wide_types_early; }
1819 virtual unsigned int execute (function *)
1820 {
1821 decompose_multiword_subregs (true);
1822 return 0;
1823 }
1824
1825 }; // class pass_lower_subreg2
1826
1827 } // anon namespace
1828
1829 rtl_opt_pass *
1830 make_pass_lower_subreg2 (gcc::context *ctxt)
1831 {
1832 return new pass_lower_subreg2 (ctxt);
1833 }
1834
1835 /* Implement third lower subreg pass. */
1836
1837 namespace {
1838
1839 const pass_data pass_data_lower_subreg3 =
1840 {
1841 RTL_PASS, /* type */
1842 "subreg3", /* name */
1843 OPTGROUP_NONE, /* optinfo_flags */
1844 TV_LOWER_SUBREG, /* tv_id */
1845 0, /* properties_required */
1846 0, /* properties_provided */
1847 0, /* properties_destroyed */
1848 0, /* todo_flags_start */
1849 TODO_df_finish, /* todo_flags_finish */
1850 };
1851
1852 class pass_lower_subreg3 : public rtl_opt_pass
1853 {
1854 public:
1855 pass_lower_subreg3 (gcc::context *ctxt)
1856 : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1857 {}
1858
1859 /* opt_pass methods: */
1860 virtual bool gate (function *) { return flag_split_wide_types; }
1861 virtual unsigned int execute (function *)
1862 {
1863 decompose_multiword_subregs (true);
1864 return 0;
1865 }
1866
1867 }; // class pass_lower_subreg3
1868
1869 } // anon namespace
1870
1871 rtl_opt_pass *
1872 make_pass_lower_subreg3 (gcc::context *ctxt)
1873 {
1874 return new pass_lower_subreg3 (ctxt);
1875 }