1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2021 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
26 #include "coretypes.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
47 struct target_expmed default_target_expmed
;
49 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
52 static bool store_integral_bit_field (rtx
, opt_scalar_int_mode
,
53 unsigned HOST_WIDE_INT
,
54 unsigned HOST_WIDE_INT
,
55 poly_uint64
, poly_uint64
,
56 machine_mode
, rtx
, bool, bool);
57 static void store_fixed_bit_field (rtx
, opt_scalar_int_mode
,
58 unsigned HOST_WIDE_INT
,
59 unsigned HOST_WIDE_INT
,
60 poly_uint64
, poly_uint64
,
61 rtx
, scalar_int_mode
, bool);
62 static void store_fixed_bit_field_1 (rtx
, scalar_int_mode
,
63 unsigned HOST_WIDE_INT
,
64 unsigned HOST_WIDE_INT
,
65 rtx
, scalar_int_mode
, bool);
66 static void store_split_bit_field (rtx
, opt_scalar_int_mode
,
67 unsigned HOST_WIDE_INT
,
68 unsigned HOST_WIDE_INT
,
69 poly_uint64
, poly_uint64
,
70 rtx
, scalar_int_mode
, bool);
71 static rtx
extract_integral_bit_field (rtx
, opt_scalar_int_mode
,
72 unsigned HOST_WIDE_INT
,
73 unsigned HOST_WIDE_INT
, int, rtx
,
74 machine_mode
, machine_mode
, bool, bool);
75 static rtx
extract_fixed_bit_field (machine_mode
, rtx
, opt_scalar_int_mode
,
76 unsigned HOST_WIDE_INT
,
77 unsigned HOST_WIDE_INT
, rtx
, int, bool);
78 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
, scalar_int_mode
,
79 unsigned HOST_WIDE_INT
,
80 unsigned HOST_WIDE_INT
, rtx
, int, bool);
81 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
82 static rtx
extract_split_bit_field (rtx
, opt_scalar_int_mode
,
83 unsigned HOST_WIDE_INT
,
84 unsigned HOST_WIDE_INT
, int, bool);
85 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
86 static rtx
expand_smod_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
87 static rtx
expand_sdiv_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91 The mask is truncated if necessary to the width of mode MODE. The
92 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
95 mask_rtx (scalar_int_mode mode
, int bitpos
, int bitsize
, bool complement
)
97 return immed_wide_int_const
98 (wi::shifted_mask (bitpos
, bitsize
, complement
,
99 GET_MODE_PRECISION (mode
)), mode
);
102 /* Test whether a value is zero of a power of two. */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
106 struct init_expmed_rtl
127 rtx pow2
[MAX_BITS_PER_WORD
];
128 rtx cint
[MAX_BITS_PER_WORD
];
132 init_expmed_one_conv (struct init_expmed_rtl
*all
, scalar_int_mode to_mode
,
133 scalar_int_mode from_mode
, bool speed
)
135 int to_size
, from_size
;
138 to_size
= GET_MODE_PRECISION (to_mode
);
139 from_size
= GET_MODE_PRECISION (from_mode
);
141 /* Most partial integers have a precision less than the "full"
142 integer it requires for storage. In case one doesn't, for
143 comparison purposes here, reduce the bit size by one in that
145 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
146 && pow2p_hwi (to_size
))
148 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
149 && pow2p_hwi (from_size
))
152 /* Assume cost of zero-extend and sign-extend is the same. */
153 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
155 PUT_MODE (all
->reg
, from_mode
);
156 set_convert_cost (to_mode
, from_mode
, speed
,
157 set_src_cost (which
, to_mode
, speed
));
158 /* Restore all->reg's mode. */
159 PUT_MODE (all
->reg
, to_mode
);
163 init_expmed_one_mode (struct init_expmed_rtl
*all
,
164 machine_mode mode
, int speed
)
166 int m
, n
, mode_bitsize
;
167 machine_mode mode_from
;
169 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
171 PUT_MODE (all
->reg
, mode
);
172 PUT_MODE (all
->plus
, mode
);
173 PUT_MODE (all
->neg
, mode
);
174 PUT_MODE (all
->mult
, mode
);
175 PUT_MODE (all
->sdiv
, mode
);
176 PUT_MODE (all
->udiv
, mode
);
177 PUT_MODE (all
->sdiv_32
, mode
);
178 PUT_MODE (all
->smod_32
, mode
);
179 PUT_MODE (all
->wide_trunc
, mode
);
180 PUT_MODE (all
->shift
, mode
);
181 PUT_MODE (all
->shift_mult
, mode
);
182 PUT_MODE (all
->shift_add
, mode
);
183 PUT_MODE (all
->shift_sub0
, mode
);
184 PUT_MODE (all
->shift_sub1
, mode
);
185 PUT_MODE (all
->zext
, mode
);
186 PUT_MODE (all
->trunc
, mode
);
188 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
189 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
190 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
191 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
192 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
194 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
195 <= 2 * add_cost (speed
, mode
)));
196 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
197 <= 4 * add_cost (speed
, mode
)));
199 set_shift_cost (speed
, mode
, 0, 0);
201 int cost
= add_cost (speed
, mode
);
202 set_shiftadd_cost (speed
, mode
, 0, cost
);
203 set_shiftsub0_cost (speed
, mode
, 0, cost
);
204 set_shiftsub1_cost (speed
, mode
, 0, cost
);
207 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
208 for (m
= 1; m
< n
; m
++)
210 XEXP (all
->shift
, 1) = all
->cint
[m
];
211 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
213 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
214 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
216 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
218 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
222 scalar_int_mode int_mode_to
;
223 if (is_a
<scalar_int_mode
> (mode
, &int_mode_to
))
225 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
226 mode_from
= (machine_mode
)(mode_from
+ 1))
227 init_expmed_one_conv (all
, int_mode_to
,
228 as_a
<scalar_int_mode
> (mode_from
), speed
);
230 scalar_int_mode wider_mode
;
231 if (GET_MODE_CLASS (int_mode_to
) == MODE_INT
232 && GET_MODE_WIDER_MODE (int_mode_to
).exists (&wider_mode
))
234 PUT_MODE (all
->reg
, mode
);
235 PUT_MODE (all
->zext
, wider_mode
);
236 PUT_MODE (all
->wide_mult
, wider_mode
);
237 PUT_MODE (all
->wide_lshr
, wider_mode
);
238 XEXP (all
->wide_lshr
, 1)
239 = gen_int_shift_amount (wider_mode
, mode_bitsize
);
241 set_mul_widen_cost (speed
, wider_mode
,
242 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
243 set_mul_highpart_cost (speed
, int_mode_to
,
244 set_src_cost (all
->wide_trunc
,
245 int_mode_to
, speed
));
253 struct init_expmed_rtl all
;
254 machine_mode mode
= QImode
;
257 memset (&all
, 0, sizeof all
);
258 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
260 all
.pow2
[m
] = GEN_INT (HOST_WIDE_INT_1
<< m
);
261 all
.cint
[m
] = GEN_INT (m
);
264 /* Avoid using hard regs in ways which may be unsupported. */
265 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
266 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
267 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
268 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
269 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
270 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
271 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
272 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
273 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
274 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
275 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
276 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
277 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
278 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
279 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
280 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
281 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
282 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
284 for (speed
= 0; speed
< 2; speed
++)
286 crtl
->maybe_hot_insn_p
= speed
;
287 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
289 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
290 mode
= (machine_mode
)(mode
+ 1))
291 init_expmed_one_mode (&all
, mode
, speed
);
293 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
294 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
295 mode
= (machine_mode
)(mode
+ 1))
296 init_expmed_one_mode (&all
, mode
, speed
);
298 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
299 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
300 mode
= (machine_mode
)(mode
+ 1))
301 init_expmed_one_mode (&all
, mode
, speed
);
304 if (alg_hash_used_p ())
306 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
307 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
310 set_alg_hash_used_p (true);
311 default_rtl_profile ();
313 ggc_free (all
.trunc
);
314 ggc_free (all
.shift_sub1
);
315 ggc_free (all
.shift_sub0
);
316 ggc_free (all
.shift_add
);
317 ggc_free (all
.shift_mult
);
318 ggc_free (all
.shift
);
319 ggc_free (all
.wide_trunc
);
320 ggc_free (all
.wide_lshr
);
321 ggc_free (all
.wide_mult
);
323 ggc_free (all
.smod_32
);
324 ggc_free (all
.sdiv_32
);
333 /* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
338 negate_rtx (machine_mode mode
, rtx x
)
340 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
343 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
348 /* Whether reverse storage order is supported on the target. */
349 static int reverse_storage_order_supported
= -1;
351 /* Check whether reverse storage order is supported on the target. */
354 check_reverse_storage_order_support (void)
356 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
358 reverse_storage_order_supported
= 0;
359 sorry ("reverse scalar storage order");
362 reverse_storage_order_supported
= 1;
365 /* Whether reverse FP storage order is supported on the target. */
366 static int reverse_float_storage_order_supported
= -1;
368 /* Check whether reverse FP storage order is supported on the target. */
371 check_reverse_float_storage_order_support (void)
373 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
375 reverse_float_storage_order_supported
= 0;
376 sorry ("reverse floating-point scalar storage order");
379 reverse_float_storage_order_supported
= 1;
382 /* Return an rtx representing value of X with reverse storage order.
383 MODE is the intended mode of the result,
384 useful if X is a CONST_INT. */
387 flip_storage_order (machine_mode mode
, rtx x
)
389 scalar_int_mode int_mode
;
395 if (COMPLEX_MODE_P (mode
))
397 rtx real
= read_complex_part (x
, false);
398 rtx imag
= read_complex_part (x
, true);
400 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
401 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
403 return gen_rtx_CONCAT (mode
, real
, imag
);
406 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
407 check_reverse_storage_order_support ();
409 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
411 if (FLOAT_MODE_P (mode
)
412 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
413 check_reverse_float_storage_order_support ();
415 if (!int_mode_for_size (GET_MODE_PRECISION (mode
), 0).exists (&int_mode
)
416 || !targetm
.scalar_mode_supported_p (int_mode
))
418 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
421 x
= gen_lowpart (int_mode
, x
);
424 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
426 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
428 if (int_mode
!= mode
)
429 result
= gen_lowpart (mode
, result
);
434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
435 first unit of mode MODE that contains a bitfield of size BITSIZE at
436 bit position BITNUM. If MODE is not set, return a BLKmode reference
437 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
438 of the field within the new memory. */
441 narrow_bit_field_mem (rtx mem
, opt_scalar_int_mode mode
,
442 unsigned HOST_WIDE_INT bitsize
,
443 unsigned HOST_WIDE_INT bitnum
,
444 unsigned HOST_WIDE_INT
*new_bitnum
)
446 scalar_int_mode imode
;
447 if (mode
.exists (&imode
))
449 unsigned int unit
= GET_MODE_BITSIZE (imode
);
450 *new_bitnum
= bitnum
% unit
;
451 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
452 return adjust_bitfield_address (mem
, imode
, offset
);
456 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
457 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
458 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
460 return adjust_bitfield_address_size (mem
, BLKmode
, offset
, size
);
464 /* The caller wants to perform insertion or extraction PATTERN on a
465 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
466 BITREGION_START and BITREGION_END are as for store_bit_field
467 and FIELDMODE is the natural mode of the field.
469 Search for a mode that is compatible with the memory access
470 restrictions and (where applicable) with a register insertion or
471 extraction. Return the new memory on success, storing the adjusted
472 bit position in *NEW_BITNUM. Return null otherwise. */
475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
476 rtx op0
, HOST_WIDE_INT bitsize
,
477 HOST_WIDE_INT bitnum
,
478 poly_uint64 bitregion_start
,
479 poly_uint64 bitregion_end
,
480 machine_mode fieldmode
,
481 unsigned HOST_WIDE_INT
*new_bitnum
)
483 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
484 bitregion_end
, MEM_ALIGN (op0
),
485 MEM_VOLATILE_P (op0
));
486 scalar_int_mode best_mode
;
487 if (iter
.next_mode (&best_mode
))
489 /* We can use a memory in BEST_MODE. See whether this is true for
490 any wider modes. All other things being equal, we prefer to
491 use the widest mode possible because it tends to expose more
492 CSE opportunities. */
493 if (!iter
.prefer_smaller_modes ())
495 /* Limit the search to the mode required by the corresponding
496 register insertion or extraction instruction, if any. */
497 scalar_int_mode limit_mode
= word_mode
;
498 extraction_insn insn
;
499 if (get_best_reg_extraction_insn (&insn
, pattern
,
500 GET_MODE_BITSIZE (best_mode
),
502 limit_mode
= insn
.field_mode
;
504 scalar_int_mode wider_mode
;
505 while (iter
.next_mode (&wider_mode
)
506 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
507 best_mode
= wider_mode
;
509 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
516 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
517 offset is then BITNUM / BITS_PER_UNIT. */
520 lowpart_bit_field_p (poly_uint64 bitnum
, poly_uint64 bitsize
,
521 machine_mode struct_mode
)
523 poly_uint64 regsize
= REGMODE_NATURAL_SIZE (struct_mode
);
524 if (BYTES_BIG_ENDIAN
)
525 return (multiple_p (bitnum
, BITS_PER_UNIT
)
526 && (known_eq (bitnum
+ bitsize
, GET_MODE_BITSIZE (struct_mode
))
527 || multiple_p (bitnum
+ bitsize
,
528 regsize
* BITS_PER_UNIT
)));
530 return multiple_p (bitnum
, regsize
* BITS_PER_UNIT
);
533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
534 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
535 Return false if the access would touch memory outside the range
536 BITREGION_START to BITREGION_END for conformance to the C++ memory
540 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
541 unsigned HOST_WIDE_INT bitnum
,
542 scalar_int_mode fieldmode
,
543 poly_uint64 bitregion_start
,
544 poly_uint64 bitregion_end
)
546 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
548 /* -fstrict-volatile-bitfields must be enabled and we must have a
551 || !MEM_VOLATILE_P (op0
)
552 || flag_strict_volatile_bitfields
<= 0)
555 /* The bit size must not be larger than the field mode, and
556 the field mode must not be larger than a word. */
557 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
560 /* Check for cases of unaligned fields that must be split. */
561 if (bitnum
% modesize
+ bitsize
> modesize
)
564 /* The memory must be sufficiently aligned for a MODESIZE access.
565 This condition guarantees, that the memory access will not
566 touch anything after the end of the structure. */
567 if (MEM_ALIGN (op0
) < modesize
)
570 /* Check for cases where the C++ memory model applies. */
571 if (maybe_ne (bitregion_end
, 0U)
572 && (maybe_lt (bitnum
- bitnum
% modesize
, bitregion_start
)
573 || maybe_gt (bitnum
- bitnum
% modesize
+ modesize
- 1,
580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
581 bit number BITNUM can be treated as a simple value of mode MODE.
582 Store the byte offset in *BYTENUM if so. */
585 simple_mem_bitfield_p (rtx op0
, poly_uint64 bitsize
, poly_uint64 bitnum
,
586 machine_mode mode
, poly_uint64
*bytenum
)
589 && multiple_p (bitnum
, BITS_PER_UNIT
, bytenum
)
590 && known_eq (bitsize
, GET_MODE_BITSIZE (mode
))
591 && (!targetm
.slow_unaligned_access (mode
, MEM_ALIGN (op0
))
592 || (multiple_p (bitnum
, GET_MODE_ALIGNMENT (mode
))
593 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
596 /* Try to use instruction INSV to store VALUE into a field of OP0.
597 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
598 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
599 are as for store_bit_field. */
602 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
603 opt_scalar_int_mode op0_mode
,
604 unsigned HOST_WIDE_INT bitsize
,
605 unsigned HOST_WIDE_INT bitnum
,
606 rtx value
, scalar_int_mode value_mode
)
608 class expand_operand ops
[4];
611 rtx_insn
*last
= get_last_insn ();
612 bool copy_back
= false;
614 scalar_int_mode op_mode
= insv
->field_mode
;
615 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
616 if (bitsize
== 0 || bitsize
> unit
)
620 /* Get a reference to the first byte of the field. */
621 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
625 /* Convert from counting within OP0 to counting in OP_MODE. */
626 if (BYTES_BIG_ENDIAN
)
627 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
629 /* If xop0 is a register, we need it in OP_MODE
630 to make it acceptable to the format of insv. */
631 if (GET_CODE (xop0
) == SUBREG
)
632 /* We can't just change the mode, because this might clobber op0,
633 and we will need the original value of op0 if insv fails. */
634 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
635 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
636 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
639 /* If the destination is a paradoxical subreg such that we need a
640 truncate to the inner mode, perform the insertion on a temporary and
641 truncate the result to the original destination. Note that we can't
642 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
643 X) 0)) is (reg:N X). */
644 if (GET_CODE (xop0
) == SUBREG
645 && REG_P (SUBREG_REG (xop0
))
646 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
649 rtx tem
= gen_reg_rtx (op_mode
);
650 emit_move_insn (tem
, xop0
);
655 /* There are similar overflow check at the start of store_bit_field_1,
656 but that only check the situation where the field lies completely
657 outside the register, while there do have situation where the field
658 lies partialy in the register, we need to adjust bitsize for this
659 partial overflow situation. Without this fix, pr48335-2.c on big-endian
660 will broken on those arch support bit insert instruction, like arm, aarch64
662 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
664 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
665 "destination object, data truncated into %wu-bit",
666 bitsize
, unit
- bitnum
);
667 bitsize
= unit
- bitnum
;
670 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
671 "backwards" from the size of the unit we are inserting into.
672 Otherwise, we count bits from the most significant on a
673 BYTES/BITS_BIG_ENDIAN machine. */
675 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
676 bitnum
= unit
- bitsize
- bitnum
;
678 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
680 if (value_mode
!= op_mode
)
682 if (GET_MODE_BITSIZE (value_mode
) >= bitsize
)
685 /* Optimization: Don't bother really extending VALUE
686 if it has all the bits we will actually use. However,
687 if we must narrow it, be sure we do it correctly. */
689 if (GET_MODE_SIZE (value_mode
) < GET_MODE_SIZE (op_mode
))
691 tmp
= simplify_subreg (op_mode
, value1
, value_mode
, 0);
693 tmp
= simplify_gen_subreg (op_mode
,
694 force_reg (value_mode
, value1
),
699 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
701 tmp
= gen_lowpart (op_mode
, force_reg (value_mode
, value1
));
705 else if (CONST_INT_P (value
))
706 value1
= gen_int_mode (INTVAL (value
), op_mode
);
708 /* Parse phase is supposed to make VALUE's data type
709 match that of the component reference, which is a type
710 at least as wide as the field; so VALUE should have
711 a mode that corresponds to that type. */
712 gcc_assert (CONSTANT_P (value
));
715 create_fixed_operand (&ops
[0], xop0
);
716 create_integer_operand (&ops
[1], bitsize
);
717 create_integer_operand (&ops
[2], bitnum
);
718 create_input_operand (&ops
[3], value1
, op_mode
);
719 if (maybe_expand_insn (insv
->icode
, 4, ops
))
722 convert_move (op0
, xop0
, true);
725 delete_insns_since (last
);
729 /* A subroutine of store_bit_field, with the same arguments. Return true
730 if the operation could be implemented.
732 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
733 no other way of implementing the operation. If FALLBACK_P is false,
734 return false instead. */
737 store_bit_field_1 (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
738 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
739 machine_mode fieldmode
,
740 rtx value
, bool reverse
, bool fallback_p
)
744 while (GET_CODE (op0
) == SUBREG
)
746 bitnum
+= subreg_memory_offset (op0
) * BITS_PER_UNIT
;
747 op0
= SUBREG_REG (op0
);
750 /* No action is needed if the target is a register and if the field
751 lies completely outside that register. This can occur if the source
752 code contains an out-of-bounds access to a small array. */
753 if (REG_P (op0
) && known_ge (bitnum
, GET_MODE_BITSIZE (GET_MODE (op0
))))
756 /* Use vec_set patterns for inserting parts of vectors whenever
758 machine_mode outermode
= GET_MODE (op0
);
759 scalar_mode innermode
= GET_MODE_INNER (outermode
);
761 if (VECTOR_MODE_P (outermode
)
763 && optab_handler (vec_set_optab
, outermode
) != CODE_FOR_nothing
764 && fieldmode
== innermode
765 && known_eq (bitsize
, GET_MODE_BITSIZE (innermode
))
766 && multiple_p (bitnum
, GET_MODE_BITSIZE (innermode
), &pos
))
768 class expand_operand ops
[3];
769 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
771 create_fixed_operand (&ops
[0], op0
);
772 create_input_operand (&ops
[1], value
, innermode
);
773 create_integer_operand (&ops
[2], pos
);
774 if (maybe_expand_insn (icode
, 3, ops
))
778 /* If the target is a register, overwriting the entire object, or storing
779 a full-word or multi-word field can be done with just a SUBREG. */
781 && known_eq (bitsize
, GET_MODE_BITSIZE (fieldmode
)))
783 /* Use the subreg machinery either to narrow OP0 to the required
784 words or to cope with mode punning between equal-sized modes.
785 In the latter case, use subreg on the rhs side, not lhs. */
787 HOST_WIDE_INT regnum
;
788 poly_uint64 regsize
= REGMODE_NATURAL_SIZE (GET_MODE (op0
));
789 if (known_eq (bitnum
, 0U)
790 && known_eq (bitsize
, GET_MODE_BITSIZE (GET_MODE (op0
))))
792 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
796 sub
= flip_storage_order (GET_MODE (op0
), sub
);
797 emit_move_insn (op0
, sub
);
801 else if (constant_multiple_p (bitnum
, regsize
* BITS_PER_UNIT
, ®num
)
802 && multiple_p (bitsize
, regsize
* BITS_PER_UNIT
))
804 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
809 value
= flip_storage_order (fieldmode
, value
);
810 emit_move_insn (sub
, value
);
816 /* If the target is memory, storing any naturally aligned field can be
817 done with a simple store. For targets that support fast unaligned
818 memory, any naturally sized, unit aligned field can be done directly. */
820 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
, &bytenum
))
822 op0
= adjust_bitfield_address (op0
, fieldmode
, bytenum
);
824 value
= flip_storage_order (fieldmode
, value
);
825 emit_move_insn (op0
, value
);
829 /* It's possible we'll need to handle other cases here for
830 polynomial bitnum and bitsize. */
832 /* From here on we need to be looking at a fixed-size insertion. */
833 unsigned HOST_WIDE_INT ibitsize
= bitsize
.to_constant ();
834 unsigned HOST_WIDE_INT ibitnum
= bitnum
.to_constant ();
836 /* Make sure we are playing with integral modes. Pun with subregs
837 if we aren't. This must come after the entire register case above,
838 since that case is valid for any mode. The following cases are only
839 valid for integral modes. */
840 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
841 scalar_int_mode imode
;
842 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
845 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
847 else if (!op0_mode
.exists ())
850 && known_eq (ibitsize
, GET_MODE_BITSIZE (GET_MODE (op0
)))
854 value
= adjust_address (value
, GET_MODE (op0
), 0);
855 emit_move_insn (op0
, value
);
860 rtx temp
= assign_stack_temp (GET_MODE (op0
),
861 GET_MODE_SIZE (GET_MODE (op0
)));
862 emit_move_insn (temp
, op0
);
863 store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0, fieldmode
, value
,
864 reverse
, fallback_p
);
865 emit_move_insn (op0
, temp
);
869 op0
= gen_lowpart (op0_mode
.require (), op0
);
872 return store_integral_bit_field (op0
, op0_mode
, ibitsize
, ibitnum
,
873 bitregion_start
, bitregion_end
,
874 fieldmode
, value
, reverse
, fallback_p
);
877 /* Subroutine of store_bit_field_1, with the same arguments, except
878 that BITSIZE and BITNUM are constant. Handle cases specific to
879 integral modes. If OP0_MODE is defined, it is the mode of OP0,
880 otherwise OP0 is a BLKmode MEM. */
883 store_integral_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
884 unsigned HOST_WIDE_INT bitsize
,
885 unsigned HOST_WIDE_INT bitnum
,
886 poly_uint64 bitregion_start
,
887 poly_uint64 bitregion_end
,
888 machine_mode fieldmode
,
889 rtx value
, bool reverse
, bool fallback_p
)
891 /* Storing an lsb-aligned field in a register
892 can be done with a movstrict instruction. */
896 && lowpart_bit_field_p (bitnum
, bitsize
, op0_mode
.require ())
897 && known_eq (bitsize
, GET_MODE_BITSIZE (fieldmode
))
898 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
900 class expand_operand ops
[2];
901 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
903 unsigned HOST_WIDE_INT subreg_off
;
905 if (GET_CODE (arg0
) == SUBREG
)
907 /* Else we've got some float mode source being extracted into
908 a different float mode destination -- this combination of
909 subregs results in Severe Tire Damage. */
910 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
911 || GET_MODE_CLASS (fieldmode
) == MODE_INT
912 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
913 arg0
= SUBREG_REG (arg0
);
916 subreg_off
= bitnum
/ BITS_PER_UNIT
;
917 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
919 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
921 create_fixed_operand (&ops
[0], arg0
);
922 /* Shrink the source operand to FIELDMODE. */
923 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
924 if (maybe_expand_insn (icode
, 2, ops
))
929 /* Handle fields bigger than a word. */
931 if (bitsize
> BITS_PER_WORD
)
933 /* Here we transfer the words of the field
934 in the order least significant first.
935 This is because the most significant word is the one which may
937 However, only do that if the value is not BLKmode. */
939 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
940 const int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
943 /* This is the mode we must force value to, so that there will be enough
944 subwords to extract. Note that fieldmode will often (always?) be
945 VOIDmode, because that is what store_field uses to indicate that this
946 is a bit field, but passing VOIDmode to operand_subword_force
949 The mode must be fixed-size, since insertions into variable-sized
950 objects are meant to be handled before calling this function. */
951 fixed_size_mode value_mode
= as_a
<fixed_size_mode
> (GET_MODE (value
));
952 if (value_mode
== VOIDmode
)
953 value_mode
= smallest_int_mode_for_size (nwords
* BITS_PER_WORD
);
955 last
= get_last_insn ();
956 for (int i
= 0; i
< nwords
; i
++)
958 /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
959 except maybe for the last iteration. */
960 const unsigned HOST_WIDE_INT new_bitsize
961 = MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
962 /* Bit offset from the starting bit number in the target. */
963 const unsigned int bit_offset
964 = backwards
^ reverse
965 ? MAX ((int) bitsize
- (i
+ 1) * BITS_PER_WORD
, 0)
967 /* Starting word number in the value. */
968 const unsigned int wordnum
970 ? GET_MODE_SIZE (value_mode
) / UNITS_PER_WORD
- (i
+ 1)
972 /* The chunk of the value in word_mode. We use bit-field extraction
973 in BLKmode to handle unaligned memory references and to shift the
974 last chunk right on big-endian machines if need be. */
976 = fieldmode
== BLKmode
977 ? extract_bit_field (value
, new_bitsize
, wordnum
* BITS_PER_WORD
,
978 1, NULL_RTX
, word_mode
, word_mode
, false,
980 : operand_subword_force (value
, wordnum
, value_mode
);
982 if (!store_bit_field_1 (op0
, new_bitsize
,
984 bitregion_start
, bitregion_end
,
986 value_word
, reverse
, fallback_p
))
988 delete_insns_since (last
);
995 /* If VALUE has a floating-point or complex mode, access it as an
996 integer of the corresponding size. This can occur on a machine
997 with 64 bit registers that uses SFmode for float. It can also
998 occur for unaligned float or complex fields. */
999 rtx orig_value
= value
;
1000 scalar_int_mode value_mode
;
1001 if (GET_MODE (value
) == VOIDmode
)
1002 /* By this point we've dealt with values that are bigger than a word,
1003 so word_mode is a conservatively correct choice. */
1004 value_mode
= word_mode
;
1005 else if (!is_a
<scalar_int_mode
> (GET_MODE (value
), &value_mode
))
1007 value_mode
= int_mode_for_mode (GET_MODE (value
)).require ();
1008 value
= gen_reg_rtx (value_mode
);
1009 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
1012 /* If OP0 is a multi-word register, narrow it to the affected word.
1013 If the region spans two words, defer to store_split_bit_field.
1014 Don't do this if op0 is a single hard register wider than word
1015 such as a float or vector register. */
1017 && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
1019 || !HARD_REGISTER_P (op0
)
1020 || hard_regno_nregs (REGNO (op0
), op0_mode
.require ()) != 1))
1022 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1027 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1028 bitregion_start
, bitregion_end
,
1029 value
, value_mode
, reverse
);
1032 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1033 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1035 op0_mode
= word_mode
;
1036 bitnum
%= BITS_PER_WORD
;
1039 /* From here on we can assume that the field to be stored in fits
1040 within a word. If the destination is a register, it too fits
1043 extraction_insn insv
;
1046 && get_best_reg_extraction_insn (&insv
, EP_insv
,
1047 GET_MODE_BITSIZE (op0_mode
.require ()),
1049 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
1050 bitsize
, bitnum
, value
, value_mode
))
1053 /* If OP0 is a memory, try copying it to a register and seeing if a
1054 cheap register alternative is available. */
1055 if (MEM_P (op0
) && !reverse
)
1057 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1059 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
1060 bitsize
, bitnum
, value
, value_mode
))
1063 rtx_insn
*last
= get_last_insn ();
1065 /* Try loading part of OP0 into a register, inserting the bitfield
1066 into that, and then copying the result back to OP0. */
1067 unsigned HOST_WIDE_INT bitpos
;
1068 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1069 bitregion_start
, bitregion_end
,
1070 fieldmode
, &bitpos
);
1073 rtx tempreg
= copy_to_reg (xop0
);
1074 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1075 bitregion_start
, bitregion_end
,
1076 fieldmode
, orig_value
, reverse
, false))
1078 emit_move_insn (xop0
, tempreg
);
1081 delete_insns_since (last
);
1088 store_fixed_bit_field (op0
, op0_mode
, bitsize
, bitnum
, bitregion_start
,
1089 bitregion_end
, value
, value_mode
, reverse
);
1093 /* Generate code to store value from rtx VALUE
1094 into a bit-field within structure STR_RTX
1095 containing BITSIZE bits starting at bit BITNUM.
1097 BITREGION_START is bitpos of the first bitfield in this region.
1098 BITREGION_END is the bitpos of the ending bitfield in this region.
1099 These two fields are 0, if the C++ memory model does not apply,
1100 or we are not interested in keeping track of bitfield regions.
1102 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1104 If REVERSE is true, the store is to be done in reverse order. */
1107 store_bit_field (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
1108 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1109 machine_mode fieldmode
,
1110 rtx value
, bool reverse
)
1112 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1113 unsigned HOST_WIDE_INT ibitsize
= 0, ibitnum
= 0;
1114 scalar_int_mode int_mode
;
1115 if (bitsize
.is_constant (&ibitsize
)
1116 && bitnum
.is_constant (&ibitnum
)
1117 && is_a
<scalar_int_mode
> (fieldmode
, &int_mode
)
1118 && strict_volatile_bitfield_p (str_rtx
, ibitsize
, ibitnum
, int_mode
,
1119 bitregion_start
, bitregion_end
))
1121 /* Storing of a full word can be done with a simple store.
1122 We know here that the field can be accessed with one single
1123 instruction. For targets that support unaligned memory,
1124 an unaligned access may be necessary. */
1125 if (ibitsize
== GET_MODE_BITSIZE (int_mode
))
1127 str_rtx
= adjust_bitfield_address (str_rtx
, int_mode
,
1128 ibitnum
/ BITS_PER_UNIT
);
1130 value
= flip_storage_order (int_mode
, value
);
1131 gcc_assert (ibitnum
% BITS_PER_UNIT
== 0);
1132 emit_move_insn (str_rtx
, value
);
1138 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, ibitsize
,
1140 gcc_assert (ibitnum
+ ibitsize
<= GET_MODE_BITSIZE (int_mode
));
1141 temp
= copy_to_reg (str_rtx
);
1142 if (!store_bit_field_1 (temp
, ibitsize
, ibitnum
, 0, 0,
1143 int_mode
, value
, reverse
, true))
1146 emit_move_insn (str_rtx
, temp
);
1152 /* Under the C++0x memory model, we must not touch bits outside the
1153 bit region. Adjust the address to start at the beginning of the
1155 if (MEM_P (str_rtx
) && maybe_ne (bitregion_start
, 0U))
1157 scalar_int_mode best_mode
;
1158 machine_mode addr_mode
= VOIDmode
;
1160 poly_uint64 offset
= exact_div (bitregion_start
, BITS_PER_UNIT
);
1161 bitnum
-= bitregion_start
;
1162 poly_int64 size
= bits_to_bytes_round_up (bitnum
+ bitsize
);
1163 bitregion_end
-= bitregion_start
;
1164 bitregion_start
= 0;
1165 if (bitsize
.is_constant (&ibitsize
)
1166 && bitnum
.is_constant (&ibitnum
)
1167 && get_best_mode (ibitsize
, ibitnum
,
1168 bitregion_start
, bitregion_end
,
1169 MEM_ALIGN (str_rtx
), INT_MAX
,
1170 MEM_VOLATILE_P (str_rtx
), &best_mode
))
1171 addr_mode
= best_mode
;
1172 str_rtx
= adjust_bitfield_address_size (str_rtx
, addr_mode
,
1176 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1177 bitregion_start
, bitregion_end
,
1178 fieldmode
, value
, reverse
, true))
1182 /* Use shifts and boolean operations to store VALUE into a bit field of
1183 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1184 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1187 If REVERSE is true, the store is to be done in reverse order. */
1190 store_fixed_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1191 unsigned HOST_WIDE_INT bitsize
,
1192 unsigned HOST_WIDE_INT bitnum
,
1193 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1194 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1196 /* There is a case not handled here:
1197 a structure with a known alignment of just a halfword
1198 and a field split across two aligned halfwords within the structure.
1199 Or likewise a structure with a known alignment of just a byte
1200 and a field split across two bytes.
1201 Such cases are not supposed to be able to occur. */
1203 scalar_int_mode best_mode
;
1206 unsigned int max_bitsize
= BITS_PER_WORD
;
1207 scalar_int_mode imode
;
1208 if (op0_mode
.exists (&imode
) && GET_MODE_BITSIZE (imode
) < max_bitsize
)
1209 max_bitsize
= GET_MODE_BITSIZE (imode
);
1211 if (!get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1212 MEM_ALIGN (op0
), max_bitsize
, MEM_VOLATILE_P (op0
),
1215 /* The only way this should occur is if the field spans word
1217 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1218 bitregion_start
, bitregion_end
,
1219 value
, value_mode
, reverse
);
1223 op0
= narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
, &bitnum
);
1226 best_mode
= op0_mode
.require ();
1228 store_fixed_bit_field_1 (op0
, best_mode
, bitsize
, bitnum
,
1229 value
, value_mode
, reverse
);
1232 /* Helper function for store_fixed_bit_field, stores
1233 the bit field always using MODE, which is the mode of OP0. The other
1234 arguments are as for store_fixed_bit_field. */
1237 store_fixed_bit_field_1 (rtx op0
, scalar_int_mode mode
,
1238 unsigned HOST_WIDE_INT bitsize
,
1239 unsigned HOST_WIDE_INT bitnum
,
1240 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1246 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1247 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1249 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1250 /* BITNUM is the distance between our msb
1251 and that of the containing datum.
1252 Convert it to the distance from the lsb. */
1253 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1255 /* Now BITNUM is always the distance between our lsb
1258 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1259 we must first convert its mode to MODE. */
1261 if (CONST_INT_P (value
))
1263 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1265 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1266 v
&= (HOST_WIDE_INT_1U
<< bitsize
) - 1;
1270 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1271 && v
== (HOST_WIDE_INT_1U
<< bitsize
) - 1)
1272 || (bitsize
== HOST_BITS_PER_WIDE_INT
1273 && v
== HOST_WIDE_INT_M1U
))
1276 value
= lshift_value (mode
, v
, bitnum
);
1280 int must_and
= (GET_MODE_BITSIZE (value_mode
) != bitsize
1281 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1283 if (value_mode
!= mode
)
1284 value
= convert_to_mode (mode
, value
, 1);
1287 value
= expand_binop (mode
, and_optab
, value
,
1288 mask_rtx (mode
, 0, bitsize
, 0),
1289 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1291 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1292 bitnum
, NULL_RTX
, 1);
1296 value
= flip_storage_order (mode
, value
);
1298 /* Now clear the chosen bits in OP0,
1299 except that if VALUE is -1 we need not bother. */
1300 /* We keep the intermediates in registers to allow CSE to combine
1301 consecutive bitfield assignments. */
1303 temp
= force_reg (mode
, op0
);
1307 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1309 mask
= flip_storage_order (mode
, mask
);
1310 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1311 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1312 temp
= force_reg (mode
, temp
);
1315 /* Now logical-or VALUE into OP0, unless it is zero. */
1319 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1320 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1321 temp
= force_reg (mode
, temp
);
1326 op0
= copy_rtx (op0
);
1327 emit_move_insn (op0
, temp
);
1331 /* Store a bit field that is split across multiple accessible memory objects.
1333 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1334 BITSIZE is the field width; BITPOS the position of its first bit
1336 VALUE is the value to store, which has mode VALUE_MODE.
1337 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1340 If REVERSE is true, the store is to be done in reverse order.
1342 This does not yet handle fields wider than BITS_PER_WORD. */
1345 store_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1346 unsigned HOST_WIDE_INT bitsize
,
1347 unsigned HOST_WIDE_INT bitpos
,
1348 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1349 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1351 unsigned int unit
, total_bits
, bitsdone
= 0;
1353 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1355 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1356 unit
= BITS_PER_WORD
;
1358 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1360 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1361 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1362 again, and we will mutually recurse forever. */
1363 if (MEM_P (op0
) && op0_mode
.exists ())
1364 unit
= MIN (unit
, GET_MODE_BITSIZE (op0_mode
.require ()));
1366 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1367 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1368 that VALUE might be a floating-point constant. */
1369 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1371 rtx word
= gen_lowpart_common (word_mode
, value
);
1373 if (word
&& (value
!= word
))
1376 value
= gen_lowpart_common (word_mode
, force_reg (value_mode
, value
));
1377 value_mode
= word_mode
;
1380 total_bits
= GET_MODE_BITSIZE (value_mode
);
1382 while (bitsdone
< bitsize
)
1384 unsigned HOST_WIDE_INT thissize
;
1385 unsigned HOST_WIDE_INT thispos
;
1386 unsigned HOST_WIDE_INT offset
;
1389 offset
= (bitpos
+ bitsdone
) / unit
;
1390 thispos
= (bitpos
+ bitsdone
) % unit
;
1392 /* When region of bytes we can touch is restricted, decrease
1393 UNIT close to the end of the region as needed. If op0 is a REG
1394 or SUBREG of REG, don't do this, as there can't be data races
1395 on a register and we can expand shorter code in some cases. */
1396 if (maybe_ne (bitregion_end
, 0U)
1397 && unit
> BITS_PER_UNIT
1398 && maybe_gt (bitpos
+ bitsdone
- thispos
+ unit
, bitregion_end
+ 1)
1400 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1406 /* THISSIZE must not overrun a word boundary. Otherwise,
1407 store_fixed_bit_field will call us again, and we will mutually
1409 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1410 thissize
= MIN (thissize
, unit
- thispos
);
1412 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1414 /* Fetch successively less significant portions. */
1415 if (CONST_INT_P (value
))
1416 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1417 >> (bitsize
- bitsdone
- thissize
))
1418 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1419 /* Likewise, but the source is little-endian. */
1421 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1423 bitsize
- bitsdone
- thissize
,
1424 NULL_RTX
, 1, false);
1426 /* The args are chosen so that the last part includes the
1427 lsb. Give extract_bit_field the value it needs (with
1428 endianness compensation) to fetch the piece we want. */
1429 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1431 total_bits
- bitsize
+ bitsdone
,
1432 NULL_RTX
, 1, false);
1436 /* Fetch successively more significant portions. */
1437 if (CONST_INT_P (value
))
1438 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1440 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1441 /* Likewise, but the source is big-endian. */
1443 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1445 total_bits
- bitsdone
- thissize
,
1446 NULL_RTX
, 1, false);
1448 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1449 thissize
, bitsdone
, NULL_RTX
,
1453 /* If OP0 is a register, then handle OFFSET here. */
1454 rtx op0_piece
= op0
;
1455 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
1456 if (SUBREG_P (op0
) || REG_P (op0
))
1458 scalar_int_mode imode
;
1459 if (op0_mode
.exists (&imode
)
1460 && GET_MODE_SIZE (imode
) < UNITS_PER_WORD
)
1463 op0_piece
= const0_rtx
;
1467 op0_piece
= operand_subword_force (op0
,
1468 offset
* unit
/ BITS_PER_WORD
,
1470 op0_piece_mode
= word_mode
;
1472 offset
&= BITS_PER_WORD
/ unit
- 1;
1475 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1476 it is just an out-of-bounds access. Ignore it. */
1477 if (op0_piece
!= const0_rtx
)
1478 store_fixed_bit_field (op0_piece
, op0_piece_mode
, thissize
,
1479 offset
* unit
+ thispos
, bitregion_start
,
1480 bitregion_end
, part
, word_mode
, reverse
);
1481 bitsdone
+= thissize
;
1485 /* A subroutine of extract_bit_field_1 that converts return value X
1486 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1487 to extract_bit_field. */
1490 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1491 machine_mode tmode
, bool unsignedp
)
1493 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1496 /* If the x mode is not a scalar integral, first convert to the
1497 integer mode of that size and then access it as a floating-point
1498 value via a SUBREG. */
1499 if (!SCALAR_INT_MODE_P (tmode
))
1501 scalar_int_mode int_mode
= int_mode_for_mode (tmode
).require ();
1502 x
= convert_to_mode (int_mode
, x
, unsignedp
);
1503 x
= force_reg (int_mode
, x
);
1504 return gen_lowpart (tmode
, x
);
1507 return convert_to_mode (tmode
, x
, unsignedp
);
1510 /* Try to use an ext(z)v pattern to extract a field from OP0.
1511 Return the extracted value on success, otherwise return null.
1512 EXTV describes the extraction instruction to use. If OP0_MODE
1513 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1514 The other arguments are as for extract_bit_field. */
1517 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1518 opt_scalar_int_mode op0_mode
,
1519 unsigned HOST_WIDE_INT bitsize
,
1520 unsigned HOST_WIDE_INT bitnum
,
1521 int unsignedp
, rtx target
,
1522 machine_mode mode
, machine_mode tmode
)
1524 class expand_operand ops
[4];
1525 rtx spec_target
= target
;
1526 rtx spec_target_subreg
= 0;
1527 scalar_int_mode ext_mode
= extv
->field_mode
;
1528 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1530 if (bitsize
== 0 || unit
< bitsize
)
1534 /* Get a reference to the first byte of the field. */
1535 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1539 /* Convert from counting within OP0 to counting in EXT_MODE. */
1540 if (BYTES_BIG_ENDIAN
)
1541 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
1543 /* If op0 is a register, we need it in EXT_MODE to make it
1544 acceptable to the format of ext(z)v. */
1545 if (GET_CODE (op0
) == SUBREG
&& op0_mode
.require () != ext_mode
)
1547 if (REG_P (op0
) && op0_mode
.require () != ext_mode
)
1548 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1551 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1552 "backwards" from the size of the unit we are extracting from.
1553 Otherwise, we count bits from the most significant on a
1554 BYTES/BITS_BIG_ENDIAN machine. */
1556 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1557 bitnum
= unit
- bitsize
- bitnum
;
1560 target
= spec_target
= gen_reg_rtx (tmode
);
1562 if (GET_MODE (target
) != ext_mode
)
1564 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1565 between the mode of the extraction (word_mode) and the target
1566 mode. Instead, create a temporary and use convert_move to set
1569 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1571 target
= gen_lowpart (ext_mode
, target
);
1572 if (partial_subreg_p (GET_MODE (spec_target
), ext_mode
))
1573 spec_target_subreg
= target
;
1576 target
= gen_reg_rtx (ext_mode
);
1579 create_output_operand (&ops
[0], target
, ext_mode
);
1580 create_fixed_operand (&ops
[1], op0
);
1581 create_integer_operand (&ops
[2], bitsize
);
1582 create_integer_operand (&ops
[3], bitnum
);
1583 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1585 target
= ops
[0].value
;
1586 if (target
== spec_target
)
1588 if (target
== spec_target_subreg
)
1590 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1595 /* See whether it would be valid to extract the part of OP0 described
1596 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1597 operation. Return the subreg if so, otherwise return null. */
1600 extract_bit_field_as_subreg (machine_mode mode
, rtx op0
,
1601 poly_uint64 bitsize
, poly_uint64 bitnum
)
1603 poly_uint64 bytenum
;
1604 if (multiple_p (bitnum
, BITS_PER_UNIT
, &bytenum
)
1605 && known_eq (bitsize
, GET_MODE_BITSIZE (mode
))
1606 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1607 && TRULY_NOOP_TRUNCATION_MODES_P (mode
, GET_MODE (op0
)))
1608 return simplify_gen_subreg (mode
, op0
, GET_MODE (op0
), bytenum
);
1612 /* A subroutine of extract_bit_field, with the same arguments.
1613 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1614 if we can find no other means of implementing the operation.
1615 if FALLBACK_P is false, return NULL instead. */
1618 extract_bit_field_1 (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
1619 int unsignedp
, rtx target
, machine_mode mode
,
1620 machine_mode tmode
, bool reverse
, bool fallback_p
,
1626 if (tmode
== VOIDmode
)
1629 while (GET_CODE (op0
) == SUBREG
)
1631 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1632 op0
= SUBREG_REG (op0
);
1635 /* If we have an out-of-bounds access to a register, just return an
1636 uninitialized register of the required mode. This can occur if the
1637 source code contains an out-of-bounds access to a small array. */
1638 if (REG_P (op0
) && known_ge (bitnum
, GET_MODE_BITSIZE (GET_MODE (op0
))))
1639 return gen_reg_rtx (tmode
);
1642 && mode
== GET_MODE (op0
)
1643 && known_eq (bitnum
, 0U)
1644 && known_eq (bitsize
, GET_MODE_BITSIZE (GET_MODE (op0
))))
1647 op0
= flip_storage_order (mode
, op0
);
1648 /* We're trying to extract a full register from itself. */
1652 /* First try to check for vector from vector extractions. */
1653 if (VECTOR_MODE_P (GET_MODE (op0
))
1655 && VECTOR_MODE_P (tmode
)
1656 && known_eq (bitsize
, GET_MODE_BITSIZE (tmode
))
1657 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0
)), GET_MODE_SIZE (tmode
)))
1659 machine_mode new_mode
= GET_MODE (op0
);
1660 if (GET_MODE_INNER (new_mode
) != GET_MODE_INNER (tmode
))
1662 scalar_mode inner_mode
= GET_MODE_INNER (tmode
);
1664 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0
)),
1665 GET_MODE_UNIT_BITSIZE (tmode
), &nunits
)
1666 || !related_vector_mode (tmode
, inner_mode
,
1667 nunits
).exists (&new_mode
)
1668 || maybe_ne (GET_MODE_SIZE (new_mode
),
1669 GET_MODE_SIZE (GET_MODE (op0
))))
1670 new_mode
= VOIDmode
;
1673 if (new_mode
!= VOIDmode
1674 && (convert_optab_handler (vec_extract_optab
, new_mode
, tmode
)
1675 != CODE_FOR_nothing
)
1676 && multiple_p (bitnum
, GET_MODE_BITSIZE (tmode
), &pos
))
1678 class expand_operand ops
[3];
1679 machine_mode outermode
= new_mode
;
1680 machine_mode innermode
= tmode
;
1681 enum insn_code icode
1682 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1684 if (new_mode
!= GET_MODE (op0
))
1685 op0
= gen_lowpart (new_mode
, op0
);
1686 create_output_operand (&ops
[0], target
, innermode
);
1688 create_input_operand (&ops
[1], op0
, outermode
);
1689 create_integer_operand (&ops
[2], pos
);
1690 if (maybe_expand_insn (icode
, 3, ops
))
1692 if (alt_rtl
&& ops
[0].target
)
1694 target
= ops
[0].value
;
1695 if (GET_MODE (target
) != mode
)
1696 return gen_lowpart (tmode
, target
);
1702 /* See if we can get a better vector mode before extracting. */
1703 if (VECTOR_MODE_P (GET_MODE (op0
))
1705 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1707 machine_mode new_mode
;
1709 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1710 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1711 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1712 new_mode
= MIN_MODE_VECTOR_FRACT
;
1713 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1714 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1715 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1716 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1717 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1718 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1720 new_mode
= MIN_MODE_VECTOR_INT
;
1722 FOR_EACH_MODE_FROM (new_mode
, new_mode
)
1723 if (known_eq (GET_MODE_SIZE (new_mode
), GET_MODE_SIZE (GET_MODE (op0
)))
1724 && known_eq (GET_MODE_UNIT_SIZE (new_mode
), GET_MODE_SIZE (tmode
))
1725 && targetm
.vector_mode_supported_p (new_mode
))
1727 if (new_mode
!= VOIDmode
)
1728 op0
= gen_lowpart (new_mode
, op0
);
1731 /* Use vec_extract patterns for extracting parts of vectors whenever
1732 available. If that fails, see whether the current modes and bitregion
1733 give a natural subreg. */
1734 machine_mode outermode
= GET_MODE (op0
);
1735 if (VECTOR_MODE_P (outermode
) && !MEM_P (op0
))
1737 scalar_mode innermode
= GET_MODE_INNER (outermode
);
1738 enum insn_code icode
1739 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1741 if (icode
!= CODE_FOR_nothing
1742 && known_eq (bitsize
, GET_MODE_BITSIZE (innermode
))
1743 && multiple_p (bitnum
, GET_MODE_BITSIZE (innermode
), &pos
))
1745 class expand_operand ops
[3];
1747 create_output_operand (&ops
[0], target
, innermode
);
1749 create_input_operand (&ops
[1], op0
, outermode
);
1750 create_integer_operand (&ops
[2], pos
);
1751 if (maybe_expand_insn (icode
, 3, ops
))
1753 if (alt_rtl
&& ops
[0].target
)
1755 target
= ops
[0].value
;
1756 if (GET_MODE (target
) != mode
)
1757 return gen_lowpart (tmode
, target
);
1761 /* Using subregs is useful if we're extracting one register vector
1762 from a multi-register vector. extract_bit_field_as_subreg checks
1763 for valid bitsize and bitnum, so we don't need to do that here. */
1764 if (VECTOR_MODE_P (mode
))
1766 rtx sub
= extract_bit_field_as_subreg (mode
, op0
, bitsize
, bitnum
);
1772 /* Make sure we are playing with integral modes. Pun with subregs
1774 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
1775 scalar_int_mode imode
;
1776 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
1779 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
1781 else if (op0_mode
.exists (&imode
))
1783 op0
= gen_lowpart (imode
, op0
);
1785 /* If we got a SUBREG, force it into a register since we
1786 aren't going to be able to do another SUBREG on it. */
1787 if (GET_CODE (op0
) == SUBREG
)
1788 op0
= force_reg (imode
, op0
);
1792 poly_int64 size
= GET_MODE_SIZE (GET_MODE (op0
));
1793 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1794 emit_move_insn (mem
, op0
);
1795 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1799 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1800 If that's wrong, the solution is to test for it and set TARGET to 0
1803 /* Get the mode of the field to use for atomic access or subreg
1805 if (!SCALAR_INT_MODE_P (tmode
)
1806 || !mode_for_size (bitsize
, GET_MODE_CLASS (tmode
), 0).exists (&mode1
))
1808 gcc_assert (mode1
!= BLKmode
);
1810 /* Extraction of a full MODE1 value can be done with a subreg as long
1811 as the least significant bit of the value is the least significant
1812 bit of either OP0 or a word of OP0. */
1813 if (!MEM_P (op0
) && !reverse
)
1815 rtx sub
= extract_bit_field_as_subreg (mode1
, op0
, bitsize
, bitnum
);
1817 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1820 /* Extraction of a full MODE1 value can be done with a load as long as
1821 the field is on a byte boundary and is sufficiently aligned. */
1822 poly_uint64 bytenum
;
1823 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
, &bytenum
))
1825 op0
= adjust_bitfield_address (op0
, mode1
, bytenum
);
1827 op0
= flip_storage_order (mode1
, op0
);
1828 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1831 /* If we have a memory source and a non-constant bit offset, restrict
1832 the memory to the referenced bytes. This is a worst-case fallback
1833 but is useful for things like vector booleans. */
1834 if (MEM_P (op0
) && !bitnum
.is_constant ())
1836 bytenum
= bits_to_bytes_round_down (bitnum
);
1837 bitnum
= num_trailing_bits (bitnum
);
1838 poly_uint64 bytesize
= bits_to_bytes_round_up (bitnum
+ bitsize
);
1839 op0
= adjust_bitfield_address_size (op0
, BLKmode
, bytenum
, bytesize
);
1840 op0_mode
= opt_scalar_int_mode ();
1843 /* It's possible we'll need to handle other cases here for
1844 polynomial bitnum and bitsize. */
1846 /* From here on we need to be looking at a fixed-size insertion. */
1847 return extract_integral_bit_field (op0
, op0_mode
, bitsize
.to_constant (),
1848 bitnum
.to_constant (), unsignedp
,
1849 target
, mode
, tmode
, reverse
, fallback_p
);
1852 /* Subroutine of extract_bit_field_1, with the same arguments, except
1853 that BITSIZE and BITNUM are constant. Handle cases specific to
1854 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1855 otherwise OP0 is a BLKmode MEM. */
1858 extract_integral_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1859 unsigned HOST_WIDE_INT bitsize
,
1860 unsigned HOST_WIDE_INT bitnum
, int unsignedp
,
1861 rtx target
, machine_mode mode
, machine_mode tmode
,
1862 bool reverse
, bool fallback_p
)
1864 /* Handle fields bigger than a word. */
1866 if (bitsize
> BITS_PER_WORD
)
1868 /* Here we transfer the words of the field
1869 in the order least significant first.
1870 This is because the most significant word is the one which may
1871 be less than full. */
1873 const bool backwards
= WORDS_BIG_ENDIAN
;
1874 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1878 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1879 target
= gen_reg_rtx (mode
);
1881 /* In case we're about to clobber a base register or something
1882 (see gcc.c-torture/execute/20040625-1.c). */
1883 if (reg_mentioned_p (target
, op0
))
1884 target
= gen_reg_rtx (mode
);
1886 /* Indicate for flow that the entire target reg is being set. */
1887 emit_clobber (target
);
1889 /* The mode must be fixed-size, since extract_bit_field_1 handles
1890 extractions from variable-sized objects before calling this
1892 unsigned int target_size
1893 = GET_MODE_SIZE (GET_MODE (target
)).to_constant ();
1894 last
= get_last_insn ();
1895 for (i
= 0; i
< nwords
; i
++)
1897 /* If I is 0, use the low-order word in both field and target;
1898 if I is 1, use the next to lowest word; and so on. */
1899 /* Word number in TARGET to use. */
1900 unsigned int wordnum
1901 = (backwards
? target_size
/ UNITS_PER_WORD
- i
- 1 : i
);
1902 /* Offset from start of field in OP0. */
1903 unsigned int bit_offset
= (backwards
^ reverse
1904 ? MAX ((int) bitsize
- ((int) i
+ 1)
1907 : (int) i
* BITS_PER_WORD
);
1908 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1910 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1911 bitsize
- i
* BITS_PER_WORD
),
1912 bitnum
+ bit_offset
, 1, target_part
,
1913 mode
, word_mode
, reverse
, fallback_p
, NULL
);
1915 gcc_assert (target_part
);
1918 delete_insns_since (last
);
1922 if (result_part
!= target_part
)
1923 emit_move_insn (target_part
, result_part
);
1928 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1929 need to be zero'd out. */
1930 if (target_size
> nwords
* UNITS_PER_WORD
)
1932 unsigned int i
, total_words
;
1934 total_words
= target_size
/ UNITS_PER_WORD
;
1935 for (i
= nwords
; i
< total_words
; i
++)
1937 (operand_subword (target
,
1938 backwards
? total_words
- i
- 1 : i
,
1945 /* Signed bit field: sign-extend with two arithmetic shifts. */
1946 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1947 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1948 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1949 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1952 /* If OP0 is a multi-word register, narrow it to the affected word.
1953 If the region spans two words, defer to extract_split_bit_field. */
1954 if (!MEM_P (op0
) && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
)
1956 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1960 target
= extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1961 unsignedp
, reverse
);
1962 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1964 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1965 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1966 op0_mode
= word_mode
;
1967 bitnum
%= BITS_PER_WORD
;
1970 /* From here on we know the desired field is smaller than a word.
1971 If OP0 is a register, it too fits within a word. */
1972 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1973 extraction_insn extv
;
1976 /* ??? We could limit the structure size to the part of OP0 that
1977 contains the field, with appropriate checks for endianness
1978 and TARGET_TRULY_NOOP_TRUNCATION. */
1979 && get_best_reg_extraction_insn (&extv
, pattern
,
1980 GET_MODE_BITSIZE (op0_mode
.require ()),
1983 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
1985 unsignedp
, target
, mode
,
1991 /* If OP0 is a memory, try copying it to a register and seeing if a
1992 cheap register alternative is available. */
1993 if (MEM_P (op0
) & !reverse
)
1995 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1998 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
2000 unsignedp
, target
, mode
,
2006 rtx_insn
*last
= get_last_insn ();
2008 /* Try loading part of OP0 into a register and extracting the
2009 bitfield from that. */
2010 unsigned HOST_WIDE_INT bitpos
;
2011 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
2012 0, 0, tmode
, &bitpos
);
2015 xop0
= copy_to_reg (xop0
);
2016 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
2018 mode
, tmode
, reverse
, false, NULL
);
2021 delete_insns_since (last
);
2028 /* Find a correspondingly-sized integer field, so we can apply
2029 shifts and masks to it. */
2030 scalar_int_mode int_mode
;
2031 if (!int_mode_for_mode (tmode
).exists (&int_mode
))
2032 /* If this fails, we should probably push op0 out to memory and then
2034 int_mode
= int_mode_for_mode (mode
).require ();
2036 target
= extract_fixed_bit_field (int_mode
, op0
, op0_mode
, bitsize
,
2037 bitnum
, target
, unsignedp
, reverse
);
2039 /* Complex values must be reversed piecewise, so we need to undo the global
2040 reversal, convert to the complex mode and reverse again. */
2041 if (reverse
&& COMPLEX_MODE_P (tmode
))
2043 target
= flip_storage_order (int_mode
, target
);
2044 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
2045 target
= flip_storage_order (tmode
, target
);
2048 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
2053 /* Generate code to extract a byte-field from STR_RTX
2054 containing BITSIZE bits, starting at BITNUM,
2055 and put it in TARGET if possible (if TARGET is nonzero).
2056 Regardless of TARGET, we return the rtx for where the value is placed.
2058 STR_RTX is the structure containing the byte (a REG or MEM).
2059 UNSIGNEDP is nonzero if this is an unsigned bit field.
2060 MODE is the natural mode of the field value once extracted.
2061 TMODE is the mode the caller would like the value to have;
2062 but the value may be returned with type MODE instead.
2064 If REVERSE is true, the extraction is to be done in reverse order.
2066 If a TARGET is specified and we can store in it at no extra cost,
2067 we do so, and return TARGET.
2068 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2069 if they are equally easy.
2071 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2072 then *ALT_RTL is set to TARGET (before legitimziation). */
2075 extract_bit_field (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
2076 int unsignedp
, rtx target
, machine_mode mode
,
2077 machine_mode tmode
, bool reverse
, rtx
*alt_rtl
)
2081 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2082 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx
)), 0))
2083 mode1
= GET_MODE (str_rtx
);
2084 else if (target
&& maybe_ne (GET_MODE_BITSIZE (GET_MODE (target
)), 0))
2085 mode1
= GET_MODE (target
);
2089 unsigned HOST_WIDE_INT ibitsize
, ibitnum
;
2090 scalar_int_mode int_mode
;
2091 if (bitsize
.is_constant (&ibitsize
)
2092 && bitnum
.is_constant (&ibitnum
)
2093 && is_a
<scalar_int_mode
> (mode1
, &int_mode
)
2094 && strict_volatile_bitfield_p (str_rtx
, ibitsize
, ibitnum
,
2097 /* Extraction of a full INT_MODE value can be done with a simple load.
2098 We know here that the field can be accessed with one single
2099 instruction. For targets that support unaligned memory,
2100 an unaligned access may be necessary. */
2101 if (ibitsize
== GET_MODE_BITSIZE (int_mode
))
2103 rtx result
= adjust_bitfield_address (str_rtx
, int_mode
,
2104 ibitnum
/ BITS_PER_UNIT
);
2106 result
= flip_storage_order (int_mode
, result
);
2107 gcc_assert (ibitnum
% BITS_PER_UNIT
== 0);
2108 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
2111 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, ibitsize
, ibitnum
,
2113 gcc_assert (ibitnum
+ ibitsize
<= GET_MODE_BITSIZE (int_mode
));
2114 str_rtx
= copy_to_reg (str_rtx
);
2115 return extract_bit_field_1 (str_rtx
, ibitsize
, ibitnum
, unsignedp
,
2116 target
, mode
, tmode
, reverse
, true, alt_rtl
);
2119 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
2120 target
, mode
, tmode
, reverse
, true, alt_rtl
);
2123 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2124 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2125 otherwise OP0 is a BLKmode MEM.
2127 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2128 If REVERSE is true, the extraction is to be done in reverse order.
2130 If TARGET is nonzero, attempts to store the value there
2131 and return TARGET, but this is not guaranteed.
2132 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2135 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
2136 opt_scalar_int_mode op0_mode
,
2137 unsigned HOST_WIDE_INT bitsize
,
2138 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2139 int unsignedp
, bool reverse
)
2141 scalar_int_mode mode
;
2144 if (!get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
),
2145 BITS_PER_WORD
, MEM_VOLATILE_P (op0
), &mode
))
2146 /* The only way this should occur is if the field spans word
2148 return extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
2149 unsignedp
, reverse
);
2151 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
2154 mode
= op0_mode
.require ();
2156 return extract_fixed_bit_field_1 (tmode
, op0
, mode
, bitsize
, bitnum
,
2157 target
, unsignedp
, reverse
);
2160 /* Helper function for extract_fixed_bit_field, extracts
2161 the bit field always using MODE, which is the mode of OP0.
2162 The other arguments are as for extract_fixed_bit_field. */
2165 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
, scalar_int_mode mode
,
2166 unsigned HOST_WIDE_INT bitsize
,
2167 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2168 int unsignedp
, bool reverse
)
2170 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2171 for invalid input, such as extract equivalent of f5 from
2172 gcc.dg/pr48335-2.c. */
2174 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2175 /* BITNUM is the distance between our msb and that of OP0.
2176 Convert it to the distance from the lsb. */
2177 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
2179 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2180 We have reduced the big-endian case to the little-endian case. */
2182 op0
= flip_storage_order (mode
, op0
);
2188 /* If the field does not already start at the lsb,
2189 shift it so it does. */
2190 /* Maybe propagate the target for the shift. */
2191 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2194 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2196 /* Convert the value to the desired mode. TMODE must also be a
2197 scalar integer for this conversion to make sense, since we
2198 shouldn't reinterpret the bits. */
2199 scalar_int_mode new_mode
= as_a
<scalar_int_mode
> (tmode
);
2200 if (mode
!= new_mode
)
2201 op0
= convert_to_mode (new_mode
, op0
, 1);
2203 /* Unless the msb of the field used to be the msb when we shifted,
2204 mask out the upper bits. */
2206 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2207 return expand_binop (new_mode
, and_optab
, op0
,
2208 mask_rtx (new_mode
, 0, bitsize
, 0),
2209 target
, 1, OPTAB_LIB_WIDEN
);
2213 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2214 then arithmetic-shift its lsb to the lsb of the word. */
2215 op0
= force_reg (mode
, op0
);
2217 /* Find the narrowest integer mode that contains the field. */
2219 opt_scalar_int_mode mode_iter
;
2220 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_INT
)
2221 if (GET_MODE_BITSIZE (mode_iter
.require ()) >= bitsize
+ bitnum
)
2224 mode
= mode_iter
.require ();
2225 op0
= convert_to_mode (mode
, op0
, 0);
2230 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2232 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2233 /* Maybe propagate the target for the shift. */
2234 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2235 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2238 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2239 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2242 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2246 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2249 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2252 /* Extract a bit field that is split across two words
2253 and return an RTX for the result.
2255 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2256 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2257 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2258 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2261 If REVERSE is true, the extraction is to be done in reverse order. */
2264 extract_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
2265 unsigned HOST_WIDE_INT bitsize
,
2266 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2270 unsigned int bitsdone
= 0;
2271 rtx result
= NULL_RTX
;
2274 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2276 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2277 unit
= BITS_PER_WORD
;
2279 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2281 while (bitsdone
< bitsize
)
2283 unsigned HOST_WIDE_INT thissize
;
2285 unsigned HOST_WIDE_INT thispos
;
2286 unsigned HOST_WIDE_INT offset
;
2288 offset
= (bitpos
+ bitsdone
) / unit
;
2289 thispos
= (bitpos
+ bitsdone
) % unit
;
2291 /* THISSIZE must not overrun a word boundary. Otherwise,
2292 extract_fixed_bit_field will call us again, and we will mutually
2294 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2295 thissize
= MIN (thissize
, unit
- thispos
);
2297 /* If OP0 is a register, then handle OFFSET here. */
2298 rtx op0_piece
= op0
;
2299 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
2300 if (SUBREG_P (op0
) || REG_P (op0
))
2302 op0_piece
= operand_subword_force (op0
, offset
, op0_mode
.require ());
2303 op0_piece_mode
= word_mode
;
2307 /* Extract the parts in bit-counting order,
2308 whose meaning is determined by BYTES_PER_UNIT.
2309 OFFSET is in UNITs, and UNIT is in bits. */
2310 part
= extract_fixed_bit_field (word_mode
, op0_piece
, op0_piece_mode
,
2311 thissize
, offset
* unit
+ thispos
,
2313 bitsdone
+= thissize
;
2315 /* Shift this part into place for the result. */
2316 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2318 if (bitsize
!= bitsdone
)
2319 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2320 bitsize
- bitsdone
, 0, 1);
2324 if (bitsdone
!= thissize
)
2325 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2326 bitsdone
- thissize
, 0, 1);
2332 /* Combine the parts with bitwise or. This works
2333 because we extracted each part as an unsigned bit field. */
2334 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2340 /* Unsigned bit field: we are done. */
2343 /* Signed bit field: sign-extend with two arithmetic shifts. */
2344 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2345 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2346 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2347 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2350 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2351 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2352 MODE, fill the upper bits with zeros. Fail if the layout of either
2353 mode is unknown (as for CC modes) or if the extraction would involve
2354 unprofitable mode punning. Return the value on success, otherwise
2357 This is different from gen_lowpart* in these respects:
2359 - the returned value must always be considered an rvalue
2361 - when MODE is wider than SRC_MODE, the extraction involves
2364 - when MODE is smaller than SRC_MODE, the extraction involves
2365 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2367 In other words, this routine performs a computation, whereas the
2368 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2372 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2374 scalar_int_mode int_mode
, src_int_mode
;
2376 if (mode
== src_mode
)
2379 if (CONSTANT_P (src
))
2381 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2382 fails, it will happily create (subreg (symbol_ref)) or similar
2384 poly_uint64 byte
= subreg_lowpart_offset (mode
, src_mode
);
2385 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2389 if (GET_MODE (src
) == VOIDmode
2390 || !validate_subreg (mode
, src_mode
, src
, byte
))
2393 src
= force_reg (GET_MODE (src
), src
);
2394 return gen_rtx_SUBREG (mode
, src
, byte
);
2397 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2400 if (known_eq (GET_MODE_BITSIZE (mode
), GET_MODE_BITSIZE (src_mode
))
2401 && targetm
.modes_tieable_p (mode
, src_mode
))
2403 rtx x
= gen_lowpart_common (mode
, src
);
2408 if (!int_mode_for_mode (src_mode
).exists (&src_int_mode
)
2409 || !int_mode_for_mode (mode
).exists (&int_mode
))
2412 if (!targetm
.modes_tieable_p (src_int_mode
, src_mode
))
2414 if (!targetm
.modes_tieable_p (int_mode
, mode
))
2417 src
= gen_lowpart (src_int_mode
, src
);
2418 if (!validate_subreg (int_mode
, src_int_mode
, src
,
2419 subreg_lowpart_offset (int_mode
, src_int_mode
)))
2422 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2423 src
= gen_lowpart (mode
, src
);
2427 /* Add INC into TARGET. */
2430 expand_inc (rtx target
, rtx inc
)
2432 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2434 target
, 0, OPTAB_LIB_WIDEN
);
2435 if (value
!= target
)
2436 emit_move_insn (target
, value
);
2439 /* Subtract DEC from TARGET. */
2442 expand_dec (rtx target
, rtx dec
)
2444 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2446 target
, 0, OPTAB_LIB_WIDEN
);
2447 if (value
!= target
)
2448 emit_move_insn (target
, value
);
2451 /* Output a shift instruction for expression code CODE,
2452 with SHIFTED being the rtx for the value to shift,
2453 and AMOUNT the rtx for the amount to shift by.
2454 Store the result in the rtx TARGET, if that is convenient.
2455 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2456 Return the rtx for where the value is.
2457 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2458 in which case 0 is returned. */
2461 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2462 rtx amount
, rtx target
, int unsignedp
, bool may_fail
= false)
2465 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2466 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2467 optab lshift_optab
= ashl_optab
;
2468 optab rshift_arith_optab
= ashr_optab
;
2469 optab rshift_uns_optab
= lshr_optab
;
2470 optab lrotate_optab
= rotl_optab
;
2471 optab rrotate_optab
= rotr_optab
;
2472 machine_mode op1_mode
;
2473 scalar_mode scalar_mode
= GET_MODE_INNER (mode
);
2475 bool speed
= optimize_insn_for_speed_p ();
2478 op1_mode
= GET_MODE (op1
);
2480 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2481 shift amount is a vector, use the vector/vector shift patterns. */
2482 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2484 lshift_optab
= vashl_optab
;
2485 rshift_arith_optab
= vashr_optab
;
2486 rshift_uns_optab
= vlshr_optab
;
2487 lrotate_optab
= vrotl_optab
;
2488 rrotate_optab
= vrotr_optab
;
2491 /* Previously detected shift-counts computed by NEGATE_EXPR
2492 and shifted in the other direction; but that does not work
2495 if (SHIFT_COUNT_TRUNCATED
)
2497 if (CONST_INT_P (op1
)
2498 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2499 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2500 op1
= gen_int_shift_amount (mode
,
2501 (unsigned HOST_WIDE_INT
) INTVAL (op1
)
2502 % GET_MODE_BITSIZE (scalar_mode
));
2503 else if (GET_CODE (op1
) == SUBREG
2504 && subreg_lowpart_p (op1
)
2505 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2506 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2507 op1
= SUBREG_REG (op1
);
2510 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2511 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2512 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2515 && CONST_INT_P (op1
)
2516 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2517 GET_MODE_BITSIZE (scalar_mode
) - 1))
2519 op1
= gen_int_shift_amount (mode
, (GET_MODE_BITSIZE (scalar_mode
)
2522 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2525 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2526 Note that this is not the case for bigger values. For instance a rotation
2527 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2528 0x04030201 (bswapsi). */
2530 && CONST_INT_P (op1
)
2531 && INTVAL (op1
) == BITS_PER_UNIT
2532 && GET_MODE_SIZE (scalar_mode
) == 2
2533 && optab_handler (bswap_optab
, mode
) != CODE_FOR_nothing
)
2534 return expand_unop (mode
, bswap_optab
, shifted
, NULL_RTX
, unsignedp
);
2536 if (op1
== const0_rtx
)
2539 /* Check whether its cheaper to implement a left shift by a constant
2540 bit count by a sequence of additions. */
2541 if (code
== LSHIFT_EXPR
2542 && CONST_INT_P (op1
)
2544 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2545 && INTVAL (op1
) < MAX_BITS_PER_WORD
2546 && (shift_cost (speed
, mode
, INTVAL (op1
))
2547 > INTVAL (op1
) * add_cost (speed
, mode
))
2548 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2551 for (i
= 0; i
< INTVAL (op1
); i
++)
2553 temp
= force_reg (mode
, shifted
);
2554 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2555 unsignedp
, OPTAB_LIB_WIDEN
);
2560 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2562 enum optab_methods methods
;
2565 methods
= OPTAB_DIRECT
;
2566 else if (attempt
== 1)
2567 methods
= OPTAB_WIDEN
;
2569 methods
= OPTAB_LIB_WIDEN
;
2573 /* Widening does not work for rotation. */
2574 if (methods
== OPTAB_WIDEN
)
2576 else if (methods
== OPTAB_LIB_WIDEN
)
2578 /* If we have been unable to open-code this by a rotation,
2579 do it as the IOR of two shifts. I.e., to rotate A
2581 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2582 where C is the bitsize of A.
2584 It is theoretically possible that the target machine might
2585 not be able to perform either shift and hence we would
2586 be making two libcalls rather than just the one for the
2587 shift (similarly if IOR could not be done). We will allow
2588 this extremely unlikely lossage to avoid complicating the
2591 rtx subtarget
= target
== shifted
? 0 : target
;
2592 rtx new_amount
, other_amount
;
2596 if (op1
== const0_rtx
)
2598 else if (CONST_INT_P (op1
))
2599 other_amount
= gen_int_shift_amount
2600 (mode
, GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2604 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2605 op1
, GET_MODE (op1
));
2606 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2608 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2609 gen_int_mode (mask
, GET_MODE (op1
)));
2612 shifted
= force_reg (mode
, shifted
);
2614 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2615 mode
, shifted
, new_amount
, 0, 1);
2616 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2617 mode
, shifted
, other_amount
,
2619 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2620 unsignedp
, methods
);
2623 temp
= expand_binop (mode
,
2624 left
? lrotate_optab
: rrotate_optab
,
2625 shifted
, op1
, target
, unsignedp
, methods
);
2628 temp
= expand_binop (mode
,
2629 left
? lshift_optab
: rshift_uns_optab
,
2630 shifted
, op1
, target
, unsignedp
, methods
);
2632 /* Do arithmetic shifts.
2633 Also, if we are going to widen the operand, we can just as well
2634 use an arithmetic right-shift instead of a logical one. */
2635 if (temp
== 0 && ! rotate
2636 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2638 enum optab_methods methods1
= methods
;
2640 /* If trying to widen a log shift to an arithmetic shift,
2641 don't accept an arithmetic shift of the same size. */
2643 methods1
= OPTAB_MUST_WIDEN
;
2645 /* Arithmetic shift */
2647 temp
= expand_binop (mode
,
2648 left
? lshift_optab
: rshift_arith_optab
,
2649 shifted
, op1
, target
, unsignedp
, methods1
);
2652 /* We used to try extzv here for logical right shifts, but that was
2653 only useful for one machine, the VAX, and caused poor code
2654 generation there for lshrdi3, so the code was deleted and a
2655 define_expand for lshrsi3 was added to vax.md. */
2658 gcc_assert (temp
!= NULL_RTX
|| may_fail
);
2662 /* Output a shift instruction for expression code CODE,
2663 with SHIFTED being the rtx for the value to shift,
2664 and AMOUNT the amount to shift by.
2665 Store the result in the rtx TARGET, if that is convenient.
2666 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2667 Return the rtx for where the value is. */
2670 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2671 poly_int64 amount
, rtx target
, int unsignedp
)
2673 return expand_shift_1 (code
, mode
, shifted
,
2674 gen_int_shift_amount (mode
, amount
),
2678 /* Likewise, but return 0 if that cannot be done. */
2681 maybe_expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2682 int amount
, rtx target
, int unsignedp
)
2684 return expand_shift_1 (code
, mode
,
2685 shifted
, GEN_INT (amount
), target
, unsignedp
, true);
2688 /* Output a shift instruction for expression code CODE,
2689 with SHIFTED being the rtx for the value to shift,
2690 and AMOUNT the tree for the amount to shift by.
2691 Store the result in the rtx TARGET, if that is convenient.
2692 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2693 Return the rtx for where the value is. */
2696 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2697 tree amount
, rtx target
, int unsignedp
)
2699 return expand_shift_1 (code
, mode
,
2700 shifted
, expand_normal (amount
), target
, unsignedp
);
2704 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2705 const struct mult_cost
*, machine_mode mode
);
2706 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2707 const struct algorithm
*, enum mult_variant
);
2708 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2709 static rtx
extract_high_half (scalar_int_mode
, rtx
);
2710 static rtx
expmed_mult_highpart (scalar_int_mode
, rtx
, rtx
, rtx
, int, int);
2711 static rtx
expmed_mult_highpart_optab (scalar_int_mode
, rtx
, rtx
, rtx
,
2713 /* Compute and return the best algorithm for multiplying by T.
2714 The algorithm must cost less than cost_limit
2715 If retval.cost >= COST_LIMIT, no algorithm was found and all
2716 other field of the returned struct are undefined.
2717 MODE is the machine mode of the multiplication. */
2720 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2721 const struct mult_cost
*cost_limit
, machine_mode mode
)
2724 struct algorithm
*alg_in
, *best_alg
;
2725 struct mult_cost best_cost
;
2726 struct mult_cost new_limit
;
2727 int op_cost
, op_latency
;
2728 unsigned HOST_WIDE_INT orig_t
= t
;
2729 unsigned HOST_WIDE_INT q
;
2730 int maxm
, hash_index
;
2731 bool cache_hit
= false;
2732 enum alg_code cache_alg
= alg_zero
;
2733 bool speed
= optimize_insn_for_speed_p ();
2734 scalar_int_mode imode
;
2735 struct alg_hash_entry
*entry_ptr
;
2737 /* Indicate that no algorithm is yet found. If no algorithm
2738 is found, this value will be returned and indicate failure. */
2739 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2740 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2742 if (cost_limit
->cost
< 0
2743 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2746 /* Be prepared for vector modes. */
2747 imode
= as_a
<scalar_int_mode
> (GET_MODE_INNER (mode
));
2749 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2751 /* Restrict the bits of "t" to the multiplication's mode. */
2752 t
&= GET_MODE_MASK (imode
);
2754 /* t == 1 can be done in zero cost. */
2758 alg_out
->cost
.cost
= 0;
2759 alg_out
->cost
.latency
= 0;
2760 alg_out
->op
[0] = alg_m
;
2764 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2768 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2773 alg_out
->cost
.cost
= zero_cost (speed
);
2774 alg_out
->cost
.latency
= zero_cost (speed
);
2775 alg_out
->op
[0] = alg_zero
;
2780 /* We'll be needing a couple extra algorithm structures now. */
2782 alg_in
= XALLOCA (struct algorithm
);
2783 best_alg
= XALLOCA (struct algorithm
);
2784 best_cost
= *cost_limit
;
2786 /* Compute the hash index. */
2787 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2789 /* See if we already know what to do for T. */
2790 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2791 if (entry_ptr
->t
== t
2792 && entry_ptr
->mode
== mode
2793 && entry_ptr
->speed
== speed
2794 && entry_ptr
->alg
!= alg_unknown
)
2796 cache_alg
= entry_ptr
->alg
;
2798 if (cache_alg
== alg_impossible
)
2800 /* The cache tells us that it's impossible to synthesize
2801 multiplication by T within entry_ptr->cost. */
2802 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2803 /* COST_LIMIT is at least as restrictive as the one
2804 recorded in the hash table, in which case we have no
2805 hope of synthesizing a multiplication. Just
2809 /* If we get here, COST_LIMIT is less restrictive than the
2810 one recorded in the hash table, so we may be able to
2811 synthesize a multiplication. Proceed as if we didn't
2812 have the cache entry. */
2816 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2817 /* The cached algorithm shows that this multiplication
2818 requires more cost than COST_LIMIT. Just return. This
2819 way, we don't clobber this cache entry with
2820 alg_impossible but retain useful information. */
2832 goto do_alg_addsub_t_m2
;
2834 case alg_add_factor
:
2835 case alg_sub_factor
:
2836 goto do_alg_addsub_factor
;
2839 goto do_alg_add_t2_m
;
2842 goto do_alg_sub_t2_m
;
2850 /* If we have a group of zero bits at the low-order part of T, try
2851 multiplying by the remaining bits and then doing a shift. */
2856 m
= ctz_or_zero (t
); /* m = number of low zero bits */
2860 /* The function expand_shift will choose between a shift and
2861 a sequence of additions, so the observed cost is given as
2862 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2863 op_cost
= m
* add_cost (speed
, mode
);
2864 if (shift_cost (speed
, mode
, m
) < op_cost
)
2865 op_cost
= shift_cost (speed
, mode
, m
);
2866 new_limit
.cost
= best_cost
.cost
- op_cost
;
2867 new_limit
.latency
= best_cost
.latency
- op_cost
;
2868 synth_mult (alg_in
, q
, &new_limit
, mode
);
2870 alg_in
->cost
.cost
+= op_cost
;
2871 alg_in
->cost
.latency
+= op_cost
;
2872 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2874 best_cost
= alg_in
->cost
;
2875 std::swap (alg_in
, best_alg
);
2876 best_alg
->log
[best_alg
->ops
] = m
;
2877 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2880 /* See if treating ORIG_T as a signed number yields a better
2881 sequence. Try this sequence only for a negative ORIG_T
2882 as it would be useless for a non-negative ORIG_T. */
2883 if ((HOST_WIDE_INT
) orig_t
< 0)
2885 /* Shift ORIG_T as follows because a right shift of a
2886 negative-valued signed type is implementation
2888 q
= ~(~orig_t
>> m
);
2889 /* The function expand_shift will choose between a shift
2890 and a sequence of additions, so the observed cost is
2891 given as MIN (m * add_cost(speed, mode),
2892 shift_cost(speed, mode, m)). */
2893 op_cost
= m
* add_cost (speed
, mode
);
2894 if (shift_cost (speed
, mode
, m
) < op_cost
)
2895 op_cost
= shift_cost (speed
, mode
, m
);
2896 new_limit
.cost
= best_cost
.cost
- op_cost
;
2897 new_limit
.latency
= best_cost
.latency
- op_cost
;
2898 synth_mult (alg_in
, q
, &new_limit
, mode
);
2900 alg_in
->cost
.cost
+= op_cost
;
2901 alg_in
->cost
.latency
+= op_cost
;
2902 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2904 best_cost
= alg_in
->cost
;
2905 std::swap (alg_in
, best_alg
);
2906 best_alg
->log
[best_alg
->ops
] = m
;
2907 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2915 /* If we have an odd number, add or subtract one. */
2918 unsigned HOST_WIDE_INT w
;
2921 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2923 /* If T was -1, then W will be zero after the loop. This is another
2924 case where T ends with ...111. Handling this with (T + 1) and
2925 subtract 1 produces slightly better code and results in algorithm
2926 selection much faster than treating it like the ...0111 case
2930 /* Reject the case where t is 3.
2931 Thus we prefer addition in that case. */
2934 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2936 op_cost
= add_cost (speed
, mode
);
2937 new_limit
.cost
= best_cost
.cost
- op_cost
;
2938 new_limit
.latency
= best_cost
.latency
- op_cost
;
2939 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2941 alg_in
->cost
.cost
+= op_cost
;
2942 alg_in
->cost
.latency
+= op_cost
;
2943 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2945 best_cost
= alg_in
->cost
;
2946 std::swap (alg_in
, best_alg
);
2947 best_alg
->log
[best_alg
->ops
] = 0;
2948 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2953 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2955 op_cost
= add_cost (speed
, mode
);
2956 new_limit
.cost
= best_cost
.cost
- op_cost
;
2957 new_limit
.latency
= best_cost
.latency
- op_cost
;
2958 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2960 alg_in
->cost
.cost
+= op_cost
;
2961 alg_in
->cost
.latency
+= op_cost
;
2962 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2964 best_cost
= alg_in
->cost
;
2965 std::swap (alg_in
, best_alg
);
2966 best_alg
->log
[best_alg
->ops
] = 0;
2967 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2971 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2972 quickly with a - a * n for some appropriate constant n. */
2973 m
= exact_log2 (-orig_t
+ 1);
2974 if (m
>= 0 && m
< maxm
)
2976 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2977 /* If the target has a cheap shift-and-subtract insn use
2978 that in preference to a shift insn followed by a sub insn.
2979 Assume that the shift-and-sub is "atomic" with a latency
2980 equal to it's cost, otherwise assume that on superscalar
2981 hardware the shift may be executed concurrently with the
2982 earlier steps in the algorithm. */
2983 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2985 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2986 op_latency
= op_cost
;
2989 op_latency
= add_cost (speed
, mode
);
2991 new_limit
.cost
= best_cost
.cost
- op_cost
;
2992 new_limit
.latency
= best_cost
.latency
- op_latency
;
2993 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2996 alg_in
->cost
.cost
+= op_cost
;
2997 alg_in
->cost
.latency
+= op_latency
;
2998 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3000 best_cost
= alg_in
->cost
;
3001 std::swap (alg_in
, best_alg
);
3002 best_alg
->log
[best_alg
->ops
] = m
;
3003 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
3011 /* Look for factors of t of the form
3012 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3013 If we find such a factor, we can multiply by t using an algorithm that
3014 multiplies by q, shift the result by m and add/subtract it to itself.
3016 We search for large factors first and loop down, even if large factors
3017 are less probable than small; if we find a large factor we will find a
3018 good sequence quickly, and therefore be able to prune (by decreasing
3019 COST_LIMIT) the search. */
3021 do_alg_addsub_factor
:
3022 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
3024 unsigned HOST_WIDE_INT d
;
3026 d
= (HOST_WIDE_INT_1U
<< m
) + 1;
3027 if (t
% d
== 0 && t
> d
&& m
< maxm
3028 && (!cache_hit
|| cache_alg
== alg_add_factor
))
3030 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3031 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
3032 op_cost
= shiftadd_cost (speed
, mode
, m
);
3034 op_latency
= op_cost
;
3037 new_limit
.cost
= best_cost
.cost
- op_cost
;
3038 new_limit
.latency
= best_cost
.latency
- op_latency
;
3039 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
3041 alg_in
->cost
.cost
+= op_cost
;
3042 alg_in
->cost
.latency
+= op_latency
;
3043 if (alg_in
->cost
.latency
< op_cost
)
3044 alg_in
->cost
.latency
= op_cost
;
3045 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3047 best_cost
= alg_in
->cost
;
3048 std::swap (alg_in
, best_alg
);
3049 best_alg
->log
[best_alg
->ops
] = m
;
3050 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
3052 /* Other factors will have been taken care of in the recursion. */
3056 d
= (HOST_WIDE_INT_1U
<< m
) - 1;
3057 if (t
% d
== 0 && t
> d
&& m
< maxm
3058 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
3060 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3061 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
3062 op_cost
= shiftsub0_cost (speed
, mode
, m
);
3064 op_latency
= op_cost
;
3066 new_limit
.cost
= best_cost
.cost
- op_cost
;
3067 new_limit
.latency
= best_cost
.latency
- op_latency
;
3068 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
3070 alg_in
->cost
.cost
+= op_cost
;
3071 alg_in
->cost
.latency
+= op_latency
;
3072 if (alg_in
->cost
.latency
< op_cost
)
3073 alg_in
->cost
.latency
= op_cost
;
3074 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3076 best_cost
= alg_in
->cost
;
3077 std::swap (alg_in
, best_alg
);
3078 best_alg
->log
[best_alg
->ops
] = m
;
3079 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
3087 /* Try shift-and-add (load effective address) instructions,
3088 i.e. do a*3, a*5, a*9. */
3096 op_cost
= shiftadd_cost (speed
, mode
, m
);
3097 new_limit
.cost
= best_cost
.cost
- op_cost
;
3098 new_limit
.latency
= best_cost
.latency
- op_cost
;
3099 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
3101 alg_in
->cost
.cost
+= op_cost
;
3102 alg_in
->cost
.latency
+= op_cost
;
3103 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3105 best_cost
= alg_in
->cost
;
3106 std::swap (alg_in
, best_alg
);
3107 best_alg
->log
[best_alg
->ops
] = m
;
3108 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
3119 op_cost
= shiftsub0_cost (speed
, mode
, m
);
3120 new_limit
.cost
= best_cost
.cost
- op_cost
;
3121 new_limit
.latency
= best_cost
.latency
- op_cost
;
3122 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
3124 alg_in
->cost
.cost
+= op_cost
;
3125 alg_in
->cost
.latency
+= op_cost
;
3126 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3128 best_cost
= alg_in
->cost
;
3129 std::swap (alg_in
, best_alg
);
3130 best_alg
->log
[best_alg
->ops
] = m
;
3131 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
3139 /* If best_cost has not decreased, we have not found any algorithm. */
3140 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
3142 /* We failed to find an algorithm. Record alg_impossible for
3143 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3144 we are asked to find an algorithm for T within the same or
3145 lower COST_LIMIT, we can immediately return to the
3148 entry_ptr
->mode
= mode
;
3149 entry_ptr
->speed
= speed
;
3150 entry_ptr
->alg
= alg_impossible
;
3151 entry_ptr
->cost
= *cost_limit
;
3155 /* Cache the result. */
3159 entry_ptr
->mode
= mode
;
3160 entry_ptr
->speed
= speed
;
3161 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
3162 entry_ptr
->cost
.cost
= best_cost
.cost
;
3163 entry_ptr
->cost
.latency
= best_cost
.latency
;
3166 /* If we are getting a too long sequence for `struct algorithm'
3167 to record, make this search fail. */
3168 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
3171 /* Copy the algorithm from temporary space to the space at alg_out.
3172 We avoid using structure assignment because the majority of
3173 best_alg is normally undefined, and this is a critical function. */
3174 alg_out
->ops
= best_alg
->ops
+ 1;
3175 alg_out
->cost
= best_cost
;
3176 memcpy (alg_out
->op
, best_alg
->op
,
3177 alg_out
->ops
* sizeof *alg_out
->op
);
3178 memcpy (alg_out
->log
, best_alg
->log
,
3179 alg_out
->ops
* sizeof *alg_out
->log
);
3182 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3183 Try three variations:
3185 - a shift/add sequence based on VAL itself
3186 - a shift/add sequence based on -VAL, followed by a negation
3187 - a shift/add sequence based on VAL - 1, followed by an addition.
3189 Return true if the cheapest of these cost less than MULT_COST,
3190 describing the algorithm in *ALG and final fixup in *VARIANT. */
3193 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
3194 struct algorithm
*alg
, enum mult_variant
*variant
,
3197 struct algorithm alg2
;
3198 struct mult_cost limit
;
3200 bool speed
= optimize_insn_for_speed_p ();
3202 /* Fail quickly for impossible bounds. */
3206 /* Ensure that mult_cost provides a reasonable upper bound.
3207 Any constant multiplication can be performed with less
3208 than 2 * bits additions. */
3209 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3210 if (mult_cost
> op_cost
)
3211 mult_cost
= op_cost
;
3213 *variant
= basic_variant
;
3214 limit
.cost
= mult_cost
;
3215 limit
.latency
= mult_cost
;
3216 synth_mult (alg
, val
, &limit
, mode
);
3218 /* This works only if the inverted value actually fits in an
3220 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3222 op_cost
= neg_cost (speed
, mode
);
3223 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3225 limit
.cost
= alg
->cost
.cost
- op_cost
;
3226 limit
.latency
= alg
->cost
.latency
- op_cost
;
3230 limit
.cost
= mult_cost
- op_cost
;
3231 limit
.latency
= mult_cost
- op_cost
;
3234 synth_mult (&alg2
, -val
, &limit
, mode
);
3235 alg2
.cost
.cost
+= op_cost
;
3236 alg2
.cost
.latency
+= op_cost
;
3237 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3238 *alg
= alg2
, *variant
= negate_variant
;
3241 /* This proves very useful for division-by-constant. */
3242 op_cost
= add_cost (speed
, mode
);
3243 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3245 limit
.cost
= alg
->cost
.cost
- op_cost
;
3246 limit
.latency
= alg
->cost
.latency
- op_cost
;
3250 limit
.cost
= mult_cost
- op_cost
;
3251 limit
.latency
= mult_cost
- op_cost
;
3254 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3255 alg2
.cost
.cost
+= op_cost
;
3256 alg2
.cost
.latency
+= op_cost
;
3257 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3258 *alg
= alg2
, *variant
= add_variant
;
3260 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3263 /* A subroutine of expand_mult, used for constant multiplications.
3264 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3265 convenient. Use the shift/add sequence described by ALG and apply
3266 the final fixup specified by VARIANT. */
3269 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3270 rtx target
, const struct algorithm
*alg
,
3271 enum mult_variant variant
)
3273 unsigned HOST_WIDE_INT val_so_far
;
3279 /* Avoid referencing memory over and over and invalid sharing
3281 op0
= force_reg (mode
, op0
);
3283 /* ACCUM starts out either as OP0 or as a zero, depending on
3284 the first operation. */
3286 if (alg
->op
[0] == alg_zero
)
3288 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3291 else if (alg
->op
[0] == alg_m
)
3293 accum
= copy_to_mode_reg (mode
, op0
);
3299 for (opno
= 1; opno
< alg
->ops
; opno
++)
3301 int log
= alg
->log
[opno
];
3302 rtx shift_subtarget
= optimize
? 0 : accum
;
3304 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3307 rtx accum_target
= optimize
? 0 : accum
;
3310 switch (alg
->op
[opno
])
3313 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3314 /* REG_EQUAL note will be attached to the following insn. */
3315 emit_move_insn (accum
, tem
);
3320 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3321 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3322 add_target
? add_target
: accum_target
);
3323 val_so_far
+= HOST_WIDE_INT_1U
<< log
;
3327 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3328 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3329 add_target
? add_target
: accum_target
);
3330 val_so_far
-= HOST_WIDE_INT_1U
<< log
;
3334 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3335 log
, shift_subtarget
, 0);
3336 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3337 add_target
? add_target
: accum_target
);
3338 val_so_far
= (val_so_far
<< log
) + 1;
3342 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3343 log
, shift_subtarget
, 0);
3344 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3345 add_target
? add_target
: accum_target
);
3346 val_so_far
= (val_so_far
<< log
) - 1;
3349 case alg_add_factor
:
3350 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3351 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3352 add_target
? add_target
: accum_target
);
3353 val_so_far
+= val_so_far
<< log
;
3356 case alg_sub_factor
:
3357 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3358 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3360 ? add_target
: (optimize
? 0 : tem
)));
3361 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3368 if (SCALAR_INT_MODE_P (mode
))
3370 /* Write a REG_EQUAL note on the last insn so that we can cse
3371 multiplication sequences. Note that if ACCUM is a SUBREG,
3372 we've set the inner register and must properly indicate that. */
3373 tem
= op0
, nmode
= mode
;
3374 accum_inner
= accum
;
3375 if (GET_CODE (accum
) == SUBREG
)
3377 accum_inner
= SUBREG_REG (accum
);
3378 nmode
= GET_MODE (accum_inner
);
3379 tem
= gen_lowpart (nmode
, op0
);
3382 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3383 In that case, only the low bits of accum would be guaranteed to
3384 be equal to the content of the REG_EQUAL note, the upper bits
3386 if (!paradoxical_subreg_p (tem
))
3388 insn
= get_last_insn ();
3389 wide_int wval_so_far
3390 = wi::uhwi (val_so_far
,
3391 GET_MODE_PRECISION (as_a
<scalar_mode
> (nmode
)));
3392 rtx c
= immed_wide_int_const (wval_so_far
, nmode
);
3393 set_dst_reg_note (insn
, REG_EQUAL
, gen_rtx_MULT (nmode
, tem
, c
),
3399 if (variant
== negate_variant
)
3401 val_so_far
= -val_so_far
;
3402 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3404 else if (variant
== add_variant
)
3406 val_so_far
= val_so_far
+ 1;
3407 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3410 /* Compare only the bits of val and val_so_far that are significant
3411 in the result mode, to avoid sign-/zero-extension confusion. */
3412 nmode
= GET_MODE_INNER (mode
);
3413 val
&= GET_MODE_MASK (nmode
);
3414 val_so_far
&= GET_MODE_MASK (nmode
);
3415 gcc_assert (val
== (HOST_WIDE_INT
) val_so_far
);
3420 /* Perform a multiplication and return an rtx for the result.
3421 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3422 TARGET is a suggestion for where to store the result (an rtx).
3424 We check specially for a constant integer as OP1.
3425 If you want this check for OP0 as well, then before calling
3426 you should swap the two operands if OP0 would be constant. */
3429 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3430 int unsignedp
, bool no_libcall
)
3432 enum mult_variant variant
;
3433 struct algorithm algorithm
;
3436 bool speed
= optimize_insn_for_speed_p ();
3437 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3439 if (CONSTANT_P (op0
))
3440 std::swap (op0
, op1
);
3442 /* For vectors, there are several simplifications that can be made if
3443 all elements of the vector constant are identical. */
3444 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3446 if (INTEGRAL_MODE_P (mode
))
3449 HOST_WIDE_INT coeff
;
3453 if (op1
== CONST0_RTX (mode
))
3455 if (op1
== CONST1_RTX (mode
))
3457 if (op1
== CONSTM1_RTX (mode
))
3458 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3464 /* If mode is integer vector mode, check if the backend supports
3465 vector lshift (by scalar or vector) at all. If not, we can't use
3466 synthetized multiply. */
3467 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3468 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3469 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3472 /* These are the operations that are potentially turned into
3473 a sequence of shifts and additions. */
3474 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3476 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3477 less than or equal in size to `unsigned int' this doesn't matter.
3478 If the mode is larger than `unsigned int', then synth_mult works
3479 only if the constant value exactly fits in an `unsigned int' without
3480 any truncation. This means that multiplying by negative values does
3481 not work; results are off by 2^32 on a 32 bit machine. */
3482 if (CONST_INT_P (scalar_op1
))
3484 coeff
= INTVAL (scalar_op1
);
3487 #if TARGET_SUPPORTS_WIDE_INT
3488 else if (CONST_WIDE_INT_P (scalar_op1
))
3490 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3493 int shift
= wi::exact_log2 (rtx_mode_t (scalar_op1
, mode
));
3494 /* Perfect power of 2 (other than 1, which is handled above). */
3496 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3497 shift
, target
, unsignedp
);
3504 /* We used to test optimize here, on the grounds that it's better to
3505 produce a smaller program when -O is not used. But this causes
3506 such a terrible slowdown sometimes that it seems better to always
3509 /* Special case powers of two. */
3510 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3511 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3512 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3513 floor_log2 (coeff
), target
, unsignedp
);
3515 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3517 /* Attempt to handle multiplication of DImode values by negative
3518 coefficients, by performing the multiplication by a positive
3519 multiplier and then inverting the result. */
3520 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3522 /* Its safe to use -coeff even for INT_MIN, as the
3523 result is interpreted as an unsigned coefficient.
3524 Exclude cost of op0 from max_cost to match the cost
3525 calculation of the synth_mult. */
3526 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3527 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3529 - neg_cost (speed
, mode
));
3533 /* Special case powers of two. */
3534 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3536 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3537 floor_log2 (coeff
), target
, unsignedp
);
3538 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3541 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3544 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3545 &algorithm
, variant
);
3546 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3551 /* Exclude cost of op0 from max_cost to match the cost
3552 calculation of the synth_mult. */
3553 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3554 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3555 return expand_mult_const (mode
, op0
, coeff
, target
,
3556 &algorithm
, variant
);
3560 /* Expand x*2.0 as x+x. */
3561 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3562 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3564 op0
= force_reg (GET_MODE (op0
), op0
);
3565 return expand_binop (mode
, add_optab
, op0
, op0
,
3567 no_libcall
? OPTAB_WIDEN
: OPTAB_LIB_WIDEN
);
3570 /* This used to use umul_optab if unsigned, but for non-widening multiply
3571 there is no difference between signed and unsigned. */
3572 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3573 op0
, op1
, target
, unsignedp
,
3574 no_libcall
? OPTAB_WIDEN
: OPTAB_LIB_WIDEN
);
3575 gcc_assert (op0
|| no_libcall
);
3579 /* Return a cost estimate for multiplying a register by the given
3580 COEFFicient in the given MODE and SPEED. */
3583 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3586 struct algorithm algorithm
;
3587 enum mult_variant variant
;
3589 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3590 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3592 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3593 return algorithm
.cost
.cost
;
3598 /* Perform a widening multiplication and return an rtx for the result.
3599 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3600 TARGET is a suggestion for where to store the result (an rtx).
3601 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3602 or smul_widen_optab.
3604 We check specially for a constant integer as OP1, comparing the
3605 cost of a widening multiply against the cost of a sequence of shifts
3609 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3610 int unsignedp
, optab this_optab
)
3612 bool speed
= optimize_insn_for_speed_p ();
3615 if (CONST_INT_P (op1
)
3616 && GET_MODE (op0
) != VOIDmode
3617 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3618 this_optab
== umul_widen_optab
))
3619 && CONST_INT_P (cop1
)
3620 && (INTVAL (cop1
) >= 0
3621 || HWI_COMPUTABLE_MODE_P (mode
)))
3623 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3625 enum mult_variant variant
;
3626 struct algorithm algorithm
;
3629 return CONST0_RTX (mode
);
3631 /* Special case powers of two. */
3632 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3634 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3635 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3636 floor_log2 (coeff
), target
, unsignedp
);
3639 /* Exclude cost of op0 from max_cost to match the cost
3640 calculation of the synth_mult. */
3641 max_cost
= mul_widen_cost (speed
, mode
);
3642 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3645 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3646 return expand_mult_const (mode
, op0
, coeff
, target
,
3647 &algorithm
, variant
);
3650 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3651 unsignedp
, OPTAB_LIB_WIDEN
);
3654 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3655 replace division by D, and put the least significant N bits of the result
3656 in *MULTIPLIER_PTR and return the most significant bit.
3658 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3659 needed precision is in PRECISION (should be <= N).
3661 PRECISION should be as small as possible so this function can choose
3662 multiplier more freely.
3664 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3665 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3667 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3668 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3670 unsigned HOST_WIDE_INT
3671 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3672 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3673 int *post_shift_ptr
, int *lgup_ptr
)
3675 int lgup
, post_shift
;
3678 /* lgup = ceil(log2(divisor)); */
3679 lgup
= ceil_log2 (d
);
3681 gcc_assert (lgup
<= n
);
3684 pow2
= n
+ lgup
- precision
;
3686 /* mlow = 2^(N + lgup)/d */
3687 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3688 wide_int mlow
= wi::udiv_trunc (val
, d
);
3690 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3691 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3692 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3694 /* If precision == N, then mlow, mhigh exceed 2^N
3695 (but they do not exceed 2^(N+1)). */
3697 /* Reduce to lowest terms. */
3698 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3700 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3701 HOST_BITS_PER_WIDE_INT
);
3702 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3703 HOST_BITS_PER_WIDE_INT
);
3707 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3708 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3711 *post_shift_ptr
= post_shift
;
3713 if (n
< HOST_BITS_PER_WIDE_INT
)
3715 unsigned HOST_WIDE_INT mask
= (HOST_WIDE_INT_1U
<< n
) - 1;
3716 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3717 return mhigh
.to_uhwi () > mask
;
3721 *multiplier_ptr
= mhigh
.to_uhwi ();
3722 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3726 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3727 congruent to 1 (mod 2**N). */
3729 static unsigned HOST_WIDE_INT
3730 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3732 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3734 /* The algorithm notes that the choice y = x satisfies
3735 x*y == 1 mod 2^3, since x is assumed odd.
3736 Each iteration doubles the number of bits of significance in y. */
3738 unsigned HOST_WIDE_INT mask
;
3739 unsigned HOST_WIDE_INT y
= x
;
3742 mask
= (n
== HOST_BITS_PER_WIDE_INT
3744 : (HOST_WIDE_INT_1U
<< n
) - 1);
3748 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3754 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3755 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3756 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3757 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3760 The result is put in TARGET if that is convenient.
3762 MODE is the mode of operation. */
3765 expand_mult_highpart_adjust (scalar_int_mode mode
, rtx adj_operand
, rtx op0
,
3766 rtx op1
, rtx target
, int unsignedp
)
3769 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3771 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3772 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3773 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3775 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3778 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3779 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3780 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3781 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3787 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3790 extract_high_half (scalar_int_mode mode
, rtx op
)
3792 if (mode
== word_mode
)
3793 return gen_highpart (mode
, op
);
3795 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3797 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3798 GET_MODE_BITSIZE (mode
), 0, 1);
3799 return convert_modes (mode
, wider_mode
, op
, 0);
3802 /* Like expmed_mult_highpart, but only consider using a multiplication
3803 optab. OP1 is an rtx for the constant operand. */
3806 expmed_mult_highpart_optab (scalar_int_mode mode
, rtx op0
, rtx op1
,
3807 rtx target
, int unsignedp
, int max_cost
)
3809 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3813 bool speed
= optimize_insn_for_speed_p ();
3815 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3817 size
= GET_MODE_BITSIZE (mode
);
3819 /* Firstly, try using a multiplication insn that only generates the needed
3820 high part of the product, and in the sign flavor of unsignedp. */
3821 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3823 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3824 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3825 unsignedp
, OPTAB_DIRECT
);
3830 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3831 Need to adjust the result after the multiplication. */
3832 if (size
- 1 < BITS_PER_WORD
3833 && (mul_highpart_cost (speed
, mode
)
3834 + 2 * shift_cost (speed
, mode
, size
-1)
3835 + 4 * add_cost (speed
, mode
) < max_cost
))
3837 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3838 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3839 unsignedp
, OPTAB_DIRECT
);
3841 /* We used the wrong signedness. Adjust the result. */
3842 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3846 /* Try widening multiplication. */
3847 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3848 if (convert_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3849 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3851 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3852 unsignedp
, OPTAB_WIDEN
);
3854 return extract_high_half (mode
, tem
);
3857 /* Try widening the mode and perform a non-widening multiplication. */
3858 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3859 && size
- 1 < BITS_PER_WORD
3860 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3866 /* We need to widen the operands, for example to ensure the
3867 constant multiplier is correctly sign or zero extended.
3868 Use a sequence to clean-up any instructions emitted by
3869 the conversions if things don't work out. */
3871 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3872 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3873 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3874 unsignedp
, OPTAB_WIDEN
);
3875 insns
= get_insns ();
3881 return extract_high_half (mode
, tem
);
3885 /* Try widening multiplication of opposite signedness, and adjust. */
3886 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3887 if (convert_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3888 && size
- 1 < BITS_PER_WORD
3889 && (mul_widen_cost (speed
, wider_mode
)
3890 + 2 * shift_cost (speed
, mode
, size
-1)
3891 + 4 * add_cost (speed
, mode
) < max_cost
))
3893 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3894 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3897 tem
= extract_high_half (mode
, tem
);
3898 /* We used the wrong signedness. Adjust the result. */
3899 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3907 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3908 putting the high half of the result in TARGET if that is convenient,
3909 and return where the result is. If the operation cannot be performed,
3912 MODE is the mode of operation and result.
3914 UNSIGNEDP nonzero means unsigned multiply.
3916 MAX_COST is the total allowed cost for the expanded RTL. */
3919 expmed_mult_highpart (scalar_int_mode mode
, rtx op0
, rtx op1
,
3920 rtx target
, int unsignedp
, int max_cost
)
3922 unsigned HOST_WIDE_INT cnst1
;
3924 bool sign_adjust
= false;
3925 enum mult_variant variant
;
3926 struct algorithm alg
;
3928 bool speed
= optimize_insn_for_speed_p ();
3930 /* We can't support modes wider than HOST_BITS_PER_INT. */
3931 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3933 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3935 /* We can't optimize modes wider than BITS_PER_WORD.
3936 ??? We might be able to perform double-word arithmetic if
3937 mode == word_mode, however all the cost calculations in
3938 synth_mult etc. assume single-word operations. */
3939 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3940 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3941 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3942 unsignedp
, max_cost
);
3944 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3946 /* Check whether we try to multiply by a negative constant. */
3947 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3950 extra_cost
+= add_cost (speed
, mode
);
3953 /* See whether shift/add multiplication is cheap enough. */
3954 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3955 max_cost
- extra_cost
))
3957 /* See whether the specialized multiplication optabs are
3958 cheaper than the shift/add version. */
3959 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3960 alg
.cost
.cost
+ extra_cost
);
3964 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3965 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3966 tem
= extract_high_half (mode
, tem
);
3968 /* Adjust result for signedness. */
3970 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3974 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3975 unsignedp
, max_cost
);
3979 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3982 expand_smod_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3984 rtx result
, temp
, shift
;
3985 rtx_code_label
*label
;
3987 int prec
= GET_MODE_PRECISION (mode
);
3989 logd
= floor_log2 (d
);
3990 result
= gen_reg_rtx (mode
);
3992 /* Avoid conditional branches when they're expensive. */
3993 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3994 && optimize_insn_for_speed_p ())
3996 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
4000 HOST_WIDE_INT masklow
= (HOST_WIDE_INT_1
<< logd
) - 1;
4001 signmask
= force_reg (mode
, signmask
);
4002 shift
= gen_int_shift_amount (mode
, GET_MODE_BITSIZE (mode
) - logd
);
4004 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4005 which instruction sequence to use. If logical right shifts
4006 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4007 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4009 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
4010 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
4011 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
4012 > COSTS_N_INSNS (2)))
4014 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
4015 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4016 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4017 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4018 temp
= expand_binop (mode
, and_optab
, temp
,
4019 gen_int_mode (masklow
, mode
),
4020 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4021 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
4022 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4023 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4024 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4028 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
4029 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4030 signmask
= force_reg (mode
, signmask
);
4032 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
4033 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4034 temp
= expand_binop (mode
, and_optab
, temp
,
4035 gen_int_mode (masklow
, mode
),
4036 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4037 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4038 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4044 /* Mask contains the mode's signbit and the significant bits of the
4045 modulus. By including the signbit in the operation, many targets
4046 can avoid an explicit compare operation in the following comparison
4048 wide_int mask
= wi::mask (logd
, false, prec
);
4049 mask
= wi::set_bit (mask
, prec
- 1);
4051 temp
= expand_binop (mode
, and_optab
, op0
,
4052 immed_wide_int_const (mask
, mode
),
4053 result
, 1, OPTAB_LIB_WIDEN
);
4055 emit_move_insn (result
, temp
);
4057 label
= gen_label_rtx ();
4058 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
4060 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
4061 0, OPTAB_LIB_WIDEN
);
4063 mask
= wi::mask (logd
, true, prec
);
4064 temp
= expand_binop (mode
, ior_optab
, temp
,
4065 immed_wide_int_const (mask
, mode
),
4066 result
, 1, OPTAB_LIB_WIDEN
);
4067 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
4068 0, OPTAB_LIB_WIDEN
);
4070 emit_move_insn (result
, temp
);
4075 /* Expand signed division of OP0 by a power of two D in mode MODE.
4076 This routine is only called for positive values of D. */
4079 expand_sdiv_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
4082 rtx_code_label
*label
;
4085 logd
= floor_log2 (d
);
4088 && BRANCH_COST (optimize_insn_for_speed_p (),
4091 temp
= gen_reg_rtx (mode
);
4092 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
4093 if (temp
!= NULL_RTX
)
4095 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
4096 0, OPTAB_LIB_WIDEN
);
4097 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4101 if (HAVE_conditional_move
4102 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4107 temp2
= copy_to_mode_reg (mode
, op0
);
4108 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
4109 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4110 temp
= force_reg (mode
, temp
);
4112 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4113 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
4114 mode
, temp
, temp2
, mode
, 0);
4117 rtx_insn
*seq
= get_insns ();
4120 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
4125 if (BRANCH_COST (optimize_insn_for_speed_p (),
4128 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
4130 temp
= gen_reg_rtx (mode
);
4131 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
4132 if (temp
!= NULL_RTX
)
4134 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
4135 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
4136 > COSTS_N_INSNS (1))
4137 temp
= expand_binop (mode
, and_optab
, temp
,
4138 gen_int_mode (d
- 1, mode
),
4139 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4141 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
4142 ushift
, NULL_RTX
, 1);
4143 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
4144 0, OPTAB_LIB_WIDEN
);
4145 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4149 label
= gen_label_rtx ();
4150 temp
= copy_to_mode_reg (mode
, op0
);
4151 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
4152 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
4154 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4157 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4158 if that is convenient, and returning where the result is.
4159 You may request either the quotient or the remainder as the result;
4160 specify REM_FLAG nonzero to get the remainder.
4162 CODE is the expression code for which kind of division this is;
4163 it controls how rounding is done. MODE is the machine mode to use.
4164 UNSIGNEDP nonzero means do unsigned division. */
4166 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4167 and then correct it by or'ing in missing high bits
4168 if result of ANDI is nonzero.
4169 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4170 This could optimize to a bfexts instruction.
4171 But C doesn't use these operations, so their optimizations are
4173 /* ??? For modulo, we don't actually need the highpart of the first product,
4174 the low part will do nicely. And for small divisors, the second multiply
4175 can also be a low-part only multiply or even be completely left out.
4176 E.g. to calculate the remainder of a division by 3 with a 32 bit
4177 multiply, multiply with 0x55555556 and extract the upper two bits;
4178 the result is exact for inputs up to 0x1fffffff.
4179 The input range can be reduced by using cross-sum rules.
4180 For odd divisors >= 3, the following table gives right shift counts
4181 so that if a number is shifted by an integer multiple of the given
4182 amount, the remainder stays the same:
4183 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4184 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4185 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4186 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4187 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4189 Cross-sum rules for even numbers can be derived by leaving as many bits
4190 to the right alone as the divisor has zeros to the right.
4191 E.g. if x is an unsigned 32 bit number:
4192 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4196 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
4197 rtx op0
, rtx op1
, rtx target
, int unsignedp
,
4198 enum optab_methods methods
)
4200 machine_mode compute_mode
;
4202 rtx quotient
= 0, remainder
= 0;
4205 optab optab1
, optab2
;
4206 int op1_is_constant
, op1_is_pow2
= 0;
4207 int max_cost
, extra_cost
;
4208 static HOST_WIDE_INT last_div_const
= 0;
4209 bool speed
= optimize_insn_for_speed_p ();
4211 op1_is_constant
= CONST_INT_P (op1
);
4212 if (op1_is_constant
)
4214 wide_int ext_op1
= rtx_mode_t (op1
, mode
);
4215 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4217 && wi::popcount (wi::neg (ext_op1
)) == 1));
4221 This is the structure of expand_divmod:
4223 First comes code to fix up the operands so we can perform the operations
4224 correctly and efficiently.
4226 Second comes a switch statement with code specific for each rounding mode.
4227 For some special operands this code emits all RTL for the desired
4228 operation, for other cases, it generates only a quotient and stores it in
4229 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4230 to indicate that it has not done anything.
4232 Last comes code that finishes the operation. If QUOTIENT is set and
4233 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4234 QUOTIENT is not set, it is computed using trunc rounding.
4236 We try to generate special code for division and remainder when OP1 is a
4237 constant. If |OP1| = 2**n we can use shifts and some other fast
4238 operations. For other values of OP1, we compute a carefully selected
4239 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4242 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4243 half of the product. Different strategies for generating the product are
4244 implemented in expmed_mult_highpart.
4246 If what we actually want is the remainder, we generate that by another
4247 by-constant multiplication and a subtraction. */
4249 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4250 code below will malfunction if we are, so check here and handle
4251 the special case if so. */
4252 if (op1
== const1_rtx
)
4253 return rem_flag
? const0_rtx
: op0
;
4255 /* When dividing by -1, we could get an overflow.
4256 negv_optab can handle overflows. */
4257 if (! unsignedp
&& op1
== constm1_rtx
)
4261 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4262 ? negv_optab
: neg_optab
, op0
, target
, 0);
4266 /* Don't use the function value register as a target
4267 since we have to read it as well as write it,
4268 and function-inlining gets confused by this. */
4269 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4270 /* Don't clobber an operand while doing a multi-step calculation. */
4271 || ((rem_flag
|| op1_is_constant
)
4272 && (reg_mentioned_p (target
, op0
)
4273 || (MEM_P (op0
) && MEM_P (target
))))
4274 || reg_mentioned_p (target
, op1
)
4275 || (MEM_P (op1
) && MEM_P (target
))))
4278 /* Get the mode in which to perform this computation. Normally it will
4279 be MODE, but sometimes we can't do the desired operation in MODE.
4280 If so, pick a wider mode in which we can do the operation. Convert
4281 to that mode at the start to avoid repeated conversions.
4283 First see what operations we need. These depend on the expression
4284 we are evaluating. (We assume that divxx3 insns exist under the
4285 same conditions that modxx3 insns and that these insns don't normally
4286 fail. If these assumptions are not correct, we may generate less
4287 efficient code in some cases.)
4289 Then see if we find a mode in which we can open-code that operation
4290 (either a division, modulus, or shift). Finally, check for the smallest
4291 mode for which we can do the operation with a library call. */
4293 /* We might want to refine this now that we have division-by-constant
4294 optimization. Since expmed_mult_highpart tries so many variants, it is
4295 not straightforward to generalize this. Maybe we should make an array
4296 of possible modes in init_expmed? Save this for GCC 2.7. */
4298 optab1
= (op1_is_pow2
4299 ? (unsignedp
? lshr_optab
: ashr_optab
)
4300 : (unsignedp
? udiv_optab
: sdiv_optab
));
4301 optab2
= (op1_is_pow2
? optab1
4302 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4304 if (methods
== OPTAB_WIDEN
|| methods
== OPTAB_LIB_WIDEN
)
4306 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4307 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4308 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4311 if (compute_mode
== VOIDmode
&& methods
== OPTAB_LIB_WIDEN
)
4312 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4313 if (optab_libfunc (optab1
, compute_mode
)
4314 || optab_libfunc (optab2
, compute_mode
))
4318 compute_mode
= mode
;
4320 /* If we still couldn't find a mode, use MODE, but expand_binop will
4322 if (compute_mode
== VOIDmode
)
4323 compute_mode
= mode
;
4325 if (target
&& GET_MODE (target
) == compute_mode
)
4328 tquotient
= gen_reg_rtx (compute_mode
);
4331 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4332 (mode), and thereby get better code when OP1 is a constant. Do that
4333 later. It will require going over all usages of SIZE below. */
4334 size
= GET_MODE_BITSIZE (mode
);
4337 /* Only deduct something for a REM if the last divide done was
4338 for a different constant. Then set the constant of the last
4340 max_cost
= (unsignedp
4341 ? udiv_cost (speed
, compute_mode
)
4342 : sdiv_cost (speed
, compute_mode
));
4343 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4344 && INTVAL (op1
) == last_div_const
))
4345 max_cost
-= (mul_cost (speed
, compute_mode
)
4346 + add_cost (speed
, compute_mode
));
4348 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4350 /* Now convert to the best mode to use. */
4351 if (compute_mode
!= mode
)
4353 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4354 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4356 /* convert_modes may have placed op1 into a register, so we
4357 must recompute the following. */
4358 op1_is_constant
= CONST_INT_P (op1
);
4359 if (op1_is_constant
)
4361 wide_int ext_op1
= rtx_mode_t (op1
, compute_mode
);
4362 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4364 && wi::popcount (wi::neg (ext_op1
)) == 1));
4370 /* If one of the operands is a volatile MEM, copy it into a register. */
4372 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4373 op0
= force_reg (compute_mode
, op0
);
4374 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4375 op1
= force_reg (compute_mode
, op1
);
4377 /* If we need the remainder or if OP1 is constant, we need to
4378 put OP0 in a register in case it has any queued subexpressions. */
4379 if (rem_flag
|| op1_is_constant
)
4380 op0
= force_reg (compute_mode
, op0
);
4382 last
= get_last_insn ();
4384 /* Promote floor rounding to trunc rounding for unsigned operations. */
4387 if (code
== FLOOR_DIV_EXPR
)
4388 code
= TRUNC_DIV_EXPR
;
4389 if (code
== FLOOR_MOD_EXPR
)
4390 code
= TRUNC_MOD_EXPR
;
4391 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4392 code
= TRUNC_DIV_EXPR
;
4395 if (op1
!= const0_rtx
)
4398 case TRUNC_MOD_EXPR
:
4399 case TRUNC_DIV_EXPR
:
4400 if (op1_is_constant
)
4402 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4403 int size
= GET_MODE_BITSIZE (int_mode
);
4406 unsigned HOST_WIDE_INT mh
, ml
;
4407 int pre_shift
, post_shift
;
4409 wide_int wd
= rtx_mode_t (op1
, int_mode
);
4410 unsigned HOST_WIDE_INT d
= wd
.to_uhwi ();
4412 if (wi::popcount (wd
) == 1)
4414 pre_shift
= floor_log2 (d
);
4417 unsigned HOST_WIDE_INT mask
4418 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4420 = expand_binop (int_mode
, and_optab
, op0
,
4421 gen_int_mode (mask
, int_mode
),
4422 remainder
, 1, methods
);
4424 return gen_lowpart (mode
, remainder
);
4426 quotient
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4427 pre_shift
, tquotient
, 1);
4429 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4431 if (d
>= (HOST_WIDE_INT_1U
<< (size
- 1)))
4433 /* Most significant bit of divisor is set; emit an scc
4435 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4440 /* Find a suitable multiplier and right shift count
4441 instead of multiplying with D. */
4443 mh
= choose_multiplier (d
, size
, size
,
4444 &ml
, &post_shift
, &dummy
);
4446 /* If the suggested multiplier is more than SIZE bits,
4447 we can do better for even divisors, using an
4448 initial right shift. */
4449 if (mh
!= 0 && (d
& 1) == 0)
4451 pre_shift
= ctz_or_zero (d
);
4452 mh
= choose_multiplier (d
>> pre_shift
, size
,
4454 &ml
, &post_shift
, &dummy
);
4464 if (post_shift
- 1 >= BITS_PER_WORD
)
4468 = (shift_cost (speed
, int_mode
, post_shift
- 1)
4469 + shift_cost (speed
, int_mode
, 1)
4470 + 2 * add_cost (speed
, int_mode
));
4471 t1
= expmed_mult_highpart
4472 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4473 NULL_RTX
, 1, max_cost
- extra_cost
);
4476 t2
= force_operand (gen_rtx_MINUS (int_mode
,
4479 t3
= expand_shift (RSHIFT_EXPR
, int_mode
,
4480 t2
, 1, NULL_RTX
, 1);
4481 t4
= force_operand (gen_rtx_PLUS (int_mode
,
4484 quotient
= expand_shift
4485 (RSHIFT_EXPR
, int_mode
, t4
,
4486 post_shift
- 1, tquotient
, 1);
4492 if (pre_shift
>= BITS_PER_WORD
4493 || post_shift
>= BITS_PER_WORD
)
4497 (RSHIFT_EXPR
, int_mode
, op0
,
4498 pre_shift
, NULL_RTX
, 1);
4500 = (shift_cost (speed
, int_mode
, pre_shift
)
4501 + shift_cost (speed
, int_mode
, post_shift
));
4502 t2
= expmed_mult_highpart
4504 gen_int_mode (ml
, int_mode
),
4505 NULL_RTX
, 1, max_cost
- extra_cost
);
4508 quotient
= expand_shift
4509 (RSHIFT_EXPR
, int_mode
, t2
,
4510 post_shift
, tquotient
, 1);
4514 else /* Too wide mode to use tricky code */
4517 insn
= get_last_insn ();
4519 set_dst_reg_note (insn
, REG_EQUAL
,
4520 gen_rtx_UDIV (int_mode
, op0
, op1
),
4523 else /* TRUNC_DIV, signed */
4525 unsigned HOST_WIDE_INT ml
;
4526 int lgup
, post_shift
;
4528 HOST_WIDE_INT d
= INTVAL (op1
);
4529 unsigned HOST_WIDE_INT abs_d
;
4531 /* Not prepared to handle division/remainder by
4532 0xffffffffffffffff8000000000000000 etc. */
4533 if (d
== HOST_WIDE_INT_MIN
&& size
> HOST_BITS_PER_WIDE_INT
)
4536 /* Since d might be INT_MIN, we have to cast to
4537 unsigned HOST_WIDE_INT before negating to avoid
4538 undefined signed overflow. */
4540 ? (unsigned HOST_WIDE_INT
) d
4541 : - (unsigned HOST_WIDE_INT
) d
);
4543 /* n rem d = n rem -d */
4544 if (rem_flag
&& d
< 0)
4547 op1
= gen_int_mode (abs_d
, int_mode
);
4553 quotient
= expand_unop (int_mode
, neg_optab
, op0
,
4555 else if (size
<= HOST_BITS_PER_WIDE_INT
4556 && abs_d
== HOST_WIDE_INT_1U
<< (size
- 1))
4558 /* This case is not handled correctly below. */
4559 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4564 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4565 && (size
<= HOST_BITS_PER_WIDE_INT
|| d
>= 0)
4567 ? smod_pow2_cheap (speed
, int_mode
)
4568 : sdiv_pow2_cheap (speed
, int_mode
))
4569 /* We assume that cheap metric is true if the
4570 optab has an expander for this mode. */
4571 && ((optab_handler ((rem_flag
? smod_optab
4574 != CODE_FOR_nothing
)
4575 || (optab_handler (sdivmod_optab
, int_mode
)
4576 != CODE_FOR_nothing
)))
4578 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4582 remainder
= expand_smod_pow2 (int_mode
, op0
, d
);
4584 return gen_lowpart (mode
, remainder
);
4587 if (sdiv_pow2_cheap (speed
, int_mode
)
4588 && ((optab_handler (sdiv_optab
, int_mode
)
4589 != CODE_FOR_nothing
)
4590 || (optab_handler (sdivmod_optab
, int_mode
)
4591 != CODE_FOR_nothing
)))
4592 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4594 gen_int_mode (abs_d
,
4598 quotient
= expand_sdiv_pow2 (int_mode
, op0
, abs_d
);
4600 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4601 negate the quotient. */
4604 insn
= get_last_insn ();
4606 && abs_d
< (HOST_WIDE_INT_1U
4607 << (HOST_BITS_PER_WIDE_INT
- 1)))
4608 set_dst_reg_note (insn
, REG_EQUAL
,
4609 gen_rtx_DIV (int_mode
, op0
,
4615 quotient
= expand_unop (int_mode
, neg_optab
,
4616 quotient
, quotient
, 0);
4619 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4621 choose_multiplier (abs_d
, size
, size
- 1,
4622 &ml
, &post_shift
, &lgup
);
4623 if (ml
< HOST_WIDE_INT_1U
<< (size
- 1))
4627 if (post_shift
>= BITS_PER_WORD
4628 || size
- 1 >= BITS_PER_WORD
)
4631 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4632 + shift_cost (speed
, int_mode
, size
- 1)
4633 + add_cost (speed
, int_mode
));
4634 t1
= expmed_mult_highpart
4635 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4636 NULL_RTX
, 0, max_cost
- extra_cost
);
4640 (RSHIFT_EXPR
, int_mode
, t1
,
4641 post_shift
, NULL_RTX
, 0);
4643 (RSHIFT_EXPR
, int_mode
, op0
,
4644 size
- 1, NULL_RTX
, 0);
4647 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t2
),
4651 = force_operand (gen_rtx_MINUS (int_mode
, t2
, t3
),
4658 if (post_shift
>= BITS_PER_WORD
4659 || size
- 1 >= BITS_PER_WORD
)
4662 ml
|= HOST_WIDE_INT_M1U
<< (size
- 1);
4663 mlr
= gen_int_mode (ml
, int_mode
);
4664 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4665 + shift_cost (speed
, int_mode
, size
- 1)
4666 + 2 * add_cost (speed
, int_mode
));
4667 t1
= expmed_mult_highpart (int_mode
, op0
, mlr
,
4669 max_cost
- extra_cost
);
4672 t2
= force_operand (gen_rtx_PLUS (int_mode
, t1
, op0
),
4675 (RSHIFT_EXPR
, int_mode
, t2
,
4676 post_shift
, NULL_RTX
, 0);
4678 (RSHIFT_EXPR
, int_mode
, op0
,
4679 size
- 1, NULL_RTX
, 0);
4682 = force_operand (gen_rtx_MINUS (int_mode
, t4
, t3
),
4686 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t4
),
4690 else /* Too wide mode to use tricky code */
4693 insn
= get_last_insn ();
4695 set_dst_reg_note (insn
, REG_EQUAL
,
4696 gen_rtx_DIV (int_mode
, op0
, op1
),
4702 delete_insns_since (last
);
4705 case FLOOR_DIV_EXPR
:
4706 case FLOOR_MOD_EXPR
:
4707 /* We will come here only for signed operations. */
4708 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
4710 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4711 int size
= GET_MODE_BITSIZE (int_mode
);
4712 unsigned HOST_WIDE_INT mh
, ml
;
4713 int pre_shift
, lgup
, post_shift
;
4714 HOST_WIDE_INT d
= INTVAL (op1
);
4718 /* We could just as easily deal with negative constants here,
4719 but it does not seem worth the trouble for GCC 2.6. */
4720 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4722 pre_shift
= floor_log2 (d
);
4725 unsigned HOST_WIDE_INT mask
4726 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4727 remainder
= expand_binop
4728 (int_mode
, and_optab
, op0
,
4729 gen_int_mode (mask
, int_mode
),
4730 remainder
, 0, methods
);
4732 return gen_lowpart (mode
, remainder
);
4734 quotient
= expand_shift
4735 (RSHIFT_EXPR
, int_mode
, op0
,
4736 pre_shift
, tquotient
, 0);
4742 mh
= choose_multiplier (d
, size
, size
- 1,
4743 &ml
, &post_shift
, &lgup
);
4746 if (post_shift
< BITS_PER_WORD
4747 && size
- 1 < BITS_PER_WORD
)
4750 (RSHIFT_EXPR
, int_mode
, op0
,
4751 size
- 1, NULL_RTX
, 0);
4752 t2
= expand_binop (int_mode
, xor_optab
, op0
, t1
,
4753 NULL_RTX
, 0, OPTAB_WIDEN
);
4754 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4755 + shift_cost (speed
, int_mode
, size
- 1)
4756 + 2 * add_cost (speed
, int_mode
));
4757 t3
= expmed_mult_highpart
4758 (int_mode
, t2
, gen_int_mode (ml
, int_mode
),
4759 NULL_RTX
, 1, max_cost
- extra_cost
);
4763 (RSHIFT_EXPR
, int_mode
, t3
,
4764 post_shift
, NULL_RTX
, 1);
4765 quotient
= expand_binop (int_mode
, xor_optab
,
4766 t4
, t1
, tquotient
, 0,
4774 rtx nsign
, t1
, t2
, t3
, t4
;
4775 t1
= force_operand (gen_rtx_PLUS (int_mode
,
4776 op0
, constm1_rtx
), NULL_RTX
);
4777 t2
= expand_binop (int_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4779 nsign
= expand_shift (RSHIFT_EXPR
, int_mode
, t2
,
4780 size
- 1, NULL_RTX
, 0);
4781 t3
= force_operand (gen_rtx_MINUS (int_mode
, t1
, nsign
),
4783 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, int_mode
, t3
, op1
,
4788 t5
= expand_unop (int_mode
, one_cmpl_optab
, nsign
,
4790 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t4
, t5
),
4798 delete_insns_since (last
);
4800 /* Try using an instruction that produces both the quotient and
4801 remainder, using truncation. We can easily compensate the quotient
4802 or remainder to get floor rounding, once we have the remainder.
4803 Notice that we compute also the final remainder value here,
4804 and return the result right away. */
4805 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4806 target
= gen_reg_rtx (compute_mode
);
4811 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4812 quotient
= gen_reg_rtx (compute_mode
);
4817 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4818 remainder
= gen_reg_rtx (compute_mode
);
4821 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4822 quotient
, remainder
, 0))
4824 /* This could be computed with a branch-less sequence.
4825 Save that for later. */
4827 rtx_code_label
*label
= gen_label_rtx ();
4828 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4829 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4830 NULL_RTX
, 0, OPTAB_WIDEN
);
4831 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4832 expand_dec (quotient
, const1_rtx
);
4833 expand_inc (remainder
, op1
);
4835 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4838 /* No luck with division elimination or divmod. Have to do it
4839 by conditionally adjusting op0 *and* the result. */
4841 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4845 quotient
= gen_reg_rtx (compute_mode
);
4846 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4847 label1
= gen_label_rtx ();
4848 label2
= gen_label_rtx ();
4849 label3
= gen_label_rtx ();
4850 label4
= gen_label_rtx ();
4851 label5
= gen_label_rtx ();
4852 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4853 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4854 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4855 quotient
, 0, methods
);
4856 if (tem
!= quotient
)
4857 emit_move_insn (quotient
, tem
);
4858 emit_jump_insn (targetm
.gen_jump (label5
));
4860 emit_label (label1
);
4861 expand_inc (adjusted_op0
, const1_rtx
);
4862 emit_jump_insn (targetm
.gen_jump (label4
));
4864 emit_label (label2
);
4865 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4866 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4867 quotient
, 0, methods
);
4868 if (tem
!= quotient
)
4869 emit_move_insn (quotient
, tem
);
4870 emit_jump_insn (targetm
.gen_jump (label5
));
4872 emit_label (label3
);
4873 expand_dec (adjusted_op0
, const1_rtx
);
4874 emit_label (label4
);
4875 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4876 quotient
, 0, methods
);
4877 if (tem
!= quotient
)
4878 emit_move_insn (quotient
, tem
);
4879 expand_dec (quotient
, const1_rtx
);
4880 emit_label (label5
);
4889 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4890 && (HWI_COMPUTABLE_MODE_P (compute_mode
)
4891 || INTVAL (op1
) >= 0))
4893 scalar_int_mode int_mode
4894 = as_a
<scalar_int_mode
> (compute_mode
);
4896 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4897 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4898 floor_log2 (d
), tquotient
, 1);
4899 t2
= expand_binop (int_mode
, and_optab
, op0
,
4900 gen_int_mode (d
- 1, int_mode
),
4901 NULL_RTX
, 1, methods
);
4902 t3
= gen_reg_rtx (int_mode
);
4903 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
, int_mode
, 1, 1);
4906 rtx_code_label
*lab
;
4907 lab
= gen_label_rtx ();
4908 do_cmp_and_jump (t2
, const0_rtx
, EQ
, int_mode
, lab
);
4909 expand_inc (t1
, const1_rtx
);
4914 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t1
, t3
),
4919 /* Try using an instruction that produces both the quotient and
4920 remainder, using truncation. We can easily compensate the
4921 quotient or remainder to get ceiling rounding, once we have the
4922 remainder. Notice that we compute also the final remainder
4923 value here, and return the result right away. */
4924 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4925 target
= gen_reg_rtx (compute_mode
);
4929 remainder
= (REG_P (target
)
4930 ? target
: gen_reg_rtx (compute_mode
));
4931 quotient
= gen_reg_rtx (compute_mode
);
4935 quotient
= (REG_P (target
)
4936 ? target
: gen_reg_rtx (compute_mode
));
4937 remainder
= gen_reg_rtx (compute_mode
);
4940 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4943 /* This could be computed with a branch-less sequence.
4944 Save that for later. */
4945 rtx_code_label
*label
= gen_label_rtx ();
4946 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4947 compute_mode
, label
);
4948 expand_inc (quotient
, const1_rtx
);
4949 expand_dec (remainder
, op1
);
4951 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4954 /* No luck with division elimination or divmod. Have to do it
4955 by conditionally adjusting op0 *and* the result. */
4957 rtx_code_label
*label1
, *label2
;
4958 rtx adjusted_op0
, tem
;
4960 quotient
= gen_reg_rtx (compute_mode
);
4961 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4962 label1
= gen_label_rtx ();
4963 label2
= gen_label_rtx ();
4964 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4965 compute_mode
, label1
);
4966 emit_move_insn (quotient
, const0_rtx
);
4967 emit_jump_insn (targetm
.gen_jump (label2
));
4969 emit_label (label1
);
4970 expand_dec (adjusted_op0
, const1_rtx
);
4971 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4972 quotient
, 1, methods
);
4973 if (tem
!= quotient
)
4974 emit_move_insn (quotient
, tem
);
4975 expand_inc (quotient
, const1_rtx
);
4976 emit_label (label2
);
4981 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4982 && INTVAL (op1
) >= 0)
4984 /* This is extremely similar to the code for the unsigned case
4985 above. For 2.7 we should merge these variants, but for
4986 2.6.1 I don't want to touch the code for unsigned since that
4987 get used in C. The signed case will only be used by other
4991 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4992 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4993 floor_log2 (d
), tquotient
, 0);
4994 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4995 gen_int_mode (d
- 1, compute_mode
),
4996 NULL_RTX
, 1, methods
);
4997 t3
= gen_reg_rtx (compute_mode
);
4998 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4999 compute_mode
, 1, 1);
5002 rtx_code_label
*lab
;
5003 lab
= gen_label_rtx ();
5004 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
5005 expand_inc (t1
, const1_rtx
);
5010 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
5016 /* Try using an instruction that produces both the quotient and
5017 remainder, using truncation. We can easily compensate the
5018 quotient or remainder to get ceiling rounding, once we have the
5019 remainder. Notice that we compute also the final remainder
5020 value here, and return the result right away. */
5021 if (target
== 0 || GET_MODE (target
) != compute_mode
)
5022 target
= gen_reg_rtx (compute_mode
);
5025 remainder
= (REG_P (target
)
5026 ? target
: gen_reg_rtx (compute_mode
));
5027 quotient
= gen_reg_rtx (compute_mode
);
5031 quotient
= (REG_P (target
)
5032 ? target
: gen_reg_rtx (compute_mode
));
5033 remainder
= gen_reg_rtx (compute_mode
);
5036 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
5039 /* This could be computed with a branch-less sequence.
5040 Save that for later. */
5042 rtx_code_label
*label
= gen_label_rtx ();
5043 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
5044 compute_mode
, label
);
5045 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
5046 NULL_RTX
, 0, OPTAB_WIDEN
);
5047 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
5048 expand_inc (quotient
, const1_rtx
);
5049 expand_dec (remainder
, op1
);
5051 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5054 /* No luck with division elimination or divmod. Have to do it
5055 by conditionally adjusting op0 *and* the result. */
5057 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
5061 quotient
= gen_reg_rtx (compute_mode
);
5062 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
5063 label1
= gen_label_rtx ();
5064 label2
= gen_label_rtx ();
5065 label3
= gen_label_rtx ();
5066 label4
= gen_label_rtx ();
5067 label5
= gen_label_rtx ();
5068 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
5069 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
5070 compute_mode
, label1
);
5071 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5072 quotient
, 0, methods
);
5073 if (tem
!= quotient
)
5074 emit_move_insn (quotient
, tem
);
5075 emit_jump_insn (targetm
.gen_jump (label5
));
5077 emit_label (label1
);
5078 expand_dec (adjusted_op0
, const1_rtx
);
5079 emit_jump_insn (targetm
.gen_jump (label4
));
5081 emit_label (label2
);
5082 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
5083 compute_mode
, label3
);
5084 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5085 quotient
, 0, methods
);
5086 if (tem
!= quotient
)
5087 emit_move_insn (quotient
, tem
);
5088 emit_jump_insn (targetm
.gen_jump (label5
));
5090 emit_label (label3
);
5091 expand_inc (adjusted_op0
, const1_rtx
);
5092 emit_label (label4
);
5093 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5094 quotient
, 0, methods
);
5095 if (tem
!= quotient
)
5096 emit_move_insn (quotient
, tem
);
5097 expand_inc (quotient
, const1_rtx
);
5098 emit_label (label5
);
5103 case EXACT_DIV_EXPR
:
5104 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
5106 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5107 int size
= GET_MODE_BITSIZE (int_mode
);
5108 HOST_WIDE_INT d
= INTVAL (op1
);
5109 unsigned HOST_WIDE_INT ml
;
5113 pre_shift
= ctz_or_zero (d
);
5114 ml
= invert_mod2n (d
>> pre_shift
, size
);
5115 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5116 pre_shift
, NULL_RTX
, unsignedp
);
5117 quotient
= expand_mult (int_mode
, t1
, gen_int_mode (ml
, int_mode
),
5120 insn
= get_last_insn ();
5121 set_dst_reg_note (insn
, REG_EQUAL
,
5122 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5123 int_mode
, op0
, op1
),
5128 case ROUND_DIV_EXPR
:
5129 case ROUND_MOD_EXPR
:
5132 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5134 rtx_code_label
*label
;
5135 label
= gen_label_rtx ();
5136 quotient
= gen_reg_rtx (int_mode
);
5137 remainder
= gen_reg_rtx (int_mode
);
5138 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
5141 quotient
= expand_binop (int_mode
, udiv_optab
, op0
, op1
,
5142 quotient
, 1, methods
);
5143 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 1);
5144 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5145 remainder
, 1, methods
);
5147 tem
= plus_constant (int_mode
, op1
, -1);
5148 tem
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
, 1, NULL_RTX
, 1);
5149 do_cmp_and_jump (remainder
, tem
, LEU
, int_mode
, label
);
5150 expand_inc (quotient
, const1_rtx
);
5151 expand_dec (remainder
, op1
);
5156 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5157 int size
= GET_MODE_BITSIZE (int_mode
);
5158 rtx abs_rem
, abs_op1
, tem
, mask
;
5159 rtx_code_label
*label
;
5160 label
= gen_label_rtx ();
5161 quotient
= gen_reg_rtx (int_mode
);
5162 remainder
= gen_reg_rtx (int_mode
);
5163 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
5166 quotient
= expand_binop (int_mode
, sdiv_optab
, op0
, op1
,
5167 quotient
, 0, methods
);
5168 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 0);
5169 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5170 remainder
, 0, methods
);
5172 abs_rem
= expand_abs (int_mode
, remainder
, NULL_RTX
, 1, 0);
5173 abs_op1
= expand_abs (int_mode
, op1
, NULL_RTX
, 1, 0);
5174 tem
= expand_shift (LSHIFT_EXPR
, int_mode
, abs_rem
,
5176 do_cmp_and_jump (tem
, abs_op1
, LTU
, int_mode
, label
);
5177 tem
= expand_binop (int_mode
, xor_optab
, op0
, op1
,
5178 NULL_RTX
, 0, OPTAB_WIDEN
);
5179 mask
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
,
5180 size
- 1, NULL_RTX
, 0);
5181 tem
= expand_binop (int_mode
, xor_optab
, mask
, const1_rtx
,
5182 NULL_RTX
, 0, OPTAB_WIDEN
);
5183 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5184 NULL_RTX
, 0, OPTAB_WIDEN
);
5185 expand_inc (quotient
, tem
);
5186 tem
= expand_binop (int_mode
, xor_optab
, mask
, op1
,
5187 NULL_RTX
, 0, OPTAB_WIDEN
);
5188 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5189 NULL_RTX
, 0, OPTAB_WIDEN
);
5190 expand_dec (remainder
, tem
);
5193 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5201 if (target
&& GET_MODE (target
) != compute_mode
)
5206 /* Try to produce the remainder without producing the quotient.
5207 If we seem to have a divmod pattern that does not require widening,
5208 don't try widening here. We should really have a WIDEN argument
5209 to expand_twoval_binop, since what we'd really like to do here is
5210 1) try a mod insn in compute_mode
5211 2) try a divmod insn in compute_mode
5212 3) try a div insn in compute_mode and multiply-subtract to get
5214 4) try the same things with widening allowed. */
5216 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5219 ((optab_handler (optab2
, compute_mode
)
5220 != CODE_FOR_nothing
)
5221 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5224 /* No luck there. Can we do remainder and divide at once
5225 without a library call? */
5226 remainder
= gen_reg_rtx (compute_mode
);
5227 if (! expand_twoval_binop ((unsignedp
5231 NULL_RTX
, remainder
, unsignedp
))
5236 return gen_lowpart (mode
, remainder
);
5239 /* Produce the quotient. Try a quotient insn, but not a library call.
5240 If we have a divmod in this mode, use it in preference to widening
5241 the div (for this test we assume it will not fail). Note that optab2
5242 is set to the one of the two optabs that the call below will use. */
5244 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5245 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5247 ((optab_handler (optab2
, compute_mode
)
5248 != CODE_FOR_nothing
)
5249 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5253 /* No luck there. Try a quotient-and-remainder insn,
5254 keeping the quotient alone. */
5255 quotient
= gen_reg_rtx (compute_mode
);
5256 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5258 quotient
, NULL_RTX
, unsignedp
))
5262 /* Still no luck. If we are not computing the remainder,
5263 use a library call for the quotient. */
5264 quotient
= sign_expand_binop (compute_mode
,
5265 udiv_optab
, sdiv_optab
,
5267 unsignedp
, methods
);
5274 if (target
&& GET_MODE (target
) != compute_mode
)
5279 /* No divide instruction either. Use library for remainder. */
5280 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5282 unsignedp
, methods
);
5283 /* No remainder function. Try a quotient-and-remainder
5284 function, keeping the remainder. */
5286 && (methods
== OPTAB_LIB
|| methods
== OPTAB_LIB_WIDEN
))
5288 remainder
= gen_reg_rtx (compute_mode
);
5289 if (!expand_twoval_binop_libfunc
5290 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5292 NULL_RTX
, remainder
,
5293 unsignedp
? UMOD
: MOD
))
5294 remainder
= NULL_RTX
;
5299 /* We divided. Now finish doing X - Y * (X / Y). */
5300 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5301 NULL_RTX
, unsignedp
);
5302 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5303 remainder
, target
, unsignedp
,
5308 if (methods
!= OPTAB_LIB_WIDEN
5309 && (rem_flag
? remainder
: quotient
) == NULL_RTX
)
5312 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5315 /* Return a tree node with data type TYPE, describing the value of X.
5316 Usually this is an VAR_DECL, if there is no obvious better choice.
5317 X may be an expression, however we only support those expressions
5318 generated by loop.c. */
5321 make_tree (tree type
, rtx x
)
5325 switch (GET_CODE (x
))
5328 case CONST_WIDE_INT
:
5329 t
= wide_int_to_tree (type
, rtx_mode_t (x
, TYPE_MODE (type
)));
5333 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5334 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5335 t
= wide_int_to_tree (type
,
5336 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5337 HOST_BITS_PER_WIDE_INT
* 2));
5339 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5345 unsigned int npatterns
= CONST_VECTOR_NPATTERNS (x
);
5346 unsigned int nelts_per_pattern
= CONST_VECTOR_NELTS_PER_PATTERN (x
);
5347 tree itype
= TREE_TYPE (type
);
5349 /* Build a tree with vector elements. */
5350 tree_vector_builder
elts (type
, npatterns
, nelts_per_pattern
);
5351 unsigned int count
= elts
.encoded_nelts ();
5352 for (unsigned int i
= 0; i
< count
; ++i
)
5354 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5355 elts
.quick_push (make_tree (itype
, elt
));
5358 return elts
.build ();
5362 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5363 make_tree (type
, XEXP (x
, 1)));
5366 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5367 make_tree (type
, XEXP (x
, 1)));
5370 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5373 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5374 make_tree (type
, XEXP (x
, 1)));
5377 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5378 make_tree (type
, XEXP (x
, 1)));
5381 t
= unsigned_type_for (type
);
5382 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5383 make_tree (t
, XEXP (x
, 0)),
5384 make_tree (type
, XEXP (x
, 1))));
5387 t
= signed_type_for (type
);
5388 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5389 make_tree (t
, XEXP (x
, 0)),
5390 make_tree (type
, XEXP (x
, 1))));
5393 if (TREE_CODE (type
) != REAL_TYPE
)
5394 t
= signed_type_for (type
);
5398 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5399 make_tree (t
, XEXP (x
, 0)),
5400 make_tree (t
, XEXP (x
, 1))));
5402 t
= unsigned_type_for (type
);
5403 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5404 make_tree (t
, XEXP (x
, 0)),
5405 make_tree (t
, XEXP (x
, 1))));
5409 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5410 GET_CODE (x
) == ZERO_EXTEND
);
5411 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5414 return make_tree (type
, XEXP (x
, 0));
5417 t
= SYMBOL_REF_DECL (x
);
5419 return fold_convert (type
, build_fold_addr_expr (t
));
5423 if (CONST_POLY_INT_P (x
))
5424 return wide_int_to_tree (t
, const_poly_int_value (x
));
5426 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5428 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5429 address mode to pointer mode. */
5430 if (POINTER_TYPE_P (type
))
5431 x
= convert_memory_address_addr_space
5432 (SCALAR_INT_TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5434 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5435 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5436 t
->decl_with_rtl
.rtl
= x
;
5442 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5443 and returning TARGET.
5445 If TARGET is 0, a pseudo-register or constant is returned. */
5448 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5452 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5453 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5455 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5459 else if (tem
!= target
)
5460 emit_move_insn (target
, tem
);
5464 /* Helper function for emit_store_flag. */
5466 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5467 machine_mode mode
, machine_mode compare_mode
,
5468 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5469 machine_mode target_mode
)
5471 class expand_operand ops
[4];
5472 rtx op0
, comparison
, subtarget
;
5474 scalar_int_mode result_mode
= targetm
.cstore_mode (icode
);
5475 scalar_int_mode int_target_mode
;
5477 last
= get_last_insn ();
5478 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5479 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5482 delete_insns_since (last
);
5486 if (target_mode
== VOIDmode
)
5487 int_target_mode
= result_mode
;
5489 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5491 target
= gen_reg_rtx (int_target_mode
);
5493 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5495 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5496 create_fixed_operand (&ops
[1], comparison
);
5497 create_fixed_operand (&ops
[2], x
);
5498 create_fixed_operand (&ops
[3], y
);
5499 if (!maybe_expand_insn (icode
, 4, ops
))
5501 delete_insns_since (last
);
5504 subtarget
= ops
[0].value
;
5506 /* If we are converting to a wider mode, first convert to
5507 INT_TARGET_MODE, then normalize. This produces better combining
5508 opportunities on machines that have a SIGN_EXTRACT when we are
5509 testing a single bit. This mostly benefits the 68k.
5511 If STORE_FLAG_VALUE does not have the sign bit set when
5512 interpreted in MODE, we can do this conversion as unsigned, which
5513 is usually more efficient. */
5514 if (GET_MODE_PRECISION (int_target_mode
) > GET_MODE_PRECISION (result_mode
))
5516 gcc_assert (GET_MODE_PRECISION (result_mode
) != 1
5517 || STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1);
5519 bool unsignedp
= (STORE_FLAG_VALUE
>= 0);
5520 convert_move (target
, subtarget
, unsignedp
);
5523 result_mode
= int_target_mode
;
5528 /* If we want to keep subexpressions around, don't reuse our last
5533 /* Now normalize to the proper value in MODE. Sometimes we don't
5534 have to do anything. */
5535 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5537 /* STORE_FLAG_VALUE might be the most negative number, so write
5538 the comparison this way to avoid a compiler-time warning. */
5539 else if (- normalizep
== STORE_FLAG_VALUE
)
5540 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5542 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5543 it hard to use a value of just the sign bit due to ANSI integer
5544 constant typing rules. */
5545 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5546 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5547 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5551 gcc_assert (STORE_FLAG_VALUE
& 1);
5553 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5554 if (normalizep
== -1)
5555 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5558 /* If we were converting to a smaller mode, do the conversion now. */
5559 if (int_target_mode
!= result_mode
)
5561 convert_move (target
, op0
, 0);
5569 /* A subroutine of emit_store_flag only including "tricks" that do not
5570 need a recursive call. These are kept separate to avoid infinite
5574 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5575 machine_mode mode
, int unsignedp
, int normalizep
,
5576 machine_mode target_mode
)
5579 enum insn_code icode
;
5580 machine_mode compare_mode
;
5581 enum mode_class mclass
;
5582 enum rtx_code scode
;
5585 code
= unsigned_condition (code
);
5586 scode
= swap_condition (code
);
5588 /* If one operand is constant, make it the second one. Only do this
5589 if the other operand is not constant as well. */
5591 if (swap_commutative_operands_p (op0
, op1
))
5593 std::swap (op0
, op1
);
5594 code
= swap_condition (code
);
5597 if (mode
== VOIDmode
)
5598 mode
= GET_MODE (op0
);
5600 if (CONST_SCALAR_INT_P (op1
))
5601 canonicalize_comparison (mode
, &code
, &op1
);
5603 /* For some comparisons with 1 and -1, we can convert this to
5604 comparisons with zero. This will often produce more opportunities for
5605 store-flag insns. */
5610 if (op1
== const1_rtx
)
5611 op1
= const0_rtx
, code
= LE
;
5614 if (op1
== constm1_rtx
)
5615 op1
= const0_rtx
, code
= LT
;
5618 if (op1
== const1_rtx
)
5619 op1
= const0_rtx
, code
= GT
;
5622 if (op1
== constm1_rtx
)
5623 op1
= const0_rtx
, code
= GE
;
5626 if (op1
== const1_rtx
)
5627 op1
= const0_rtx
, code
= NE
;
5630 if (op1
== const1_rtx
)
5631 op1
= const0_rtx
, code
= EQ
;
5637 /* If we are comparing a double-word integer with zero or -1, we can
5638 convert the comparison into one involving a single word. */
5639 scalar_int_mode int_mode
;
5640 if (is_int_mode (mode
, &int_mode
)
5641 && GET_MODE_BITSIZE (int_mode
) == BITS_PER_WORD
* 2
5642 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5645 if ((code
== EQ
|| code
== NE
)
5646 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5650 /* Do a logical OR or AND of the two words and compare the
5652 op00
= simplify_gen_subreg (word_mode
, op0
, int_mode
, 0);
5653 op01
= simplify_gen_subreg (word_mode
, op0
, int_mode
, UNITS_PER_WORD
);
5654 tem
= expand_binop (word_mode
,
5655 op1
== const0_rtx
? ior_optab
: and_optab
,
5656 op00
, op01
, NULL_RTX
, unsignedp
,
5660 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5661 unsignedp
, normalizep
);
5663 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5667 /* If testing the sign bit, can just test on high word. */
5668 op0h
= simplify_gen_subreg (word_mode
, op0
, int_mode
,
5669 subreg_highpart_offset (word_mode
,
5671 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5672 unsignedp
, normalizep
);
5679 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5682 target
= gen_reg_rtx (target_mode
);
5684 convert_move (target
, tem
,
5685 !val_signbit_known_set_p (word_mode
,
5686 (normalizep
? normalizep
5687 : STORE_FLAG_VALUE
)));
5692 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5693 complement of A (for GE) and shifting the sign bit to the low bit. */
5694 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5695 && is_int_mode (mode
, &int_mode
)
5696 && (normalizep
|| STORE_FLAG_VALUE
== 1
5697 || val_signbit_p (int_mode
, STORE_FLAG_VALUE
)))
5699 scalar_int_mode int_target_mode
;
5703 int_target_mode
= int_mode
;
5706 /* If the result is to be wider than OP0, it is best to convert it
5707 first. If it is to be narrower, it is *incorrect* to convert it
5709 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5710 if (GET_MODE_SIZE (int_target_mode
) > GET_MODE_SIZE (int_mode
))
5712 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5713 int_mode
= int_target_mode
;
5717 if (int_target_mode
!= int_mode
)
5721 op0
= expand_unop (int_mode
, one_cmpl_optab
, op0
,
5722 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5723 ? 0 : subtarget
), 0);
5725 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5726 /* If we are supposed to produce a 0/1 value, we want to do
5727 a logical shift from the sign bit to the low-order bit; for
5728 a -1/0 value, we do an arithmetic shift. */
5729 op0
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5730 GET_MODE_BITSIZE (int_mode
) - 1,
5731 subtarget
, normalizep
!= -1);
5733 if (int_mode
!= int_target_mode
)
5734 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5739 mclass
= GET_MODE_CLASS (mode
);
5740 FOR_EACH_MODE_FROM (compare_mode
, mode
)
5742 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5743 icode
= optab_handler (cstore_optab
, optab_mode
);
5744 if (icode
!= CODE_FOR_nothing
)
5746 do_pending_stack_adjust ();
5747 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5748 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5752 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5754 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5755 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5766 /* Subroutine of emit_store_flag that handles cases in which the operands
5767 are scalar integers. SUBTARGET is the target to use for temporary
5768 operations and TRUEVAL is the value to store when the condition is
5769 true. All other arguments are as for emit_store_flag. */
5772 emit_store_flag_int (rtx target
, rtx subtarget
, enum rtx_code code
, rtx op0
,
5773 rtx op1
, scalar_int_mode mode
, int unsignedp
,
5774 int normalizep
, rtx trueval
)
5776 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5777 rtx_insn
*last
= get_last_insn ();
5779 /* If this is an equality comparison of integers, we can try to exclusive-or
5780 (or subtract) the two operands and use a recursive call to try the
5781 comparison with zero. Don't do any of these cases if branches are
5784 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5786 rtx tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5790 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5793 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5794 mode
, unsignedp
, normalizep
);
5798 delete_insns_since (last
);
5801 /* For integer comparisons, try the reverse comparison. However, for
5802 small X and if we'd have anyway to extend, implementing "X != 0"
5803 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5804 rtx_code rcode
= reverse_condition (code
);
5805 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5806 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5808 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5809 && op1
== const0_rtx
))
5811 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5812 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5814 /* Again, for the reverse comparison, use either an addition or a XOR. */
5816 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5817 optimize_insn_for_speed_p ()) == 0)
5819 rtx tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5820 STORE_FLAG_VALUE
, target_mode
);
5822 tem
= expand_binop (target_mode
, add_optab
, tem
,
5823 gen_int_mode (normalizep
, target_mode
),
5824 target
, 0, OPTAB_WIDEN
);
5829 && rtx_cost (trueval
, mode
, XOR
, 1,
5830 optimize_insn_for_speed_p ()) == 0)
5832 rtx tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5833 normalizep
, target_mode
);
5835 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5836 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5841 delete_insns_since (last
);
5844 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5845 the constant zero. Reject all other comparisons at this point. Only
5846 do LE and GT if branches are expensive since they are expensive on
5847 2-operand machines. */
5849 if (op1
!= const0_rtx
5850 || (code
!= EQ
&& code
!= NE
5851 && (BRANCH_COST (optimize_insn_for_speed_p (),
5852 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5855 /* Try to put the result of the comparison in the sign bit. Assume we can't
5856 do the necessary operation below. */
5860 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5861 the sign bit set. */
5865 /* This is destructive, so SUBTARGET can't be OP0. */
5866 if (rtx_equal_p (subtarget
, op0
))
5869 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5872 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5876 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5877 number of bits in the mode of OP0, minus one. */
5881 if (rtx_equal_p (subtarget
, op0
))
5884 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, op0
,
5885 GET_MODE_BITSIZE (mode
) - 1,
5888 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5892 if (code
== EQ
|| code
== NE
)
5894 /* For EQ or NE, one way to do the comparison is to apply an operation
5895 that converts the operand into a positive number if it is nonzero
5896 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5897 for NE we negate. This puts the result in the sign bit. Then we
5898 normalize with a shift, if needed.
5900 Two operations that can do the above actions are ABS and FFS, so try
5901 them. If that doesn't work, and MODE is smaller than a full word,
5902 we can use zero-extension to the wider mode (an unsigned conversion)
5903 as the operation. */
5905 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5906 that is compensated by the subsequent overflow when subtracting
5909 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5910 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5911 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5912 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5913 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5915 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5922 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5925 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5928 /* If we couldn't do it that way, for NE we can "or" the two's complement
5929 of the value with itself. For EQ, we take the one's complement of
5930 that "or", which is an extra insn, so we only handle EQ if branches
5935 || BRANCH_COST (optimize_insn_for_speed_p (),
5938 if (rtx_equal_p (subtarget
, op0
))
5941 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5942 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5945 if (tem
&& code
== EQ
)
5946 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5950 if (tem
&& normalizep
)
5951 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, tem
,
5952 GET_MODE_BITSIZE (mode
) - 1,
5953 subtarget
, normalizep
== 1);
5959 else if (GET_MODE (tem
) != target_mode
)
5961 convert_move (target
, tem
, 0);
5964 else if (!subtarget
)
5966 emit_move_insn (target
, tem
);
5971 delete_insns_since (last
);
5976 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5977 and storing in TARGET. Normally return TARGET.
5978 Return 0 if that cannot be done.
5980 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5981 it is VOIDmode, they cannot both be CONST_INT.
5983 UNSIGNEDP is for the case where we have to widen the operands
5984 to perform the operation. It says to use zero-extension.
5986 NORMALIZEP is 1 if we should convert the result to be either zero
5987 or one. Normalize is -1 if we should convert the result to be
5988 either zero or -1. If NORMALIZEP is zero, the result will be left
5989 "raw" out of the scc insn. */
5992 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5993 machine_mode mode
, int unsignedp
, int normalizep
)
5995 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5996 enum rtx_code rcode
;
6001 /* If we compare constants, we shouldn't use a store-flag operation,
6002 but a constant load. We can get there via the vanilla route that
6003 usually generates a compare-branch sequence, but will in this case
6004 fold the comparison to a constant, and thus elide the branch. */
6005 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
6008 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
6013 /* If we reached here, we can't do this with a scc insn, however there
6014 are some comparisons that can be done in other ways. Don't do any
6015 of these cases if branches are very cheap. */
6016 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6019 /* See what we need to return. We can only return a 1, -1, or the
6022 if (normalizep
== 0)
6024 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
6025 normalizep
= STORE_FLAG_VALUE
;
6027 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
6033 last
= get_last_insn ();
6035 /* If optimizing, use different pseudo registers for each insn, instead
6036 of reusing the same pseudo. This leads to better CSE, but slows
6037 down the compiler, since there are more pseudos. */
6038 subtarget
= (!optimize
6039 && (target_mode
== mode
)) ? target
: NULL_RTX
;
6040 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
6042 /* For floating-point comparisons, try the reverse comparison or try
6043 changing the "orderedness" of the comparison. */
6044 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6046 enum rtx_code first_code
;
6049 rcode
= reverse_condition_maybe_unordered (code
);
6050 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
6051 && (code
== ORDERED
|| code
== UNORDERED
6052 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6053 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6055 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
6056 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
6058 /* For the reverse comparison, use either an addition or a XOR. */
6060 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
6061 optimize_insn_for_speed_p ()) == 0)
6063 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
6064 STORE_FLAG_VALUE
, target_mode
);
6066 return expand_binop (target_mode
, add_optab
, tem
,
6067 gen_int_mode (normalizep
, target_mode
),
6068 target
, 0, OPTAB_WIDEN
);
6071 && rtx_cost (trueval
, mode
, XOR
, 1,
6072 optimize_insn_for_speed_p ()) == 0)
6074 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
6075 normalizep
, target_mode
);
6077 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
6078 target
, INTVAL (trueval
) >= 0,
6083 delete_insns_since (last
);
6085 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6086 if (code
== ORDERED
|| code
== UNORDERED
)
6089 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
6091 /* If there are no NaNs, the first comparison should always fall through.
6092 Effectively change the comparison to the other one. */
6093 if (!HONOR_NANS (mode
))
6095 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
6096 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
6100 if (!HAVE_conditional_move
)
6103 /* Do not turn a trapping comparison into a non-trapping one. */
6104 if ((code
!= EQ
&& code
!= NE
&& code
!= UNEQ
&& code
!= LTGT
)
6105 && flag_trapping_math
)
6108 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6109 conditional move. */
6110 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
6111 normalizep
, target_mode
);
6116 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
6117 tem
, const0_rtx
, GET_MODE (tem
), 0);
6119 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
6120 trueval
, tem
, GET_MODE (tem
), 0);
6123 delete_insns_since (last
);
6127 /* The remaining tricks only apply to integer comparisons. */
6129 scalar_int_mode int_mode
;
6130 if (is_int_mode (mode
, &int_mode
))
6131 return emit_store_flag_int (target
, subtarget
, code
, op0
, op1
, int_mode
,
6132 unsignedp
, normalizep
, trueval
);
6137 /* Like emit_store_flag, but always succeeds. */
6140 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
6141 machine_mode mode
, int unsignedp
, int normalizep
)
6144 rtx_code_label
*label
;
6145 rtx trueval
, falseval
;
6147 /* First see if emit_store_flag can do the job. */
6148 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
6152 /* If one operand is constant, make it the second one. Only do this
6153 if the other operand is not constant as well. */
6154 if (swap_commutative_operands_p (op0
, op1
))
6156 std::swap (op0
, op1
);
6157 code
= swap_condition (code
);
6160 if (mode
== VOIDmode
)
6161 mode
= GET_MODE (op0
);
6164 target
= gen_reg_rtx (word_mode
);
6166 /* If this failed, we have to do this with set/compare/jump/set code.
6167 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6168 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
6170 && GET_MODE_CLASS (mode
) == MODE_INT
6173 && op1
== const0_rtx
)
6175 label
= gen_label_rtx ();
6176 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
6177 NULL_RTX
, NULL
, label
,
6178 profile_probability::uninitialized ());
6179 emit_move_insn (target
, trueval
);
6185 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
6186 target
= gen_reg_rtx (GET_MODE (target
));
6188 /* Jump in the right direction if the target cannot implement CODE
6189 but can jump on its reverse condition. */
6190 falseval
= const0_rtx
;
6191 if (! can_compare_p (code
, mode
, ccp_jump
)
6192 && (! FLOAT_MODE_P (mode
)
6193 || code
== ORDERED
|| code
== UNORDERED
6194 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6195 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6197 enum rtx_code rcode
;
6198 if (FLOAT_MODE_P (mode
))
6199 rcode
= reverse_condition_maybe_unordered (code
);
6201 rcode
= reverse_condition (code
);
6203 /* Canonicalize to UNORDERED for the libcall. */
6204 if (can_compare_p (rcode
, mode
, ccp_jump
)
6205 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
6208 trueval
= const0_rtx
;
6213 emit_move_insn (target
, trueval
);
6214 label
= gen_label_rtx ();
6215 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
6216 label
, profile_probability::uninitialized ());
6218 emit_move_insn (target
, falseval
);
6224 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6225 and exclusive ranges in order to create an equivalent comparison. See
6226 canonicalize_cmp_for_target for the possible cases. */
6228 static enum rtx_code
6229 equivalent_cmp_code (enum rtx_code code
)
6255 /* Choose the more appropiate immediate in scalar integer comparisons. The
6256 purpose of this is to end up with an immediate which can be loaded into a
6257 register in fewer moves, if possible.
6259 For each integer comparison there exists an equivalent choice:
6260 i) a > b or a >= b + 1
6261 ii) a <= b or a < b + 1
6262 iii) a >= b or a > b - 1
6263 iv) a < b or a <= b - 1
6265 MODE is the mode of the first operand.
6266 CODE points to the comparison code.
6267 IMM points to the rtx containing the immediate. *IMM must satisfy
6268 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6272 canonicalize_comparison (machine_mode mode
, enum rtx_code
*code
, rtx
*imm
)
6274 if (!SCALAR_INT_MODE_P (mode
))
6278 enum signop sgn
= unsigned_condition_p (*code
) ? UNSIGNED
: SIGNED
;
6280 /* Extract the immediate value from the rtx. */
6281 wide_int imm_val
= rtx_mode_t (*imm
, mode
);
6283 if (*code
== GT
|| *code
== GTU
|| *code
== LE
|| *code
== LEU
)
6285 else if (*code
== GE
|| *code
== GEU
|| *code
== LT
|| *code
== LTU
)
6290 /* Check for overflow/underflow in the case of signed values and
6291 wrapping around in the case of unsigned values. If any occur
6292 cancel the optimization. */
6293 wi::overflow_type overflow
= wi::OVF_NONE
;
6297 imm_modif
= wi::add (imm_val
, 1, sgn
, &overflow
);
6299 imm_modif
= wi::sub (imm_val
, 1, sgn
, &overflow
);
6304 /* The following creates a pseudo; if we cannot do that, bail out. */
6305 if (!can_create_pseudo_p ())
6308 rtx reg
= gen_rtx_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
6309 rtx new_imm
= immed_wide_int_const (imm_modif
, mode
);
6311 rtx_insn
*old_rtx
= gen_move_insn (reg
, *imm
);
6312 rtx_insn
*new_rtx
= gen_move_insn (reg
, new_imm
);
6314 /* Update the immediate and the code. */
6315 if (insn_cost (old_rtx
, true) > insn_cost (new_rtx
, true))
6317 *code
= equivalent_cmp_code (*code
);
6324 /* Perform possibly multi-word comparison and conditional jump to LABEL
6325 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6326 now a thin wrapper around do_compare_rtx_and_jump. */
6329 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
6330 rtx_code_label
*label
)
6332 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
6333 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,
6334 NULL
, label
, profile_probability::uninitialized ());