Daily bump.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2021 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "optabs.h"
35 #include "expmed.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 poly_uint64, poly_uint64,
56 machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 poly_uint64, poly_uint64,
61 rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT,
69 poly_uint64, poly_uint64,
70 rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, int, rtx,
74 machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 unsigned HOST_WIDE_INT,
77 unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 unsigned HOST_WIDE_INT,
80 unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 unsigned HOST_WIDE_INT,
84 unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91 The mask is truncated if necessary to the width of mode MODE. The
92 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
93
94 static inline rtx
95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97 return immed_wide_int_const
98 (wi::shifted_mask (bitpos, bitsize, complement,
99 GET_MODE_PRECISION (mode)), mode);
100 }
101
102 /* Test whether a value is zero of a power of two. */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105
106 struct init_expmed_rtl
107 {
108 rtx reg;
109 rtx plus;
110 rtx neg;
111 rtx mult;
112 rtx sdiv;
113 rtx udiv;
114 rtx sdiv_32;
115 rtx smod_32;
116 rtx wide_mult;
117 rtx wide_lshr;
118 rtx wide_trunc;
119 rtx shift;
120 rtx shift_mult;
121 rtx shift_add;
122 rtx shift_sub0;
123 rtx shift_sub1;
124 rtx zext;
125 rtx trunc;
126
127 rtx pow2[MAX_BITS_PER_WORD];
128 rtx cint[MAX_BITS_PER_WORD];
129 };
130
131 static void
132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 scalar_int_mode from_mode, bool speed)
134 {
135 int to_size, from_size;
136 rtx which;
137
138 to_size = GET_MODE_PRECISION (to_mode);
139 from_size = GET_MODE_PRECISION (from_mode);
140
141 /* Most partial integers have a precision less than the "full"
142 integer it requires for storage. In case one doesn't, for
143 comparison purposes here, reduce the bit size by one in that
144 case. */
145 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146 && pow2p_hwi (to_size))
147 to_size --;
148 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149 && pow2p_hwi (from_size))
150 from_size --;
151
152 /* Assume cost of zero-extend and sign-extend is the same. */
153 which = (to_size < from_size ? all->trunc : all->zext);
154
155 PUT_MODE (all->reg, from_mode);
156 set_convert_cost (to_mode, from_mode, speed,
157 set_src_cost (which, to_mode, speed));
158 /* Restore all->reg's mode. */
159 PUT_MODE (all->reg, to_mode);
160 }
161
162 static void
163 init_expmed_one_mode (struct init_expmed_rtl *all,
164 machine_mode mode, int speed)
165 {
166 int m, n, mode_bitsize;
167 machine_mode mode_from;
168
169 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
170
171 PUT_MODE (all->reg, mode);
172 PUT_MODE (all->plus, mode);
173 PUT_MODE (all->neg, mode);
174 PUT_MODE (all->mult, mode);
175 PUT_MODE (all->sdiv, mode);
176 PUT_MODE (all->udiv, mode);
177 PUT_MODE (all->sdiv_32, mode);
178 PUT_MODE (all->smod_32, mode);
179 PUT_MODE (all->wide_trunc, mode);
180 PUT_MODE (all->shift, mode);
181 PUT_MODE (all->shift_mult, mode);
182 PUT_MODE (all->shift_add, mode);
183 PUT_MODE (all->shift_sub0, mode);
184 PUT_MODE (all->shift_sub1, mode);
185 PUT_MODE (all->zext, mode);
186 PUT_MODE (all->trunc, mode);
187
188 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
189 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
190 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
191 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
192 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
193
194 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
195 <= 2 * add_cost (speed, mode)));
196 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
197 <= 4 * add_cost (speed, mode)));
198
199 set_shift_cost (speed, mode, 0, 0);
200 {
201 int cost = add_cost (speed, mode);
202 set_shiftadd_cost (speed, mode, 0, cost);
203 set_shiftsub0_cost (speed, mode, 0, cost);
204 set_shiftsub1_cost (speed, mode, 0, cost);
205 }
206
207 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
208 for (m = 1; m < n; m++)
209 {
210 XEXP (all->shift, 1) = all->cint[m];
211 XEXP (all->shift_mult, 1) = all->pow2[m];
212
213 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
214 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
215 speed));
216 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
217 speed));
218 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
219 speed));
220 }
221
222 scalar_int_mode int_mode_to;
223 if (is_a <scalar_int_mode> (mode, &int_mode_to))
224 {
225 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
226 mode_from = (machine_mode)(mode_from + 1))
227 init_expmed_one_conv (all, int_mode_to,
228 as_a <scalar_int_mode> (mode_from), speed);
229
230 scalar_int_mode wider_mode;
231 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
232 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
233 {
234 PUT_MODE (all->reg, mode);
235 PUT_MODE (all->zext, wider_mode);
236 PUT_MODE (all->wide_mult, wider_mode);
237 PUT_MODE (all->wide_lshr, wider_mode);
238 XEXP (all->wide_lshr, 1)
239 = gen_int_shift_amount (wider_mode, mode_bitsize);
240
241 set_mul_widen_cost (speed, wider_mode,
242 set_src_cost (all->wide_mult, wider_mode, speed));
243 set_mul_highpart_cost (speed, int_mode_to,
244 set_src_cost (all->wide_trunc,
245 int_mode_to, speed));
246 }
247 }
248 }
249
250 void
251 init_expmed (void)
252 {
253 struct init_expmed_rtl all;
254 machine_mode mode = QImode;
255 int m, speed;
256
257 memset (&all, 0, sizeof all);
258 for (m = 1; m < MAX_BITS_PER_WORD; m++)
259 {
260 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
261 all.cint[m] = GEN_INT (m);
262 }
263
264 /* Avoid using hard regs in ways which may be unsupported. */
265 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
266 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
267 all.neg = gen_rtx_NEG (mode, all.reg);
268 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
269 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
270 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
271 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
272 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
273 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
274 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
275 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
276 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
277 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
278 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
279 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
280 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
281 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
282 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
283
284 for (speed = 0; speed < 2; speed++)
285 {
286 crtl->maybe_hot_insn_p = speed;
287 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
288
289 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
290 mode = (machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
292
293 if (MIN_MODE_PARTIAL_INT != VOIDmode)
294 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
295 mode = (machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
297
298 if (MIN_MODE_VECTOR_INT != VOIDmode)
299 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
300 mode = (machine_mode)(mode + 1))
301 init_expmed_one_mode (&all, mode, speed);
302 }
303
304 if (alg_hash_used_p ())
305 {
306 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
307 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
308 }
309 else
310 set_alg_hash_used_p (true);
311 default_rtl_profile ();
312
313 ggc_free (all.trunc);
314 ggc_free (all.shift_sub1);
315 ggc_free (all.shift_sub0);
316 ggc_free (all.shift_add);
317 ggc_free (all.shift_mult);
318 ggc_free (all.shift);
319 ggc_free (all.wide_trunc);
320 ggc_free (all.wide_lshr);
321 ggc_free (all.wide_mult);
322 ggc_free (all.zext);
323 ggc_free (all.smod_32);
324 ggc_free (all.sdiv_32);
325 ggc_free (all.udiv);
326 ggc_free (all.sdiv);
327 ggc_free (all.mult);
328 ggc_free (all.neg);
329 ggc_free (all.plus);
330 ggc_free (all.reg);
331 }
332
333 /* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
336
337 rtx
338 negate_rtx (machine_mode mode, rtx x)
339 {
340 rtx result = simplify_unary_operation (NEG, mode, x, mode);
341
342 if (result == 0)
343 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
344
345 return result;
346 }
347
348 /* Whether reverse storage order is supported on the target. */
349 static int reverse_storage_order_supported = -1;
350
351 /* Check whether reverse storage order is supported on the target. */
352
353 static void
354 check_reverse_storage_order_support (void)
355 {
356 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
357 {
358 reverse_storage_order_supported = 0;
359 sorry ("reverse scalar storage order");
360 }
361 else
362 reverse_storage_order_supported = 1;
363 }
364
365 /* Whether reverse FP storage order is supported on the target. */
366 static int reverse_float_storage_order_supported = -1;
367
368 /* Check whether reverse FP storage order is supported on the target. */
369
370 static void
371 check_reverse_float_storage_order_support (void)
372 {
373 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
374 {
375 reverse_float_storage_order_supported = 0;
376 sorry ("reverse floating-point scalar storage order");
377 }
378 else
379 reverse_float_storage_order_supported = 1;
380 }
381
382 /* Return an rtx representing value of X with reverse storage order.
383 MODE is the intended mode of the result,
384 useful if X is a CONST_INT. */
385
386 rtx
387 flip_storage_order (machine_mode mode, rtx x)
388 {
389 scalar_int_mode int_mode;
390 rtx result;
391
392 if (mode == QImode)
393 return x;
394
395 if (COMPLEX_MODE_P (mode))
396 {
397 rtx real = read_complex_part (x, false);
398 rtx imag = read_complex_part (x, true);
399
400 real = flip_storage_order (GET_MODE_INNER (mode), real);
401 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
402
403 return gen_rtx_CONCAT (mode, real, imag);
404 }
405
406 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
407 check_reverse_storage_order_support ();
408
409 if (!is_a <scalar_int_mode> (mode, &int_mode))
410 {
411 if (FLOAT_MODE_P (mode)
412 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
413 check_reverse_float_storage_order_support ();
414
415 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
416 || !targetm.scalar_mode_supported_p (int_mode))
417 {
418 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
419 return x;
420 }
421 x = gen_lowpart (int_mode, x);
422 }
423
424 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
425 if (result == 0)
426 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
427
428 if (int_mode != mode)
429 result = gen_lowpart (mode, result);
430
431 return result;
432 }
433
434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
435 first unit of mode MODE that contains a bitfield of size BITSIZE at
436 bit position BITNUM. If MODE is not set, return a BLKmode reference
437 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
438 of the field within the new memory. */
439
440 static rtx
441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
442 unsigned HOST_WIDE_INT bitsize,
443 unsigned HOST_WIDE_INT bitnum,
444 unsigned HOST_WIDE_INT *new_bitnum)
445 {
446 scalar_int_mode imode;
447 if (mode.exists (&imode))
448 {
449 unsigned int unit = GET_MODE_BITSIZE (imode);
450 *new_bitnum = bitnum % unit;
451 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
452 return adjust_bitfield_address (mem, imode, offset);
453 }
454 else
455 {
456 *new_bitnum = bitnum % BITS_PER_UNIT;
457 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
458 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
459 / BITS_PER_UNIT);
460 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
461 }
462 }
463
464 /* The caller wants to perform insertion or extraction PATTERN on a
465 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
466 BITREGION_START and BITREGION_END are as for store_bit_field
467 and FIELDMODE is the natural mode of the field.
468
469 Search for a mode that is compatible with the memory access
470 restrictions and (where applicable) with a register insertion or
471 extraction. Return the new memory on success, storing the adjusted
472 bit position in *NEW_BITNUM. Return null otherwise. */
473
474 static rtx
475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
476 rtx op0, HOST_WIDE_INT bitsize,
477 HOST_WIDE_INT bitnum,
478 poly_uint64 bitregion_start,
479 poly_uint64 bitregion_end,
480 machine_mode fieldmode,
481 unsigned HOST_WIDE_INT *new_bitnum)
482 {
483 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
484 bitregion_end, MEM_ALIGN (op0),
485 MEM_VOLATILE_P (op0));
486 scalar_int_mode best_mode;
487 if (iter.next_mode (&best_mode))
488 {
489 /* We can use a memory in BEST_MODE. See whether this is true for
490 any wider modes. All other things being equal, we prefer to
491 use the widest mode possible because it tends to expose more
492 CSE opportunities. */
493 if (!iter.prefer_smaller_modes ())
494 {
495 /* Limit the search to the mode required by the corresponding
496 register insertion or extraction instruction, if any. */
497 scalar_int_mode limit_mode = word_mode;
498 extraction_insn insn;
499 if (get_best_reg_extraction_insn (&insn, pattern,
500 GET_MODE_BITSIZE (best_mode),
501 fieldmode))
502 limit_mode = insn.field_mode;
503
504 scalar_int_mode wider_mode;
505 while (iter.next_mode (&wider_mode)
506 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
507 best_mode = wider_mode;
508 }
509 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
510 new_bitnum);
511 }
512 return NULL_RTX;
513 }
514
515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
516 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
517 offset is then BITNUM / BITS_PER_UNIT. */
518
519 static bool
520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
521 machine_mode struct_mode)
522 {
523 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
524 if (BYTES_BIG_ENDIAN)
525 return (multiple_p (bitnum, BITS_PER_UNIT)
526 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
527 || multiple_p (bitnum + bitsize,
528 regsize * BITS_PER_UNIT)));
529 else
530 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
531 }
532
533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
534 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
535 Return false if the access would touch memory outside the range
536 BITREGION_START to BITREGION_END for conformance to the C++ memory
537 model. */
538
539 static bool
540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
541 unsigned HOST_WIDE_INT bitnum,
542 scalar_int_mode fieldmode,
543 poly_uint64 bitregion_start,
544 poly_uint64 bitregion_end)
545 {
546 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
547
548 /* -fstrict-volatile-bitfields must be enabled and we must have a
549 volatile MEM. */
550 if (!MEM_P (op0)
551 || !MEM_VOLATILE_P (op0)
552 || flag_strict_volatile_bitfields <= 0)
553 return false;
554
555 /* The bit size must not be larger than the field mode, and
556 the field mode must not be larger than a word. */
557 if (bitsize > modesize || modesize > BITS_PER_WORD)
558 return false;
559
560 /* Check for cases of unaligned fields that must be split. */
561 if (bitnum % modesize + bitsize > modesize)
562 return false;
563
564 /* The memory must be sufficiently aligned for a MODESIZE access.
565 This condition guarantees, that the memory access will not
566 touch anything after the end of the structure. */
567 if (MEM_ALIGN (op0) < modesize)
568 return false;
569
570 /* Check for cases where the C++ memory model applies. */
571 if (maybe_ne (bitregion_end, 0U)
572 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
573 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
574 bitregion_end)))
575 return false;
576
577 return true;
578 }
579
580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
581 bit number BITNUM can be treated as a simple value of mode MODE.
582 Store the byte offset in *BYTENUM if so. */
583
584 static bool
585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
586 machine_mode mode, poly_uint64 *bytenum)
587 {
588 return (MEM_P (op0)
589 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
590 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
591 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
592 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
593 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
594 }
595 \f
596 /* Try to use instruction INSV to store VALUE into a field of OP0.
597 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
598 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
599 are as for store_bit_field. */
600
601 static bool
602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
603 opt_scalar_int_mode op0_mode,
604 unsigned HOST_WIDE_INT bitsize,
605 unsigned HOST_WIDE_INT bitnum,
606 rtx value, scalar_int_mode value_mode)
607 {
608 class expand_operand ops[4];
609 rtx value1;
610 rtx xop0 = op0;
611 rtx_insn *last = get_last_insn ();
612 bool copy_back = false;
613
614 scalar_int_mode op_mode = insv->field_mode;
615 unsigned int unit = GET_MODE_BITSIZE (op_mode);
616 if (bitsize == 0 || bitsize > unit)
617 return false;
618
619 if (MEM_P (xop0))
620 /* Get a reference to the first byte of the field. */
621 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
622 &bitnum);
623 else
624 {
625 /* Convert from counting within OP0 to counting in OP_MODE. */
626 if (BYTES_BIG_ENDIAN)
627 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
628
629 /* If xop0 is a register, we need it in OP_MODE
630 to make it acceptable to the format of insv. */
631 if (GET_CODE (xop0) == SUBREG)
632 /* We can't just change the mode, because this might clobber op0,
633 and we will need the original value of op0 if insv fails. */
634 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
635 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
636 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
637 }
638
639 /* If the destination is a paradoxical subreg such that we need a
640 truncate to the inner mode, perform the insertion on a temporary and
641 truncate the result to the original destination. Note that we can't
642 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
643 X) 0)) is (reg:N X). */
644 if (GET_CODE (xop0) == SUBREG
645 && REG_P (SUBREG_REG (xop0))
646 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
647 op_mode))
648 {
649 rtx tem = gen_reg_rtx (op_mode);
650 emit_move_insn (tem, xop0);
651 xop0 = tem;
652 copy_back = true;
653 }
654
655 /* There are similar overflow check at the start of store_bit_field_1,
656 but that only check the situation where the field lies completely
657 outside the register, while there do have situation where the field
658 lies partialy in the register, we need to adjust bitsize for this
659 partial overflow situation. Without this fix, pr48335-2.c on big-endian
660 will broken on those arch support bit insert instruction, like arm, aarch64
661 etc. */
662 if (bitsize + bitnum > unit && bitnum < unit)
663 {
664 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
665 "destination object, data truncated into %wu-bit",
666 bitsize, unit - bitnum);
667 bitsize = unit - bitnum;
668 }
669
670 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
671 "backwards" from the size of the unit we are inserting into.
672 Otherwise, we count bits from the most significant on a
673 BYTES/BITS_BIG_ENDIAN machine. */
674
675 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
676 bitnum = unit - bitsize - bitnum;
677
678 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
679 value1 = value;
680 if (value_mode != op_mode)
681 {
682 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
683 {
684 rtx tmp;
685 /* Optimization: Don't bother really extending VALUE
686 if it has all the bits we will actually use. However,
687 if we must narrow it, be sure we do it correctly. */
688
689 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
690 {
691 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
692 if (! tmp)
693 tmp = simplify_gen_subreg (op_mode,
694 force_reg (value_mode, value1),
695 value_mode, 0);
696 }
697 else
698 {
699 tmp = gen_lowpart_if_possible (op_mode, value1);
700 if (! tmp)
701 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
702 }
703 value1 = tmp;
704 }
705 else if (CONST_INT_P (value))
706 value1 = gen_int_mode (INTVAL (value), op_mode);
707 else
708 /* Parse phase is supposed to make VALUE's data type
709 match that of the component reference, which is a type
710 at least as wide as the field; so VALUE should have
711 a mode that corresponds to that type. */
712 gcc_assert (CONSTANT_P (value));
713 }
714
715 create_fixed_operand (&ops[0], xop0);
716 create_integer_operand (&ops[1], bitsize);
717 create_integer_operand (&ops[2], bitnum);
718 create_input_operand (&ops[3], value1, op_mode);
719 if (maybe_expand_insn (insv->icode, 4, ops))
720 {
721 if (copy_back)
722 convert_move (op0, xop0, true);
723 return true;
724 }
725 delete_insns_since (last);
726 return false;
727 }
728
729 /* A subroutine of store_bit_field, with the same arguments. Return true
730 if the operation could be implemented.
731
732 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
733 no other way of implementing the operation. If FALLBACK_P is false,
734 return false instead. */
735
736 static bool
737 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
738 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
739 machine_mode fieldmode,
740 rtx value, bool reverse, bool fallback_p)
741 {
742 rtx op0 = str_rtx;
743
744 while (GET_CODE (op0) == SUBREG)
745 {
746 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
747 op0 = SUBREG_REG (op0);
748 }
749
750 /* No action is needed if the target is a register and if the field
751 lies completely outside that register. This can occur if the source
752 code contains an out-of-bounds access to a small array. */
753 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
754 return true;
755
756 /* Use vec_set patterns for inserting parts of vectors whenever
757 available. */
758 machine_mode outermode = GET_MODE (op0);
759 scalar_mode innermode = GET_MODE_INNER (outermode);
760 poly_uint64 pos;
761 if (VECTOR_MODE_P (outermode)
762 && !MEM_P (op0)
763 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
764 && fieldmode == innermode
765 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
766 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
767 {
768 class expand_operand ops[3];
769 enum insn_code icode = optab_handler (vec_set_optab, outermode);
770
771 create_fixed_operand (&ops[0], op0);
772 create_input_operand (&ops[1], value, innermode);
773 create_integer_operand (&ops[2], pos);
774 if (maybe_expand_insn (icode, 3, ops))
775 return true;
776 }
777
778 /* If the target is a register, overwriting the entire object, or storing
779 a full-word or multi-word field can be done with just a SUBREG. */
780 if (!MEM_P (op0)
781 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
782 {
783 /* Use the subreg machinery either to narrow OP0 to the required
784 words or to cope with mode punning between equal-sized modes.
785 In the latter case, use subreg on the rhs side, not lhs. */
786 rtx sub;
787 HOST_WIDE_INT regnum;
788 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
789 if (known_eq (bitnum, 0U)
790 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
791 {
792 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
793 if (sub)
794 {
795 if (reverse)
796 sub = flip_storage_order (GET_MODE (op0), sub);
797 emit_move_insn (op0, sub);
798 return true;
799 }
800 }
801 else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
802 && multiple_p (bitsize, regsize * BITS_PER_UNIT))
803 {
804 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
805 regnum * regsize);
806 if (sub)
807 {
808 if (reverse)
809 value = flip_storage_order (fieldmode, value);
810 emit_move_insn (sub, value);
811 return true;
812 }
813 }
814 }
815
816 /* If the target is memory, storing any naturally aligned field can be
817 done with a simple store. For targets that support fast unaligned
818 memory, any naturally sized, unit aligned field can be done directly. */
819 poly_uint64 bytenum;
820 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
821 {
822 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
823 if (reverse)
824 value = flip_storage_order (fieldmode, value);
825 emit_move_insn (op0, value);
826 return true;
827 }
828
829 /* It's possible we'll need to handle other cases here for
830 polynomial bitnum and bitsize. */
831
832 /* From here on we need to be looking at a fixed-size insertion. */
833 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
834 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
835
836 /* Make sure we are playing with integral modes. Pun with subregs
837 if we aren't. This must come after the entire register case above,
838 since that case is valid for any mode. The following cases are only
839 valid for integral modes. */
840 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
841 scalar_int_mode imode;
842 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
843 {
844 if (MEM_P (op0))
845 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
846 0, MEM_SIZE (op0));
847 else if (!op0_mode.exists ())
848 {
849 if (ibitnum == 0
850 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
851 && MEM_P (value)
852 && !reverse)
853 {
854 value = adjust_address (value, GET_MODE (op0), 0);
855 emit_move_insn (op0, value);
856 return true;
857 }
858 if (!fallback_p)
859 return false;
860 rtx temp = assign_stack_temp (GET_MODE (op0),
861 GET_MODE_SIZE (GET_MODE (op0)));
862 emit_move_insn (temp, op0);
863 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
864 reverse, fallback_p);
865 emit_move_insn (op0, temp);
866 return true;
867 }
868 else
869 op0 = gen_lowpart (op0_mode.require (), op0);
870 }
871
872 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
873 bitregion_start, bitregion_end,
874 fieldmode, value, reverse, fallback_p);
875 }
876
877 /* Subroutine of store_bit_field_1, with the same arguments, except
878 that BITSIZE and BITNUM are constant. Handle cases specific to
879 integral modes. If OP0_MODE is defined, it is the mode of OP0,
880 otherwise OP0 is a BLKmode MEM. */
881
882 static bool
883 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
884 unsigned HOST_WIDE_INT bitsize,
885 unsigned HOST_WIDE_INT bitnum,
886 poly_uint64 bitregion_start,
887 poly_uint64 bitregion_end,
888 machine_mode fieldmode,
889 rtx value, bool reverse, bool fallback_p)
890 {
891 /* Storing an lsb-aligned field in a register
892 can be done with a movstrict instruction. */
893
894 if (!MEM_P (op0)
895 && !reverse
896 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
897 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
898 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
899 {
900 class expand_operand ops[2];
901 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
902 rtx arg0 = op0;
903 unsigned HOST_WIDE_INT subreg_off;
904
905 if (GET_CODE (arg0) == SUBREG)
906 {
907 /* Else we've got some float mode source being extracted into
908 a different float mode destination -- this combination of
909 subregs results in Severe Tire Damage. */
910 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
911 || GET_MODE_CLASS (fieldmode) == MODE_INT
912 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
913 arg0 = SUBREG_REG (arg0);
914 }
915
916 subreg_off = bitnum / BITS_PER_UNIT;
917 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
918 {
919 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
920
921 create_fixed_operand (&ops[0], arg0);
922 /* Shrink the source operand to FIELDMODE. */
923 create_convert_operand_to (&ops[1], value, fieldmode, false);
924 if (maybe_expand_insn (icode, 2, ops))
925 return true;
926 }
927 }
928
929 /* Handle fields bigger than a word. */
930
931 if (bitsize > BITS_PER_WORD)
932 {
933 /* Here we transfer the words of the field
934 in the order least significant first.
935 This is because the most significant word is the one which may
936 be less than full.
937 However, only do that if the value is not BLKmode. */
938
939 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
940 const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
941 rtx_insn *last;
942
943 /* This is the mode we must force value to, so that there will be enough
944 subwords to extract. Note that fieldmode will often (always?) be
945 VOIDmode, because that is what store_field uses to indicate that this
946 is a bit field, but passing VOIDmode to operand_subword_force
947 is not allowed.
948
949 The mode must be fixed-size, since insertions into variable-sized
950 objects are meant to be handled before calling this function. */
951 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
952 if (value_mode == VOIDmode)
953 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
954
955 last = get_last_insn ();
956 for (int i = 0; i < nwords; i++)
957 {
958 /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
959 except maybe for the last iteration. */
960 const unsigned HOST_WIDE_INT new_bitsize
961 = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
962 /* Bit offset from the starting bit number in the target. */
963 const unsigned int bit_offset
964 = backwards ^ reverse
965 ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
966 : i * BITS_PER_WORD;
967 /* Starting word number in the value. */
968 const unsigned int wordnum
969 = backwards
970 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
971 : i;
972 /* The chunk of the value in word_mode. We use bit-field extraction
973 in BLKmode to handle unaligned memory references and to shift the
974 last chunk right on big-endian machines if need be. */
975 rtx value_word
976 = fieldmode == BLKmode
977 ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
978 1, NULL_RTX, word_mode, word_mode, false,
979 NULL)
980 : operand_subword_force (value, wordnum, value_mode);
981
982 if (!store_bit_field_1 (op0, new_bitsize,
983 bitnum + bit_offset,
984 bitregion_start, bitregion_end,
985 word_mode,
986 value_word, reverse, fallback_p))
987 {
988 delete_insns_since (last);
989 return false;
990 }
991 }
992 return true;
993 }
994
995 /* If VALUE has a floating-point or complex mode, access it as an
996 integer of the corresponding size. This can occur on a machine
997 with 64 bit registers that uses SFmode for float. It can also
998 occur for unaligned float or complex fields. */
999 rtx orig_value = value;
1000 scalar_int_mode value_mode;
1001 if (GET_MODE (value) == VOIDmode)
1002 /* By this point we've dealt with values that are bigger than a word,
1003 so word_mode is a conservatively correct choice. */
1004 value_mode = word_mode;
1005 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1006 {
1007 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1008 value = gen_reg_rtx (value_mode);
1009 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1010 }
1011
1012 /* If OP0 is a multi-word register, narrow it to the affected word.
1013 If the region spans two words, defer to store_split_bit_field.
1014 Don't do this if op0 is a single hard register wider than word
1015 such as a float or vector register. */
1016 if (!MEM_P (op0)
1017 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1018 && (!REG_P (op0)
1019 || !HARD_REGISTER_P (op0)
1020 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1021 {
1022 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1023 {
1024 if (!fallback_p)
1025 return false;
1026
1027 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1028 bitregion_start, bitregion_end,
1029 value, value_mode, reverse);
1030 return true;
1031 }
1032 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1033 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1034 gcc_assert (op0);
1035 op0_mode = word_mode;
1036 bitnum %= BITS_PER_WORD;
1037 }
1038
1039 /* From here on we can assume that the field to be stored in fits
1040 within a word. If the destination is a register, it too fits
1041 in a word. */
1042
1043 extraction_insn insv;
1044 if (!MEM_P (op0)
1045 && !reverse
1046 && get_best_reg_extraction_insn (&insv, EP_insv,
1047 GET_MODE_BITSIZE (op0_mode.require ()),
1048 fieldmode)
1049 && store_bit_field_using_insv (&insv, op0, op0_mode,
1050 bitsize, bitnum, value, value_mode))
1051 return true;
1052
1053 /* If OP0 is a memory, try copying it to a register and seeing if a
1054 cheap register alternative is available. */
1055 if (MEM_P (op0) && !reverse)
1056 {
1057 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1058 fieldmode)
1059 && store_bit_field_using_insv (&insv, op0, op0_mode,
1060 bitsize, bitnum, value, value_mode))
1061 return true;
1062
1063 rtx_insn *last = get_last_insn ();
1064
1065 /* Try loading part of OP0 into a register, inserting the bitfield
1066 into that, and then copying the result back to OP0. */
1067 unsigned HOST_WIDE_INT bitpos;
1068 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1069 bitregion_start, bitregion_end,
1070 fieldmode, &bitpos);
1071 if (xop0)
1072 {
1073 rtx tempreg = copy_to_reg (xop0);
1074 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1075 bitregion_start, bitregion_end,
1076 fieldmode, orig_value, reverse, false))
1077 {
1078 emit_move_insn (xop0, tempreg);
1079 return true;
1080 }
1081 delete_insns_since (last);
1082 }
1083 }
1084
1085 if (!fallback_p)
1086 return false;
1087
1088 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1089 bitregion_end, value, value_mode, reverse);
1090 return true;
1091 }
1092
1093 /* Generate code to store value from rtx VALUE
1094 into a bit-field within structure STR_RTX
1095 containing BITSIZE bits starting at bit BITNUM.
1096
1097 BITREGION_START is bitpos of the first bitfield in this region.
1098 BITREGION_END is the bitpos of the ending bitfield in this region.
1099 These two fields are 0, if the C++ memory model does not apply,
1100 or we are not interested in keeping track of bitfield regions.
1101
1102 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1103
1104 If REVERSE is true, the store is to be done in reverse order. */
1105
1106 void
1107 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1108 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1109 machine_mode fieldmode,
1110 rtx value, bool reverse)
1111 {
1112 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1113 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1114 scalar_int_mode int_mode;
1115 if (bitsize.is_constant (&ibitsize)
1116 && bitnum.is_constant (&ibitnum)
1117 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1118 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1119 bitregion_start, bitregion_end))
1120 {
1121 /* Storing of a full word can be done with a simple store.
1122 We know here that the field can be accessed with one single
1123 instruction. For targets that support unaligned memory,
1124 an unaligned access may be necessary. */
1125 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1126 {
1127 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1128 ibitnum / BITS_PER_UNIT);
1129 if (reverse)
1130 value = flip_storage_order (int_mode, value);
1131 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1132 emit_move_insn (str_rtx, value);
1133 }
1134 else
1135 {
1136 rtx temp;
1137
1138 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1139 ibitnum, &ibitnum);
1140 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1141 temp = copy_to_reg (str_rtx);
1142 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1143 int_mode, value, reverse, true))
1144 gcc_unreachable ();
1145
1146 emit_move_insn (str_rtx, temp);
1147 }
1148
1149 return;
1150 }
1151
1152 /* Under the C++0x memory model, we must not touch bits outside the
1153 bit region. Adjust the address to start at the beginning of the
1154 bit region. */
1155 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1156 {
1157 scalar_int_mode best_mode;
1158 machine_mode addr_mode = VOIDmode;
1159
1160 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1161 bitnum -= bitregion_start;
1162 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1163 bitregion_end -= bitregion_start;
1164 bitregion_start = 0;
1165 if (bitsize.is_constant (&ibitsize)
1166 && bitnum.is_constant (&ibitnum)
1167 && get_best_mode (ibitsize, ibitnum,
1168 bitregion_start, bitregion_end,
1169 MEM_ALIGN (str_rtx), INT_MAX,
1170 MEM_VOLATILE_P (str_rtx), &best_mode))
1171 addr_mode = best_mode;
1172 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1173 offset, size);
1174 }
1175
1176 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1177 bitregion_start, bitregion_end,
1178 fieldmode, value, reverse, true))
1179 gcc_unreachable ();
1180 }
1181 \f
1182 /* Use shifts and boolean operations to store VALUE into a bit field of
1183 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1184 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1185 the mode of VALUE.
1186
1187 If REVERSE is true, the store is to be done in reverse order. */
1188
1189 static void
1190 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1191 unsigned HOST_WIDE_INT bitsize,
1192 unsigned HOST_WIDE_INT bitnum,
1193 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1194 rtx value, scalar_int_mode value_mode, bool reverse)
1195 {
1196 /* There is a case not handled here:
1197 a structure with a known alignment of just a halfword
1198 and a field split across two aligned halfwords within the structure.
1199 Or likewise a structure with a known alignment of just a byte
1200 and a field split across two bytes.
1201 Such cases are not supposed to be able to occur. */
1202
1203 scalar_int_mode best_mode;
1204 if (MEM_P (op0))
1205 {
1206 unsigned int max_bitsize = BITS_PER_WORD;
1207 scalar_int_mode imode;
1208 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1209 max_bitsize = GET_MODE_BITSIZE (imode);
1210
1211 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1212 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1213 &best_mode))
1214 {
1215 /* The only way this should occur is if the field spans word
1216 boundaries. */
1217 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1218 bitregion_start, bitregion_end,
1219 value, value_mode, reverse);
1220 return;
1221 }
1222
1223 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1224 }
1225 else
1226 best_mode = op0_mode.require ();
1227
1228 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1229 value, value_mode, reverse);
1230 }
1231
1232 /* Helper function for store_fixed_bit_field, stores
1233 the bit field always using MODE, which is the mode of OP0. The other
1234 arguments are as for store_fixed_bit_field. */
1235
1236 static void
1237 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1238 unsigned HOST_WIDE_INT bitsize,
1239 unsigned HOST_WIDE_INT bitnum,
1240 rtx value, scalar_int_mode value_mode, bool reverse)
1241 {
1242 rtx temp;
1243 int all_zero = 0;
1244 int all_one = 0;
1245
1246 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1247 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1248
1249 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1250 /* BITNUM is the distance between our msb
1251 and that of the containing datum.
1252 Convert it to the distance from the lsb. */
1253 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1254
1255 /* Now BITNUM is always the distance between our lsb
1256 and that of OP0. */
1257
1258 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1259 we must first convert its mode to MODE. */
1260
1261 if (CONST_INT_P (value))
1262 {
1263 unsigned HOST_WIDE_INT v = UINTVAL (value);
1264
1265 if (bitsize < HOST_BITS_PER_WIDE_INT)
1266 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1267
1268 if (v == 0)
1269 all_zero = 1;
1270 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1271 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1272 || (bitsize == HOST_BITS_PER_WIDE_INT
1273 && v == HOST_WIDE_INT_M1U))
1274 all_one = 1;
1275
1276 value = lshift_value (mode, v, bitnum);
1277 }
1278 else
1279 {
1280 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1281 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1282
1283 if (value_mode != mode)
1284 value = convert_to_mode (mode, value, 1);
1285
1286 if (must_and)
1287 value = expand_binop (mode, and_optab, value,
1288 mask_rtx (mode, 0, bitsize, 0),
1289 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1290 if (bitnum > 0)
1291 value = expand_shift (LSHIFT_EXPR, mode, value,
1292 bitnum, NULL_RTX, 1);
1293 }
1294
1295 if (reverse)
1296 value = flip_storage_order (mode, value);
1297
1298 /* Now clear the chosen bits in OP0,
1299 except that if VALUE is -1 we need not bother. */
1300 /* We keep the intermediates in registers to allow CSE to combine
1301 consecutive bitfield assignments. */
1302
1303 temp = force_reg (mode, op0);
1304
1305 if (! all_one)
1306 {
1307 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1308 if (reverse)
1309 mask = flip_storage_order (mode, mask);
1310 temp = expand_binop (mode, and_optab, temp, mask,
1311 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1312 temp = force_reg (mode, temp);
1313 }
1314
1315 /* Now logical-or VALUE into OP0, unless it is zero. */
1316
1317 if (! all_zero)
1318 {
1319 temp = expand_binop (mode, ior_optab, temp, value,
1320 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1321 temp = force_reg (mode, temp);
1322 }
1323
1324 if (op0 != temp)
1325 {
1326 op0 = copy_rtx (op0);
1327 emit_move_insn (op0, temp);
1328 }
1329 }
1330 \f
1331 /* Store a bit field that is split across multiple accessible memory objects.
1332
1333 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1334 BITSIZE is the field width; BITPOS the position of its first bit
1335 (within the word).
1336 VALUE is the value to store, which has mode VALUE_MODE.
1337 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1338 a BLKmode MEM.
1339
1340 If REVERSE is true, the store is to be done in reverse order.
1341
1342 This does not yet handle fields wider than BITS_PER_WORD. */
1343
1344 static void
1345 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1346 unsigned HOST_WIDE_INT bitsize,
1347 unsigned HOST_WIDE_INT bitpos,
1348 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1349 rtx value, scalar_int_mode value_mode, bool reverse)
1350 {
1351 unsigned int unit, total_bits, bitsdone = 0;
1352
1353 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1354 much at a time. */
1355 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1356 unit = BITS_PER_WORD;
1357 else
1358 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1359
1360 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1361 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1362 again, and we will mutually recurse forever. */
1363 if (MEM_P (op0) && op0_mode.exists ())
1364 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1365
1366 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1367 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1368 that VALUE might be a floating-point constant. */
1369 if (CONSTANT_P (value) && !CONST_INT_P (value))
1370 {
1371 rtx word = gen_lowpart_common (word_mode, value);
1372
1373 if (word && (value != word))
1374 value = word;
1375 else
1376 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1377 value_mode = word_mode;
1378 }
1379
1380 total_bits = GET_MODE_BITSIZE (value_mode);
1381
1382 while (bitsdone < bitsize)
1383 {
1384 unsigned HOST_WIDE_INT thissize;
1385 unsigned HOST_WIDE_INT thispos;
1386 unsigned HOST_WIDE_INT offset;
1387 rtx part;
1388
1389 offset = (bitpos + bitsdone) / unit;
1390 thispos = (bitpos + bitsdone) % unit;
1391
1392 /* When region of bytes we can touch is restricted, decrease
1393 UNIT close to the end of the region as needed. If op0 is a REG
1394 or SUBREG of REG, don't do this, as there can't be data races
1395 on a register and we can expand shorter code in some cases. */
1396 if (maybe_ne (bitregion_end, 0U)
1397 && unit > BITS_PER_UNIT
1398 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1399 && !REG_P (op0)
1400 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1401 {
1402 unit = unit / 2;
1403 continue;
1404 }
1405
1406 /* THISSIZE must not overrun a word boundary. Otherwise,
1407 store_fixed_bit_field will call us again, and we will mutually
1408 recurse forever. */
1409 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1410 thissize = MIN (thissize, unit - thispos);
1411
1412 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1413 {
1414 /* Fetch successively less significant portions. */
1415 if (CONST_INT_P (value))
1416 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1417 >> (bitsize - bitsdone - thissize))
1418 & ((HOST_WIDE_INT_1 << thissize) - 1));
1419 /* Likewise, but the source is little-endian. */
1420 else if (reverse)
1421 part = extract_fixed_bit_field (word_mode, value, value_mode,
1422 thissize,
1423 bitsize - bitsdone - thissize,
1424 NULL_RTX, 1, false);
1425 else
1426 /* The args are chosen so that the last part includes the
1427 lsb. Give extract_bit_field the value it needs (with
1428 endianness compensation) to fetch the piece we want. */
1429 part = extract_fixed_bit_field (word_mode, value, value_mode,
1430 thissize,
1431 total_bits - bitsize + bitsdone,
1432 NULL_RTX, 1, false);
1433 }
1434 else
1435 {
1436 /* Fetch successively more significant portions. */
1437 if (CONST_INT_P (value))
1438 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1439 >> bitsdone)
1440 & ((HOST_WIDE_INT_1 << thissize) - 1));
1441 /* Likewise, but the source is big-endian. */
1442 else if (reverse)
1443 part = extract_fixed_bit_field (word_mode, value, value_mode,
1444 thissize,
1445 total_bits - bitsdone - thissize,
1446 NULL_RTX, 1, false);
1447 else
1448 part = extract_fixed_bit_field (word_mode, value, value_mode,
1449 thissize, bitsdone, NULL_RTX,
1450 1, false);
1451 }
1452
1453 /* If OP0 is a register, then handle OFFSET here. */
1454 rtx op0_piece = op0;
1455 opt_scalar_int_mode op0_piece_mode = op0_mode;
1456 if (SUBREG_P (op0) || REG_P (op0))
1457 {
1458 scalar_int_mode imode;
1459 if (op0_mode.exists (&imode)
1460 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1461 {
1462 if (offset)
1463 op0_piece = const0_rtx;
1464 }
1465 else
1466 {
1467 op0_piece = operand_subword_force (op0,
1468 offset * unit / BITS_PER_WORD,
1469 GET_MODE (op0));
1470 op0_piece_mode = word_mode;
1471 }
1472 offset &= BITS_PER_WORD / unit - 1;
1473 }
1474
1475 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1476 it is just an out-of-bounds access. Ignore it. */
1477 if (op0_piece != const0_rtx)
1478 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1479 offset * unit + thispos, bitregion_start,
1480 bitregion_end, part, word_mode, reverse);
1481 bitsdone += thissize;
1482 }
1483 }
1484 \f
1485 /* A subroutine of extract_bit_field_1 that converts return value X
1486 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1487 to extract_bit_field. */
1488
1489 static rtx
1490 convert_extracted_bit_field (rtx x, machine_mode mode,
1491 machine_mode tmode, bool unsignedp)
1492 {
1493 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1494 return x;
1495
1496 /* If the x mode is not a scalar integral, first convert to the
1497 integer mode of that size and then access it as a floating-point
1498 value via a SUBREG. */
1499 if (!SCALAR_INT_MODE_P (tmode))
1500 {
1501 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1502 x = convert_to_mode (int_mode, x, unsignedp);
1503 x = force_reg (int_mode, x);
1504 return gen_lowpart (tmode, x);
1505 }
1506
1507 return convert_to_mode (tmode, x, unsignedp);
1508 }
1509
1510 /* Try to use an ext(z)v pattern to extract a field from OP0.
1511 Return the extracted value on success, otherwise return null.
1512 EXTV describes the extraction instruction to use. If OP0_MODE
1513 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1514 The other arguments are as for extract_bit_field. */
1515
1516 static rtx
1517 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1518 opt_scalar_int_mode op0_mode,
1519 unsigned HOST_WIDE_INT bitsize,
1520 unsigned HOST_WIDE_INT bitnum,
1521 int unsignedp, rtx target,
1522 machine_mode mode, machine_mode tmode)
1523 {
1524 class expand_operand ops[4];
1525 rtx spec_target = target;
1526 rtx spec_target_subreg = 0;
1527 scalar_int_mode ext_mode = extv->field_mode;
1528 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1529
1530 if (bitsize == 0 || unit < bitsize)
1531 return NULL_RTX;
1532
1533 if (MEM_P (op0))
1534 /* Get a reference to the first byte of the field. */
1535 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1536 &bitnum);
1537 else
1538 {
1539 /* Convert from counting within OP0 to counting in EXT_MODE. */
1540 if (BYTES_BIG_ENDIAN)
1541 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1542
1543 /* If op0 is a register, we need it in EXT_MODE to make it
1544 acceptable to the format of ext(z)v. */
1545 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1546 return NULL_RTX;
1547 if (REG_P (op0) && op0_mode.require () != ext_mode)
1548 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1549 }
1550
1551 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1552 "backwards" from the size of the unit we are extracting from.
1553 Otherwise, we count bits from the most significant on a
1554 BYTES/BITS_BIG_ENDIAN machine. */
1555
1556 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1557 bitnum = unit - bitsize - bitnum;
1558
1559 if (target == 0)
1560 target = spec_target = gen_reg_rtx (tmode);
1561
1562 if (GET_MODE (target) != ext_mode)
1563 {
1564 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1565 between the mode of the extraction (word_mode) and the target
1566 mode. Instead, create a temporary and use convert_move to set
1567 the target. */
1568 if (REG_P (target)
1569 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1570 {
1571 target = gen_lowpart (ext_mode, target);
1572 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1573 spec_target_subreg = target;
1574 }
1575 else
1576 target = gen_reg_rtx (ext_mode);
1577 }
1578
1579 create_output_operand (&ops[0], target, ext_mode);
1580 create_fixed_operand (&ops[1], op0);
1581 create_integer_operand (&ops[2], bitsize);
1582 create_integer_operand (&ops[3], bitnum);
1583 if (maybe_expand_insn (extv->icode, 4, ops))
1584 {
1585 target = ops[0].value;
1586 if (target == spec_target)
1587 return target;
1588 if (target == spec_target_subreg)
1589 return spec_target;
1590 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1591 }
1592 return NULL_RTX;
1593 }
1594
1595 /* See whether it would be valid to extract the part of OP0 described
1596 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1597 operation. Return the subreg if so, otherwise return null. */
1598
1599 static rtx
1600 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1601 poly_uint64 bitsize, poly_uint64 bitnum)
1602 {
1603 poly_uint64 bytenum;
1604 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1605 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1606 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1607 && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1608 return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1609 return NULL_RTX;
1610 }
1611
1612 /* A subroutine of extract_bit_field, with the same arguments.
1613 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1614 if we can find no other means of implementing the operation.
1615 if FALLBACK_P is false, return NULL instead. */
1616
1617 static rtx
1618 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1619 int unsignedp, rtx target, machine_mode mode,
1620 machine_mode tmode, bool reverse, bool fallback_p,
1621 rtx *alt_rtl)
1622 {
1623 rtx op0 = str_rtx;
1624 machine_mode mode1;
1625
1626 if (tmode == VOIDmode)
1627 tmode = mode;
1628
1629 while (GET_CODE (op0) == SUBREG)
1630 {
1631 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1632 op0 = SUBREG_REG (op0);
1633 }
1634
1635 /* If we have an out-of-bounds access to a register, just return an
1636 uninitialized register of the required mode. This can occur if the
1637 source code contains an out-of-bounds access to a small array. */
1638 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1639 return gen_reg_rtx (tmode);
1640
1641 if (REG_P (op0)
1642 && mode == GET_MODE (op0)
1643 && known_eq (bitnum, 0U)
1644 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1645 {
1646 if (reverse)
1647 op0 = flip_storage_order (mode, op0);
1648 /* We're trying to extract a full register from itself. */
1649 return op0;
1650 }
1651
1652 /* First try to check for vector from vector extractions. */
1653 if (VECTOR_MODE_P (GET_MODE (op0))
1654 && !MEM_P (op0)
1655 && VECTOR_MODE_P (tmode)
1656 && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1657 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1658 {
1659 machine_mode new_mode = GET_MODE (op0);
1660 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1661 {
1662 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1663 poly_uint64 nunits;
1664 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1665 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1666 || !related_vector_mode (tmode, inner_mode,
1667 nunits).exists (&new_mode)
1668 || maybe_ne (GET_MODE_SIZE (new_mode),
1669 GET_MODE_SIZE (GET_MODE (op0))))
1670 new_mode = VOIDmode;
1671 }
1672 poly_uint64 pos;
1673 if (new_mode != VOIDmode
1674 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1675 != CODE_FOR_nothing)
1676 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1677 {
1678 class expand_operand ops[3];
1679 machine_mode outermode = new_mode;
1680 machine_mode innermode = tmode;
1681 enum insn_code icode
1682 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1683
1684 if (new_mode != GET_MODE (op0))
1685 op0 = gen_lowpart (new_mode, op0);
1686 create_output_operand (&ops[0], target, innermode);
1687 ops[0].target = 1;
1688 create_input_operand (&ops[1], op0, outermode);
1689 create_integer_operand (&ops[2], pos);
1690 if (maybe_expand_insn (icode, 3, ops))
1691 {
1692 if (alt_rtl && ops[0].target)
1693 *alt_rtl = target;
1694 target = ops[0].value;
1695 if (GET_MODE (target) != mode)
1696 return gen_lowpart (tmode, target);
1697 return target;
1698 }
1699 }
1700 }
1701
1702 /* See if we can get a better vector mode before extracting. */
1703 if (VECTOR_MODE_P (GET_MODE (op0))
1704 && !MEM_P (op0)
1705 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1706 {
1707 machine_mode new_mode;
1708
1709 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1710 new_mode = MIN_MODE_VECTOR_FLOAT;
1711 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1712 new_mode = MIN_MODE_VECTOR_FRACT;
1713 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1714 new_mode = MIN_MODE_VECTOR_UFRACT;
1715 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1716 new_mode = MIN_MODE_VECTOR_ACCUM;
1717 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1718 new_mode = MIN_MODE_VECTOR_UACCUM;
1719 else
1720 new_mode = MIN_MODE_VECTOR_INT;
1721
1722 FOR_EACH_MODE_FROM (new_mode, new_mode)
1723 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1724 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1725 && targetm.vector_mode_supported_p (new_mode))
1726 break;
1727 if (new_mode != VOIDmode)
1728 op0 = gen_lowpart (new_mode, op0);
1729 }
1730
1731 /* Use vec_extract patterns for extracting parts of vectors whenever
1732 available. If that fails, see whether the current modes and bitregion
1733 give a natural subreg. */
1734 machine_mode outermode = GET_MODE (op0);
1735 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1736 {
1737 scalar_mode innermode = GET_MODE_INNER (outermode);
1738 enum insn_code icode
1739 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1740 poly_uint64 pos;
1741 if (icode != CODE_FOR_nothing
1742 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1743 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1744 {
1745 class expand_operand ops[3];
1746
1747 create_output_operand (&ops[0], target, innermode);
1748 ops[0].target = 1;
1749 create_input_operand (&ops[1], op0, outermode);
1750 create_integer_operand (&ops[2], pos);
1751 if (maybe_expand_insn (icode, 3, ops))
1752 {
1753 if (alt_rtl && ops[0].target)
1754 *alt_rtl = target;
1755 target = ops[0].value;
1756 if (GET_MODE (target) != mode)
1757 return gen_lowpart (tmode, target);
1758 return target;
1759 }
1760 }
1761 /* Using subregs is useful if we're extracting one register vector
1762 from a multi-register vector. extract_bit_field_as_subreg checks
1763 for valid bitsize and bitnum, so we don't need to do that here. */
1764 if (VECTOR_MODE_P (mode))
1765 {
1766 rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1767 if (sub)
1768 return sub;
1769 }
1770 }
1771
1772 /* Make sure we are playing with integral modes. Pun with subregs
1773 if we aren't. */
1774 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1775 scalar_int_mode imode;
1776 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1777 {
1778 if (MEM_P (op0))
1779 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1780 0, MEM_SIZE (op0));
1781 else if (op0_mode.exists (&imode))
1782 {
1783 op0 = gen_lowpart (imode, op0);
1784
1785 /* If we got a SUBREG, force it into a register since we
1786 aren't going to be able to do another SUBREG on it. */
1787 if (GET_CODE (op0) == SUBREG)
1788 op0 = force_reg (imode, op0);
1789 }
1790 else
1791 {
1792 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1793 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1794 emit_move_insn (mem, op0);
1795 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1796 }
1797 }
1798
1799 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1800 If that's wrong, the solution is to test for it and set TARGET to 0
1801 if needed. */
1802
1803 /* Get the mode of the field to use for atomic access or subreg
1804 conversion. */
1805 if (!SCALAR_INT_MODE_P (tmode)
1806 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1807 mode1 = mode;
1808 gcc_assert (mode1 != BLKmode);
1809
1810 /* Extraction of a full MODE1 value can be done with a subreg as long
1811 as the least significant bit of the value is the least significant
1812 bit of either OP0 or a word of OP0. */
1813 if (!MEM_P (op0) && !reverse)
1814 {
1815 rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1816 if (sub)
1817 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1818 }
1819
1820 /* Extraction of a full MODE1 value can be done with a load as long as
1821 the field is on a byte boundary and is sufficiently aligned. */
1822 poly_uint64 bytenum;
1823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1824 {
1825 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1826 if (reverse)
1827 op0 = flip_storage_order (mode1, op0);
1828 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1829 }
1830
1831 /* If we have a memory source and a non-constant bit offset, restrict
1832 the memory to the referenced bytes. This is a worst-case fallback
1833 but is useful for things like vector booleans. */
1834 if (MEM_P (op0) && !bitnum.is_constant ())
1835 {
1836 bytenum = bits_to_bytes_round_down (bitnum);
1837 bitnum = num_trailing_bits (bitnum);
1838 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1839 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1840 op0_mode = opt_scalar_int_mode ();
1841 }
1842
1843 /* It's possible we'll need to handle other cases here for
1844 polynomial bitnum and bitsize. */
1845
1846 /* From here on we need to be looking at a fixed-size insertion. */
1847 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1848 bitnum.to_constant (), unsignedp,
1849 target, mode, tmode, reverse, fallback_p);
1850 }
1851
1852 /* Subroutine of extract_bit_field_1, with the same arguments, except
1853 that BITSIZE and BITNUM are constant. Handle cases specific to
1854 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1855 otherwise OP0 is a BLKmode MEM. */
1856
1857 static rtx
1858 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1859 unsigned HOST_WIDE_INT bitsize,
1860 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1861 rtx target, machine_mode mode, machine_mode tmode,
1862 bool reverse, bool fallback_p)
1863 {
1864 /* Handle fields bigger than a word. */
1865
1866 if (bitsize > BITS_PER_WORD)
1867 {
1868 /* Here we transfer the words of the field
1869 in the order least significant first.
1870 This is because the most significant word is the one which may
1871 be less than full. */
1872
1873 const bool backwards = WORDS_BIG_ENDIAN;
1874 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1875 unsigned int i;
1876 rtx_insn *last;
1877
1878 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1879 target = gen_reg_rtx (mode);
1880
1881 /* In case we're about to clobber a base register or something
1882 (see gcc.c-torture/execute/20040625-1.c). */
1883 if (reg_mentioned_p (target, op0))
1884 target = gen_reg_rtx (mode);
1885
1886 /* Indicate for flow that the entire target reg is being set. */
1887 emit_clobber (target);
1888
1889 /* The mode must be fixed-size, since extract_bit_field_1 handles
1890 extractions from variable-sized objects before calling this
1891 function. */
1892 unsigned int target_size
1893 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1894 last = get_last_insn ();
1895 for (i = 0; i < nwords; i++)
1896 {
1897 /* If I is 0, use the low-order word in both field and target;
1898 if I is 1, use the next to lowest word; and so on. */
1899 /* Word number in TARGET to use. */
1900 unsigned int wordnum
1901 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1902 /* Offset from start of field in OP0. */
1903 unsigned int bit_offset = (backwards ^ reverse
1904 ? MAX ((int) bitsize - ((int) i + 1)
1905 * BITS_PER_WORD,
1906 0)
1907 : (int) i * BITS_PER_WORD);
1908 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1909 rtx result_part
1910 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1911 bitsize - i * BITS_PER_WORD),
1912 bitnum + bit_offset, 1, target_part,
1913 mode, word_mode, reverse, fallback_p, NULL);
1914
1915 gcc_assert (target_part);
1916 if (!result_part)
1917 {
1918 delete_insns_since (last);
1919 return NULL;
1920 }
1921
1922 if (result_part != target_part)
1923 emit_move_insn (target_part, result_part);
1924 }
1925
1926 if (unsignedp)
1927 {
1928 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1929 need to be zero'd out. */
1930 if (target_size > nwords * UNITS_PER_WORD)
1931 {
1932 unsigned int i, total_words;
1933
1934 total_words = target_size / UNITS_PER_WORD;
1935 for (i = nwords; i < total_words; i++)
1936 emit_move_insn
1937 (operand_subword (target,
1938 backwards ? total_words - i - 1 : i,
1939 1, VOIDmode),
1940 const0_rtx);
1941 }
1942 return target;
1943 }
1944
1945 /* Signed bit field: sign-extend with two arithmetic shifts. */
1946 target = expand_shift (LSHIFT_EXPR, mode, target,
1947 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1948 return expand_shift (RSHIFT_EXPR, mode, target,
1949 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1950 }
1951
1952 /* If OP0 is a multi-word register, narrow it to the affected word.
1953 If the region spans two words, defer to extract_split_bit_field. */
1954 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1955 {
1956 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1957 {
1958 if (!fallback_p)
1959 return NULL_RTX;
1960 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1961 unsignedp, reverse);
1962 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1963 }
1964 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1965 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1966 op0_mode = word_mode;
1967 bitnum %= BITS_PER_WORD;
1968 }
1969
1970 /* From here on we know the desired field is smaller than a word.
1971 If OP0 is a register, it too fits within a word. */
1972 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1973 extraction_insn extv;
1974 if (!MEM_P (op0)
1975 && !reverse
1976 /* ??? We could limit the structure size to the part of OP0 that
1977 contains the field, with appropriate checks for endianness
1978 and TARGET_TRULY_NOOP_TRUNCATION. */
1979 && get_best_reg_extraction_insn (&extv, pattern,
1980 GET_MODE_BITSIZE (op0_mode.require ()),
1981 tmode))
1982 {
1983 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1984 bitsize, bitnum,
1985 unsignedp, target, mode,
1986 tmode);
1987 if (result)
1988 return result;
1989 }
1990
1991 /* If OP0 is a memory, try copying it to a register and seeing if a
1992 cheap register alternative is available. */
1993 if (MEM_P (op0) & !reverse)
1994 {
1995 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1996 tmode))
1997 {
1998 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1999 bitsize, bitnum,
2000 unsignedp, target, mode,
2001 tmode);
2002 if (result)
2003 return result;
2004 }
2005
2006 rtx_insn *last = get_last_insn ();
2007
2008 /* Try loading part of OP0 into a register and extracting the
2009 bitfield from that. */
2010 unsigned HOST_WIDE_INT bitpos;
2011 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2012 0, 0, tmode, &bitpos);
2013 if (xop0)
2014 {
2015 xop0 = copy_to_reg (xop0);
2016 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2017 unsignedp, target,
2018 mode, tmode, reverse, false, NULL);
2019 if (result)
2020 return result;
2021 delete_insns_since (last);
2022 }
2023 }
2024
2025 if (!fallback_p)
2026 return NULL;
2027
2028 /* Find a correspondingly-sized integer field, so we can apply
2029 shifts and masks to it. */
2030 scalar_int_mode int_mode;
2031 if (!int_mode_for_mode (tmode).exists (&int_mode))
2032 /* If this fails, we should probably push op0 out to memory and then
2033 do a load. */
2034 int_mode = int_mode_for_mode (mode).require ();
2035
2036 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2037 bitnum, target, unsignedp, reverse);
2038
2039 /* Complex values must be reversed piecewise, so we need to undo the global
2040 reversal, convert to the complex mode and reverse again. */
2041 if (reverse && COMPLEX_MODE_P (tmode))
2042 {
2043 target = flip_storage_order (int_mode, target);
2044 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2045 target = flip_storage_order (tmode, target);
2046 }
2047 else
2048 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2049
2050 return target;
2051 }
2052
2053 /* Generate code to extract a byte-field from STR_RTX
2054 containing BITSIZE bits, starting at BITNUM,
2055 and put it in TARGET if possible (if TARGET is nonzero).
2056 Regardless of TARGET, we return the rtx for where the value is placed.
2057
2058 STR_RTX is the structure containing the byte (a REG or MEM).
2059 UNSIGNEDP is nonzero if this is an unsigned bit field.
2060 MODE is the natural mode of the field value once extracted.
2061 TMODE is the mode the caller would like the value to have;
2062 but the value may be returned with type MODE instead.
2063
2064 If REVERSE is true, the extraction is to be done in reverse order.
2065
2066 If a TARGET is specified and we can store in it at no extra cost,
2067 we do so, and return TARGET.
2068 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2069 if they are equally easy.
2070
2071 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2072 then *ALT_RTL is set to TARGET (before legitimziation). */
2073
2074 rtx
2075 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2076 int unsignedp, rtx target, machine_mode mode,
2077 machine_mode tmode, bool reverse, rtx *alt_rtl)
2078 {
2079 machine_mode mode1;
2080
2081 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2082 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2083 mode1 = GET_MODE (str_rtx);
2084 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2085 mode1 = GET_MODE (target);
2086 else
2087 mode1 = tmode;
2088
2089 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2090 scalar_int_mode int_mode;
2091 if (bitsize.is_constant (&ibitsize)
2092 && bitnum.is_constant (&ibitnum)
2093 && is_a <scalar_int_mode> (mode1, &int_mode)
2094 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2095 int_mode, 0, 0))
2096 {
2097 /* Extraction of a full INT_MODE value can be done with a simple load.
2098 We know here that the field can be accessed with one single
2099 instruction. For targets that support unaligned memory,
2100 an unaligned access may be necessary. */
2101 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2102 {
2103 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2104 ibitnum / BITS_PER_UNIT);
2105 if (reverse)
2106 result = flip_storage_order (int_mode, result);
2107 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2108 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2109 }
2110
2111 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2112 &ibitnum);
2113 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2114 str_rtx = copy_to_reg (str_rtx);
2115 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2116 target, mode, tmode, reverse, true, alt_rtl);
2117 }
2118
2119 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2120 target, mode, tmode, reverse, true, alt_rtl);
2121 }
2122 \f
2123 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2124 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2125 otherwise OP0 is a BLKmode MEM.
2126
2127 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2128 If REVERSE is true, the extraction is to be done in reverse order.
2129
2130 If TARGET is nonzero, attempts to store the value there
2131 and return TARGET, but this is not guaranteed.
2132 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2133
2134 static rtx
2135 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2136 opt_scalar_int_mode op0_mode,
2137 unsigned HOST_WIDE_INT bitsize,
2138 unsigned HOST_WIDE_INT bitnum, rtx target,
2139 int unsignedp, bool reverse)
2140 {
2141 scalar_int_mode mode;
2142 if (MEM_P (op0))
2143 {
2144 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2145 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2146 /* The only way this should occur is if the field spans word
2147 boundaries. */
2148 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2149 unsignedp, reverse);
2150
2151 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2152 }
2153 else
2154 mode = op0_mode.require ();
2155
2156 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2157 target, unsignedp, reverse);
2158 }
2159
2160 /* Helper function for extract_fixed_bit_field, extracts
2161 the bit field always using MODE, which is the mode of OP0.
2162 The other arguments are as for extract_fixed_bit_field. */
2163
2164 static rtx
2165 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2166 unsigned HOST_WIDE_INT bitsize,
2167 unsigned HOST_WIDE_INT bitnum, rtx target,
2168 int unsignedp, bool reverse)
2169 {
2170 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2171 for invalid input, such as extract equivalent of f5 from
2172 gcc.dg/pr48335-2.c. */
2173
2174 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2175 /* BITNUM is the distance between our msb and that of OP0.
2176 Convert it to the distance from the lsb. */
2177 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2178
2179 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2180 We have reduced the big-endian case to the little-endian case. */
2181 if (reverse)
2182 op0 = flip_storage_order (mode, op0);
2183
2184 if (unsignedp)
2185 {
2186 if (bitnum)
2187 {
2188 /* If the field does not already start at the lsb,
2189 shift it so it does. */
2190 /* Maybe propagate the target for the shift. */
2191 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2192 if (tmode != mode)
2193 subtarget = 0;
2194 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2195 }
2196 /* Convert the value to the desired mode. TMODE must also be a
2197 scalar integer for this conversion to make sense, since we
2198 shouldn't reinterpret the bits. */
2199 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2200 if (mode != new_mode)
2201 op0 = convert_to_mode (new_mode, op0, 1);
2202
2203 /* Unless the msb of the field used to be the msb when we shifted,
2204 mask out the upper bits. */
2205
2206 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2207 return expand_binop (new_mode, and_optab, op0,
2208 mask_rtx (new_mode, 0, bitsize, 0),
2209 target, 1, OPTAB_LIB_WIDEN);
2210 return op0;
2211 }
2212
2213 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2214 then arithmetic-shift its lsb to the lsb of the word. */
2215 op0 = force_reg (mode, op0);
2216
2217 /* Find the narrowest integer mode that contains the field. */
2218
2219 opt_scalar_int_mode mode_iter;
2220 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2221 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2222 break;
2223
2224 mode = mode_iter.require ();
2225 op0 = convert_to_mode (mode, op0, 0);
2226
2227 if (mode != tmode)
2228 target = 0;
2229
2230 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2231 {
2232 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2233 /* Maybe propagate the target for the shift. */
2234 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2235 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2236 }
2237
2238 return expand_shift (RSHIFT_EXPR, mode, op0,
2239 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2240 }
2241
2242 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2243 VALUE << BITPOS. */
2244
2245 static rtx
2246 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2247 int bitpos)
2248 {
2249 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2250 }
2251 \f
2252 /* Extract a bit field that is split across two words
2253 and return an RTX for the result.
2254
2255 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2256 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2257 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2258 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2259 a BLKmode MEM.
2260
2261 If REVERSE is true, the extraction is to be done in reverse order. */
2262
2263 static rtx
2264 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2265 unsigned HOST_WIDE_INT bitsize,
2266 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2267 bool reverse)
2268 {
2269 unsigned int unit;
2270 unsigned int bitsdone = 0;
2271 rtx result = NULL_RTX;
2272 int first = 1;
2273
2274 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2275 much at a time. */
2276 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2277 unit = BITS_PER_WORD;
2278 else
2279 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2280
2281 while (bitsdone < bitsize)
2282 {
2283 unsigned HOST_WIDE_INT thissize;
2284 rtx part;
2285 unsigned HOST_WIDE_INT thispos;
2286 unsigned HOST_WIDE_INT offset;
2287
2288 offset = (bitpos + bitsdone) / unit;
2289 thispos = (bitpos + bitsdone) % unit;
2290
2291 /* THISSIZE must not overrun a word boundary. Otherwise,
2292 extract_fixed_bit_field will call us again, and we will mutually
2293 recurse forever. */
2294 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2295 thissize = MIN (thissize, unit - thispos);
2296
2297 /* If OP0 is a register, then handle OFFSET here. */
2298 rtx op0_piece = op0;
2299 opt_scalar_int_mode op0_piece_mode = op0_mode;
2300 if (SUBREG_P (op0) || REG_P (op0))
2301 {
2302 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2303 op0_piece_mode = word_mode;
2304 offset = 0;
2305 }
2306
2307 /* Extract the parts in bit-counting order,
2308 whose meaning is determined by BYTES_PER_UNIT.
2309 OFFSET is in UNITs, and UNIT is in bits. */
2310 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2311 thissize, offset * unit + thispos,
2312 0, 1, reverse);
2313 bitsdone += thissize;
2314
2315 /* Shift this part into place for the result. */
2316 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2317 {
2318 if (bitsize != bitsdone)
2319 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2320 bitsize - bitsdone, 0, 1);
2321 }
2322 else
2323 {
2324 if (bitsdone != thissize)
2325 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2326 bitsdone - thissize, 0, 1);
2327 }
2328
2329 if (first)
2330 result = part;
2331 else
2332 /* Combine the parts with bitwise or. This works
2333 because we extracted each part as an unsigned bit field. */
2334 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2335 OPTAB_LIB_WIDEN);
2336
2337 first = 0;
2338 }
2339
2340 /* Unsigned bit field: we are done. */
2341 if (unsignedp)
2342 return result;
2343 /* Signed bit field: sign-extend with two arithmetic shifts. */
2344 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2345 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2346 return expand_shift (RSHIFT_EXPR, word_mode, result,
2347 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2348 }
2349 \f
2350 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2351 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2352 MODE, fill the upper bits with zeros. Fail if the layout of either
2353 mode is unknown (as for CC modes) or if the extraction would involve
2354 unprofitable mode punning. Return the value on success, otherwise
2355 return null.
2356
2357 This is different from gen_lowpart* in these respects:
2358
2359 - the returned value must always be considered an rvalue
2360
2361 - when MODE is wider than SRC_MODE, the extraction involves
2362 a zero extension
2363
2364 - when MODE is smaller than SRC_MODE, the extraction involves
2365 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2366
2367 In other words, this routine performs a computation, whereas the
2368 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2369 operations. */
2370
2371 rtx
2372 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2373 {
2374 scalar_int_mode int_mode, src_int_mode;
2375
2376 if (mode == src_mode)
2377 return src;
2378
2379 if (CONSTANT_P (src))
2380 {
2381 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2382 fails, it will happily create (subreg (symbol_ref)) or similar
2383 invalid SUBREGs. */
2384 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2385 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2386 if (ret)
2387 return ret;
2388
2389 if (GET_MODE (src) == VOIDmode
2390 || !validate_subreg (mode, src_mode, src, byte))
2391 return NULL_RTX;
2392
2393 src = force_reg (GET_MODE (src), src);
2394 return gen_rtx_SUBREG (mode, src, byte);
2395 }
2396
2397 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2398 return NULL_RTX;
2399
2400 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2401 && targetm.modes_tieable_p (mode, src_mode))
2402 {
2403 rtx x = gen_lowpart_common (mode, src);
2404 if (x)
2405 return x;
2406 }
2407
2408 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2409 || !int_mode_for_mode (mode).exists (&int_mode))
2410 return NULL_RTX;
2411
2412 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2413 return NULL_RTX;
2414 if (!targetm.modes_tieable_p (int_mode, mode))
2415 return NULL_RTX;
2416
2417 src = gen_lowpart (src_int_mode, src);
2418 if (!validate_subreg (int_mode, src_int_mode, src,
2419 subreg_lowpart_offset (int_mode, src_int_mode)))
2420 return NULL_RTX;
2421
2422 src = convert_modes (int_mode, src_int_mode, src, true);
2423 src = gen_lowpart (mode, src);
2424 return src;
2425 }
2426 \f
2427 /* Add INC into TARGET. */
2428
2429 void
2430 expand_inc (rtx target, rtx inc)
2431 {
2432 rtx value = expand_binop (GET_MODE (target), add_optab,
2433 target, inc,
2434 target, 0, OPTAB_LIB_WIDEN);
2435 if (value != target)
2436 emit_move_insn (target, value);
2437 }
2438
2439 /* Subtract DEC from TARGET. */
2440
2441 void
2442 expand_dec (rtx target, rtx dec)
2443 {
2444 rtx value = expand_binop (GET_MODE (target), sub_optab,
2445 target, dec,
2446 target, 0, OPTAB_LIB_WIDEN);
2447 if (value != target)
2448 emit_move_insn (target, value);
2449 }
2450 \f
2451 /* Output a shift instruction for expression code CODE,
2452 with SHIFTED being the rtx for the value to shift,
2453 and AMOUNT the rtx for the amount to shift by.
2454 Store the result in the rtx TARGET, if that is convenient.
2455 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2456 Return the rtx for where the value is.
2457 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2458 in which case 0 is returned. */
2459
2460 static rtx
2461 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2462 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2463 {
2464 rtx op1, temp = 0;
2465 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2466 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2467 optab lshift_optab = ashl_optab;
2468 optab rshift_arith_optab = ashr_optab;
2469 optab rshift_uns_optab = lshr_optab;
2470 optab lrotate_optab = rotl_optab;
2471 optab rrotate_optab = rotr_optab;
2472 machine_mode op1_mode;
2473 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2474 int attempt;
2475 bool speed = optimize_insn_for_speed_p ();
2476
2477 op1 = amount;
2478 op1_mode = GET_MODE (op1);
2479
2480 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2481 shift amount is a vector, use the vector/vector shift patterns. */
2482 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2483 {
2484 lshift_optab = vashl_optab;
2485 rshift_arith_optab = vashr_optab;
2486 rshift_uns_optab = vlshr_optab;
2487 lrotate_optab = vrotl_optab;
2488 rrotate_optab = vrotr_optab;
2489 }
2490
2491 /* Previously detected shift-counts computed by NEGATE_EXPR
2492 and shifted in the other direction; but that does not work
2493 on all machines. */
2494
2495 if (SHIFT_COUNT_TRUNCATED)
2496 {
2497 if (CONST_INT_P (op1)
2498 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2499 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2500 op1 = gen_int_shift_amount (mode,
2501 (unsigned HOST_WIDE_INT) INTVAL (op1)
2502 % GET_MODE_BITSIZE (scalar_mode));
2503 else if (GET_CODE (op1) == SUBREG
2504 && subreg_lowpart_p (op1)
2505 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2506 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2507 op1 = SUBREG_REG (op1);
2508 }
2509
2510 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2511 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2512 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2513 amount instead. */
2514 if (rotate
2515 && CONST_INT_P (op1)
2516 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2517 GET_MODE_BITSIZE (scalar_mode) - 1))
2518 {
2519 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2520 - INTVAL (op1)));
2521 left = !left;
2522 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2523 }
2524
2525 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2526 Note that this is not the case for bigger values. For instance a rotation
2527 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2528 0x04030201 (bswapsi). */
2529 if (rotate
2530 && CONST_INT_P (op1)
2531 && INTVAL (op1) == BITS_PER_UNIT
2532 && GET_MODE_SIZE (scalar_mode) == 2
2533 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2534 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2535
2536 if (op1 == const0_rtx)
2537 return shifted;
2538
2539 /* Check whether its cheaper to implement a left shift by a constant
2540 bit count by a sequence of additions. */
2541 if (code == LSHIFT_EXPR
2542 && CONST_INT_P (op1)
2543 && INTVAL (op1) > 0
2544 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2545 && INTVAL (op1) < MAX_BITS_PER_WORD
2546 && (shift_cost (speed, mode, INTVAL (op1))
2547 > INTVAL (op1) * add_cost (speed, mode))
2548 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2549 {
2550 int i;
2551 for (i = 0; i < INTVAL (op1); i++)
2552 {
2553 temp = force_reg (mode, shifted);
2554 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2555 unsignedp, OPTAB_LIB_WIDEN);
2556 }
2557 return shifted;
2558 }
2559
2560 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2561 {
2562 enum optab_methods methods;
2563
2564 if (attempt == 0)
2565 methods = OPTAB_DIRECT;
2566 else if (attempt == 1)
2567 methods = OPTAB_WIDEN;
2568 else
2569 methods = OPTAB_LIB_WIDEN;
2570
2571 if (rotate)
2572 {
2573 /* Widening does not work for rotation. */
2574 if (methods == OPTAB_WIDEN)
2575 continue;
2576 else if (methods == OPTAB_LIB_WIDEN)
2577 {
2578 /* If we have been unable to open-code this by a rotation,
2579 do it as the IOR of two shifts. I.e., to rotate A
2580 by N bits, compute
2581 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2582 where C is the bitsize of A.
2583
2584 It is theoretically possible that the target machine might
2585 not be able to perform either shift and hence we would
2586 be making two libcalls rather than just the one for the
2587 shift (similarly if IOR could not be done). We will allow
2588 this extremely unlikely lossage to avoid complicating the
2589 code below. */
2590
2591 rtx subtarget = target == shifted ? 0 : target;
2592 rtx new_amount, other_amount;
2593 rtx temp1;
2594
2595 new_amount = op1;
2596 if (op1 == const0_rtx)
2597 return shifted;
2598 else if (CONST_INT_P (op1))
2599 other_amount = gen_int_shift_amount
2600 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2601 else
2602 {
2603 other_amount
2604 = simplify_gen_unary (NEG, GET_MODE (op1),
2605 op1, GET_MODE (op1));
2606 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2607 other_amount
2608 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2609 gen_int_mode (mask, GET_MODE (op1)));
2610 }
2611
2612 shifted = force_reg (mode, shifted);
2613
2614 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2615 mode, shifted, new_amount, 0, 1);
2616 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2617 mode, shifted, other_amount,
2618 subtarget, 1);
2619 return expand_binop (mode, ior_optab, temp, temp1, target,
2620 unsignedp, methods);
2621 }
2622
2623 temp = expand_binop (mode,
2624 left ? lrotate_optab : rrotate_optab,
2625 shifted, op1, target, unsignedp, methods);
2626 }
2627 else if (unsignedp)
2628 temp = expand_binop (mode,
2629 left ? lshift_optab : rshift_uns_optab,
2630 shifted, op1, target, unsignedp, methods);
2631
2632 /* Do arithmetic shifts.
2633 Also, if we are going to widen the operand, we can just as well
2634 use an arithmetic right-shift instead of a logical one. */
2635 if (temp == 0 && ! rotate
2636 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2637 {
2638 enum optab_methods methods1 = methods;
2639
2640 /* If trying to widen a log shift to an arithmetic shift,
2641 don't accept an arithmetic shift of the same size. */
2642 if (unsignedp)
2643 methods1 = OPTAB_MUST_WIDEN;
2644
2645 /* Arithmetic shift */
2646
2647 temp = expand_binop (mode,
2648 left ? lshift_optab : rshift_arith_optab,
2649 shifted, op1, target, unsignedp, methods1);
2650 }
2651
2652 /* We used to try extzv here for logical right shifts, but that was
2653 only useful for one machine, the VAX, and caused poor code
2654 generation there for lshrdi3, so the code was deleted and a
2655 define_expand for lshrsi3 was added to vax.md. */
2656 }
2657
2658 gcc_assert (temp != NULL_RTX || may_fail);
2659 return temp;
2660 }
2661
2662 /* Output a shift instruction for expression code CODE,
2663 with SHIFTED being the rtx for the value to shift,
2664 and AMOUNT the amount to shift by.
2665 Store the result in the rtx TARGET, if that is convenient.
2666 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2667 Return the rtx for where the value is. */
2668
2669 rtx
2670 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2671 poly_int64 amount, rtx target, int unsignedp)
2672 {
2673 return expand_shift_1 (code, mode, shifted,
2674 gen_int_shift_amount (mode, amount),
2675 target, unsignedp);
2676 }
2677
2678 /* Likewise, but return 0 if that cannot be done. */
2679
2680 static rtx
2681 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2682 int amount, rtx target, int unsignedp)
2683 {
2684 return expand_shift_1 (code, mode,
2685 shifted, GEN_INT (amount), target, unsignedp, true);
2686 }
2687
2688 /* Output a shift instruction for expression code CODE,
2689 with SHIFTED being the rtx for the value to shift,
2690 and AMOUNT the tree for the amount to shift by.
2691 Store the result in the rtx TARGET, if that is convenient.
2692 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2693 Return the rtx for where the value is. */
2694
2695 rtx
2696 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2697 tree amount, rtx target, int unsignedp)
2698 {
2699 return expand_shift_1 (code, mode,
2700 shifted, expand_normal (amount), target, unsignedp);
2701 }
2702
2703 \f
2704 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2705 const struct mult_cost *, machine_mode mode);
2706 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2707 const struct algorithm *, enum mult_variant);
2708 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2709 static rtx extract_high_half (scalar_int_mode, rtx);
2710 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2711 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2712 int, int);
2713 /* Compute and return the best algorithm for multiplying by T.
2714 The algorithm must cost less than cost_limit
2715 If retval.cost >= COST_LIMIT, no algorithm was found and all
2716 other field of the returned struct are undefined.
2717 MODE is the machine mode of the multiplication. */
2718
2719 static void
2720 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2721 const struct mult_cost *cost_limit, machine_mode mode)
2722 {
2723 int m;
2724 struct algorithm *alg_in, *best_alg;
2725 struct mult_cost best_cost;
2726 struct mult_cost new_limit;
2727 int op_cost, op_latency;
2728 unsigned HOST_WIDE_INT orig_t = t;
2729 unsigned HOST_WIDE_INT q;
2730 int maxm, hash_index;
2731 bool cache_hit = false;
2732 enum alg_code cache_alg = alg_zero;
2733 bool speed = optimize_insn_for_speed_p ();
2734 scalar_int_mode imode;
2735 struct alg_hash_entry *entry_ptr;
2736
2737 /* Indicate that no algorithm is yet found. If no algorithm
2738 is found, this value will be returned and indicate failure. */
2739 alg_out->cost.cost = cost_limit->cost + 1;
2740 alg_out->cost.latency = cost_limit->latency + 1;
2741
2742 if (cost_limit->cost < 0
2743 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2744 return;
2745
2746 /* Be prepared for vector modes. */
2747 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2748
2749 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2750
2751 /* Restrict the bits of "t" to the multiplication's mode. */
2752 t &= GET_MODE_MASK (imode);
2753
2754 /* t == 1 can be done in zero cost. */
2755 if (t == 1)
2756 {
2757 alg_out->ops = 1;
2758 alg_out->cost.cost = 0;
2759 alg_out->cost.latency = 0;
2760 alg_out->op[0] = alg_m;
2761 return;
2762 }
2763
2764 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2765 fail now. */
2766 if (t == 0)
2767 {
2768 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2769 return;
2770 else
2771 {
2772 alg_out->ops = 1;
2773 alg_out->cost.cost = zero_cost (speed);
2774 alg_out->cost.latency = zero_cost (speed);
2775 alg_out->op[0] = alg_zero;
2776 return;
2777 }
2778 }
2779
2780 /* We'll be needing a couple extra algorithm structures now. */
2781
2782 alg_in = XALLOCA (struct algorithm);
2783 best_alg = XALLOCA (struct algorithm);
2784 best_cost = *cost_limit;
2785
2786 /* Compute the hash index. */
2787 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2788
2789 /* See if we already know what to do for T. */
2790 entry_ptr = alg_hash_entry_ptr (hash_index);
2791 if (entry_ptr->t == t
2792 && entry_ptr->mode == mode
2793 && entry_ptr->speed == speed
2794 && entry_ptr->alg != alg_unknown)
2795 {
2796 cache_alg = entry_ptr->alg;
2797
2798 if (cache_alg == alg_impossible)
2799 {
2800 /* The cache tells us that it's impossible to synthesize
2801 multiplication by T within entry_ptr->cost. */
2802 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2803 /* COST_LIMIT is at least as restrictive as the one
2804 recorded in the hash table, in which case we have no
2805 hope of synthesizing a multiplication. Just
2806 return. */
2807 return;
2808
2809 /* If we get here, COST_LIMIT is less restrictive than the
2810 one recorded in the hash table, so we may be able to
2811 synthesize a multiplication. Proceed as if we didn't
2812 have the cache entry. */
2813 }
2814 else
2815 {
2816 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2817 /* The cached algorithm shows that this multiplication
2818 requires more cost than COST_LIMIT. Just return. This
2819 way, we don't clobber this cache entry with
2820 alg_impossible but retain useful information. */
2821 return;
2822
2823 cache_hit = true;
2824
2825 switch (cache_alg)
2826 {
2827 case alg_shift:
2828 goto do_alg_shift;
2829
2830 case alg_add_t_m2:
2831 case alg_sub_t_m2:
2832 goto do_alg_addsub_t_m2;
2833
2834 case alg_add_factor:
2835 case alg_sub_factor:
2836 goto do_alg_addsub_factor;
2837
2838 case alg_add_t2_m:
2839 goto do_alg_add_t2_m;
2840
2841 case alg_sub_t2_m:
2842 goto do_alg_sub_t2_m;
2843
2844 default:
2845 gcc_unreachable ();
2846 }
2847 }
2848 }
2849
2850 /* If we have a group of zero bits at the low-order part of T, try
2851 multiplying by the remaining bits and then doing a shift. */
2852
2853 if ((t & 1) == 0)
2854 {
2855 do_alg_shift:
2856 m = ctz_or_zero (t); /* m = number of low zero bits */
2857 if (m < maxm)
2858 {
2859 q = t >> m;
2860 /* The function expand_shift will choose between a shift and
2861 a sequence of additions, so the observed cost is given as
2862 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2863 op_cost = m * add_cost (speed, mode);
2864 if (shift_cost (speed, mode, m) < op_cost)
2865 op_cost = shift_cost (speed, mode, m);
2866 new_limit.cost = best_cost.cost - op_cost;
2867 new_limit.latency = best_cost.latency - op_cost;
2868 synth_mult (alg_in, q, &new_limit, mode);
2869
2870 alg_in->cost.cost += op_cost;
2871 alg_in->cost.latency += op_cost;
2872 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2873 {
2874 best_cost = alg_in->cost;
2875 std::swap (alg_in, best_alg);
2876 best_alg->log[best_alg->ops] = m;
2877 best_alg->op[best_alg->ops] = alg_shift;
2878 }
2879
2880 /* See if treating ORIG_T as a signed number yields a better
2881 sequence. Try this sequence only for a negative ORIG_T
2882 as it would be useless for a non-negative ORIG_T. */
2883 if ((HOST_WIDE_INT) orig_t < 0)
2884 {
2885 /* Shift ORIG_T as follows because a right shift of a
2886 negative-valued signed type is implementation
2887 defined. */
2888 q = ~(~orig_t >> m);
2889 /* The function expand_shift will choose between a shift
2890 and a sequence of additions, so the observed cost is
2891 given as MIN (m * add_cost(speed, mode),
2892 shift_cost(speed, mode, m)). */
2893 op_cost = m * add_cost (speed, mode);
2894 if (shift_cost (speed, mode, m) < op_cost)
2895 op_cost = shift_cost (speed, mode, m);
2896 new_limit.cost = best_cost.cost - op_cost;
2897 new_limit.latency = best_cost.latency - op_cost;
2898 synth_mult (alg_in, q, &new_limit, mode);
2899
2900 alg_in->cost.cost += op_cost;
2901 alg_in->cost.latency += op_cost;
2902 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2903 {
2904 best_cost = alg_in->cost;
2905 std::swap (alg_in, best_alg);
2906 best_alg->log[best_alg->ops] = m;
2907 best_alg->op[best_alg->ops] = alg_shift;
2908 }
2909 }
2910 }
2911 if (cache_hit)
2912 goto done;
2913 }
2914
2915 /* If we have an odd number, add or subtract one. */
2916 if ((t & 1) != 0)
2917 {
2918 unsigned HOST_WIDE_INT w;
2919
2920 do_alg_addsub_t_m2:
2921 for (w = 1; (w & t) != 0; w <<= 1)
2922 ;
2923 /* If T was -1, then W will be zero after the loop. This is another
2924 case where T ends with ...111. Handling this with (T + 1) and
2925 subtract 1 produces slightly better code and results in algorithm
2926 selection much faster than treating it like the ...0111 case
2927 below. */
2928 if (w == 0
2929 || (w > 2
2930 /* Reject the case where t is 3.
2931 Thus we prefer addition in that case. */
2932 && t != 3))
2933 {
2934 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2935
2936 op_cost = add_cost (speed, mode);
2937 new_limit.cost = best_cost.cost - op_cost;
2938 new_limit.latency = best_cost.latency - op_cost;
2939 synth_mult (alg_in, t + 1, &new_limit, mode);
2940
2941 alg_in->cost.cost += op_cost;
2942 alg_in->cost.latency += op_cost;
2943 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2944 {
2945 best_cost = alg_in->cost;
2946 std::swap (alg_in, best_alg);
2947 best_alg->log[best_alg->ops] = 0;
2948 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2949 }
2950 }
2951 else
2952 {
2953 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2954
2955 op_cost = add_cost (speed, mode);
2956 new_limit.cost = best_cost.cost - op_cost;
2957 new_limit.latency = best_cost.latency - op_cost;
2958 synth_mult (alg_in, t - 1, &new_limit, mode);
2959
2960 alg_in->cost.cost += op_cost;
2961 alg_in->cost.latency += op_cost;
2962 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2963 {
2964 best_cost = alg_in->cost;
2965 std::swap (alg_in, best_alg);
2966 best_alg->log[best_alg->ops] = 0;
2967 best_alg->op[best_alg->ops] = alg_add_t_m2;
2968 }
2969 }
2970
2971 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2972 quickly with a - a * n for some appropriate constant n. */
2973 m = exact_log2 (-orig_t + 1);
2974 if (m >= 0 && m < maxm)
2975 {
2976 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2977 /* If the target has a cheap shift-and-subtract insn use
2978 that in preference to a shift insn followed by a sub insn.
2979 Assume that the shift-and-sub is "atomic" with a latency
2980 equal to it's cost, otherwise assume that on superscalar
2981 hardware the shift may be executed concurrently with the
2982 earlier steps in the algorithm. */
2983 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2984 {
2985 op_cost = shiftsub1_cost (speed, mode, m);
2986 op_latency = op_cost;
2987 }
2988 else
2989 op_latency = add_cost (speed, mode);
2990
2991 new_limit.cost = best_cost.cost - op_cost;
2992 new_limit.latency = best_cost.latency - op_latency;
2993 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2994 &new_limit, mode);
2995
2996 alg_in->cost.cost += op_cost;
2997 alg_in->cost.latency += op_latency;
2998 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2999 {
3000 best_cost = alg_in->cost;
3001 std::swap (alg_in, best_alg);
3002 best_alg->log[best_alg->ops] = m;
3003 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3004 }
3005 }
3006
3007 if (cache_hit)
3008 goto done;
3009 }
3010
3011 /* Look for factors of t of the form
3012 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3013 If we find such a factor, we can multiply by t using an algorithm that
3014 multiplies by q, shift the result by m and add/subtract it to itself.
3015
3016 We search for large factors first and loop down, even if large factors
3017 are less probable than small; if we find a large factor we will find a
3018 good sequence quickly, and therefore be able to prune (by decreasing
3019 COST_LIMIT) the search. */
3020
3021 do_alg_addsub_factor:
3022 for (m = floor_log2 (t - 1); m >= 2; m--)
3023 {
3024 unsigned HOST_WIDE_INT d;
3025
3026 d = (HOST_WIDE_INT_1U << m) + 1;
3027 if (t % d == 0 && t > d && m < maxm
3028 && (!cache_hit || cache_alg == alg_add_factor))
3029 {
3030 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3031 if (shiftadd_cost (speed, mode, m) <= op_cost)
3032 op_cost = shiftadd_cost (speed, mode, m);
3033
3034 op_latency = op_cost;
3035
3036
3037 new_limit.cost = best_cost.cost - op_cost;
3038 new_limit.latency = best_cost.latency - op_latency;
3039 synth_mult (alg_in, t / d, &new_limit, mode);
3040
3041 alg_in->cost.cost += op_cost;
3042 alg_in->cost.latency += op_latency;
3043 if (alg_in->cost.latency < op_cost)
3044 alg_in->cost.latency = op_cost;
3045 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3046 {
3047 best_cost = alg_in->cost;
3048 std::swap (alg_in, best_alg);
3049 best_alg->log[best_alg->ops] = m;
3050 best_alg->op[best_alg->ops] = alg_add_factor;
3051 }
3052 /* Other factors will have been taken care of in the recursion. */
3053 break;
3054 }
3055
3056 d = (HOST_WIDE_INT_1U << m) - 1;
3057 if (t % d == 0 && t > d && m < maxm
3058 && (!cache_hit || cache_alg == alg_sub_factor))
3059 {
3060 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3061 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3062 op_cost = shiftsub0_cost (speed, mode, m);
3063
3064 op_latency = op_cost;
3065
3066 new_limit.cost = best_cost.cost - op_cost;
3067 new_limit.latency = best_cost.latency - op_latency;
3068 synth_mult (alg_in, t / d, &new_limit, mode);
3069
3070 alg_in->cost.cost += op_cost;
3071 alg_in->cost.latency += op_latency;
3072 if (alg_in->cost.latency < op_cost)
3073 alg_in->cost.latency = op_cost;
3074 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3075 {
3076 best_cost = alg_in->cost;
3077 std::swap (alg_in, best_alg);
3078 best_alg->log[best_alg->ops] = m;
3079 best_alg->op[best_alg->ops] = alg_sub_factor;
3080 }
3081 break;
3082 }
3083 }
3084 if (cache_hit)
3085 goto done;
3086
3087 /* Try shift-and-add (load effective address) instructions,
3088 i.e. do a*3, a*5, a*9. */
3089 if ((t & 1) != 0)
3090 {
3091 do_alg_add_t2_m:
3092 q = t - 1;
3093 m = ctz_hwi (q);
3094 if (q && m < maxm)
3095 {
3096 op_cost = shiftadd_cost (speed, mode, m);
3097 new_limit.cost = best_cost.cost - op_cost;
3098 new_limit.latency = best_cost.latency - op_cost;
3099 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3100
3101 alg_in->cost.cost += op_cost;
3102 alg_in->cost.latency += op_cost;
3103 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3104 {
3105 best_cost = alg_in->cost;
3106 std::swap (alg_in, best_alg);
3107 best_alg->log[best_alg->ops] = m;
3108 best_alg->op[best_alg->ops] = alg_add_t2_m;
3109 }
3110 }
3111 if (cache_hit)
3112 goto done;
3113
3114 do_alg_sub_t2_m:
3115 q = t + 1;
3116 m = ctz_hwi (q);
3117 if (q && m < maxm)
3118 {
3119 op_cost = shiftsub0_cost (speed, mode, m);
3120 new_limit.cost = best_cost.cost - op_cost;
3121 new_limit.latency = best_cost.latency - op_cost;
3122 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3123
3124 alg_in->cost.cost += op_cost;
3125 alg_in->cost.latency += op_cost;
3126 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3127 {
3128 best_cost = alg_in->cost;
3129 std::swap (alg_in, best_alg);
3130 best_alg->log[best_alg->ops] = m;
3131 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3132 }
3133 }
3134 if (cache_hit)
3135 goto done;
3136 }
3137
3138 done:
3139 /* If best_cost has not decreased, we have not found any algorithm. */
3140 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3141 {
3142 /* We failed to find an algorithm. Record alg_impossible for
3143 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3144 we are asked to find an algorithm for T within the same or
3145 lower COST_LIMIT, we can immediately return to the
3146 caller. */
3147 entry_ptr->t = t;
3148 entry_ptr->mode = mode;
3149 entry_ptr->speed = speed;
3150 entry_ptr->alg = alg_impossible;
3151 entry_ptr->cost = *cost_limit;
3152 return;
3153 }
3154
3155 /* Cache the result. */
3156 if (!cache_hit)
3157 {
3158 entry_ptr->t = t;
3159 entry_ptr->mode = mode;
3160 entry_ptr->speed = speed;
3161 entry_ptr->alg = best_alg->op[best_alg->ops];
3162 entry_ptr->cost.cost = best_cost.cost;
3163 entry_ptr->cost.latency = best_cost.latency;
3164 }
3165
3166 /* If we are getting a too long sequence for `struct algorithm'
3167 to record, make this search fail. */
3168 if (best_alg->ops == MAX_BITS_PER_WORD)
3169 return;
3170
3171 /* Copy the algorithm from temporary space to the space at alg_out.
3172 We avoid using structure assignment because the majority of
3173 best_alg is normally undefined, and this is a critical function. */
3174 alg_out->ops = best_alg->ops + 1;
3175 alg_out->cost = best_cost;
3176 memcpy (alg_out->op, best_alg->op,
3177 alg_out->ops * sizeof *alg_out->op);
3178 memcpy (alg_out->log, best_alg->log,
3179 alg_out->ops * sizeof *alg_out->log);
3180 }
3181 \f
3182 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3183 Try three variations:
3184
3185 - a shift/add sequence based on VAL itself
3186 - a shift/add sequence based on -VAL, followed by a negation
3187 - a shift/add sequence based on VAL - 1, followed by an addition.
3188
3189 Return true if the cheapest of these cost less than MULT_COST,
3190 describing the algorithm in *ALG and final fixup in *VARIANT. */
3191
3192 bool
3193 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3194 struct algorithm *alg, enum mult_variant *variant,
3195 int mult_cost)
3196 {
3197 struct algorithm alg2;
3198 struct mult_cost limit;
3199 int op_cost;
3200 bool speed = optimize_insn_for_speed_p ();
3201
3202 /* Fail quickly for impossible bounds. */
3203 if (mult_cost < 0)
3204 return false;
3205
3206 /* Ensure that mult_cost provides a reasonable upper bound.
3207 Any constant multiplication can be performed with less
3208 than 2 * bits additions. */
3209 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3210 if (mult_cost > op_cost)
3211 mult_cost = op_cost;
3212
3213 *variant = basic_variant;
3214 limit.cost = mult_cost;
3215 limit.latency = mult_cost;
3216 synth_mult (alg, val, &limit, mode);
3217
3218 /* This works only if the inverted value actually fits in an
3219 `unsigned int' */
3220 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3221 {
3222 op_cost = neg_cost (speed, mode);
3223 if (MULT_COST_LESS (&alg->cost, mult_cost))
3224 {
3225 limit.cost = alg->cost.cost - op_cost;
3226 limit.latency = alg->cost.latency - op_cost;
3227 }
3228 else
3229 {
3230 limit.cost = mult_cost - op_cost;
3231 limit.latency = mult_cost - op_cost;
3232 }
3233
3234 synth_mult (&alg2, -val, &limit, mode);
3235 alg2.cost.cost += op_cost;
3236 alg2.cost.latency += op_cost;
3237 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3238 *alg = alg2, *variant = negate_variant;
3239 }
3240
3241 /* This proves very useful for division-by-constant. */
3242 op_cost = add_cost (speed, mode);
3243 if (MULT_COST_LESS (&alg->cost, mult_cost))
3244 {
3245 limit.cost = alg->cost.cost - op_cost;
3246 limit.latency = alg->cost.latency - op_cost;
3247 }
3248 else
3249 {
3250 limit.cost = mult_cost - op_cost;
3251 limit.latency = mult_cost - op_cost;
3252 }
3253
3254 synth_mult (&alg2, val - 1, &limit, mode);
3255 alg2.cost.cost += op_cost;
3256 alg2.cost.latency += op_cost;
3257 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3258 *alg = alg2, *variant = add_variant;
3259
3260 return MULT_COST_LESS (&alg->cost, mult_cost);
3261 }
3262
3263 /* A subroutine of expand_mult, used for constant multiplications.
3264 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3265 convenient. Use the shift/add sequence described by ALG and apply
3266 the final fixup specified by VARIANT. */
3267
3268 static rtx
3269 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3270 rtx target, const struct algorithm *alg,
3271 enum mult_variant variant)
3272 {
3273 unsigned HOST_WIDE_INT val_so_far;
3274 rtx_insn *insn;
3275 rtx accum, tem;
3276 int opno;
3277 machine_mode nmode;
3278
3279 /* Avoid referencing memory over and over and invalid sharing
3280 on SUBREGs. */
3281 op0 = force_reg (mode, op0);
3282
3283 /* ACCUM starts out either as OP0 or as a zero, depending on
3284 the first operation. */
3285
3286 if (alg->op[0] == alg_zero)
3287 {
3288 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3289 val_so_far = 0;
3290 }
3291 else if (alg->op[0] == alg_m)
3292 {
3293 accum = copy_to_mode_reg (mode, op0);
3294 val_so_far = 1;
3295 }
3296 else
3297 gcc_unreachable ();
3298
3299 for (opno = 1; opno < alg->ops; opno++)
3300 {
3301 int log = alg->log[opno];
3302 rtx shift_subtarget = optimize ? 0 : accum;
3303 rtx add_target
3304 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3305 && !optimize)
3306 ? target : 0;
3307 rtx accum_target = optimize ? 0 : accum;
3308 rtx accum_inner;
3309
3310 switch (alg->op[opno])
3311 {
3312 case alg_shift:
3313 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3314 /* REG_EQUAL note will be attached to the following insn. */
3315 emit_move_insn (accum, tem);
3316 val_so_far <<= log;
3317 break;
3318
3319 case alg_add_t_m2:
3320 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3321 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3322 add_target ? add_target : accum_target);
3323 val_so_far += HOST_WIDE_INT_1U << log;
3324 break;
3325
3326 case alg_sub_t_m2:
3327 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3328 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3329 add_target ? add_target : accum_target);
3330 val_so_far -= HOST_WIDE_INT_1U << log;
3331 break;
3332
3333 case alg_add_t2_m:
3334 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3335 log, shift_subtarget, 0);
3336 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3337 add_target ? add_target : accum_target);
3338 val_so_far = (val_so_far << log) + 1;
3339 break;
3340
3341 case alg_sub_t2_m:
3342 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3343 log, shift_subtarget, 0);
3344 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3345 add_target ? add_target : accum_target);
3346 val_so_far = (val_so_far << log) - 1;
3347 break;
3348
3349 case alg_add_factor:
3350 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3351 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3352 add_target ? add_target : accum_target);
3353 val_so_far += val_so_far << log;
3354 break;
3355
3356 case alg_sub_factor:
3357 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3358 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3359 (add_target
3360 ? add_target : (optimize ? 0 : tem)));
3361 val_so_far = (val_so_far << log) - val_so_far;
3362 break;
3363
3364 default:
3365 gcc_unreachable ();
3366 }
3367
3368 if (SCALAR_INT_MODE_P (mode))
3369 {
3370 /* Write a REG_EQUAL note on the last insn so that we can cse
3371 multiplication sequences. Note that if ACCUM is a SUBREG,
3372 we've set the inner register and must properly indicate that. */
3373 tem = op0, nmode = mode;
3374 accum_inner = accum;
3375 if (GET_CODE (accum) == SUBREG)
3376 {
3377 accum_inner = SUBREG_REG (accum);
3378 nmode = GET_MODE (accum_inner);
3379 tem = gen_lowpart (nmode, op0);
3380 }
3381
3382 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3383 In that case, only the low bits of accum would be guaranteed to
3384 be equal to the content of the REG_EQUAL note, the upper bits
3385 can be anything. */
3386 if (!paradoxical_subreg_p (tem))
3387 {
3388 insn = get_last_insn ();
3389 wide_int wval_so_far
3390 = wi::uhwi (val_so_far,
3391 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3392 rtx c = immed_wide_int_const (wval_so_far, nmode);
3393 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3394 accum_inner);
3395 }
3396 }
3397 }
3398
3399 if (variant == negate_variant)
3400 {
3401 val_so_far = -val_so_far;
3402 accum = expand_unop (mode, neg_optab, accum, target, 0);
3403 }
3404 else if (variant == add_variant)
3405 {
3406 val_so_far = val_so_far + 1;
3407 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3408 }
3409
3410 /* Compare only the bits of val and val_so_far that are significant
3411 in the result mode, to avoid sign-/zero-extension confusion. */
3412 nmode = GET_MODE_INNER (mode);
3413 val &= GET_MODE_MASK (nmode);
3414 val_so_far &= GET_MODE_MASK (nmode);
3415 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3416
3417 return accum;
3418 }
3419
3420 /* Perform a multiplication and return an rtx for the result.
3421 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3422 TARGET is a suggestion for where to store the result (an rtx).
3423
3424 We check specially for a constant integer as OP1.
3425 If you want this check for OP0 as well, then before calling
3426 you should swap the two operands if OP0 would be constant. */
3427
3428 rtx
3429 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3430 int unsignedp, bool no_libcall)
3431 {
3432 enum mult_variant variant;
3433 struct algorithm algorithm;
3434 rtx scalar_op1;
3435 int max_cost;
3436 bool speed = optimize_insn_for_speed_p ();
3437 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3438
3439 if (CONSTANT_P (op0))
3440 std::swap (op0, op1);
3441
3442 /* For vectors, there are several simplifications that can be made if
3443 all elements of the vector constant are identical. */
3444 scalar_op1 = unwrap_const_vec_duplicate (op1);
3445
3446 if (INTEGRAL_MODE_P (mode))
3447 {
3448 rtx fake_reg;
3449 HOST_WIDE_INT coeff;
3450 bool is_neg;
3451 int mode_bitsize;
3452
3453 if (op1 == CONST0_RTX (mode))
3454 return op1;
3455 if (op1 == CONST1_RTX (mode))
3456 return op0;
3457 if (op1 == CONSTM1_RTX (mode))
3458 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3459 op0, target, 0);
3460
3461 if (do_trapv)
3462 goto skip_synth;
3463
3464 /* If mode is integer vector mode, check if the backend supports
3465 vector lshift (by scalar or vector) at all. If not, we can't use
3466 synthetized multiply. */
3467 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3468 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3469 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3470 goto skip_synth;
3471
3472 /* These are the operations that are potentially turned into
3473 a sequence of shifts and additions. */
3474 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3475
3476 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3477 less than or equal in size to `unsigned int' this doesn't matter.
3478 If the mode is larger than `unsigned int', then synth_mult works
3479 only if the constant value exactly fits in an `unsigned int' without
3480 any truncation. This means that multiplying by negative values does
3481 not work; results are off by 2^32 on a 32 bit machine. */
3482 if (CONST_INT_P (scalar_op1))
3483 {
3484 coeff = INTVAL (scalar_op1);
3485 is_neg = coeff < 0;
3486 }
3487 #if TARGET_SUPPORTS_WIDE_INT
3488 else if (CONST_WIDE_INT_P (scalar_op1))
3489 #else
3490 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3491 #endif
3492 {
3493 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3494 /* Perfect power of 2 (other than 1, which is handled above). */
3495 if (shift > 0)
3496 return expand_shift (LSHIFT_EXPR, mode, op0,
3497 shift, target, unsignedp);
3498 else
3499 goto skip_synth;
3500 }
3501 else
3502 goto skip_synth;
3503
3504 /* We used to test optimize here, on the grounds that it's better to
3505 produce a smaller program when -O is not used. But this causes
3506 such a terrible slowdown sometimes that it seems better to always
3507 use synth_mult. */
3508
3509 /* Special case powers of two. */
3510 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3511 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3512 return expand_shift (LSHIFT_EXPR, mode, op0,
3513 floor_log2 (coeff), target, unsignedp);
3514
3515 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3516
3517 /* Attempt to handle multiplication of DImode values by negative
3518 coefficients, by performing the multiplication by a positive
3519 multiplier and then inverting the result. */
3520 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3521 {
3522 /* Its safe to use -coeff even for INT_MIN, as the
3523 result is interpreted as an unsigned coefficient.
3524 Exclude cost of op0 from max_cost to match the cost
3525 calculation of the synth_mult. */
3526 coeff = -(unsigned HOST_WIDE_INT) coeff;
3527 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3528 mode, speed)
3529 - neg_cost (speed, mode));
3530 if (max_cost <= 0)
3531 goto skip_synth;
3532
3533 /* Special case powers of two. */
3534 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3535 {
3536 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3537 floor_log2 (coeff), target, unsignedp);
3538 return expand_unop (mode, neg_optab, temp, target, 0);
3539 }
3540
3541 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3542 max_cost))
3543 {
3544 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3545 &algorithm, variant);
3546 return expand_unop (mode, neg_optab, temp, target, 0);
3547 }
3548 goto skip_synth;
3549 }
3550
3551 /* Exclude cost of op0 from max_cost to match the cost
3552 calculation of the synth_mult. */
3553 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3554 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3555 return expand_mult_const (mode, op0, coeff, target,
3556 &algorithm, variant);
3557 }
3558 skip_synth:
3559
3560 /* Expand x*2.0 as x+x. */
3561 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3562 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3563 {
3564 op0 = force_reg (GET_MODE (op0), op0);
3565 return expand_binop (mode, add_optab, op0, op0,
3566 target, unsignedp,
3567 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3568 }
3569
3570 /* This used to use umul_optab if unsigned, but for non-widening multiply
3571 there is no difference between signed and unsigned. */
3572 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3573 op0, op1, target, unsignedp,
3574 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3575 gcc_assert (op0 || no_libcall);
3576 return op0;
3577 }
3578
3579 /* Return a cost estimate for multiplying a register by the given
3580 COEFFicient in the given MODE and SPEED. */
3581
3582 int
3583 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3584 {
3585 int max_cost;
3586 struct algorithm algorithm;
3587 enum mult_variant variant;
3588
3589 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3590 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3591 mode, speed);
3592 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3593 return algorithm.cost.cost;
3594 else
3595 return max_cost;
3596 }
3597
3598 /* Perform a widening multiplication and return an rtx for the result.
3599 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3600 TARGET is a suggestion for where to store the result (an rtx).
3601 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3602 or smul_widen_optab.
3603
3604 We check specially for a constant integer as OP1, comparing the
3605 cost of a widening multiply against the cost of a sequence of shifts
3606 and adds. */
3607
3608 rtx
3609 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3610 int unsignedp, optab this_optab)
3611 {
3612 bool speed = optimize_insn_for_speed_p ();
3613 rtx cop1;
3614
3615 if (CONST_INT_P (op1)
3616 && GET_MODE (op0) != VOIDmode
3617 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3618 this_optab == umul_widen_optab))
3619 && CONST_INT_P (cop1)
3620 && (INTVAL (cop1) >= 0
3621 || HWI_COMPUTABLE_MODE_P (mode)))
3622 {
3623 HOST_WIDE_INT coeff = INTVAL (cop1);
3624 int max_cost;
3625 enum mult_variant variant;
3626 struct algorithm algorithm;
3627
3628 if (coeff == 0)
3629 return CONST0_RTX (mode);
3630
3631 /* Special case powers of two. */
3632 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3633 {
3634 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3635 return expand_shift (LSHIFT_EXPR, mode, op0,
3636 floor_log2 (coeff), target, unsignedp);
3637 }
3638
3639 /* Exclude cost of op0 from max_cost to match the cost
3640 calculation of the synth_mult. */
3641 max_cost = mul_widen_cost (speed, mode);
3642 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3643 max_cost))
3644 {
3645 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3646 return expand_mult_const (mode, op0, coeff, target,
3647 &algorithm, variant);
3648 }
3649 }
3650 return expand_binop (mode, this_optab, op0, op1, target,
3651 unsignedp, OPTAB_LIB_WIDEN);
3652 }
3653 \f
3654 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3655 replace division by D, and put the least significant N bits of the result
3656 in *MULTIPLIER_PTR and return the most significant bit.
3657
3658 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3659 needed precision is in PRECISION (should be <= N).
3660
3661 PRECISION should be as small as possible so this function can choose
3662 multiplier more freely.
3663
3664 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3665 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3666
3667 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3668 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3669
3670 unsigned HOST_WIDE_INT
3671 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3672 unsigned HOST_WIDE_INT *multiplier_ptr,
3673 int *post_shift_ptr, int *lgup_ptr)
3674 {
3675 int lgup, post_shift;
3676 int pow, pow2;
3677
3678 /* lgup = ceil(log2(divisor)); */
3679 lgup = ceil_log2 (d);
3680
3681 gcc_assert (lgup <= n);
3682
3683 pow = n + lgup;
3684 pow2 = n + lgup - precision;
3685
3686 /* mlow = 2^(N + lgup)/d */
3687 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3688 wide_int mlow = wi::udiv_trunc (val, d);
3689
3690 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3691 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3692 wide_int mhigh = wi::udiv_trunc (val, d);
3693
3694 /* If precision == N, then mlow, mhigh exceed 2^N
3695 (but they do not exceed 2^(N+1)). */
3696
3697 /* Reduce to lowest terms. */
3698 for (post_shift = lgup; post_shift > 0; post_shift--)
3699 {
3700 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3701 HOST_BITS_PER_WIDE_INT);
3702 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3703 HOST_BITS_PER_WIDE_INT);
3704 if (ml_lo >= mh_lo)
3705 break;
3706
3707 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3708 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3709 }
3710
3711 *post_shift_ptr = post_shift;
3712 *lgup_ptr = lgup;
3713 if (n < HOST_BITS_PER_WIDE_INT)
3714 {
3715 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3716 *multiplier_ptr = mhigh.to_uhwi () & mask;
3717 return mhigh.to_uhwi () > mask;
3718 }
3719 else
3720 {
3721 *multiplier_ptr = mhigh.to_uhwi ();
3722 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3723 }
3724 }
3725
3726 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3727 congruent to 1 (mod 2**N). */
3728
3729 static unsigned HOST_WIDE_INT
3730 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3731 {
3732 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3733
3734 /* The algorithm notes that the choice y = x satisfies
3735 x*y == 1 mod 2^3, since x is assumed odd.
3736 Each iteration doubles the number of bits of significance in y. */
3737
3738 unsigned HOST_WIDE_INT mask;
3739 unsigned HOST_WIDE_INT y = x;
3740 int nbit = 3;
3741
3742 mask = (n == HOST_BITS_PER_WIDE_INT
3743 ? HOST_WIDE_INT_M1U
3744 : (HOST_WIDE_INT_1U << n) - 1);
3745
3746 while (nbit < n)
3747 {
3748 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3749 nbit *= 2;
3750 }
3751 return y;
3752 }
3753
3754 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3755 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3756 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3757 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3758 become signed.
3759
3760 The result is put in TARGET if that is convenient.
3761
3762 MODE is the mode of operation. */
3763
3764 rtx
3765 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3766 rtx op1, rtx target, int unsignedp)
3767 {
3768 rtx tem;
3769 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3770
3771 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3772 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3773 tem = expand_and (mode, tem, op1, NULL_RTX);
3774 adj_operand
3775 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3776 adj_operand);
3777
3778 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3779 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3780 tem = expand_and (mode, tem, op0, NULL_RTX);
3781 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3782 target);
3783
3784 return target;
3785 }
3786
3787 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3788
3789 static rtx
3790 extract_high_half (scalar_int_mode mode, rtx op)
3791 {
3792 if (mode == word_mode)
3793 return gen_highpart (mode, op);
3794
3795 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3796
3797 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3798 GET_MODE_BITSIZE (mode), 0, 1);
3799 return convert_modes (mode, wider_mode, op, 0);
3800 }
3801
3802 /* Like expmed_mult_highpart, but only consider using a multiplication
3803 optab. OP1 is an rtx for the constant operand. */
3804
3805 static rtx
3806 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3807 rtx target, int unsignedp, int max_cost)
3808 {
3809 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3810 optab moptab;
3811 rtx tem;
3812 int size;
3813 bool speed = optimize_insn_for_speed_p ();
3814
3815 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3816
3817 size = GET_MODE_BITSIZE (mode);
3818
3819 /* Firstly, try using a multiplication insn that only generates the needed
3820 high part of the product, and in the sign flavor of unsignedp. */
3821 if (mul_highpart_cost (speed, mode) < max_cost)
3822 {
3823 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3824 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3825 unsignedp, OPTAB_DIRECT);
3826 if (tem)
3827 return tem;
3828 }
3829
3830 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3831 Need to adjust the result after the multiplication. */
3832 if (size - 1 < BITS_PER_WORD
3833 && (mul_highpart_cost (speed, mode)
3834 + 2 * shift_cost (speed, mode, size-1)
3835 + 4 * add_cost (speed, mode) < max_cost))
3836 {
3837 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3838 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3839 unsignedp, OPTAB_DIRECT);
3840 if (tem)
3841 /* We used the wrong signedness. Adjust the result. */
3842 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3843 tem, unsignedp);
3844 }
3845
3846 /* Try widening multiplication. */
3847 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3848 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3849 && mul_widen_cost (speed, wider_mode) < max_cost)
3850 {
3851 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3852 unsignedp, OPTAB_WIDEN);
3853 if (tem)
3854 return extract_high_half (mode, tem);
3855 }
3856
3857 /* Try widening the mode and perform a non-widening multiplication. */
3858 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3859 && size - 1 < BITS_PER_WORD
3860 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3861 < max_cost))
3862 {
3863 rtx_insn *insns;
3864 rtx wop0, wop1;
3865
3866 /* We need to widen the operands, for example to ensure the
3867 constant multiplier is correctly sign or zero extended.
3868 Use a sequence to clean-up any instructions emitted by
3869 the conversions if things don't work out. */
3870 start_sequence ();
3871 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3872 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3873 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3874 unsignedp, OPTAB_WIDEN);
3875 insns = get_insns ();
3876 end_sequence ();
3877
3878 if (tem)
3879 {
3880 emit_insn (insns);
3881 return extract_high_half (mode, tem);
3882 }
3883 }
3884
3885 /* Try widening multiplication of opposite signedness, and adjust. */
3886 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3887 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3888 && size - 1 < BITS_PER_WORD
3889 && (mul_widen_cost (speed, wider_mode)
3890 + 2 * shift_cost (speed, mode, size-1)
3891 + 4 * add_cost (speed, mode) < max_cost))
3892 {
3893 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3894 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3895 if (tem != 0)
3896 {
3897 tem = extract_high_half (mode, tem);
3898 /* We used the wrong signedness. Adjust the result. */
3899 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3900 target, unsignedp);
3901 }
3902 }
3903
3904 return 0;
3905 }
3906
3907 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3908 putting the high half of the result in TARGET if that is convenient,
3909 and return where the result is. If the operation cannot be performed,
3910 0 is returned.
3911
3912 MODE is the mode of operation and result.
3913
3914 UNSIGNEDP nonzero means unsigned multiply.
3915
3916 MAX_COST is the total allowed cost for the expanded RTL. */
3917
3918 static rtx
3919 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3920 rtx target, int unsignedp, int max_cost)
3921 {
3922 unsigned HOST_WIDE_INT cnst1;
3923 int extra_cost;
3924 bool sign_adjust = false;
3925 enum mult_variant variant;
3926 struct algorithm alg;
3927 rtx tem;
3928 bool speed = optimize_insn_for_speed_p ();
3929
3930 /* We can't support modes wider than HOST_BITS_PER_INT. */
3931 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3932
3933 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3934
3935 /* We can't optimize modes wider than BITS_PER_WORD.
3936 ??? We might be able to perform double-word arithmetic if
3937 mode == word_mode, however all the cost calculations in
3938 synth_mult etc. assume single-word operations. */
3939 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3940 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3941 return expmed_mult_highpart_optab (mode, op0, op1, target,
3942 unsignedp, max_cost);
3943
3944 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3945
3946 /* Check whether we try to multiply by a negative constant. */
3947 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3948 {
3949 sign_adjust = true;
3950 extra_cost += add_cost (speed, mode);
3951 }
3952
3953 /* See whether shift/add multiplication is cheap enough. */
3954 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3955 max_cost - extra_cost))
3956 {
3957 /* See whether the specialized multiplication optabs are
3958 cheaper than the shift/add version. */
3959 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3960 alg.cost.cost + extra_cost);
3961 if (tem)
3962 return tem;
3963
3964 tem = convert_to_mode (wider_mode, op0, unsignedp);
3965 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3966 tem = extract_high_half (mode, tem);
3967
3968 /* Adjust result for signedness. */
3969 if (sign_adjust)
3970 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3971
3972 return tem;
3973 }
3974 return expmed_mult_highpart_optab (mode, op0, op1, target,
3975 unsignedp, max_cost);
3976 }
3977
3978
3979 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3980
3981 static rtx
3982 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3983 {
3984 rtx result, temp, shift;
3985 rtx_code_label *label;
3986 int logd;
3987 int prec = GET_MODE_PRECISION (mode);
3988
3989 logd = floor_log2 (d);
3990 result = gen_reg_rtx (mode);
3991
3992 /* Avoid conditional branches when they're expensive. */
3993 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3994 && optimize_insn_for_speed_p ())
3995 {
3996 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3997 mode, 0, -1);
3998 if (signmask)
3999 {
4000 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4001 signmask = force_reg (mode, signmask);
4002 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4003
4004 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4005 which instruction sequence to use. If logical right shifts
4006 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4007 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4008
4009 temp = gen_rtx_LSHIFTRT (mode, result, shift);
4010 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4011 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4012 > COSTS_N_INSNS (2)))
4013 {
4014 temp = expand_binop (mode, xor_optab, op0, signmask,
4015 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4016 temp = expand_binop (mode, sub_optab, temp, signmask,
4017 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4018 temp = expand_binop (mode, and_optab, temp,
4019 gen_int_mode (masklow, mode),
4020 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4021 temp = expand_binop (mode, xor_optab, temp, signmask,
4022 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4023 temp = expand_binop (mode, sub_optab, temp, signmask,
4024 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4025 }
4026 else
4027 {
4028 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4029 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030 signmask = force_reg (mode, signmask);
4031
4032 temp = expand_binop (mode, add_optab, op0, signmask,
4033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4034 temp = expand_binop (mode, and_optab, temp,
4035 gen_int_mode (masklow, mode),
4036 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4037 temp = expand_binop (mode, sub_optab, temp, signmask,
4038 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4039 }
4040 return temp;
4041 }
4042 }
4043
4044 /* Mask contains the mode's signbit and the significant bits of the
4045 modulus. By including the signbit in the operation, many targets
4046 can avoid an explicit compare operation in the following comparison
4047 against zero. */
4048 wide_int mask = wi::mask (logd, false, prec);
4049 mask = wi::set_bit (mask, prec - 1);
4050
4051 temp = expand_binop (mode, and_optab, op0,
4052 immed_wide_int_const (mask, mode),
4053 result, 1, OPTAB_LIB_WIDEN);
4054 if (temp != result)
4055 emit_move_insn (result, temp);
4056
4057 label = gen_label_rtx ();
4058 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4059
4060 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4061 0, OPTAB_LIB_WIDEN);
4062
4063 mask = wi::mask (logd, true, prec);
4064 temp = expand_binop (mode, ior_optab, temp,
4065 immed_wide_int_const (mask, mode),
4066 result, 1, OPTAB_LIB_WIDEN);
4067 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4068 0, OPTAB_LIB_WIDEN);
4069 if (temp != result)
4070 emit_move_insn (result, temp);
4071 emit_label (label);
4072 return result;
4073 }
4074
4075 /* Expand signed division of OP0 by a power of two D in mode MODE.
4076 This routine is only called for positive values of D. */
4077
4078 static rtx
4079 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4080 {
4081 rtx temp;
4082 rtx_code_label *label;
4083 int logd;
4084
4085 logd = floor_log2 (d);
4086
4087 if (d == 2
4088 && BRANCH_COST (optimize_insn_for_speed_p (),
4089 false) >= 1)
4090 {
4091 temp = gen_reg_rtx (mode);
4092 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4093 if (temp != NULL_RTX)
4094 {
4095 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4096 0, OPTAB_LIB_WIDEN);
4097 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4098 }
4099 }
4100
4101 if (HAVE_conditional_move
4102 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4103 {
4104 rtx temp2;
4105
4106 start_sequence ();
4107 temp2 = copy_to_mode_reg (mode, op0);
4108 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4109 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4110 temp = force_reg (mode, temp);
4111
4112 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4113 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4114 mode, temp, temp2, mode, 0);
4115 if (temp2)
4116 {
4117 rtx_insn *seq = get_insns ();
4118 end_sequence ();
4119 emit_insn (seq);
4120 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4121 }
4122 end_sequence ();
4123 }
4124
4125 if (BRANCH_COST (optimize_insn_for_speed_p (),
4126 false) >= 2)
4127 {
4128 int ushift = GET_MODE_BITSIZE (mode) - logd;
4129
4130 temp = gen_reg_rtx (mode);
4131 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4132 if (temp != NULL_RTX)
4133 {
4134 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4135 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4136 > COSTS_N_INSNS (1))
4137 temp = expand_binop (mode, and_optab, temp,
4138 gen_int_mode (d - 1, mode),
4139 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4140 else
4141 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4142 ushift, NULL_RTX, 1);
4143 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4144 0, OPTAB_LIB_WIDEN);
4145 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4146 }
4147 }
4148
4149 label = gen_label_rtx ();
4150 temp = copy_to_mode_reg (mode, op0);
4151 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4152 expand_inc (temp, gen_int_mode (d - 1, mode));
4153 emit_label (label);
4154 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4155 }
4156 \f
4157 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4158 if that is convenient, and returning where the result is.
4159 You may request either the quotient or the remainder as the result;
4160 specify REM_FLAG nonzero to get the remainder.
4161
4162 CODE is the expression code for which kind of division this is;
4163 it controls how rounding is done. MODE is the machine mode to use.
4164 UNSIGNEDP nonzero means do unsigned division. */
4165
4166 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4167 and then correct it by or'ing in missing high bits
4168 if result of ANDI is nonzero.
4169 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4170 This could optimize to a bfexts instruction.
4171 But C doesn't use these operations, so their optimizations are
4172 left for later. */
4173 /* ??? For modulo, we don't actually need the highpart of the first product,
4174 the low part will do nicely. And for small divisors, the second multiply
4175 can also be a low-part only multiply or even be completely left out.
4176 E.g. to calculate the remainder of a division by 3 with a 32 bit
4177 multiply, multiply with 0x55555556 and extract the upper two bits;
4178 the result is exact for inputs up to 0x1fffffff.
4179 The input range can be reduced by using cross-sum rules.
4180 For odd divisors >= 3, the following table gives right shift counts
4181 so that if a number is shifted by an integer multiple of the given
4182 amount, the remainder stays the same:
4183 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4184 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4185 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4186 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4187 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4188
4189 Cross-sum rules for even numbers can be derived by leaving as many bits
4190 to the right alone as the divisor has zeros to the right.
4191 E.g. if x is an unsigned 32 bit number:
4192 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4193 */
4194
4195 rtx
4196 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4197 rtx op0, rtx op1, rtx target, int unsignedp,
4198 enum optab_methods methods)
4199 {
4200 machine_mode compute_mode;
4201 rtx tquotient;
4202 rtx quotient = 0, remainder = 0;
4203 rtx_insn *last;
4204 rtx_insn *insn;
4205 optab optab1, optab2;
4206 int op1_is_constant, op1_is_pow2 = 0;
4207 int max_cost, extra_cost;
4208 static HOST_WIDE_INT last_div_const = 0;
4209 bool speed = optimize_insn_for_speed_p ();
4210
4211 op1_is_constant = CONST_INT_P (op1);
4212 if (op1_is_constant)
4213 {
4214 wide_int ext_op1 = rtx_mode_t (op1, mode);
4215 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4216 || (! unsignedp
4217 && wi::popcount (wi::neg (ext_op1)) == 1));
4218 }
4219
4220 /*
4221 This is the structure of expand_divmod:
4222
4223 First comes code to fix up the operands so we can perform the operations
4224 correctly and efficiently.
4225
4226 Second comes a switch statement with code specific for each rounding mode.
4227 For some special operands this code emits all RTL for the desired
4228 operation, for other cases, it generates only a quotient and stores it in
4229 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4230 to indicate that it has not done anything.
4231
4232 Last comes code that finishes the operation. If QUOTIENT is set and
4233 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4234 QUOTIENT is not set, it is computed using trunc rounding.
4235
4236 We try to generate special code for division and remainder when OP1 is a
4237 constant. If |OP1| = 2**n we can use shifts and some other fast
4238 operations. For other values of OP1, we compute a carefully selected
4239 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4240 by m.
4241
4242 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4243 half of the product. Different strategies for generating the product are
4244 implemented in expmed_mult_highpart.
4245
4246 If what we actually want is the remainder, we generate that by another
4247 by-constant multiplication and a subtraction. */
4248
4249 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4250 code below will malfunction if we are, so check here and handle
4251 the special case if so. */
4252 if (op1 == const1_rtx)
4253 return rem_flag ? const0_rtx : op0;
4254
4255 /* When dividing by -1, we could get an overflow.
4256 negv_optab can handle overflows. */
4257 if (! unsignedp && op1 == constm1_rtx)
4258 {
4259 if (rem_flag)
4260 return const0_rtx;
4261 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4262 ? negv_optab : neg_optab, op0, target, 0);
4263 }
4264
4265 if (target
4266 /* Don't use the function value register as a target
4267 since we have to read it as well as write it,
4268 and function-inlining gets confused by this. */
4269 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4270 /* Don't clobber an operand while doing a multi-step calculation. */
4271 || ((rem_flag || op1_is_constant)
4272 && (reg_mentioned_p (target, op0)
4273 || (MEM_P (op0) && MEM_P (target))))
4274 || reg_mentioned_p (target, op1)
4275 || (MEM_P (op1) && MEM_P (target))))
4276 target = 0;
4277
4278 /* Get the mode in which to perform this computation. Normally it will
4279 be MODE, but sometimes we can't do the desired operation in MODE.
4280 If so, pick a wider mode in which we can do the operation. Convert
4281 to that mode at the start to avoid repeated conversions.
4282
4283 First see what operations we need. These depend on the expression
4284 we are evaluating. (We assume that divxx3 insns exist under the
4285 same conditions that modxx3 insns and that these insns don't normally
4286 fail. If these assumptions are not correct, we may generate less
4287 efficient code in some cases.)
4288
4289 Then see if we find a mode in which we can open-code that operation
4290 (either a division, modulus, or shift). Finally, check for the smallest
4291 mode for which we can do the operation with a library call. */
4292
4293 /* We might want to refine this now that we have division-by-constant
4294 optimization. Since expmed_mult_highpart tries so many variants, it is
4295 not straightforward to generalize this. Maybe we should make an array
4296 of possible modes in init_expmed? Save this for GCC 2.7. */
4297
4298 optab1 = (op1_is_pow2
4299 ? (unsignedp ? lshr_optab : ashr_optab)
4300 : (unsignedp ? udiv_optab : sdiv_optab));
4301 optab2 = (op1_is_pow2 ? optab1
4302 : (unsignedp ? udivmod_optab : sdivmod_optab));
4303
4304 if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4305 {
4306 FOR_EACH_MODE_FROM (compute_mode, mode)
4307 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4308 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4309 break;
4310
4311 if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4312 FOR_EACH_MODE_FROM (compute_mode, mode)
4313 if (optab_libfunc (optab1, compute_mode)
4314 || optab_libfunc (optab2, compute_mode))
4315 break;
4316 }
4317 else
4318 compute_mode = mode;
4319
4320 /* If we still couldn't find a mode, use MODE, but expand_binop will
4321 probably die. */
4322 if (compute_mode == VOIDmode)
4323 compute_mode = mode;
4324
4325 if (target && GET_MODE (target) == compute_mode)
4326 tquotient = target;
4327 else
4328 tquotient = gen_reg_rtx (compute_mode);
4329
4330 #if 0
4331 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4332 (mode), and thereby get better code when OP1 is a constant. Do that
4333 later. It will require going over all usages of SIZE below. */
4334 size = GET_MODE_BITSIZE (mode);
4335 #endif
4336
4337 /* Only deduct something for a REM if the last divide done was
4338 for a different constant. Then set the constant of the last
4339 divide. */
4340 max_cost = (unsignedp
4341 ? udiv_cost (speed, compute_mode)
4342 : sdiv_cost (speed, compute_mode));
4343 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4344 && INTVAL (op1) == last_div_const))
4345 max_cost -= (mul_cost (speed, compute_mode)
4346 + add_cost (speed, compute_mode));
4347
4348 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4349
4350 /* Now convert to the best mode to use. */
4351 if (compute_mode != mode)
4352 {
4353 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4354 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4355
4356 /* convert_modes may have placed op1 into a register, so we
4357 must recompute the following. */
4358 op1_is_constant = CONST_INT_P (op1);
4359 if (op1_is_constant)
4360 {
4361 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4362 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4363 || (! unsignedp
4364 && wi::popcount (wi::neg (ext_op1)) == 1));
4365 }
4366 else
4367 op1_is_pow2 = 0;
4368 }
4369
4370 /* If one of the operands is a volatile MEM, copy it into a register. */
4371
4372 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4373 op0 = force_reg (compute_mode, op0);
4374 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4375 op1 = force_reg (compute_mode, op1);
4376
4377 /* If we need the remainder or if OP1 is constant, we need to
4378 put OP0 in a register in case it has any queued subexpressions. */
4379 if (rem_flag || op1_is_constant)
4380 op0 = force_reg (compute_mode, op0);
4381
4382 last = get_last_insn ();
4383
4384 /* Promote floor rounding to trunc rounding for unsigned operations. */
4385 if (unsignedp)
4386 {
4387 if (code == FLOOR_DIV_EXPR)
4388 code = TRUNC_DIV_EXPR;
4389 if (code == FLOOR_MOD_EXPR)
4390 code = TRUNC_MOD_EXPR;
4391 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4392 code = TRUNC_DIV_EXPR;
4393 }
4394
4395 if (op1 != const0_rtx)
4396 switch (code)
4397 {
4398 case TRUNC_MOD_EXPR:
4399 case TRUNC_DIV_EXPR:
4400 if (op1_is_constant)
4401 {
4402 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4403 int size = GET_MODE_BITSIZE (int_mode);
4404 if (unsignedp)
4405 {
4406 unsigned HOST_WIDE_INT mh, ml;
4407 int pre_shift, post_shift;
4408 int dummy;
4409 wide_int wd = rtx_mode_t (op1, int_mode);
4410 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4411
4412 if (wi::popcount (wd) == 1)
4413 {
4414 pre_shift = floor_log2 (d);
4415 if (rem_flag)
4416 {
4417 unsigned HOST_WIDE_INT mask
4418 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4419 remainder
4420 = expand_binop (int_mode, and_optab, op0,
4421 gen_int_mode (mask, int_mode),
4422 remainder, 1, methods);
4423 if (remainder)
4424 return gen_lowpart (mode, remainder);
4425 }
4426 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4427 pre_shift, tquotient, 1);
4428 }
4429 else if (size <= HOST_BITS_PER_WIDE_INT)
4430 {
4431 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4432 {
4433 /* Most significant bit of divisor is set; emit an scc
4434 insn. */
4435 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4436 int_mode, 1, 1);
4437 }
4438 else
4439 {
4440 /* Find a suitable multiplier and right shift count
4441 instead of multiplying with D. */
4442
4443 mh = choose_multiplier (d, size, size,
4444 &ml, &post_shift, &dummy);
4445
4446 /* If the suggested multiplier is more than SIZE bits,
4447 we can do better for even divisors, using an
4448 initial right shift. */
4449 if (mh != 0 && (d & 1) == 0)
4450 {
4451 pre_shift = ctz_or_zero (d);
4452 mh = choose_multiplier (d >> pre_shift, size,
4453 size - pre_shift,
4454 &ml, &post_shift, &dummy);
4455 gcc_assert (!mh);
4456 }
4457 else
4458 pre_shift = 0;
4459
4460 if (mh != 0)
4461 {
4462 rtx t1, t2, t3, t4;
4463
4464 if (post_shift - 1 >= BITS_PER_WORD)
4465 goto fail1;
4466
4467 extra_cost
4468 = (shift_cost (speed, int_mode, post_shift - 1)
4469 + shift_cost (speed, int_mode, 1)
4470 + 2 * add_cost (speed, int_mode));
4471 t1 = expmed_mult_highpart
4472 (int_mode, op0, gen_int_mode (ml, int_mode),
4473 NULL_RTX, 1, max_cost - extra_cost);
4474 if (t1 == 0)
4475 goto fail1;
4476 t2 = force_operand (gen_rtx_MINUS (int_mode,
4477 op0, t1),
4478 NULL_RTX);
4479 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4480 t2, 1, NULL_RTX, 1);
4481 t4 = force_operand (gen_rtx_PLUS (int_mode,
4482 t1, t3),
4483 NULL_RTX);
4484 quotient = expand_shift
4485 (RSHIFT_EXPR, int_mode, t4,
4486 post_shift - 1, tquotient, 1);
4487 }
4488 else
4489 {
4490 rtx t1, t2;
4491
4492 if (pre_shift >= BITS_PER_WORD
4493 || post_shift >= BITS_PER_WORD)
4494 goto fail1;
4495
4496 t1 = expand_shift
4497 (RSHIFT_EXPR, int_mode, op0,
4498 pre_shift, NULL_RTX, 1);
4499 extra_cost
4500 = (shift_cost (speed, int_mode, pre_shift)
4501 + shift_cost (speed, int_mode, post_shift));
4502 t2 = expmed_mult_highpart
4503 (int_mode, t1,
4504 gen_int_mode (ml, int_mode),
4505 NULL_RTX, 1, max_cost - extra_cost);
4506 if (t2 == 0)
4507 goto fail1;
4508 quotient = expand_shift
4509 (RSHIFT_EXPR, int_mode, t2,
4510 post_shift, tquotient, 1);
4511 }
4512 }
4513 }
4514 else /* Too wide mode to use tricky code */
4515 break;
4516
4517 insn = get_last_insn ();
4518 if (insn != last)
4519 set_dst_reg_note (insn, REG_EQUAL,
4520 gen_rtx_UDIV (int_mode, op0, op1),
4521 quotient);
4522 }
4523 else /* TRUNC_DIV, signed */
4524 {
4525 unsigned HOST_WIDE_INT ml;
4526 int lgup, post_shift;
4527 rtx mlr;
4528 HOST_WIDE_INT d = INTVAL (op1);
4529 unsigned HOST_WIDE_INT abs_d;
4530
4531 /* Not prepared to handle division/remainder by
4532 0xffffffffffffffff8000000000000000 etc. */
4533 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4534 break;
4535
4536 /* Since d might be INT_MIN, we have to cast to
4537 unsigned HOST_WIDE_INT before negating to avoid
4538 undefined signed overflow. */
4539 abs_d = (d >= 0
4540 ? (unsigned HOST_WIDE_INT) d
4541 : - (unsigned HOST_WIDE_INT) d);
4542
4543 /* n rem d = n rem -d */
4544 if (rem_flag && d < 0)
4545 {
4546 d = abs_d;
4547 op1 = gen_int_mode (abs_d, int_mode);
4548 }
4549
4550 if (d == 1)
4551 quotient = op0;
4552 else if (d == -1)
4553 quotient = expand_unop (int_mode, neg_optab, op0,
4554 tquotient, 0);
4555 else if (size <= HOST_BITS_PER_WIDE_INT
4556 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4557 {
4558 /* This case is not handled correctly below. */
4559 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4560 int_mode, 1, 1);
4561 if (quotient == 0)
4562 goto fail1;
4563 }
4564 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4565 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4566 && (rem_flag
4567 ? smod_pow2_cheap (speed, int_mode)
4568 : sdiv_pow2_cheap (speed, int_mode))
4569 /* We assume that cheap metric is true if the
4570 optab has an expander for this mode. */
4571 && ((optab_handler ((rem_flag ? smod_optab
4572 : sdiv_optab),
4573 int_mode)
4574 != CODE_FOR_nothing)
4575 || (optab_handler (sdivmod_optab, int_mode)
4576 != CODE_FOR_nothing)))
4577 ;
4578 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4579 {
4580 if (rem_flag)
4581 {
4582 remainder = expand_smod_pow2 (int_mode, op0, d);
4583 if (remainder)
4584 return gen_lowpart (mode, remainder);
4585 }
4586
4587 if (sdiv_pow2_cheap (speed, int_mode)
4588 && ((optab_handler (sdiv_optab, int_mode)
4589 != CODE_FOR_nothing)
4590 || (optab_handler (sdivmod_optab, int_mode)
4591 != CODE_FOR_nothing)))
4592 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4593 int_mode, op0,
4594 gen_int_mode (abs_d,
4595 int_mode),
4596 NULL_RTX, 0);
4597 else
4598 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4599
4600 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4601 negate the quotient. */
4602 if (d < 0)
4603 {
4604 insn = get_last_insn ();
4605 if (insn != last
4606 && abs_d < (HOST_WIDE_INT_1U
4607 << (HOST_BITS_PER_WIDE_INT - 1)))
4608 set_dst_reg_note (insn, REG_EQUAL,
4609 gen_rtx_DIV (int_mode, op0,
4610 gen_int_mode
4611 (abs_d,
4612 int_mode)),
4613 quotient);
4614
4615 quotient = expand_unop (int_mode, neg_optab,
4616 quotient, quotient, 0);
4617 }
4618 }
4619 else if (size <= HOST_BITS_PER_WIDE_INT)
4620 {
4621 choose_multiplier (abs_d, size, size - 1,
4622 &ml, &post_shift, &lgup);
4623 if (ml < HOST_WIDE_INT_1U << (size - 1))
4624 {
4625 rtx t1, t2, t3;
4626
4627 if (post_shift >= BITS_PER_WORD
4628 || size - 1 >= BITS_PER_WORD)
4629 goto fail1;
4630
4631 extra_cost = (shift_cost (speed, int_mode, post_shift)
4632 + shift_cost (speed, int_mode, size - 1)
4633 + add_cost (speed, int_mode));
4634 t1 = expmed_mult_highpart
4635 (int_mode, op0, gen_int_mode (ml, int_mode),
4636 NULL_RTX, 0, max_cost - extra_cost);
4637 if (t1 == 0)
4638 goto fail1;
4639 t2 = expand_shift
4640 (RSHIFT_EXPR, int_mode, t1,
4641 post_shift, NULL_RTX, 0);
4642 t3 = expand_shift
4643 (RSHIFT_EXPR, int_mode, op0,
4644 size - 1, NULL_RTX, 0);
4645 if (d < 0)
4646 quotient
4647 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4648 tquotient);
4649 else
4650 quotient
4651 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4652 tquotient);
4653 }
4654 else
4655 {
4656 rtx t1, t2, t3, t4;
4657
4658 if (post_shift >= BITS_PER_WORD
4659 || size - 1 >= BITS_PER_WORD)
4660 goto fail1;
4661
4662 ml |= HOST_WIDE_INT_M1U << (size - 1);
4663 mlr = gen_int_mode (ml, int_mode);
4664 extra_cost = (shift_cost (speed, int_mode, post_shift)
4665 + shift_cost (speed, int_mode, size - 1)
4666 + 2 * add_cost (speed, int_mode));
4667 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4668 NULL_RTX, 0,
4669 max_cost - extra_cost);
4670 if (t1 == 0)
4671 goto fail1;
4672 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4673 NULL_RTX);
4674 t3 = expand_shift
4675 (RSHIFT_EXPR, int_mode, t2,
4676 post_shift, NULL_RTX, 0);
4677 t4 = expand_shift
4678 (RSHIFT_EXPR, int_mode, op0,
4679 size - 1, NULL_RTX, 0);
4680 if (d < 0)
4681 quotient
4682 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4683 tquotient);
4684 else
4685 quotient
4686 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4687 tquotient);
4688 }
4689 }
4690 else /* Too wide mode to use tricky code */
4691 break;
4692
4693 insn = get_last_insn ();
4694 if (insn != last)
4695 set_dst_reg_note (insn, REG_EQUAL,
4696 gen_rtx_DIV (int_mode, op0, op1),
4697 quotient);
4698 }
4699 break;
4700 }
4701 fail1:
4702 delete_insns_since (last);
4703 break;
4704
4705 case FLOOR_DIV_EXPR:
4706 case FLOOR_MOD_EXPR:
4707 /* We will come here only for signed operations. */
4708 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4709 {
4710 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4711 int size = GET_MODE_BITSIZE (int_mode);
4712 unsigned HOST_WIDE_INT mh, ml;
4713 int pre_shift, lgup, post_shift;
4714 HOST_WIDE_INT d = INTVAL (op1);
4715
4716 if (d > 0)
4717 {
4718 /* We could just as easily deal with negative constants here,
4719 but it does not seem worth the trouble for GCC 2.6. */
4720 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4721 {
4722 pre_shift = floor_log2 (d);
4723 if (rem_flag)
4724 {
4725 unsigned HOST_WIDE_INT mask
4726 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4727 remainder = expand_binop
4728 (int_mode, and_optab, op0,
4729 gen_int_mode (mask, int_mode),
4730 remainder, 0, methods);
4731 if (remainder)
4732 return gen_lowpart (mode, remainder);
4733 }
4734 quotient = expand_shift
4735 (RSHIFT_EXPR, int_mode, op0,
4736 pre_shift, tquotient, 0);
4737 }
4738 else
4739 {
4740 rtx t1, t2, t3, t4;
4741
4742 mh = choose_multiplier (d, size, size - 1,
4743 &ml, &post_shift, &lgup);
4744 gcc_assert (!mh);
4745
4746 if (post_shift < BITS_PER_WORD
4747 && size - 1 < BITS_PER_WORD)
4748 {
4749 t1 = expand_shift
4750 (RSHIFT_EXPR, int_mode, op0,
4751 size - 1, NULL_RTX, 0);
4752 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4753 NULL_RTX, 0, OPTAB_WIDEN);
4754 extra_cost = (shift_cost (speed, int_mode, post_shift)
4755 + shift_cost (speed, int_mode, size - 1)
4756 + 2 * add_cost (speed, int_mode));
4757 t3 = expmed_mult_highpart
4758 (int_mode, t2, gen_int_mode (ml, int_mode),
4759 NULL_RTX, 1, max_cost - extra_cost);
4760 if (t3 != 0)
4761 {
4762 t4 = expand_shift
4763 (RSHIFT_EXPR, int_mode, t3,
4764 post_shift, NULL_RTX, 1);
4765 quotient = expand_binop (int_mode, xor_optab,
4766 t4, t1, tquotient, 0,
4767 OPTAB_WIDEN);
4768 }
4769 }
4770 }
4771 }
4772 else
4773 {
4774 rtx nsign, t1, t2, t3, t4;
4775 t1 = force_operand (gen_rtx_PLUS (int_mode,
4776 op0, constm1_rtx), NULL_RTX);
4777 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4778 0, OPTAB_WIDEN);
4779 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4780 size - 1, NULL_RTX, 0);
4781 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4782 NULL_RTX);
4783 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4784 NULL_RTX, 0);
4785 if (t4)
4786 {
4787 rtx t5;
4788 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4789 NULL_RTX, 0);
4790 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4791 tquotient);
4792 }
4793 }
4794 }
4795
4796 if (quotient != 0)
4797 break;
4798 delete_insns_since (last);
4799
4800 /* Try using an instruction that produces both the quotient and
4801 remainder, using truncation. We can easily compensate the quotient
4802 or remainder to get floor rounding, once we have the remainder.
4803 Notice that we compute also the final remainder value here,
4804 and return the result right away. */
4805 if (target == 0 || GET_MODE (target) != compute_mode)
4806 target = gen_reg_rtx (compute_mode);
4807
4808 if (rem_flag)
4809 {
4810 remainder
4811 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4812 quotient = gen_reg_rtx (compute_mode);
4813 }
4814 else
4815 {
4816 quotient
4817 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4818 remainder = gen_reg_rtx (compute_mode);
4819 }
4820
4821 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4822 quotient, remainder, 0))
4823 {
4824 /* This could be computed with a branch-less sequence.
4825 Save that for later. */
4826 rtx tem;
4827 rtx_code_label *label = gen_label_rtx ();
4828 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4829 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4830 NULL_RTX, 0, OPTAB_WIDEN);
4831 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4832 expand_dec (quotient, const1_rtx);
4833 expand_inc (remainder, op1);
4834 emit_label (label);
4835 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4836 }
4837
4838 /* No luck with division elimination or divmod. Have to do it
4839 by conditionally adjusting op0 *and* the result. */
4840 {
4841 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4842 rtx adjusted_op0;
4843 rtx tem;
4844
4845 quotient = gen_reg_rtx (compute_mode);
4846 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4847 label1 = gen_label_rtx ();
4848 label2 = gen_label_rtx ();
4849 label3 = gen_label_rtx ();
4850 label4 = gen_label_rtx ();
4851 label5 = gen_label_rtx ();
4852 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4853 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4854 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4855 quotient, 0, methods);
4856 if (tem != quotient)
4857 emit_move_insn (quotient, tem);
4858 emit_jump_insn (targetm.gen_jump (label5));
4859 emit_barrier ();
4860 emit_label (label1);
4861 expand_inc (adjusted_op0, const1_rtx);
4862 emit_jump_insn (targetm.gen_jump (label4));
4863 emit_barrier ();
4864 emit_label (label2);
4865 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4866 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4867 quotient, 0, methods);
4868 if (tem != quotient)
4869 emit_move_insn (quotient, tem);
4870 emit_jump_insn (targetm.gen_jump (label5));
4871 emit_barrier ();
4872 emit_label (label3);
4873 expand_dec (adjusted_op0, const1_rtx);
4874 emit_label (label4);
4875 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4876 quotient, 0, methods);
4877 if (tem != quotient)
4878 emit_move_insn (quotient, tem);
4879 expand_dec (quotient, const1_rtx);
4880 emit_label (label5);
4881 }
4882 break;
4883
4884 case CEIL_DIV_EXPR:
4885 case CEIL_MOD_EXPR:
4886 if (unsignedp)
4887 {
4888 if (op1_is_constant
4889 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4890 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4891 || INTVAL (op1) >= 0))
4892 {
4893 scalar_int_mode int_mode
4894 = as_a <scalar_int_mode> (compute_mode);
4895 rtx t1, t2, t3;
4896 unsigned HOST_WIDE_INT d = INTVAL (op1);
4897 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4898 floor_log2 (d), tquotient, 1);
4899 t2 = expand_binop (int_mode, and_optab, op0,
4900 gen_int_mode (d - 1, int_mode),
4901 NULL_RTX, 1, methods);
4902 t3 = gen_reg_rtx (int_mode);
4903 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4904 if (t3 == 0)
4905 {
4906 rtx_code_label *lab;
4907 lab = gen_label_rtx ();
4908 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4909 expand_inc (t1, const1_rtx);
4910 emit_label (lab);
4911 quotient = t1;
4912 }
4913 else
4914 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4915 tquotient);
4916 break;
4917 }
4918
4919 /* Try using an instruction that produces both the quotient and
4920 remainder, using truncation. We can easily compensate the
4921 quotient or remainder to get ceiling rounding, once we have the
4922 remainder. Notice that we compute also the final remainder
4923 value here, and return the result right away. */
4924 if (target == 0 || GET_MODE (target) != compute_mode)
4925 target = gen_reg_rtx (compute_mode);
4926
4927 if (rem_flag)
4928 {
4929 remainder = (REG_P (target)
4930 ? target : gen_reg_rtx (compute_mode));
4931 quotient = gen_reg_rtx (compute_mode);
4932 }
4933 else
4934 {
4935 quotient = (REG_P (target)
4936 ? target : gen_reg_rtx (compute_mode));
4937 remainder = gen_reg_rtx (compute_mode);
4938 }
4939
4940 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4941 remainder, 1))
4942 {
4943 /* This could be computed with a branch-less sequence.
4944 Save that for later. */
4945 rtx_code_label *label = gen_label_rtx ();
4946 do_cmp_and_jump (remainder, const0_rtx, EQ,
4947 compute_mode, label);
4948 expand_inc (quotient, const1_rtx);
4949 expand_dec (remainder, op1);
4950 emit_label (label);
4951 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4952 }
4953
4954 /* No luck with division elimination or divmod. Have to do it
4955 by conditionally adjusting op0 *and* the result. */
4956 {
4957 rtx_code_label *label1, *label2;
4958 rtx adjusted_op0, tem;
4959
4960 quotient = gen_reg_rtx (compute_mode);
4961 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4962 label1 = gen_label_rtx ();
4963 label2 = gen_label_rtx ();
4964 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4965 compute_mode, label1);
4966 emit_move_insn (quotient, const0_rtx);
4967 emit_jump_insn (targetm.gen_jump (label2));
4968 emit_barrier ();
4969 emit_label (label1);
4970 expand_dec (adjusted_op0, const1_rtx);
4971 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4972 quotient, 1, methods);
4973 if (tem != quotient)
4974 emit_move_insn (quotient, tem);
4975 expand_inc (quotient, const1_rtx);
4976 emit_label (label2);
4977 }
4978 }
4979 else /* signed */
4980 {
4981 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4982 && INTVAL (op1) >= 0)
4983 {
4984 /* This is extremely similar to the code for the unsigned case
4985 above. For 2.7 we should merge these variants, but for
4986 2.6.1 I don't want to touch the code for unsigned since that
4987 get used in C. The signed case will only be used by other
4988 languages (Ada). */
4989
4990 rtx t1, t2, t3;
4991 unsigned HOST_WIDE_INT d = INTVAL (op1);
4992 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4993 floor_log2 (d), tquotient, 0);
4994 t2 = expand_binop (compute_mode, and_optab, op0,
4995 gen_int_mode (d - 1, compute_mode),
4996 NULL_RTX, 1, methods);
4997 t3 = gen_reg_rtx (compute_mode);
4998 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4999 compute_mode, 1, 1);
5000 if (t3 == 0)
5001 {
5002 rtx_code_label *lab;
5003 lab = gen_label_rtx ();
5004 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5005 expand_inc (t1, const1_rtx);
5006 emit_label (lab);
5007 quotient = t1;
5008 }
5009 else
5010 quotient = force_operand (gen_rtx_PLUS (compute_mode,
5011 t1, t3),
5012 tquotient);
5013 break;
5014 }
5015
5016 /* Try using an instruction that produces both the quotient and
5017 remainder, using truncation. We can easily compensate the
5018 quotient or remainder to get ceiling rounding, once we have the
5019 remainder. Notice that we compute also the final remainder
5020 value here, and return the result right away. */
5021 if (target == 0 || GET_MODE (target) != compute_mode)
5022 target = gen_reg_rtx (compute_mode);
5023 if (rem_flag)
5024 {
5025 remainder= (REG_P (target)
5026 ? target : gen_reg_rtx (compute_mode));
5027 quotient = gen_reg_rtx (compute_mode);
5028 }
5029 else
5030 {
5031 quotient = (REG_P (target)
5032 ? target : gen_reg_rtx (compute_mode));
5033 remainder = gen_reg_rtx (compute_mode);
5034 }
5035
5036 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5037 remainder, 0))
5038 {
5039 /* This could be computed with a branch-less sequence.
5040 Save that for later. */
5041 rtx tem;
5042 rtx_code_label *label = gen_label_rtx ();
5043 do_cmp_and_jump (remainder, const0_rtx, EQ,
5044 compute_mode, label);
5045 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5046 NULL_RTX, 0, OPTAB_WIDEN);
5047 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5048 expand_inc (quotient, const1_rtx);
5049 expand_dec (remainder, op1);
5050 emit_label (label);
5051 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5052 }
5053
5054 /* No luck with division elimination or divmod. Have to do it
5055 by conditionally adjusting op0 *and* the result. */
5056 {
5057 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5058 rtx adjusted_op0;
5059 rtx tem;
5060
5061 quotient = gen_reg_rtx (compute_mode);
5062 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5063 label1 = gen_label_rtx ();
5064 label2 = gen_label_rtx ();
5065 label3 = gen_label_rtx ();
5066 label4 = gen_label_rtx ();
5067 label5 = gen_label_rtx ();
5068 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5069 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5070 compute_mode, label1);
5071 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5072 quotient, 0, methods);
5073 if (tem != quotient)
5074 emit_move_insn (quotient, tem);
5075 emit_jump_insn (targetm.gen_jump (label5));
5076 emit_barrier ();
5077 emit_label (label1);
5078 expand_dec (adjusted_op0, const1_rtx);
5079 emit_jump_insn (targetm.gen_jump (label4));
5080 emit_barrier ();
5081 emit_label (label2);
5082 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5083 compute_mode, label3);
5084 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5085 quotient, 0, methods);
5086 if (tem != quotient)
5087 emit_move_insn (quotient, tem);
5088 emit_jump_insn (targetm.gen_jump (label5));
5089 emit_barrier ();
5090 emit_label (label3);
5091 expand_inc (adjusted_op0, const1_rtx);
5092 emit_label (label4);
5093 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5094 quotient, 0, methods);
5095 if (tem != quotient)
5096 emit_move_insn (quotient, tem);
5097 expand_inc (quotient, const1_rtx);
5098 emit_label (label5);
5099 }
5100 }
5101 break;
5102
5103 case EXACT_DIV_EXPR:
5104 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5105 {
5106 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5107 int size = GET_MODE_BITSIZE (int_mode);
5108 HOST_WIDE_INT d = INTVAL (op1);
5109 unsigned HOST_WIDE_INT ml;
5110 int pre_shift;
5111 rtx t1;
5112
5113 pre_shift = ctz_or_zero (d);
5114 ml = invert_mod2n (d >> pre_shift, size);
5115 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5116 pre_shift, NULL_RTX, unsignedp);
5117 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5118 NULL_RTX, 1);
5119
5120 insn = get_last_insn ();
5121 set_dst_reg_note (insn, REG_EQUAL,
5122 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5123 int_mode, op0, op1),
5124 quotient);
5125 }
5126 break;
5127
5128 case ROUND_DIV_EXPR:
5129 case ROUND_MOD_EXPR:
5130 if (unsignedp)
5131 {
5132 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5133 rtx tem;
5134 rtx_code_label *label;
5135 label = gen_label_rtx ();
5136 quotient = gen_reg_rtx (int_mode);
5137 remainder = gen_reg_rtx (int_mode);
5138 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5139 {
5140 rtx tem;
5141 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5142 quotient, 1, methods);
5143 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5144 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5145 remainder, 1, methods);
5146 }
5147 tem = plus_constant (int_mode, op1, -1);
5148 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5149 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5150 expand_inc (quotient, const1_rtx);
5151 expand_dec (remainder, op1);
5152 emit_label (label);
5153 }
5154 else
5155 {
5156 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5157 int size = GET_MODE_BITSIZE (int_mode);
5158 rtx abs_rem, abs_op1, tem, mask;
5159 rtx_code_label *label;
5160 label = gen_label_rtx ();
5161 quotient = gen_reg_rtx (int_mode);
5162 remainder = gen_reg_rtx (int_mode);
5163 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5164 {
5165 rtx tem;
5166 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5167 quotient, 0, methods);
5168 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5169 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5170 remainder, 0, methods);
5171 }
5172 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5173 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5174 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5175 1, NULL_RTX, 1);
5176 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5177 tem = expand_binop (int_mode, xor_optab, op0, op1,
5178 NULL_RTX, 0, OPTAB_WIDEN);
5179 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5180 size - 1, NULL_RTX, 0);
5181 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5182 NULL_RTX, 0, OPTAB_WIDEN);
5183 tem = expand_binop (int_mode, sub_optab, tem, mask,
5184 NULL_RTX, 0, OPTAB_WIDEN);
5185 expand_inc (quotient, tem);
5186 tem = expand_binop (int_mode, xor_optab, mask, op1,
5187 NULL_RTX, 0, OPTAB_WIDEN);
5188 tem = expand_binop (int_mode, sub_optab, tem, mask,
5189 NULL_RTX, 0, OPTAB_WIDEN);
5190 expand_dec (remainder, tem);
5191 emit_label (label);
5192 }
5193 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5194
5195 default:
5196 gcc_unreachable ();
5197 }
5198
5199 if (quotient == 0)
5200 {
5201 if (target && GET_MODE (target) != compute_mode)
5202 target = 0;
5203
5204 if (rem_flag)
5205 {
5206 /* Try to produce the remainder without producing the quotient.
5207 If we seem to have a divmod pattern that does not require widening,
5208 don't try widening here. We should really have a WIDEN argument
5209 to expand_twoval_binop, since what we'd really like to do here is
5210 1) try a mod insn in compute_mode
5211 2) try a divmod insn in compute_mode
5212 3) try a div insn in compute_mode and multiply-subtract to get
5213 remainder
5214 4) try the same things with widening allowed. */
5215 remainder
5216 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5217 op0, op1, target,
5218 unsignedp,
5219 ((optab_handler (optab2, compute_mode)
5220 != CODE_FOR_nothing)
5221 ? OPTAB_DIRECT : OPTAB_WIDEN));
5222 if (remainder == 0)
5223 {
5224 /* No luck there. Can we do remainder and divide at once
5225 without a library call? */
5226 remainder = gen_reg_rtx (compute_mode);
5227 if (! expand_twoval_binop ((unsignedp
5228 ? udivmod_optab
5229 : sdivmod_optab),
5230 op0, op1,
5231 NULL_RTX, remainder, unsignedp))
5232 remainder = 0;
5233 }
5234
5235 if (remainder)
5236 return gen_lowpart (mode, remainder);
5237 }
5238
5239 /* Produce the quotient. Try a quotient insn, but not a library call.
5240 If we have a divmod in this mode, use it in preference to widening
5241 the div (for this test we assume it will not fail). Note that optab2
5242 is set to the one of the two optabs that the call below will use. */
5243 quotient
5244 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5245 op0, op1, rem_flag ? NULL_RTX : target,
5246 unsignedp,
5247 ((optab_handler (optab2, compute_mode)
5248 != CODE_FOR_nothing)
5249 ? OPTAB_DIRECT : OPTAB_WIDEN));
5250
5251 if (quotient == 0)
5252 {
5253 /* No luck there. Try a quotient-and-remainder insn,
5254 keeping the quotient alone. */
5255 quotient = gen_reg_rtx (compute_mode);
5256 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5257 op0, op1,
5258 quotient, NULL_RTX, unsignedp))
5259 {
5260 quotient = 0;
5261 if (! rem_flag)
5262 /* Still no luck. If we are not computing the remainder,
5263 use a library call for the quotient. */
5264 quotient = sign_expand_binop (compute_mode,
5265 udiv_optab, sdiv_optab,
5266 op0, op1, target,
5267 unsignedp, methods);
5268 }
5269 }
5270 }
5271
5272 if (rem_flag)
5273 {
5274 if (target && GET_MODE (target) != compute_mode)
5275 target = 0;
5276
5277 if (quotient == 0)
5278 {
5279 /* No divide instruction either. Use library for remainder. */
5280 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5281 op0, op1, target,
5282 unsignedp, methods);
5283 /* No remainder function. Try a quotient-and-remainder
5284 function, keeping the remainder. */
5285 if (!remainder
5286 && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5287 {
5288 remainder = gen_reg_rtx (compute_mode);
5289 if (!expand_twoval_binop_libfunc
5290 (unsignedp ? udivmod_optab : sdivmod_optab,
5291 op0, op1,
5292 NULL_RTX, remainder,
5293 unsignedp ? UMOD : MOD))
5294 remainder = NULL_RTX;
5295 }
5296 }
5297 else
5298 {
5299 /* We divided. Now finish doing X - Y * (X / Y). */
5300 remainder = expand_mult (compute_mode, quotient, op1,
5301 NULL_RTX, unsignedp);
5302 remainder = expand_binop (compute_mode, sub_optab, op0,
5303 remainder, target, unsignedp,
5304 methods);
5305 }
5306 }
5307
5308 if (methods != OPTAB_LIB_WIDEN
5309 && (rem_flag ? remainder : quotient) == NULL_RTX)
5310 return NULL_RTX;
5311
5312 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5313 }
5314 \f
5315 /* Return a tree node with data type TYPE, describing the value of X.
5316 Usually this is an VAR_DECL, if there is no obvious better choice.
5317 X may be an expression, however we only support those expressions
5318 generated by loop.c. */
5319
5320 tree
5321 make_tree (tree type, rtx x)
5322 {
5323 tree t;
5324
5325 switch (GET_CODE (x))
5326 {
5327 case CONST_INT:
5328 case CONST_WIDE_INT:
5329 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5330 return t;
5331
5332 case CONST_DOUBLE:
5333 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5334 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5335 t = wide_int_to_tree (type,
5336 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5337 HOST_BITS_PER_WIDE_INT * 2));
5338 else
5339 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5340
5341 return t;
5342
5343 case CONST_VECTOR:
5344 {
5345 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5346 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5347 tree itype = TREE_TYPE (type);
5348
5349 /* Build a tree with vector elements. */
5350 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5351 unsigned int count = elts.encoded_nelts ();
5352 for (unsigned int i = 0; i < count; ++i)
5353 {
5354 rtx elt = CONST_VECTOR_ELT (x, i);
5355 elts.quick_push (make_tree (itype, elt));
5356 }
5357
5358 return elts.build ();
5359 }
5360
5361 case PLUS:
5362 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5363 make_tree (type, XEXP (x, 1)));
5364
5365 case MINUS:
5366 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5367 make_tree (type, XEXP (x, 1)));
5368
5369 case NEG:
5370 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5371
5372 case MULT:
5373 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5374 make_tree (type, XEXP (x, 1)));
5375
5376 case ASHIFT:
5377 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5378 make_tree (type, XEXP (x, 1)));
5379
5380 case LSHIFTRT:
5381 t = unsigned_type_for (type);
5382 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5383 make_tree (t, XEXP (x, 0)),
5384 make_tree (type, XEXP (x, 1))));
5385
5386 case ASHIFTRT:
5387 t = signed_type_for (type);
5388 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5389 make_tree (t, XEXP (x, 0)),
5390 make_tree (type, XEXP (x, 1))));
5391
5392 case DIV:
5393 if (TREE_CODE (type) != REAL_TYPE)
5394 t = signed_type_for (type);
5395 else
5396 t = type;
5397
5398 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5399 make_tree (t, XEXP (x, 0)),
5400 make_tree (t, XEXP (x, 1))));
5401 case UDIV:
5402 t = unsigned_type_for (type);
5403 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5404 make_tree (t, XEXP (x, 0)),
5405 make_tree (t, XEXP (x, 1))));
5406
5407 case SIGN_EXTEND:
5408 case ZERO_EXTEND:
5409 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5410 GET_CODE (x) == ZERO_EXTEND);
5411 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5412
5413 case CONST:
5414 return make_tree (type, XEXP (x, 0));
5415
5416 case SYMBOL_REF:
5417 t = SYMBOL_REF_DECL (x);
5418 if (t)
5419 return fold_convert (type, build_fold_addr_expr (t));
5420 /* fall through. */
5421
5422 default:
5423 if (CONST_POLY_INT_P (x))
5424 return wide_int_to_tree (t, const_poly_int_value (x));
5425
5426 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5427
5428 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5429 address mode to pointer mode. */
5430 if (POINTER_TYPE_P (type))
5431 x = convert_memory_address_addr_space
5432 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5433
5434 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5435 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5436 t->decl_with_rtl.rtl = x;
5437
5438 return t;
5439 }
5440 }
5441 \f
5442 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5443 and returning TARGET.
5444
5445 If TARGET is 0, a pseudo-register or constant is returned. */
5446
5447 rtx
5448 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5449 {
5450 rtx tem = 0;
5451
5452 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5453 tem = simplify_binary_operation (AND, mode, op0, op1);
5454 if (tem == 0)
5455 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5456
5457 if (target == 0)
5458 target = tem;
5459 else if (tem != target)
5460 emit_move_insn (target, tem);
5461 return target;
5462 }
5463
5464 /* Helper function for emit_store_flag. */
5465 rtx
5466 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5467 machine_mode mode, machine_mode compare_mode,
5468 int unsignedp, rtx x, rtx y, int normalizep,
5469 machine_mode target_mode)
5470 {
5471 class expand_operand ops[4];
5472 rtx op0, comparison, subtarget;
5473 rtx_insn *last;
5474 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5475 scalar_int_mode int_target_mode;
5476
5477 last = get_last_insn ();
5478 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5479 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5480 if (!x || !y)
5481 {
5482 delete_insns_since (last);
5483 return NULL_RTX;
5484 }
5485
5486 if (target_mode == VOIDmode)
5487 int_target_mode = result_mode;
5488 else
5489 int_target_mode = as_a <scalar_int_mode> (target_mode);
5490 if (!target)
5491 target = gen_reg_rtx (int_target_mode);
5492
5493 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5494
5495 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5496 create_fixed_operand (&ops[1], comparison);
5497 create_fixed_operand (&ops[2], x);
5498 create_fixed_operand (&ops[3], y);
5499 if (!maybe_expand_insn (icode, 4, ops))
5500 {
5501 delete_insns_since (last);
5502 return NULL_RTX;
5503 }
5504 subtarget = ops[0].value;
5505
5506 /* If we are converting to a wider mode, first convert to
5507 INT_TARGET_MODE, then normalize. This produces better combining
5508 opportunities on machines that have a SIGN_EXTRACT when we are
5509 testing a single bit. This mostly benefits the 68k.
5510
5511 If STORE_FLAG_VALUE does not have the sign bit set when
5512 interpreted in MODE, we can do this conversion as unsigned, which
5513 is usually more efficient. */
5514 if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5515 {
5516 gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5517 || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5518
5519 bool unsignedp = (STORE_FLAG_VALUE >= 0);
5520 convert_move (target, subtarget, unsignedp);
5521
5522 op0 = target;
5523 result_mode = int_target_mode;
5524 }
5525 else
5526 op0 = subtarget;
5527
5528 /* If we want to keep subexpressions around, don't reuse our last
5529 target. */
5530 if (optimize)
5531 subtarget = 0;
5532
5533 /* Now normalize to the proper value in MODE. Sometimes we don't
5534 have to do anything. */
5535 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5536 ;
5537 /* STORE_FLAG_VALUE might be the most negative number, so write
5538 the comparison this way to avoid a compiler-time warning. */
5539 else if (- normalizep == STORE_FLAG_VALUE)
5540 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5541
5542 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5543 it hard to use a value of just the sign bit due to ANSI integer
5544 constant typing rules. */
5545 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5546 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5547 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5548 normalizep == 1);
5549 else
5550 {
5551 gcc_assert (STORE_FLAG_VALUE & 1);
5552
5553 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5554 if (normalizep == -1)
5555 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5556 }
5557
5558 /* If we were converting to a smaller mode, do the conversion now. */
5559 if (int_target_mode != result_mode)
5560 {
5561 convert_move (target, op0, 0);
5562 return target;
5563 }
5564 else
5565 return op0;
5566 }
5567
5568
5569 /* A subroutine of emit_store_flag only including "tricks" that do not
5570 need a recursive call. These are kept separate to avoid infinite
5571 loops. */
5572
5573 static rtx
5574 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5575 machine_mode mode, int unsignedp, int normalizep,
5576 machine_mode target_mode)
5577 {
5578 rtx subtarget;
5579 enum insn_code icode;
5580 machine_mode compare_mode;
5581 enum mode_class mclass;
5582 enum rtx_code scode;
5583
5584 if (unsignedp)
5585 code = unsigned_condition (code);
5586 scode = swap_condition (code);
5587
5588 /* If one operand is constant, make it the second one. Only do this
5589 if the other operand is not constant as well. */
5590
5591 if (swap_commutative_operands_p (op0, op1))
5592 {
5593 std::swap (op0, op1);
5594 code = swap_condition (code);
5595 }
5596
5597 if (mode == VOIDmode)
5598 mode = GET_MODE (op0);
5599
5600 if (CONST_SCALAR_INT_P (op1))
5601 canonicalize_comparison (mode, &code, &op1);
5602
5603 /* For some comparisons with 1 and -1, we can convert this to
5604 comparisons with zero. This will often produce more opportunities for
5605 store-flag insns. */
5606
5607 switch (code)
5608 {
5609 case LT:
5610 if (op1 == const1_rtx)
5611 op1 = const0_rtx, code = LE;
5612 break;
5613 case LE:
5614 if (op1 == constm1_rtx)
5615 op1 = const0_rtx, code = LT;
5616 break;
5617 case GE:
5618 if (op1 == const1_rtx)
5619 op1 = const0_rtx, code = GT;
5620 break;
5621 case GT:
5622 if (op1 == constm1_rtx)
5623 op1 = const0_rtx, code = GE;
5624 break;
5625 case GEU:
5626 if (op1 == const1_rtx)
5627 op1 = const0_rtx, code = NE;
5628 break;
5629 case LTU:
5630 if (op1 == const1_rtx)
5631 op1 = const0_rtx, code = EQ;
5632 break;
5633 default:
5634 break;
5635 }
5636
5637 /* If we are comparing a double-word integer with zero or -1, we can
5638 convert the comparison into one involving a single word. */
5639 scalar_int_mode int_mode;
5640 if (is_int_mode (mode, &int_mode)
5641 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5642 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5643 {
5644 rtx tem;
5645 if ((code == EQ || code == NE)
5646 && (op1 == const0_rtx || op1 == constm1_rtx))
5647 {
5648 rtx op00, op01;
5649
5650 /* Do a logical OR or AND of the two words and compare the
5651 result. */
5652 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5653 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5654 tem = expand_binop (word_mode,
5655 op1 == const0_rtx ? ior_optab : and_optab,
5656 op00, op01, NULL_RTX, unsignedp,
5657 OPTAB_DIRECT);
5658
5659 if (tem != 0)
5660 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5661 unsignedp, normalizep);
5662 }
5663 else if ((code == LT || code == GE) && op1 == const0_rtx)
5664 {
5665 rtx op0h;
5666
5667 /* If testing the sign bit, can just test on high word. */
5668 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5669 subreg_highpart_offset (word_mode,
5670 int_mode));
5671 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5672 unsignedp, normalizep);
5673 }
5674 else
5675 tem = NULL_RTX;
5676
5677 if (tem)
5678 {
5679 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5680 return tem;
5681 if (!target)
5682 target = gen_reg_rtx (target_mode);
5683
5684 convert_move (target, tem,
5685 !val_signbit_known_set_p (word_mode,
5686 (normalizep ? normalizep
5687 : STORE_FLAG_VALUE)));
5688 return target;
5689 }
5690 }
5691
5692 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5693 complement of A (for GE) and shifting the sign bit to the low bit. */
5694 if (op1 == const0_rtx && (code == LT || code == GE)
5695 && is_int_mode (mode, &int_mode)
5696 && (normalizep || STORE_FLAG_VALUE == 1
5697 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5698 {
5699 scalar_int_mode int_target_mode;
5700 subtarget = target;
5701
5702 if (!target)
5703 int_target_mode = int_mode;
5704 else
5705 {
5706 /* If the result is to be wider than OP0, it is best to convert it
5707 first. If it is to be narrower, it is *incorrect* to convert it
5708 first. */
5709 int_target_mode = as_a <scalar_int_mode> (target_mode);
5710 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5711 {
5712 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5713 int_mode = int_target_mode;
5714 }
5715 }
5716
5717 if (int_target_mode != int_mode)
5718 subtarget = 0;
5719
5720 if (code == GE)
5721 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5722 ((STORE_FLAG_VALUE == 1 || normalizep)
5723 ? 0 : subtarget), 0);
5724
5725 if (STORE_FLAG_VALUE == 1 || normalizep)
5726 /* If we are supposed to produce a 0/1 value, we want to do
5727 a logical shift from the sign bit to the low-order bit; for
5728 a -1/0 value, we do an arithmetic shift. */
5729 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5730 GET_MODE_BITSIZE (int_mode) - 1,
5731 subtarget, normalizep != -1);
5732
5733 if (int_mode != int_target_mode)
5734 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5735
5736 return op0;
5737 }
5738
5739 mclass = GET_MODE_CLASS (mode);
5740 FOR_EACH_MODE_FROM (compare_mode, mode)
5741 {
5742 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5743 icode = optab_handler (cstore_optab, optab_mode);
5744 if (icode != CODE_FOR_nothing)
5745 {
5746 do_pending_stack_adjust ();
5747 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5748 unsignedp, op0, op1, normalizep, target_mode);
5749 if (tem)
5750 return tem;
5751
5752 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5753 {
5754 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5755 unsignedp, op1, op0, normalizep, target_mode);
5756 if (tem)
5757 return tem;
5758 }
5759 break;
5760 }
5761 }
5762
5763 return 0;
5764 }
5765
5766 /* Subroutine of emit_store_flag that handles cases in which the operands
5767 are scalar integers. SUBTARGET is the target to use for temporary
5768 operations and TRUEVAL is the value to store when the condition is
5769 true. All other arguments are as for emit_store_flag. */
5770
5771 rtx
5772 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5773 rtx op1, scalar_int_mode mode, int unsignedp,
5774 int normalizep, rtx trueval)
5775 {
5776 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5777 rtx_insn *last = get_last_insn ();
5778
5779 /* If this is an equality comparison of integers, we can try to exclusive-or
5780 (or subtract) the two operands and use a recursive call to try the
5781 comparison with zero. Don't do any of these cases if branches are
5782 very cheap. */
5783
5784 if ((code == EQ || code == NE) && op1 != const0_rtx)
5785 {
5786 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5787 OPTAB_WIDEN);
5788
5789 if (tem == 0)
5790 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5791 OPTAB_WIDEN);
5792 if (tem != 0)
5793 tem = emit_store_flag (target, code, tem, const0_rtx,
5794 mode, unsignedp, normalizep);
5795 if (tem != 0)
5796 return tem;
5797
5798 delete_insns_since (last);
5799 }
5800
5801 /* For integer comparisons, try the reverse comparison. However, for
5802 small X and if we'd have anyway to extend, implementing "X != 0"
5803 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5804 rtx_code rcode = reverse_condition (code);
5805 if (can_compare_p (rcode, mode, ccp_store_flag)
5806 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5807 && code == NE
5808 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5809 && op1 == const0_rtx))
5810 {
5811 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5812 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5813
5814 /* Again, for the reverse comparison, use either an addition or a XOR. */
5815 if (want_add
5816 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5817 optimize_insn_for_speed_p ()) == 0)
5818 {
5819 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5820 STORE_FLAG_VALUE, target_mode);
5821 if (tem != 0)
5822 tem = expand_binop (target_mode, add_optab, tem,
5823 gen_int_mode (normalizep, target_mode),
5824 target, 0, OPTAB_WIDEN);
5825 if (tem != 0)
5826 return tem;
5827 }
5828 else if (!want_add
5829 && rtx_cost (trueval, mode, XOR, 1,
5830 optimize_insn_for_speed_p ()) == 0)
5831 {
5832 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5833 normalizep, target_mode);
5834 if (tem != 0)
5835 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5836 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5837 if (tem != 0)
5838 return tem;
5839 }
5840
5841 delete_insns_since (last);
5842 }
5843
5844 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5845 the constant zero. Reject all other comparisons at this point. Only
5846 do LE and GT if branches are expensive since they are expensive on
5847 2-operand machines. */
5848
5849 if (op1 != const0_rtx
5850 || (code != EQ && code != NE
5851 && (BRANCH_COST (optimize_insn_for_speed_p (),
5852 false) <= 1 || (code != LE && code != GT))))
5853 return 0;
5854
5855 /* Try to put the result of the comparison in the sign bit. Assume we can't
5856 do the necessary operation below. */
5857
5858 rtx tem = 0;
5859
5860 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5861 the sign bit set. */
5862
5863 if (code == LE)
5864 {
5865 /* This is destructive, so SUBTARGET can't be OP0. */
5866 if (rtx_equal_p (subtarget, op0))
5867 subtarget = 0;
5868
5869 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5870 OPTAB_WIDEN);
5871 if (tem)
5872 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5873 OPTAB_WIDEN);
5874 }
5875
5876 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5877 number of bits in the mode of OP0, minus one. */
5878
5879 if (code == GT)
5880 {
5881 if (rtx_equal_p (subtarget, op0))
5882 subtarget = 0;
5883
5884 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5885 GET_MODE_BITSIZE (mode) - 1,
5886 subtarget, 0);
5887 if (tem)
5888 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5889 OPTAB_WIDEN);
5890 }
5891
5892 if (code == EQ || code == NE)
5893 {
5894 /* For EQ or NE, one way to do the comparison is to apply an operation
5895 that converts the operand into a positive number if it is nonzero
5896 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5897 for NE we negate. This puts the result in the sign bit. Then we
5898 normalize with a shift, if needed.
5899
5900 Two operations that can do the above actions are ABS and FFS, so try
5901 them. If that doesn't work, and MODE is smaller than a full word,
5902 we can use zero-extension to the wider mode (an unsigned conversion)
5903 as the operation. */
5904
5905 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5906 that is compensated by the subsequent overflow when subtracting
5907 one / negating. */
5908
5909 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5910 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5911 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5912 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5913 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5914 {
5915 tem = convert_modes (word_mode, mode, op0, 1);
5916 mode = word_mode;
5917 }
5918
5919 if (tem != 0)
5920 {
5921 if (code == EQ)
5922 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5923 0, OPTAB_WIDEN);
5924 else
5925 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5926 }
5927
5928 /* If we couldn't do it that way, for NE we can "or" the two's complement
5929 of the value with itself. For EQ, we take the one's complement of
5930 that "or", which is an extra insn, so we only handle EQ if branches
5931 are expensive. */
5932
5933 if (tem == 0
5934 && (code == NE
5935 || BRANCH_COST (optimize_insn_for_speed_p (),
5936 false) > 1))
5937 {
5938 if (rtx_equal_p (subtarget, op0))
5939 subtarget = 0;
5940
5941 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5942 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5943 OPTAB_WIDEN);
5944
5945 if (tem && code == EQ)
5946 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5947 }
5948 }
5949
5950 if (tem && normalizep)
5951 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5952 GET_MODE_BITSIZE (mode) - 1,
5953 subtarget, normalizep == 1);
5954
5955 if (tem)
5956 {
5957 if (!target)
5958 ;
5959 else if (GET_MODE (tem) != target_mode)
5960 {
5961 convert_move (target, tem, 0);
5962 tem = target;
5963 }
5964 else if (!subtarget)
5965 {
5966 emit_move_insn (target, tem);
5967 tem = target;
5968 }
5969 }
5970 else
5971 delete_insns_since (last);
5972
5973 return tem;
5974 }
5975
5976 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5977 and storing in TARGET. Normally return TARGET.
5978 Return 0 if that cannot be done.
5979
5980 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5981 it is VOIDmode, they cannot both be CONST_INT.
5982
5983 UNSIGNEDP is for the case where we have to widen the operands
5984 to perform the operation. It says to use zero-extension.
5985
5986 NORMALIZEP is 1 if we should convert the result to be either zero
5987 or one. Normalize is -1 if we should convert the result to be
5988 either zero or -1. If NORMALIZEP is zero, the result will be left
5989 "raw" out of the scc insn. */
5990
5991 rtx
5992 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5993 machine_mode mode, int unsignedp, int normalizep)
5994 {
5995 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5996 enum rtx_code rcode;
5997 rtx subtarget;
5998 rtx tem, trueval;
5999 rtx_insn *last;
6000
6001 /* If we compare constants, we shouldn't use a store-flag operation,
6002 but a constant load. We can get there via the vanilla route that
6003 usually generates a compare-branch sequence, but will in this case
6004 fold the comparison to a constant, and thus elide the branch. */
6005 if (CONSTANT_P (op0) && CONSTANT_P (op1))
6006 return NULL_RTX;
6007
6008 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6009 target_mode);
6010 if (tem)
6011 return tem;
6012
6013 /* If we reached here, we can't do this with a scc insn, however there
6014 are some comparisons that can be done in other ways. Don't do any
6015 of these cases if branches are very cheap. */
6016 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6017 return 0;
6018
6019 /* See what we need to return. We can only return a 1, -1, or the
6020 sign bit. */
6021
6022 if (normalizep == 0)
6023 {
6024 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6025 normalizep = STORE_FLAG_VALUE;
6026
6027 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6028 ;
6029 else
6030 return 0;
6031 }
6032
6033 last = get_last_insn ();
6034
6035 /* If optimizing, use different pseudo registers for each insn, instead
6036 of reusing the same pseudo. This leads to better CSE, but slows
6037 down the compiler, since there are more pseudos. */
6038 subtarget = (!optimize
6039 && (target_mode == mode)) ? target : NULL_RTX;
6040 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6041
6042 /* For floating-point comparisons, try the reverse comparison or try
6043 changing the "orderedness" of the comparison. */
6044 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6045 {
6046 enum rtx_code first_code;
6047 bool and_them;
6048
6049 rcode = reverse_condition_maybe_unordered (code);
6050 if (can_compare_p (rcode, mode, ccp_store_flag)
6051 && (code == ORDERED || code == UNORDERED
6052 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6053 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6054 {
6055 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6056 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6057
6058 /* For the reverse comparison, use either an addition or a XOR. */
6059 if (want_add
6060 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6061 optimize_insn_for_speed_p ()) == 0)
6062 {
6063 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6064 STORE_FLAG_VALUE, target_mode);
6065 if (tem)
6066 return expand_binop (target_mode, add_optab, tem,
6067 gen_int_mode (normalizep, target_mode),
6068 target, 0, OPTAB_WIDEN);
6069 }
6070 else if (!want_add
6071 && rtx_cost (trueval, mode, XOR, 1,
6072 optimize_insn_for_speed_p ()) == 0)
6073 {
6074 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6075 normalizep, target_mode);
6076 if (tem)
6077 return expand_binop (target_mode, xor_optab, tem, trueval,
6078 target, INTVAL (trueval) >= 0,
6079 OPTAB_WIDEN);
6080 }
6081 }
6082
6083 delete_insns_since (last);
6084
6085 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6086 if (code == ORDERED || code == UNORDERED)
6087 return 0;
6088
6089 and_them = split_comparison (code, mode, &first_code, &code);
6090
6091 /* If there are no NaNs, the first comparison should always fall through.
6092 Effectively change the comparison to the other one. */
6093 if (!HONOR_NANS (mode))
6094 {
6095 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6096 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6097 target_mode);
6098 }
6099
6100 if (!HAVE_conditional_move)
6101 return 0;
6102
6103 /* Do not turn a trapping comparison into a non-trapping one. */
6104 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6105 && flag_trapping_math)
6106 return 0;
6107
6108 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6109 conditional move. */
6110 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6111 normalizep, target_mode);
6112 if (tem == 0)
6113 return 0;
6114
6115 if (and_them)
6116 tem = emit_conditional_move (target, code, op0, op1, mode,
6117 tem, const0_rtx, GET_MODE (tem), 0);
6118 else
6119 tem = emit_conditional_move (target, code, op0, op1, mode,
6120 trueval, tem, GET_MODE (tem), 0);
6121
6122 if (tem == 0)
6123 delete_insns_since (last);
6124 return tem;
6125 }
6126
6127 /* The remaining tricks only apply to integer comparisons. */
6128
6129 scalar_int_mode int_mode;
6130 if (is_int_mode (mode, &int_mode))
6131 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6132 unsignedp, normalizep, trueval);
6133
6134 return 0;
6135 }
6136
6137 /* Like emit_store_flag, but always succeeds. */
6138
6139 rtx
6140 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6141 machine_mode mode, int unsignedp, int normalizep)
6142 {
6143 rtx tem;
6144 rtx_code_label *label;
6145 rtx trueval, falseval;
6146
6147 /* First see if emit_store_flag can do the job. */
6148 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6149 if (tem != 0)
6150 return tem;
6151
6152 /* If one operand is constant, make it the second one. Only do this
6153 if the other operand is not constant as well. */
6154 if (swap_commutative_operands_p (op0, op1))
6155 {
6156 std::swap (op0, op1);
6157 code = swap_condition (code);
6158 }
6159
6160 if (mode == VOIDmode)
6161 mode = GET_MODE (op0);
6162
6163 if (!target)
6164 target = gen_reg_rtx (word_mode);
6165
6166 /* If this failed, we have to do this with set/compare/jump/set code.
6167 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6168 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6169 if (code == NE
6170 && GET_MODE_CLASS (mode) == MODE_INT
6171 && REG_P (target)
6172 && op0 == target
6173 && op1 == const0_rtx)
6174 {
6175 label = gen_label_rtx ();
6176 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6177 NULL_RTX, NULL, label,
6178 profile_probability::uninitialized ());
6179 emit_move_insn (target, trueval);
6180 emit_label (label);
6181 return target;
6182 }
6183
6184 if (!REG_P (target)
6185 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6186 target = gen_reg_rtx (GET_MODE (target));
6187
6188 /* Jump in the right direction if the target cannot implement CODE
6189 but can jump on its reverse condition. */
6190 falseval = const0_rtx;
6191 if (! can_compare_p (code, mode, ccp_jump)
6192 && (! FLOAT_MODE_P (mode)
6193 || code == ORDERED || code == UNORDERED
6194 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6195 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6196 {
6197 enum rtx_code rcode;
6198 if (FLOAT_MODE_P (mode))
6199 rcode = reverse_condition_maybe_unordered (code);
6200 else
6201 rcode = reverse_condition (code);
6202
6203 /* Canonicalize to UNORDERED for the libcall. */
6204 if (can_compare_p (rcode, mode, ccp_jump)
6205 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6206 {
6207 falseval = trueval;
6208 trueval = const0_rtx;
6209 code = rcode;
6210 }
6211 }
6212
6213 emit_move_insn (target, trueval);
6214 label = gen_label_rtx ();
6215 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6216 label, profile_probability::uninitialized ());
6217
6218 emit_move_insn (target, falseval);
6219 emit_label (label);
6220
6221 return target;
6222 }
6223
6224 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6225 and exclusive ranges in order to create an equivalent comparison. See
6226 canonicalize_cmp_for_target for the possible cases. */
6227
6228 static enum rtx_code
6229 equivalent_cmp_code (enum rtx_code code)
6230 {
6231 switch (code)
6232 {
6233 case GT:
6234 return GE;
6235 case GE:
6236 return GT;
6237 case LT:
6238 return LE;
6239 case LE:
6240 return LT;
6241 case GTU:
6242 return GEU;
6243 case GEU:
6244 return GTU;
6245 case LTU:
6246 return LEU;
6247 case LEU:
6248 return LTU;
6249
6250 default:
6251 return code;
6252 }
6253 }
6254
6255 /* Choose the more appropiate immediate in scalar integer comparisons. The
6256 purpose of this is to end up with an immediate which can be loaded into a
6257 register in fewer moves, if possible.
6258
6259 For each integer comparison there exists an equivalent choice:
6260 i) a > b or a >= b + 1
6261 ii) a <= b or a < b + 1
6262 iii) a >= b or a > b - 1
6263 iv) a < b or a <= b - 1
6264
6265 MODE is the mode of the first operand.
6266 CODE points to the comparison code.
6267 IMM points to the rtx containing the immediate. *IMM must satisfy
6268 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6269 on exit. */
6270
6271 void
6272 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6273 {
6274 if (!SCALAR_INT_MODE_P (mode))
6275 return;
6276
6277 int to_add = 0;
6278 enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6279
6280 /* Extract the immediate value from the rtx. */
6281 wide_int imm_val = rtx_mode_t (*imm, mode);
6282
6283 if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6284 to_add = 1;
6285 else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6286 to_add = -1;
6287 else
6288 return;
6289
6290 /* Check for overflow/underflow in the case of signed values and
6291 wrapping around in the case of unsigned values. If any occur
6292 cancel the optimization. */
6293 wi::overflow_type overflow = wi::OVF_NONE;
6294 wide_int imm_modif;
6295
6296 if (to_add == 1)
6297 imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6298 else
6299 imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6300
6301 if (overflow)
6302 return;
6303
6304 /* The following creates a pseudo; if we cannot do that, bail out. */
6305 if (!can_create_pseudo_p ())
6306 return;
6307
6308 rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6309 rtx new_imm = immed_wide_int_const (imm_modif, mode);
6310
6311 rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6312 rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6313
6314 /* Update the immediate and the code. */
6315 if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6316 {
6317 *code = equivalent_cmp_code (*code);
6318 *imm = new_imm;
6319 }
6320 }
6321
6322
6323 \f
6324 /* Perform possibly multi-word comparison and conditional jump to LABEL
6325 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6326 now a thin wrapper around do_compare_rtx_and_jump. */
6327
6328 static void
6329 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6330 rtx_code_label *label)
6331 {
6332 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6333 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6334 NULL, label, profile_probability::uninitialized ());
6335 }