gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2021 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         /* We can't just change the mode, because this might clobber op0,
 633            and we will need the original value of op0 if insv fails.  */
 634         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 635       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 636         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 637     }
 638
 639   /* If the destination is a paradoxical subreg such that we need a
 640      truncate to the inner mode, perform the insertion on a temporary and
 641      truncate the result to the original destination.  Note that we can't
 642      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 643      X) 0)) is (reg:N X).  */
 644   if (GET_CODE (xop0) == SUBREG
 645       && REG_P (SUBREG_REG (xop0))
 646       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 647                                          op_mode))
 648     {
 649       rtx tem = gen_reg_rtx (op_mode);
 650       emit_move_insn (tem, xop0);
 651       xop0 = tem;
 652       copy_back = true;
 653     }
 654
 655   /* There are similar overflow check at the start of store_bit_field_1,
 656      but that only check the situation where the field lies completely
 657      outside the register, while there do have situation where the field
 658      lies partialy in the register, we need to adjust bitsize for this
 659      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 660      will broken on those arch support bit insert instruction, like arm, aarch64
 661      etc.  */
 662   if (bitsize + bitnum > unit && bitnum < unit)
 663     {
 664       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 665                "destination object, data truncated into %wu-bit",
 666                bitsize, unit - bitnum);
 667       bitsize = unit - bitnum;
 668     }
 669
 670   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 671      "backwards" from the size of the unit we are inserting into.
 672      Otherwise, we count bits from the most significant on a
 673      BYTES/BITS_BIG_ENDIAN machine.  */
 674
 675   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 676     bitnum = unit - bitsize - bitnum;
 677
 678   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 679   value1 = value;
 680   if (value_mode != op_mode)
 681     {
 682       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 683         {
 684           rtx tmp;
 685           /* Optimization: Don't bother really extending VALUE
 686              if it has all the bits we will actually use.  However,
 687              if we must narrow it, be sure we do it correctly.  */
 688
 689           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 690             {
 691               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 692               if (! tmp)
 693                 tmp = simplify_gen_subreg (op_mode,
 694                                            force_reg (value_mode, value1),
 695                                            value_mode, 0);
 696             }
 697           else
 698             {
 699               tmp = gen_lowpart_if_possible (op_mode, value1);
 700               if (! tmp)
 701                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 702             }
 703           value1 = tmp;
 704         }
 705       else if (CONST_INT_P (value))
 706         value1 = gen_int_mode (INTVAL (value), op_mode);
 707       else
 708         /* Parse phase is supposed to make VALUE's data type
 709            match that of the component reference, which is a type
 710            at least as wide as the field; so VALUE should have
 711            a mode that corresponds to that type.  */
 712         gcc_assert (CONSTANT_P (value));
 713     }
 714
 715   create_fixed_operand (&ops[0], xop0);
 716   create_integer_operand (&ops[1], bitsize);
 717   create_integer_operand (&ops[2], bitnum);
 718   create_input_operand (&ops[3], value1, op_mode);
 719   if (maybe_expand_insn (insv->icode, 4, ops))
 720     {
 721       if (copy_back)
 722         convert_move (op0, xop0, true);
 723       return true;
 724     }
 725   delete_insns_since (last);
 726   return false;
 727 }
 728
 729 /* A subroutine of store_bit_field, with the same arguments.  Return true
 730    if the operation could be implemented.
 731
 732    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 733    no other way of implementing the operation.  If FALLBACK_P is false,
 734    return false instead.  */
 735
 736 static bool
 737 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 738                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 739                    machine_mode fieldmode,
 740                    rtx value, bool reverse, bool fallback_p)
 741 {
 742   rtx op0 = str_rtx;
 743
 744   while (GET_CODE (op0) == SUBREG)
 745     {
 746       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 747       op0 = SUBREG_REG (op0);
 748     }
 749
 750   /* No action is needed if the target is a register and if the field
 751      lies completely outside that register.  This can occur if the source
 752      code contains an out-of-bounds access to a small array.  */
 753   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 754     return true;
 755
 756   /* Use vec_set patterns for inserting parts of vectors whenever
 757      available.  */
 758   machine_mode outermode = GET_MODE (op0);
 759   scalar_mode innermode = GET_MODE_INNER (outermode);
 760   poly_uint64 pos;
 761   if (VECTOR_MODE_P (outermode)
 762       && !MEM_P (op0)
 763       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 764       && fieldmode == innermode
 765       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 766       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 767     {
 768       class expand_operand ops[3];
 769       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 770
 771       create_fixed_operand (&ops[0], op0);
 772       create_input_operand (&ops[1], value, innermode);
 773       create_integer_operand (&ops[2], pos);
 774       if (maybe_expand_insn (icode, 3, ops))
 775         return true;
 776     }
 777
 778   /* If the target is a register, overwriting the entire object, or storing
 779      a full-word or multi-word field can be done with just a SUBREG.  */
 780   if (!MEM_P (op0)
 781       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 782     {
 783       /* Use the subreg machinery either to narrow OP0 to the required
 784          words or to cope with mode punning between equal-sized modes.
 785          In the latter case, use subreg on the rhs side, not lhs.  */
 786       rtx sub;
 787       HOST_WIDE_INT regnum;
 788       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 789       if (known_eq (bitnum, 0U)
 790           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 791         {
 792           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 793           if (sub)
 794             {
 795               if (reverse)
 796                 sub = flip_storage_order (GET_MODE (op0), sub);
 797               emit_move_insn (op0, sub);
 798               return true;
 799             }
 800         }
 801       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 802                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 803         {
 804           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 805                                      regnum * regsize);
 806           if (sub)
 807             {
 808               if (reverse)
 809                 value = flip_storage_order (fieldmode, value);
 810               emit_move_insn (sub, value);
 811               return true;
 812             }
 813         }
 814     }
 815
 816   /* If the target is memory, storing any naturally aligned field can be
 817      done with a simple store.  For targets that support fast unaligned
 818      memory, any naturally sized, unit aligned field can be done directly.  */
 819   poly_uint64 bytenum;
 820   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 821     {
 822       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 823       if (reverse)
 824         value = flip_storage_order (fieldmode, value);
 825       emit_move_insn (op0, value);
 826       return true;
 827     }
 828
 829   /* It's possible we'll need to handle other cases here for
 830      polynomial bitnum and bitsize.  */
 831
 832   /* From here on we need to be looking at a fixed-size insertion.  */
 833   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 834   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 835
 836   /* Make sure we are playing with integral modes.  Pun with subregs
 837      if we aren't.  This must come after the entire register case above,
 838      since that case is valid for any mode.  The following cases are only
 839      valid for integral modes.  */
 840   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 841   scalar_int_mode imode;
 842   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 843     {
 844       if (MEM_P (op0))
 845         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 846                                             0, MEM_SIZE (op0));
 847       else if (!op0_mode.exists ())
 848         {
 849           if (ibitnum == 0
 850               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 851               && MEM_P (value)
 852               && !reverse)
 853             {
 854               value = adjust_address (value, GET_MODE (op0), 0);
 855               emit_move_insn (op0, value);
 856               return true;
 857             }
 858           if (!fallback_p)
 859             return false;
 860           rtx temp = assign_stack_temp (GET_MODE (op0),
 861                                         GET_MODE_SIZE (GET_MODE (op0)));
 862           emit_move_insn (temp, op0);
 863           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 864                              reverse, fallback_p);
 865           emit_move_insn (op0, temp);
 866           return true;
 867         }
 868       else
 869         op0 = gen_lowpart (op0_mode.require (), op0);
 870     }
 871
 872   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 873                                    bitregion_start, bitregion_end,
 874                                    fieldmode, value, reverse, fallback_p);
 875 }
 876
 877 /* Subroutine of store_bit_field_1, with the same arguments, except
 878    that BITSIZE and BITNUM are constant.  Handle cases specific to
 879    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 880    otherwise OP0 is a BLKmode MEM.  */
 881
 882 static bool
 883 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 884                           unsigned HOST_WIDE_INT bitsize,
 885                           unsigned HOST_WIDE_INT bitnum,
 886                           poly_uint64 bitregion_start,
 887                           poly_uint64 bitregion_end,
 888                           machine_mode fieldmode,
 889                           rtx value, bool reverse, bool fallback_p)
 890 {
 891   /* Storing an lsb-aligned field in a register
 892      can be done with a movstrict instruction.  */
 893
 894   if (!MEM_P (op0)
 895       && !reverse
 896       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 897       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 898       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 899     {
 900       class expand_operand ops[2];
 901       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 902       rtx arg0 = op0;
 903       unsigned HOST_WIDE_INT subreg_off;
 904
 905       if (GET_CODE (arg0) == SUBREG)
 906         {
 907           /* Else we've got some float mode source being extracted into
 908              a different float mode destination -- this combination of
 909              subregs results in Severe Tire Damage.  */
 910           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 911                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 912                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 913           arg0 = SUBREG_REG (arg0);
 914         }
 915
 916       subreg_off = bitnum / BITS_PER_UNIT;
 917       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 918         {
 919           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 920
 921           create_fixed_operand (&ops[0], arg0);
 922           /* Shrink the source operand to FIELDMODE.  */
 923           create_convert_operand_to (&ops[1], value, fieldmode, false);
 924           if (maybe_expand_insn (icode, 2, ops))
 925             return true;
 926         }
 927     }
 928
 929   /* Handle fields bigger than a word.  */
 930
 931   if (bitsize > BITS_PER_WORD)
 932     {
 933       /* Here we transfer the words of the field
 934          in the order least significant first.
 935          This is because the most significant word is the one which may
 936          be less than full.
 937          However, only do that if the value is not BLKmode.  */
 938
 939       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 940       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 941       rtx_insn *last;
 942
 943       /* This is the mode we must force value to, so that there will be enough
 944          subwords to extract.  Note that fieldmode will often (always?) be
 945          VOIDmode, because that is what store_field uses to indicate that this
 946          is a bit field, but passing VOIDmode to operand_subword_force
 947          is not allowed.
 948
 949          The mode must be fixed-size, since insertions into variable-sized
 950          objects are meant to be handled before calling this function.  */
 951       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 952       if (value_mode == VOIDmode)
 953         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 954
 955       last = get_last_insn ();
 956       for (int i = 0; i < nwords; i++)
 957         {
 958           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 959              except maybe for the last iteration.  */
 960           const unsigned HOST_WIDE_INT new_bitsize
 961             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 962           /* Bit offset from the starting bit number in the target.  */
 963           const unsigned int bit_offset
 964             = backwards ^ reverse
 965               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 966               : i * BITS_PER_WORD;
 967           /* Starting word number in the value.  */
 968           const unsigned int wordnum
 969             = backwards
 970               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 971               : i;
 972           /* The chunk of the value in word_mode.  We use bit-field extraction
 973               in BLKmode to handle unaligned memory references and to shift the
 974               last chunk right on big-endian machines if need be.  */
 975           rtx value_word
 976             = fieldmode == BLKmode
 977               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 978                                    1, NULL_RTX, word_mode, word_mode, false,
 979                                    NULL)
 980               : operand_subword_force (value, wordnum, value_mode);
 981
 982           if (!store_bit_field_1 (op0, new_bitsize,
 983                                   bitnum + bit_offset,
 984                                   bitregion_start, bitregion_end,
 985                                   word_mode,
 986                                   value_word, reverse, fallback_p))
 987             {
 988               delete_insns_since (last);
 989               return false;
 990             }
 991         }
 992       return true;
 993     }
 994
 995   /* If VALUE has a floating-point or complex mode, access it as an
 996      integer of the corresponding size.  This can occur on a machine
 997      with 64 bit registers that uses SFmode for float.  It can also
 998      occur for unaligned float or complex fields.  */
 999   rtx orig_value = value;
1000   scalar_int_mode value_mode;
1001   if (GET_MODE (value) == VOIDmode)
1002     /* By this point we've dealt with values that are bigger than a word,
1003        so word_mode is a conservatively correct choice.  */
1004     value_mode = word_mode;
1005   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1006     {
1007       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1008       value = gen_reg_rtx (value_mode);
1009       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1010     }
1011
1012   /* If OP0 is a multi-word register, narrow it to the affected word.
1013      If the region spans two words, defer to store_split_bit_field.
1014      Don't do this if op0 is a single hard register wider than word
1015      such as a float or vector register.  */
1016   if (!MEM_P (op0)
1017       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1018       && (!REG_P (op0)
1019           || !HARD_REGISTER_P (op0)
1020           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1021     {
1022       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1023         {
1024           if (!fallback_p)
1025             return false;
1026
1027           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1028                                  bitregion_start, bitregion_end,
1029                                  value, value_mode, reverse);
1030           return true;
1031         }
1032       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1033                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1034       gcc_assert (op0);
1035       op0_mode = word_mode;
1036       bitnum %= BITS_PER_WORD;
1037     }
1038
1039   /* From here on we can assume that the field to be stored in fits
1040      within a word.  If the destination is a register, it too fits
1041      in a word.  */
1042
1043   extraction_insn insv;
1044   if (!MEM_P (op0)
1045       && !reverse
1046       && get_best_reg_extraction_insn (&insv, EP_insv,
1047                                        GET_MODE_BITSIZE (op0_mode.require ()),
1048                                        fieldmode)
1049       && store_bit_field_using_insv (&insv, op0, op0_mode,
1050                                      bitsize, bitnum, value, value_mode))
1051     return true;
1052
1053   /* If OP0 is a memory, try copying it to a register and seeing if a
1054      cheap register alternative is available.  */
1055   if (MEM_P (op0) && !reverse)
1056     {
1057       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1058                                         fieldmode)
1059           && store_bit_field_using_insv (&insv, op0, op0_mode,
1060                                          bitsize, bitnum, value, value_mode))
1061         return true;
1062
1063       rtx_insn *last = get_last_insn ();
1064
1065       /* Try loading part of OP0 into a register, inserting the bitfield
1066          into that, and then copying the result back to OP0.  */
1067       unsigned HOST_WIDE_INT bitpos;
1068       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1069                                                bitregion_start, bitregion_end,
1070                                                fieldmode, &bitpos);
1071       if (xop0)
1072         {
1073           rtx tempreg = copy_to_reg (xop0);
1074           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1075                                  bitregion_start, bitregion_end,
1076                                  fieldmode, orig_value, reverse, false))
1077             {
1078               emit_move_insn (xop0, tempreg);
1079               return true;
1080             }
1081           delete_insns_since (last);
1082         }
1083     }
1084
1085   if (!fallback_p)
1086     return false;
1087
1088   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1089                          bitregion_end, value, value_mode, reverse);
1090   return true;
1091 }
1092
1093 /* Generate code to store value from rtx VALUE
1094    into a bit-field within structure STR_RTX
1095    containing BITSIZE bits starting at bit BITNUM.
1096
1097    BITREGION_START is bitpos of the first bitfield in this region.
1098    BITREGION_END is the bitpos of the ending bitfield in this region.
1099    These two fields are 0, if the C++ memory model does not apply,
1100    or we are not interested in keeping track of bitfield regions.
1101
1102    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1103
1104    If REVERSE is true, the store is to be done in reverse order.  */
1105
1106 void
1107 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1108                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1109                  machine_mode fieldmode,
1110                  rtx value, bool reverse)
1111 {
1112   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1113   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1114   scalar_int_mode int_mode;
1115   if (bitsize.is_constant (&ibitsize)
1116       && bitnum.is_constant (&ibitnum)
1117       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1118       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1119                                      bitregion_start, bitregion_end))
1120     {
1121       /* Storing of a full word can be done with a simple store.
1122          We know here that the field can be accessed with one single
1123          instruction.  For targets that support unaligned memory,
1124          an unaligned access may be necessary.  */
1125       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1126         {
1127           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1128                                              ibitnum / BITS_PER_UNIT);
1129           if (reverse)
1130             value = flip_storage_order (int_mode, value);
1131           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1132           emit_move_insn (str_rtx, value);
1133         }
1134       else
1135         {
1136           rtx temp;
1137
1138           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1139                                           ibitnum, &ibitnum);
1140           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1141           temp = copy_to_reg (str_rtx);
1142           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1143                                   int_mode, value, reverse, true))
1144             gcc_unreachable ();
1145
1146           emit_move_insn (str_rtx, temp);
1147         }
1148
1149       return;
1150     }
1151
1152   /* Under the C++0x memory model, we must not touch bits outside the
1153      bit region.  Adjust the address to start at the beginning of the
1154      bit region.  */
1155   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1156     {
1157       scalar_int_mode best_mode;
1158       machine_mode addr_mode = VOIDmode;
1159
1160       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1161       bitnum -= bitregion_start;
1162       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1163       bitregion_end -= bitregion_start;
1164       bitregion_start = 0;
1165       if (bitsize.is_constant (&ibitsize)
1166           && bitnum.is_constant (&ibitnum)
1167           && get_best_mode (ibitsize, ibitnum,
1168                             bitregion_start, bitregion_end,
1169                             MEM_ALIGN (str_rtx), INT_MAX,
1170                             MEM_VOLATILE_P (str_rtx), &best_mode))
1171         addr_mode = best_mode;
1172       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1173                                               offset, size);
1174     }
1175
1176   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1177                           bitregion_start, bitregion_end,
1178                           fieldmode, value, reverse, true))
1179     gcc_unreachable ();
1180 }
1181 \f
1182 /* Use shifts and boolean operations to store VALUE into a bit field of
1183    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1184    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1185    the mode of VALUE.
1186
1187    If REVERSE is true, the store is to be done in reverse order.  */
1188
1189 static void
1190 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1191                        unsigned HOST_WIDE_INT bitsize,
1192                        unsigned HOST_WIDE_INT bitnum,
1193                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1194                        rtx value, scalar_int_mode value_mode, bool reverse)
1195 {
1196   /* There is a case not handled here:
1197      a structure with a known alignment of just a halfword
1198      and a field split across two aligned halfwords within the structure.
1199      Or likewise a structure with a known alignment of just a byte
1200      and a field split across two bytes.
1201      Such cases are not supposed to be able to occur.  */
1202
1203   scalar_int_mode best_mode;
1204   if (MEM_P (op0))
1205     {
1206       unsigned int max_bitsize = BITS_PER_WORD;
1207       scalar_int_mode imode;
1208       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1209         max_bitsize = GET_MODE_BITSIZE (imode);
1210
1211       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1212                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1213                           &best_mode))
1214         {
1215           /* The only way this should occur is if the field spans word
1216              boundaries.  */
1217           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1218                                  bitregion_start, bitregion_end,
1219                                  value, value_mode, reverse);
1220           return;
1221         }
1222
1223       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1224     }
1225   else
1226     best_mode = op0_mode.require ();
1227
1228   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1229                            value, value_mode, reverse);
1230 }
1231
1232 /* Helper function for store_fixed_bit_field, stores
1233    the bit field always using MODE, which is the mode of OP0.  The other
1234    arguments are as for store_fixed_bit_field.  */
1235
1236 static void
1237 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1238                          unsigned HOST_WIDE_INT bitsize,
1239                          unsigned HOST_WIDE_INT bitnum,
1240                          rtx value, scalar_int_mode value_mode, bool reverse)
1241 {
1242   rtx temp;
1243   int all_zero = 0;
1244   int all_one = 0;
1245
1246   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1247      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1248
1249   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1250     /* BITNUM is the distance between our msb
1251        and that of the containing datum.
1252        Convert it to the distance from the lsb.  */
1253     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1254
1255   /* Now BITNUM is always the distance between our lsb
1256      and that of OP0.  */
1257
1258   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1259      we must first convert its mode to MODE.  */
1260
1261   if (CONST_INT_P (value))
1262     {
1263       unsigned HOST_WIDE_INT v = UINTVAL (value);
1264
1265       if (bitsize < HOST_BITS_PER_WIDE_INT)
1266         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1267
1268       if (v == 0)
1269         all_zero = 1;
1270       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1271                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1272                || (bitsize == HOST_BITS_PER_WIDE_INT
1273                    && v == HOST_WIDE_INT_M1U))
1274         all_one = 1;
1275
1276       value = lshift_value (mode, v, bitnum);
1277     }
1278   else
1279     {
1280       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1281                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1282
1283       if (value_mode != mode)
1284         value = convert_to_mode (mode, value, 1);
1285
1286       if (must_and)
1287         value = expand_binop (mode, and_optab, value,
1288                               mask_rtx (mode, 0, bitsize, 0),
1289                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1290       if (bitnum > 0)
1291         value = expand_shift (LSHIFT_EXPR, mode, value,
1292                               bitnum, NULL_RTX, 1);
1293     }
1294
1295   if (reverse)
1296     value = flip_storage_order (mode, value);
1297
1298   /* Now clear the chosen bits in OP0,
1299      except that if VALUE is -1 we need not bother.  */
1300   /* We keep the intermediates in registers to allow CSE to combine
1301      consecutive bitfield assignments.  */
1302
1303   temp = force_reg (mode, op0);
1304
1305   if (! all_one)
1306     {
1307       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1308       if (reverse)
1309         mask = flip_storage_order (mode, mask);
1310       temp = expand_binop (mode, and_optab, temp, mask,
1311                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1312       temp = force_reg (mode, temp);
1313     }
1314
1315   /* Now logical-or VALUE into OP0, unless it is zero.  */
1316
1317   if (! all_zero)
1318     {
1319       temp = expand_binop (mode, ior_optab, temp, value,
1320                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1321       temp = force_reg (mode, temp);
1322     }
1323
1324   if (op0 != temp)
1325     {
1326       op0 = copy_rtx (op0);
1327       emit_move_insn (op0, temp);
1328     }
1329 }
1330 \f
1331 /* Store a bit field that is split across multiple accessible memory objects.
1332
1333    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1334    BITSIZE is the field width; BITPOS the position of its first bit
1335    (within the word).
1336    VALUE is the value to store, which has mode VALUE_MODE.
1337    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1338    a BLKmode MEM.
1339
1340    If REVERSE is true, the store is to be done in reverse order.
1341
1342    This does not yet handle fields wider than BITS_PER_WORD.  */
1343
1344 static void
1345 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1346                        unsigned HOST_WIDE_INT bitsize,
1347                        unsigned HOST_WIDE_INT bitpos,
1348                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1349                        rtx value, scalar_int_mode value_mode, bool reverse)
1350 {
1351   unsigned int unit, total_bits, bitsdone = 0;
1352
1353   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1354      much at a time.  */
1355   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1356     unit = BITS_PER_WORD;
1357   else
1358     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1359
1360   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1361      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1362      again, and we will mutually recurse forever.  */
1363   if (MEM_P (op0) && op0_mode.exists ())
1364     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1365
1366   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1367      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1368      that VALUE might be a floating-point constant.  */
1369   if (CONSTANT_P (value) && !CONST_INT_P (value))
1370     {
1371       rtx word = gen_lowpart_common (word_mode, value);
1372
1373       if (word && (value != word))
1374         value = word;
1375       else
1376         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1377       value_mode = word_mode;
1378     }
1379
1380   total_bits = GET_MODE_BITSIZE (value_mode);
1381
1382   while (bitsdone < bitsize)
1383     {
1384       unsigned HOST_WIDE_INT thissize;
1385       unsigned HOST_WIDE_INT thispos;
1386       unsigned HOST_WIDE_INT offset;
1387       rtx part;
1388
1389       offset = (bitpos + bitsdone) / unit;
1390       thispos = (bitpos + bitsdone) % unit;
1391
1392       /* When region of bytes we can touch is restricted, decrease
1393          UNIT close to the end of the region as needed.  If op0 is a REG
1394          or SUBREG of REG, don't do this, as there can't be data races
1395          on a register and we can expand shorter code in some cases.  */
1396       if (maybe_ne (bitregion_end, 0U)
1397           && unit > BITS_PER_UNIT
1398           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1399           && !REG_P (op0)
1400           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1401         {
1402           unit = unit / 2;
1403           continue;
1404         }
1405
1406       /* THISSIZE must not overrun a word boundary.  Otherwise,
1407          store_fixed_bit_field will call us again, and we will mutually
1408          recurse forever.  */
1409       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1410       thissize = MIN (thissize, unit - thispos);
1411
1412       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1413         {
1414           /* Fetch successively less significant portions.  */
1415           if (CONST_INT_P (value))
1416             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1417                              >> (bitsize - bitsdone - thissize))
1418                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1419           /* Likewise, but the source is little-endian.  */
1420           else if (reverse)
1421             part = extract_fixed_bit_field (word_mode, value, value_mode,
1422                                             thissize,
1423                                             bitsize - bitsdone - thissize,
1424                                             NULL_RTX, 1, false);
1425           else
1426             /* The args are chosen so that the last part includes the
1427                lsb.  Give extract_bit_field the value it needs (with
1428                endianness compensation) to fetch the piece we want.  */
1429             part = extract_fixed_bit_field (word_mode, value, value_mode,
1430                                             thissize,
1431                                             total_bits - bitsize + bitsdone,
1432                                             NULL_RTX, 1, false);
1433         }
1434       else
1435         {
1436           /* Fetch successively more significant portions.  */
1437           if (CONST_INT_P (value))
1438             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1439                              >> bitsdone)
1440                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1441           /* Likewise, but the source is big-endian.  */
1442           else if (reverse)
1443             part = extract_fixed_bit_field (word_mode, value, value_mode,
1444                                             thissize,
1445                                             total_bits - bitsdone - thissize,
1446                                             NULL_RTX, 1, false);
1447           else
1448             part = extract_fixed_bit_field (word_mode, value, value_mode,
1449                                             thissize, bitsdone, NULL_RTX,
1450                                             1, false);
1451         }
1452
1453       /* If OP0 is a register, then handle OFFSET here.  */
1454       rtx op0_piece = op0;
1455       opt_scalar_int_mode op0_piece_mode = op0_mode;
1456       if (SUBREG_P (op0) || REG_P (op0))
1457         {
1458           scalar_int_mode imode;
1459           if (op0_mode.exists (&imode)
1460               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1461             {
1462               if (offset)
1463                 op0_piece = const0_rtx;
1464             }
1465           else
1466             {
1467               op0_piece = operand_subword_force (op0,
1468                                                  offset * unit / BITS_PER_WORD,
1469                                                  GET_MODE (op0));
1470               op0_piece_mode = word_mode;
1471             }
1472           offset &= BITS_PER_WORD / unit - 1;
1473         }
1474
1475       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1476          it is just an out-of-bounds access.  Ignore it.  */
1477       if (op0_piece != const0_rtx)
1478         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1479                                offset * unit + thispos, bitregion_start,
1480                                bitregion_end, part, word_mode, reverse);
1481       bitsdone += thissize;
1482     }
1483 }
1484 \f
1485 /* A subroutine of extract_bit_field_1 that converts return value X
1486    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1487    to extract_bit_field.  */
1488
1489 static rtx
1490 convert_extracted_bit_field (rtx x, machine_mode mode,
1491                              machine_mode tmode, bool unsignedp)
1492 {
1493   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1494     return x;
1495
1496   /* If the x mode is not a scalar integral, first convert to the
1497      integer mode of that size and then access it as a floating-point
1498      value via a SUBREG.  */
1499   if (!SCALAR_INT_MODE_P (tmode))
1500     {
1501       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1502       x = convert_to_mode (int_mode, x, unsignedp);
1503       x = force_reg (int_mode, x);
1504       return gen_lowpart (tmode, x);
1505     }
1506
1507   return convert_to_mode (tmode, x, unsignedp);
1508 }
1509
1510 /* Try to use an ext(z)v pattern to extract a field from OP0.
1511    Return the extracted value on success, otherwise return null.
1512    EXTV describes the extraction instruction to use.  If OP0_MODE
1513    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1514    The other arguments are as for extract_bit_field.  */
1515
1516 static rtx
1517 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1518                               opt_scalar_int_mode op0_mode,
1519                               unsigned HOST_WIDE_INT bitsize,
1520                               unsigned HOST_WIDE_INT bitnum,
1521                               int unsignedp, rtx target,
1522                               machine_mode mode, machine_mode tmode)
1523 {
1524   class expand_operand ops[4];
1525   rtx spec_target = target;
1526   rtx spec_target_subreg = 0;
1527   scalar_int_mode ext_mode = extv->field_mode;
1528   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1529
1530   if (bitsize == 0 || unit < bitsize)
1531     return NULL_RTX;
1532
1533   if (MEM_P (op0))
1534     /* Get a reference to the first byte of the field.  */
1535     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1536                                 &bitnum);
1537   else
1538     {
1539       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1540       if (BYTES_BIG_ENDIAN)
1541         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1542
1543       /* If op0 is a register, we need it in EXT_MODE to make it
1544          acceptable to the format of ext(z)v.  */
1545       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1546         return NULL_RTX;
1547       if (REG_P (op0) && op0_mode.require () != ext_mode)
1548         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1549     }
1550
1551   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1552      "backwards" from the size of the unit we are extracting from.
1553      Otherwise, we count bits from the most significant on a
1554      BYTES/BITS_BIG_ENDIAN machine.  */
1555
1556   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1557     bitnum = unit - bitsize - bitnum;
1558
1559   if (target == 0)
1560     target = spec_target = gen_reg_rtx (tmode);
1561
1562   if (GET_MODE (target) != ext_mode)
1563     {
1564       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1565          between the mode of the extraction (word_mode) and the target
1566          mode.  Instead, create a temporary and use convert_move to set
1567          the target.  */
1568       if (REG_P (target)
1569           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1570         {
1571           target = gen_lowpart (ext_mode, target);
1572           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1573             spec_target_subreg = target;
1574         }
1575       else
1576         target = gen_reg_rtx (ext_mode);
1577     }
1578
1579   create_output_operand (&ops[0], target, ext_mode);
1580   create_fixed_operand (&ops[1], op0);
1581   create_integer_operand (&ops[2], bitsize);
1582   create_integer_operand (&ops[3], bitnum);
1583   if (maybe_expand_insn (extv->icode, 4, ops))
1584     {
1585       target = ops[0].value;
1586       if (target == spec_target)
1587         return target;
1588       if (target == spec_target_subreg)
1589         return spec_target;
1590       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1591     }
1592   return NULL_RTX;
1593 }
1594
1595 /* See whether it would be valid to extract the part of OP0 described
1596    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1597    operation.  Return the subreg if so, otherwise return null.  */
1598
1599 static rtx
1600 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1601                              poly_uint64 bitsize, poly_uint64 bitnum)
1602 {
1603   poly_uint64 bytenum;
1604   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1605       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1606       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1607       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1608     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1609   return NULL_RTX;
1610 }
1611
1612 /* A subroutine of extract_bit_field, with the same arguments.
1613    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1614    if we can find no other means of implementing the operation.
1615    if FALLBACK_P is false, return NULL instead.  */
1616
1617 static rtx
1618 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1619                      int unsignedp, rtx target, machine_mode mode,
1620                      machine_mode tmode, bool reverse, bool fallback_p,
1621                      rtx *alt_rtl)
1622 {
1623   rtx op0 = str_rtx;
1624   machine_mode mode1;
1625
1626   if (tmode == VOIDmode)
1627     tmode = mode;
1628
1629   while (GET_CODE (op0) == SUBREG)
1630     {
1631       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1632       op0 = SUBREG_REG (op0);
1633     }
1634
1635   /* If we have an out-of-bounds access to a register, just return an
1636      uninitialized register of the required mode.  This can occur if the
1637      source code contains an out-of-bounds access to a small array.  */
1638   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1639     return gen_reg_rtx (tmode);
1640
1641   if (REG_P (op0)
1642       && mode == GET_MODE (op0)
1643       && known_eq (bitnum, 0U)
1644       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1645     {
1646       if (reverse)
1647         op0 = flip_storage_order (mode, op0);
1648       /* We're trying to extract a full register from itself.  */
1649       return op0;
1650     }
1651
1652   /* First try to check for vector from vector extractions.  */
1653   if (VECTOR_MODE_P (GET_MODE (op0))
1654       && !MEM_P (op0)
1655       && VECTOR_MODE_P (tmode)
1656       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1657       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1658     {
1659       machine_mode new_mode = GET_MODE (op0);
1660       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1661         {
1662           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1663           poly_uint64 nunits;
1664           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1665                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1666               || !related_vector_mode (tmode, inner_mode,
1667                                        nunits).exists (&new_mode)
1668               || maybe_ne (GET_MODE_SIZE (new_mode),
1669                            GET_MODE_SIZE (GET_MODE (op0))))
1670             new_mode = VOIDmode;
1671         }
1672       poly_uint64 pos;
1673       if (new_mode != VOIDmode
1674           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1675               != CODE_FOR_nothing)
1676           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1677         {
1678           class expand_operand ops[3];
1679           machine_mode outermode = new_mode;
1680           machine_mode innermode = tmode;
1681           enum insn_code icode
1682             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1683
1684           if (new_mode != GET_MODE (op0))
1685             op0 = gen_lowpart (new_mode, op0);
1686           create_output_operand (&ops[0], target, innermode);
1687           ops[0].target = 1;
1688           create_input_operand (&ops[1], op0, outermode);
1689           create_integer_operand (&ops[2], pos);
1690           if (maybe_expand_insn (icode, 3, ops))
1691             {
1692               if (alt_rtl && ops[0].target)
1693                 *alt_rtl = target;
1694               target = ops[0].value;
1695               if (GET_MODE (target) != mode)
1696                 return gen_lowpart (tmode, target);
1697               return target;
1698             }
1699         }
1700     }
1701
1702   /* See if we can get a better vector mode before extracting.  */
1703   if (VECTOR_MODE_P (GET_MODE (op0))
1704       && !MEM_P (op0)
1705       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1706     {
1707       machine_mode new_mode;
1708
1709       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1710         new_mode = MIN_MODE_VECTOR_FLOAT;
1711       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1712         new_mode = MIN_MODE_VECTOR_FRACT;
1713       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1714         new_mode = MIN_MODE_VECTOR_UFRACT;
1715       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1716         new_mode = MIN_MODE_VECTOR_ACCUM;
1717       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1718         new_mode = MIN_MODE_VECTOR_UACCUM;
1719       else
1720         new_mode = MIN_MODE_VECTOR_INT;
1721
1722       FOR_EACH_MODE_FROM (new_mode, new_mode)
1723         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1724             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1725             && targetm.vector_mode_supported_p (new_mode))
1726           break;
1727       if (new_mode != VOIDmode)
1728         op0 = gen_lowpart (new_mode, op0);
1729     }
1730
1731   /* Use vec_extract patterns for extracting parts of vectors whenever
1732      available.  If that fails, see whether the current modes and bitregion
1733      give a natural subreg.  */
1734   machine_mode outermode = GET_MODE (op0);
1735   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1736     {
1737       scalar_mode innermode = GET_MODE_INNER (outermode);
1738       enum insn_code icode
1739         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1740       poly_uint64 pos;
1741       if (icode != CODE_FOR_nothing
1742           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1743           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1744         {
1745           class expand_operand ops[3];
1746
1747           create_output_operand (&ops[0], target, innermode);
1748           ops[0].target = 1;
1749           create_input_operand (&ops[1], op0, outermode);
1750           create_integer_operand (&ops[2], pos);
1751           if (maybe_expand_insn (icode, 3, ops))
1752             {
1753               if (alt_rtl && ops[0].target)
1754                 *alt_rtl = target;
1755               target = ops[0].value;
1756               if (GET_MODE (target) != mode)
1757                 return gen_lowpart (tmode, target);
1758               return target;
1759             }
1760         }
1761       /* Using subregs is useful if we're extracting one register vector
1762          from a multi-register vector.  extract_bit_field_as_subreg checks
1763          for valid bitsize and bitnum, so we don't need to do that here.  */
1764       if (VECTOR_MODE_P (mode))
1765         {
1766           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1767           if (sub)
1768             return sub;
1769         }
1770     }
1771
1772   /* Make sure we are playing with integral modes.  Pun with subregs
1773      if we aren't.  */
1774   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1775   scalar_int_mode imode;
1776   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1777     {
1778       if (MEM_P (op0))
1779         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1780                                             0, MEM_SIZE (op0));
1781       else if (op0_mode.exists (&imode))
1782         {
1783           op0 = gen_lowpart (imode, op0);
1784
1785           /* If we got a SUBREG, force it into a register since we
1786              aren't going to be able to do another SUBREG on it.  */
1787           if (GET_CODE (op0) == SUBREG)
1788             op0 = force_reg (imode, op0);
1789         }
1790       else
1791         {
1792           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1793           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1794           emit_move_insn (mem, op0);
1795           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1796         }
1797     }
1798
1799   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1800      If that's wrong, the solution is to test for it and set TARGET to 0
1801      if needed.  */
1802
1803   /* Get the mode of the field to use for atomic access or subreg
1804      conversion.  */
1805   if (!SCALAR_INT_MODE_P (tmode)
1806       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1807     mode1 = mode;
1808   gcc_assert (mode1 != BLKmode);
1809
1810   /* Extraction of a full MODE1 value can be done with a subreg as long
1811      as the least significant bit of the value is the least significant
1812      bit of either OP0 or a word of OP0.  */
1813   if (!MEM_P (op0) && !reverse)
1814     {
1815       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1816       if (sub)
1817         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1818     }
1819
1820   /* Extraction of a full MODE1 value can be done with a load as long as
1821      the field is on a byte boundary and is sufficiently aligned.  */
1822   poly_uint64 bytenum;
1823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1824     {
1825       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1826       if (reverse)
1827         op0 = flip_storage_order (mode1, op0);
1828       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1829     }
1830
1831   /* If we have a memory source and a non-constant bit offset, restrict
1832      the memory to the referenced bytes.  This is a worst-case fallback
1833      but is useful for things like vector booleans.  */
1834   if (MEM_P (op0) && !bitnum.is_constant ())
1835     {
1836       bytenum = bits_to_bytes_round_down (bitnum);
1837       bitnum = num_trailing_bits (bitnum);
1838       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1839       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1840       op0_mode = opt_scalar_int_mode ();
1841     }
1842
1843   /* It's possible we'll need to handle other cases here for
1844      polynomial bitnum and bitsize.  */
1845
1846   /* From here on we need to be looking at a fixed-size insertion.  */
1847   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1848                                      bitnum.to_constant (), unsignedp,
1849                                      target, mode, tmode, reverse, fallback_p);
1850 }
1851
1852 /* Subroutine of extract_bit_field_1, with the same arguments, except
1853    that BITSIZE and BITNUM are constant.  Handle cases specific to
1854    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1855    otherwise OP0 is a BLKmode MEM.  */
1856
1857 static rtx
1858 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1859                             unsigned HOST_WIDE_INT bitsize,
1860                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1861                             rtx target, machine_mode mode, machine_mode tmode,
1862                             bool reverse, bool fallback_p)
1863 {
1864   /* Handle fields bigger than a word.  */
1865
1866   if (bitsize > BITS_PER_WORD)
1867     {
1868       /* Here we transfer the words of the field
1869          in the order least significant first.
1870          This is because the most significant word is the one which may
1871          be less than full.  */
1872
1873       const bool backwards = WORDS_BIG_ENDIAN;
1874       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1875       unsigned int i;
1876       rtx_insn *last;
1877
1878       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1879         target = gen_reg_rtx (mode);
1880
1881       /* In case we're about to clobber a base register or something
1882          (see gcc.c-torture/execute/20040625-1.c).   */
1883       if (reg_mentioned_p (target, op0))
1884         target = gen_reg_rtx (mode);
1885
1886       /* Indicate for flow that the entire target reg is being set.  */
1887       emit_clobber (target);
1888
1889       /* The mode must be fixed-size, since extract_bit_field_1 handles
1890          extractions from variable-sized objects before calling this
1891          function.  */
1892       unsigned int target_size
1893         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1894       last = get_last_insn ();
1895       for (i = 0; i < nwords; i++)
1896         {
1897           /* If I is 0, use the low-order word in both field and target;
1898              if I is 1, use the next to lowest word; and so on.  */
1899           /* Word number in TARGET to use.  */
1900           unsigned int wordnum
1901             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1902           /* Offset from start of field in OP0.  */
1903           unsigned int bit_offset = (backwards ^ reverse
1904                                      ? MAX ((int) bitsize - ((int) i + 1)
1905                                             * BITS_PER_WORD,
1906                                             0)
1907                                      : (int) i * BITS_PER_WORD);
1908           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1909           rtx result_part
1910             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1911                                              bitsize - i * BITS_PER_WORD),
1912                                    bitnum + bit_offset, 1, target_part,
1913                                    mode, word_mode, reverse, fallback_p, NULL);
1914
1915           gcc_assert (target_part);
1916           if (!result_part)
1917             {
1918               delete_insns_since (last);
1919               return NULL;
1920             }
1921
1922           if (result_part != target_part)
1923             emit_move_insn (target_part, result_part);
1924         }
1925
1926       if (unsignedp)
1927         {
1928           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1929              need to be zero'd out.  */
1930           if (target_size > nwords * UNITS_PER_WORD)
1931             {
1932               unsigned int i, total_words;
1933
1934               total_words = target_size / UNITS_PER_WORD;
1935               for (i = nwords; i < total_words; i++)
1936                 emit_move_insn
1937                   (operand_subword (target,
1938                                     backwards ? total_words - i - 1 : i,
1939                                     1, VOIDmode),
1940                    const0_rtx);
1941             }
1942           return target;
1943         }
1944
1945       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1946       target = expand_shift (LSHIFT_EXPR, mode, target,
1947                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1948       return expand_shift (RSHIFT_EXPR, mode, target,
1949                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1950     }
1951
1952   /* If OP0 is a multi-word register, narrow it to the affected word.
1953      If the region spans two words, defer to extract_split_bit_field.  */
1954   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1955     {
1956       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1957         {
1958           if (!fallback_p)
1959             return NULL_RTX;
1960           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1961                                             unsignedp, reverse);
1962           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1963         }
1964       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1965                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1966       op0_mode = word_mode;
1967       bitnum %= BITS_PER_WORD;
1968     }
1969
1970   /* From here on we know the desired field is smaller than a word.
1971      If OP0 is a register, it too fits within a word.  */
1972   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1973   extraction_insn extv;
1974   if (!MEM_P (op0)
1975       && !reverse
1976       /* ??? We could limit the structure size to the part of OP0 that
1977          contains the field, with appropriate checks for endianness
1978          and TARGET_TRULY_NOOP_TRUNCATION.  */
1979       && get_best_reg_extraction_insn (&extv, pattern,
1980                                        GET_MODE_BITSIZE (op0_mode.require ()),
1981                                        tmode))
1982     {
1983       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1984                                                  bitsize, bitnum,
1985                                                  unsignedp, target, mode,
1986                                                  tmode);
1987       if (result)
1988         return result;
1989     }
1990
1991   /* If OP0 is a memory, try copying it to a register and seeing if a
1992      cheap register alternative is available.  */
1993   if (MEM_P (op0) & !reverse)
1994     {
1995       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1996                                         tmode))
1997         {
1998           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1999                                                      bitsize, bitnum,
2000                                                      unsignedp, target, mode,
2001                                                      tmode);
2002           if (result)
2003             return result;
2004         }
2005
2006       rtx_insn *last = get_last_insn ();
2007
2008       /* Try loading part of OP0 into a register and extracting the
2009          bitfield from that.  */
2010       unsigned HOST_WIDE_INT bitpos;
2011       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2012                                                0, 0, tmode, &bitpos);
2013       if (xop0)
2014         {
2015           xop0 = copy_to_reg (xop0);
2016           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2017                                             unsignedp, target,
2018                                             mode, tmode, reverse, false, NULL);
2019           if (result)
2020             return result;
2021           delete_insns_since (last);
2022         }
2023     }
2024
2025   if (!fallback_p)
2026     return NULL;
2027
2028   /* Find a correspondingly-sized integer field, so we can apply
2029      shifts and masks to it.  */
2030   scalar_int_mode int_mode;
2031   if (!int_mode_for_mode (tmode).exists (&int_mode))
2032     /* If this fails, we should probably push op0 out to memory and then
2033        do a load.  */
2034     int_mode = int_mode_for_mode (mode).require ();
2035
2036   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2037                                     bitnum, target, unsignedp, reverse);
2038
2039   /* Complex values must be reversed piecewise, so we need to undo the global
2040      reversal, convert to the complex mode and reverse again.  */
2041   if (reverse && COMPLEX_MODE_P (tmode))
2042     {
2043       target = flip_storage_order (int_mode, target);
2044       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2045       target = flip_storage_order (tmode, target);
2046     }
2047   else
2048     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2049
2050   return target;
2051 }
2052
2053 /* Generate code to extract a byte-field from STR_RTX
2054    containing BITSIZE bits, starting at BITNUM,
2055    and put it in TARGET if possible (if TARGET is nonzero).
2056    Regardless of TARGET, we return the rtx for where the value is placed.
2057
2058    STR_RTX is the structure containing the byte (a REG or MEM).
2059    UNSIGNEDP is nonzero if this is an unsigned bit field.
2060    MODE is the natural mode of the field value once extracted.
2061    TMODE is the mode the caller would like the value to have;
2062    but the value may be returned with type MODE instead.
2063
2064    If REVERSE is true, the extraction is to be done in reverse order.
2065
2066    If a TARGET is specified and we can store in it at no extra cost,
2067    we do so, and return TARGET.
2068    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2069    if they are equally easy.
2070
2071    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2072    then *ALT_RTL is set to TARGET (before legitimziation).  */
2073
2074 rtx
2075 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2076                    int unsignedp, rtx target, machine_mode mode,
2077                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2078 {
2079   machine_mode mode1;
2080
2081   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2082   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2083     mode1 = GET_MODE (str_rtx);
2084   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2085     mode1 = GET_MODE (target);
2086   else
2087     mode1 = tmode;
2088
2089   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2090   scalar_int_mode int_mode;
2091   if (bitsize.is_constant (&ibitsize)
2092       && bitnum.is_constant (&ibitnum)
2093       && is_a <scalar_int_mode> (mode1, &int_mode)
2094       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2095                                      int_mode, 0, 0))
2096     {
2097       /* Extraction of a full INT_MODE value can be done with a simple load.
2098          We know here that the field can be accessed with one single
2099          instruction.  For targets that support unaligned memory,
2100          an unaligned access may be necessary.  */
2101       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2102         {
2103           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2104                                                 ibitnum / BITS_PER_UNIT);
2105           if (reverse)
2106             result = flip_storage_order (int_mode, result);
2107           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2108           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2109         }
2110
2111       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2112                                       &ibitnum);
2113       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2114       str_rtx = copy_to_reg (str_rtx);
2115       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2116                                   target, mode, tmode, reverse, true, alt_rtl);
2117     }
2118
2119   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2120                               target, mode, tmode, reverse, true, alt_rtl);
2121 }
2122 \f
2123 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2124    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2125    otherwise OP0 is a BLKmode MEM.
2126
2127    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2128    If REVERSE is true, the extraction is to be done in reverse order.
2129
2130    If TARGET is nonzero, attempts to store the value there
2131    and return TARGET, but this is not guaranteed.
2132    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2133
2134 static rtx
2135 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2136                          opt_scalar_int_mode op0_mode,
2137                          unsigned HOST_WIDE_INT bitsize,
2138                          unsigned HOST_WIDE_INT bitnum, rtx target,
2139                          int unsignedp, bool reverse)
2140 {
2141   scalar_int_mode mode;
2142   if (MEM_P (op0))
2143     {
2144       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2145                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2146         /* The only way this should occur is if the field spans word
2147            boundaries.  */
2148         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2149                                         unsignedp, reverse);
2150
2151       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2152     }
2153   else
2154     mode = op0_mode.require ();
2155
2156   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2157                                     target, unsignedp, reverse);
2158 }
2159
2160 /* Helper function for extract_fixed_bit_field, extracts
2161    the bit field always using MODE, which is the mode of OP0.
2162    The other arguments are as for extract_fixed_bit_field.  */
2163
2164 static rtx
2165 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2166                            unsigned HOST_WIDE_INT bitsize,
2167                            unsigned HOST_WIDE_INT bitnum, rtx target,
2168                            int unsignedp, bool reverse)
2169 {
2170   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2171      for invalid input, such as extract equivalent of f5 from
2172      gcc.dg/pr48335-2.c.  */
2173
2174   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2175     /* BITNUM is the distance between our msb and that of OP0.
2176        Convert it to the distance from the lsb.  */
2177     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2178
2179   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2180      We have reduced the big-endian case to the little-endian case.  */
2181   if (reverse)
2182     op0 = flip_storage_order (mode, op0);
2183
2184   if (unsignedp)
2185     {
2186       if (bitnum)
2187         {
2188           /* If the field does not already start at the lsb,
2189              shift it so it does.  */
2190           /* Maybe propagate the target for the shift.  */
2191           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2192           if (tmode != mode)
2193             subtarget = 0;
2194           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2195         }
2196       /* Convert the value to the desired mode.  TMODE must also be a
2197          scalar integer for this conversion to make sense, since we
2198          shouldn't reinterpret the bits.  */
2199       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2200       if (mode != new_mode)
2201         op0 = convert_to_mode (new_mode, op0, 1);
2202
2203       /* Unless the msb of the field used to be the msb when we shifted,
2204          mask out the upper bits.  */
2205
2206       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2207         return expand_binop (new_mode, and_optab, op0,
2208                              mask_rtx (new_mode, 0, bitsize, 0),
2209                              target, 1, OPTAB_LIB_WIDEN);
2210       return op0;
2211     }
2212
2213   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2214      then arithmetic-shift its lsb to the lsb of the word.  */
2215   op0 = force_reg (mode, op0);
2216
2217   /* Find the narrowest integer mode that contains the field.  */
2218
2219   opt_scalar_int_mode mode_iter;
2220   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2221     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2222       break;
2223
2224   mode = mode_iter.require ();
2225   op0 = convert_to_mode (mode, op0, 0);
2226
2227   if (mode != tmode)
2228     target = 0;
2229
2230   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2231     {
2232       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2233       /* Maybe propagate the target for the shift.  */
2234       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2235       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2236     }
2237
2238   return expand_shift (RSHIFT_EXPR, mode, op0,
2239                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2240 }
2241
2242 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2243    VALUE << BITPOS.  */
2244
2245 static rtx
2246 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2247               int bitpos)
2248 {
2249   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2250 }
2251 \f
2252 /* Extract a bit field that is split across two words
2253    and return an RTX for the result.
2254
2255    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2256    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2257    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2258    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2259    a BLKmode MEM.
2260
2261    If REVERSE is true, the extraction is to be done in reverse order.  */
2262
2263 static rtx
2264 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2265                          unsigned HOST_WIDE_INT bitsize,
2266                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2267                          bool reverse)
2268 {
2269   unsigned int unit;
2270   unsigned int bitsdone = 0;
2271   rtx result = NULL_RTX;
2272   int first = 1;
2273
2274   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2275      much at a time.  */
2276   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2277     unit = BITS_PER_WORD;
2278   else
2279     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2280
2281   while (bitsdone < bitsize)
2282     {
2283       unsigned HOST_WIDE_INT thissize;
2284       rtx part;
2285       unsigned HOST_WIDE_INT thispos;
2286       unsigned HOST_WIDE_INT offset;
2287
2288       offset = (bitpos + bitsdone) / unit;
2289       thispos = (bitpos + bitsdone) % unit;
2290
2291       /* THISSIZE must not overrun a word boundary.  Otherwise,
2292          extract_fixed_bit_field will call us again, and we will mutually
2293          recurse forever.  */
2294       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2295       thissize = MIN (thissize, unit - thispos);
2296
2297       /* If OP0 is a register, then handle OFFSET here.  */
2298       rtx op0_piece = op0;
2299       opt_scalar_int_mode op0_piece_mode = op0_mode;
2300       if (SUBREG_P (op0) || REG_P (op0))
2301         {
2302           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2303           op0_piece_mode = word_mode;
2304           offset = 0;
2305         }
2306
2307       /* Extract the parts in bit-counting order,
2308          whose meaning is determined by BYTES_PER_UNIT.
2309          OFFSET is in UNITs, and UNIT is in bits.  */
2310       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2311                                       thissize, offset * unit + thispos,
2312                                       0, 1, reverse);
2313       bitsdone += thissize;
2314
2315       /* Shift this part into place for the result.  */
2316       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2317         {
2318           if (bitsize != bitsdone)
2319             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2320                                  bitsize - bitsdone, 0, 1);
2321         }
2322       else
2323         {
2324           if (bitsdone != thissize)
2325             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2326                                  bitsdone - thissize, 0, 1);
2327         }
2328
2329       if (first)
2330         result = part;
2331       else
2332         /* Combine the parts with bitwise or.  This works
2333            because we extracted each part as an unsigned bit field.  */
2334         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2335                                OPTAB_LIB_WIDEN);
2336
2337       first = 0;
2338     }
2339
2340   /* Unsigned bit field: we are done.  */
2341   if (unsignedp)
2342     return result;
2343   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2344   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2345                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2346   return expand_shift (RSHIFT_EXPR, word_mode, result,
2347                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2348 }
2349 \f
2350 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2351    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2352    MODE, fill the upper bits with zeros.  Fail if the layout of either
2353    mode is unknown (as for CC modes) or if the extraction would involve
2354    unprofitable mode punning.  Return the value on success, otherwise
2355    return null.
2356
2357    This is different from gen_lowpart* in these respects:
2358
2359      - the returned value must always be considered an rvalue
2360
2361      - when MODE is wider than SRC_MODE, the extraction involves
2362        a zero extension
2363
2364      - when MODE is smaller than SRC_MODE, the extraction involves
2365        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2366
2367    In other words, this routine performs a computation, whereas the
2368    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2369    operations.  */
2370
2371 rtx
2372 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2373 {
2374   scalar_int_mode int_mode, src_int_mode;
2375
2376   if (mode == src_mode)
2377     return src;
2378
2379   if (CONSTANT_P (src))
2380     {
2381       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2382          fails, it will happily create (subreg (symbol_ref)) or similar
2383          invalid SUBREGs.  */
2384       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2385       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2386       if (ret)
2387         return ret;
2388
2389       if (GET_MODE (src) == VOIDmode
2390           || !validate_subreg (mode, src_mode, src, byte))
2391         return NULL_RTX;
2392
2393       src = force_reg (GET_MODE (src), src);
2394       return gen_rtx_SUBREG (mode, src, byte);
2395     }
2396
2397   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2398     return NULL_RTX;
2399
2400   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2401       && targetm.modes_tieable_p (mode, src_mode))
2402     {
2403       rtx x = gen_lowpart_common (mode, src);
2404       if (x)
2405         return x;
2406     }
2407
2408   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2409       || !int_mode_for_mode (mode).exists (&int_mode))
2410     return NULL_RTX;
2411
2412   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2413     return NULL_RTX;
2414   if (!targetm.modes_tieable_p (int_mode, mode))
2415     return NULL_RTX;
2416
2417   src = gen_lowpart (src_int_mode, src);
2418   if (!validate_subreg (int_mode, src_int_mode, src,
2419                         subreg_lowpart_offset (int_mode, src_int_mode)))
2420     return NULL_RTX;
2421
2422   src = convert_modes (int_mode, src_int_mode, src, true);
2423   src = gen_lowpart (mode, src);
2424   return src;
2425 }
2426 \f
2427 /* Add INC into TARGET.  */
2428
2429 void
2430 expand_inc (rtx target, rtx inc)
2431 {
2432   rtx value = expand_binop (GET_MODE (target), add_optab,
2433                             target, inc,
2434                             target, 0, OPTAB_LIB_WIDEN);
2435   if (value != target)
2436     emit_move_insn (target, value);
2437 }
2438
2439 /* Subtract DEC from TARGET.  */
2440
2441 void
2442 expand_dec (rtx target, rtx dec)
2443 {
2444   rtx value = expand_binop (GET_MODE (target), sub_optab,
2445                             target, dec,
2446                             target, 0, OPTAB_LIB_WIDEN);
2447   if (value != target)
2448     emit_move_insn (target, value);
2449 }
2450 \f
2451 /* Output a shift instruction for expression code CODE,
2452    with SHIFTED being the rtx for the value to shift,
2453    and AMOUNT the rtx for the amount to shift by.
2454    Store the result in the rtx TARGET, if that is convenient.
2455    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2456    Return the rtx for where the value is.
2457    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2458    in which case 0 is returned.  */
2459
2460 static rtx
2461 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2462                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2463 {
2464   rtx op1, temp = 0;
2465   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2466   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2467   optab lshift_optab = ashl_optab;
2468   optab rshift_arith_optab = ashr_optab;
2469   optab rshift_uns_optab = lshr_optab;
2470   optab lrotate_optab = rotl_optab;
2471   optab rrotate_optab = rotr_optab;
2472   machine_mode op1_mode;
2473   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2474   int attempt;
2475   bool speed = optimize_insn_for_speed_p ();
2476
2477   op1 = amount;
2478   op1_mode = GET_MODE (op1);
2479
2480   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2481      shift amount is a vector, use the vector/vector shift patterns.  */
2482   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2483     {
2484       lshift_optab = vashl_optab;
2485       rshift_arith_optab = vashr_optab;
2486       rshift_uns_optab = vlshr_optab;
2487       lrotate_optab = vrotl_optab;
2488       rrotate_optab = vrotr_optab;
2489     }
2490
2491   /* Previously detected shift-counts computed by NEGATE_EXPR
2492      and shifted in the other direction; but that does not work
2493      on all machines.  */
2494
2495   if (SHIFT_COUNT_TRUNCATED)
2496     {
2497       if (CONST_INT_P (op1)
2498           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2499               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2500         op1 = gen_int_shift_amount (mode,
2501                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2502                                     % GET_MODE_BITSIZE (scalar_mode));
2503       else if (GET_CODE (op1) == SUBREG
2504                && subreg_lowpart_p (op1)
2505                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2506                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2507         op1 = SUBREG_REG (op1);
2508     }
2509
2510   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2511      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2512      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2513      amount instead.  */
2514   if (rotate
2515       && CONST_INT_P (op1)
2516       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2517                    GET_MODE_BITSIZE (scalar_mode) - 1))
2518     {
2519       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2520                                          - INTVAL (op1)));
2521       left = !left;
2522       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2523     }
2524
2525   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2526      Note that this is not the case for bigger values.  For instance a rotation
2527      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2528      0x04030201 (bswapsi).  */
2529   if (rotate
2530       && CONST_INT_P (op1)
2531       && INTVAL (op1) == BITS_PER_UNIT
2532       && GET_MODE_SIZE (scalar_mode) == 2
2533       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2534     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2535
2536   if (op1 == const0_rtx)
2537     return shifted;
2538
2539   /* Check whether its cheaper to implement a left shift by a constant
2540      bit count by a sequence of additions.  */
2541   if (code == LSHIFT_EXPR
2542       && CONST_INT_P (op1)
2543       && INTVAL (op1) > 0
2544       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2545       && INTVAL (op1) < MAX_BITS_PER_WORD
2546       && (shift_cost (speed, mode, INTVAL (op1))
2547           > INTVAL (op1) * add_cost (speed, mode))
2548       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2549     {
2550       int i;
2551       for (i = 0; i < INTVAL (op1); i++)
2552         {
2553           temp = force_reg (mode, shifted);
2554           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2555                                   unsignedp, OPTAB_LIB_WIDEN);
2556         }
2557       return shifted;
2558     }
2559
2560   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2561     {
2562       enum optab_methods methods;
2563
2564       if (attempt == 0)
2565         methods = OPTAB_DIRECT;
2566       else if (attempt == 1)
2567         methods = OPTAB_WIDEN;
2568       else
2569         methods = OPTAB_LIB_WIDEN;
2570
2571       if (rotate)
2572         {
2573           /* Widening does not work for rotation.  */
2574           if (methods == OPTAB_WIDEN)
2575             continue;
2576           else if (methods == OPTAB_LIB_WIDEN)
2577             {
2578               /* If we have been unable to open-code this by a rotation,
2579                  do it as the IOR of two shifts.  I.e., to rotate A
2580                  by N bits, compute
2581                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2582                  where C is the bitsize of A.
2583
2584                  It is theoretically possible that the target machine might
2585                  not be able to perform either shift and hence we would
2586                  be making two libcalls rather than just the one for the
2587                  shift (similarly if IOR could not be done).  We will allow
2588                  this extremely unlikely lossage to avoid complicating the
2589                  code below.  */
2590
2591               rtx subtarget = target == shifted ? 0 : target;
2592               rtx new_amount, other_amount;
2593               rtx temp1;
2594
2595               new_amount = op1;
2596               if (op1 == const0_rtx)
2597                 return shifted;
2598               else if (CONST_INT_P (op1))
2599                 other_amount = gen_int_shift_amount
2600                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2601               else
2602                 {
2603                   other_amount
2604                     = simplify_gen_unary (NEG, GET_MODE (op1),
2605                                           op1, GET_MODE (op1));
2606                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2607                   other_amount
2608                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2609                                            gen_int_mode (mask, GET_MODE (op1)));
2610                 }
2611
2612               shifted = force_reg (mode, shifted);
2613
2614               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2615                                      mode, shifted, new_amount, 0, 1);
2616               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2617                                       mode, shifted, other_amount,
2618                                       subtarget, 1);
2619               return expand_binop (mode, ior_optab, temp, temp1, target,
2620                                    unsignedp, methods);
2621             }
2622
2623           temp = expand_binop (mode,
2624                                left ? lrotate_optab : rrotate_optab,
2625                                shifted, op1, target, unsignedp, methods);
2626         }
2627       else if (unsignedp)
2628         temp = expand_binop (mode,
2629                              left ? lshift_optab : rshift_uns_optab,
2630                              shifted, op1, target, unsignedp, methods);
2631
2632       /* Do arithmetic shifts.
2633          Also, if we are going to widen the operand, we can just as well
2634          use an arithmetic right-shift instead of a logical one.  */
2635       if (temp == 0 && ! rotate
2636           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2637         {
2638           enum optab_methods methods1 = methods;
2639
2640           /* If trying to widen a log shift to an arithmetic shift,
2641              don't accept an arithmetic shift of the same size.  */
2642           if (unsignedp)
2643             methods1 = OPTAB_MUST_WIDEN;
2644
2645           /* Arithmetic shift */
2646
2647           temp = expand_binop (mode,
2648                                left ? lshift_optab : rshift_arith_optab,
2649                                shifted, op1, target, unsignedp, methods1);
2650         }
2651
2652       /* We used to try extzv here for logical right shifts, but that was
2653          only useful for one machine, the VAX, and caused poor code
2654          generation there for lshrdi3, so the code was deleted and a
2655          define_expand for lshrsi3 was added to vax.md.  */
2656     }
2657
2658   gcc_assert (temp != NULL_RTX || may_fail);
2659   return temp;
2660 }
2661
2662 /* Output a shift instruction for expression code CODE,
2663    with SHIFTED being the rtx for the value to shift,
2664    and AMOUNT the amount to shift by.
2665    Store the result in the rtx TARGET, if that is convenient.
2666    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2667    Return the rtx for where the value is.  */
2668
2669 rtx
2670 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2671               poly_int64 amount, rtx target, int unsignedp)
2672 {
2673   return expand_shift_1 (code, mode, shifted,
2674                          gen_int_shift_amount (mode, amount),
2675                          target, unsignedp);
2676 }
2677
2678 /* Likewise, but return 0 if that cannot be done.  */
2679
2680 static rtx
2681 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2682                     int amount, rtx target, int unsignedp)
2683 {
2684   return expand_shift_1 (code, mode,
2685                          shifted, GEN_INT (amount), target, unsignedp, true);
2686 }
2687
2688 /* Output a shift instruction for expression code CODE,
2689    with SHIFTED being the rtx for the value to shift,
2690    and AMOUNT the tree for the amount to shift by.
2691    Store the result in the rtx TARGET, if that is convenient.
2692    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2693    Return the rtx for where the value is.  */
2694
2695 rtx
2696 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2697                        tree amount, rtx target, int unsignedp)
2698 {
2699   return expand_shift_1 (code, mode,
2700                          shifted, expand_normal (amount), target, unsignedp);
2701 }
2702
2703 \f
2704 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2705                         const struct mult_cost *, machine_mode mode);
2706 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2707                               const struct algorithm *, enum mult_variant);
2708 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2709 static rtx extract_high_half (scalar_int_mode, rtx);
2710 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2711 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2712                                        int, int);
2713 /* Compute and return the best algorithm for multiplying by T.
2714    The algorithm must cost less than cost_limit
2715    If retval.cost >= COST_LIMIT, no algorithm was found and all
2716    other field of the returned struct are undefined.
2717    MODE is the machine mode of the multiplication.  */
2718
2719 static void
2720 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2721             const struct mult_cost *cost_limit, machine_mode mode)
2722 {
2723   int m;
2724   struct algorithm *alg_in, *best_alg;
2725   struct mult_cost best_cost;
2726   struct mult_cost new_limit;
2727   int op_cost, op_latency;
2728   unsigned HOST_WIDE_INT orig_t = t;
2729   unsigned HOST_WIDE_INT q;
2730   int maxm, hash_index;
2731   bool cache_hit = false;
2732   enum alg_code cache_alg = alg_zero;
2733   bool speed = optimize_insn_for_speed_p ();
2734   scalar_int_mode imode;
2735   struct alg_hash_entry *entry_ptr;
2736
2737   /* Indicate that no algorithm is yet found.  If no algorithm
2738      is found, this value will be returned and indicate failure.  */
2739   alg_out->cost.cost = cost_limit->cost + 1;
2740   alg_out->cost.latency = cost_limit->latency + 1;
2741
2742   if (cost_limit->cost < 0
2743       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2744     return;
2745
2746   /* Be prepared for vector modes.  */
2747   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2748
2749   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2750
2751   /* Restrict the bits of "t" to the multiplication's mode.  */
2752   t &= GET_MODE_MASK (imode);
2753
2754   /* t == 1 can be done in zero cost.  */
2755   if (t == 1)
2756     {
2757       alg_out->ops = 1;
2758       alg_out->cost.cost = 0;
2759       alg_out->cost.latency = 0;
2760       alg_out->op[0] = alg_m;
2761       return;
2762     }
2763
2764   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2765      fail now.  */
2766   if (t == 0)
2767     {
2768       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2769         return;
2770       else
2771         {
2772           alg_out->ops = 1;
2773           alg_out->cost.cost = zero_cost (speed);
2774           alg_out->cost.latency = zero_cost (speed);
2775           alg_out->op[0] = alg_zero;
2776           return;
2777         }
2778     }
2779
2780   /* We'll be needing a couple extra algorithm structures now.  */
2781
2782   alg_in = XALLOCA (struct algorithm);
2783   best_alg = XALLOCA (struct algorithm);
2784   best_cost = *cost_limit;
2785
2786   /* Compute the hash index.  */
2787   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2788
2789   /* See if we already know what to do for T.  */
2790   entry_ptr = alg_hash_entry_ptr (hash_index);
2791   if (entry_ptr->t == t
2792       && entry_ptr->mode == mode
2793       && entry_ptr->speed == speed
2794       && entry_ptr->alg != alg_unknown)
2795     {
2796       cache_alg = entry_ptr->alg;
2797
2798       if (cache_alg == alg_impossible)
2799         {
2800           /* The cache tells us that it's impossible to synthesize
2801              multiplication by T within entry_ptr->cost.  */
2802           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2803             /* COST_LIMIT is at least as restrictive as the one
2804                recorded in the hash table, in which case we have no
2805                hope of synthesizing a multiplication.  Just
2806                return.  */
2807             return;
2808
2809           /* If we get here, COST_LIMIT is less restrictive than the
2810              one recorded in the hash table, so we may be able to
2811              synthesize a multiplication.  Proceed as if we didn't
2812              have the cache entry.  */
2813         }
2814       else
2815         {
2816           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2817             /* The cached algorithm shows that this multiplication
2818                requires more cost than COST_LIMIT.  Just return.  This
2819                way, we don't clobber this cache entry with
2820                alg_impossible but retain useful information.  */
2821             return;
2822
2823           cache_hit = true;
2824
2825           switch (cache_alg)
2826             {
2827             case alg_shift:
2828               goto do_alg_shift;
2829
2830             case alg_add_t_m2:
2831             case alg_sub_t_m2:
2832               goto do_alg_addsub_t_m2;
2833
2834             case alg_add_factor:
2835             case alg_sub_factor:
2836               goto do_alg_addsub_factor;
2837
2838             case alg_add_t2_m:
2839               goto do_alg_add_t2_m;
2840
2841             case alg_sub_t2_m:
2842               goto do_alg_sub_t2_m;
2843
2844             default:
2845               gcc_unreachable ();
2846             }
2847         }
2848     }
2849
2850   /* If we have a group of zero bits at the low-order part of T, try
2851      multiplying by the remaining bits and then doing a shift.  */
2852
2853   if ((t & 1) == 0)
2854     {
2855     do_alg_shift:
2856       m = ctz_or_zero (t); /* m = number of low zero bits */
2857       if (m < maxm)
2858         {
2859           q = t >> m;
2860           /* The function expand_shift will choose between a shift and
2861              a sequence of additions, so the observed cost is given as
2862              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2863           op_cost = m * add_cost (speed, mode);
2864           if (shift_cost (speed, mode, m) < op_cost)
2865             op_cost = shift_cost (speed, mode, m);
2866           new_limit.cost = best_cost.cost - op_cost;
2867           new_limit.latency = best_cost.latency - op_cost;
2868           synth_mult (alg_in, q, &new_limit, mode);
2869
2870           alg_in->cost.cost += op_cost;
2871           alg_in->cost.latency += op_cost;
2872           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2873             {
2874               best_cost = alg_in->cost;
2875               std::swap (alg_in, best_alg);
2876               best_alg->log[best_alg->ops] = m;
2877               best_alg->op[best_alg->ops] = alg_shift;
2878             }
2879
2880           /* See if treating ORIG_T as a signed number yields a better
2881              sequence.  Try this sequence only for a negative ORIG_T
2882              as it would be useless for a non-negative ORIG_T.  */
2883           if ((HOST_WIDE_INT) orig_t < 0)
2884             {
2885               /* Shift ORIG_T as follows because a right shift of a
2886                  negative-valued signed type is implementation
2887                  defined.  */
2888               q = ~(~orig_t >> m);
2889               /* The function expand_shift will choose between a shift
2890                  and a sequence of additions, so the observed cost is
2891                  given as MIN (m * add_cost(speed, mode),
2892                  shift_cost(speed, mode, m)).  */
2893               op_cost = m * add_cost (speed, mode);
2894               if (shift_cost (speed, mode, m) < op_cost)
2895                 op_cost = shift_cost (speed, mode, m);
2896               new_limit.cost = best_cost.cost - op_cost;
2897               new_limit.latency = best_cost.latency - op_cost;
2898               synth_mult (alg_in, q, &new_limit, mode);
2899
2900               alg_in->cost.cost += op_cost;
2901               alg_in->cost.latency += op_cost;
2902               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2903                 {
2904                   best_cost = alg_in->cost;
2905                   std::swap (alg_in, best_alg);
2906                   best_alg->log[best_alg->ops] = m;
2907                   best_alg->op[best_alg->ops] = alg_shift;
2908                 }
2909             }
2910         }
2911       if (cache_hit)
2912         goto done;
2913     }
2914
2915   /* If we have an odd number, add or subtract one.  */
2916   if ((t & 1) != 0)
2917     {
2918       unsigned HOST_WIDE_INT w;
2919
2920     do_alg_addsub_t_m2:
2921       for (w = 1; (w & t) != 0; w <<= 1)
2922         ;
2923       /* If T was -1, then W will be zero after the loop.  This is another
2924          case where T ends with ...111.  Handling this with (T + 1) and
2925          subtract 1 produces slightly better code and results in algorithm
2926          selection much faster than treating it like the ...0111 case
2927          below.  */
2928       if (w == 0
2929           || (w > 2
2930               /* Reject the case where t is 3.
2931                  Thus we prefer addition in that case.  */
2932               && t != 3))
2933         {
2934           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2935
2936           op_cost = add_cost (speed, mode);
2937           new_limit.cost = best_cost.cost - op_cost;
2938           new_limit.latency = best_cost.latency - op_cost;
2939           synth_mult (alg_in, t + 1, &new_limit, mode);
2940
2941           alg_in->cost.cost += op_cost;
2942           alg_in->cost.latency += op_cost;
2943           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2944             {
2945               best_cost = alg_in->cost;
2946               std::swap (alg_in, best_alg);
2947               best_alg->log[best_alg->ops] = 0;
2948               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2949             }
2950         }
2951       else
2952         {
2953           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2954
2955           op_cost = add_cost (speed, mode);
2956           new_limit.cost = best_cost.cost - op_cost;
2957           new_limit.latency = best_cost.latency - op_cost;
2958           synth_mult (alg_in, t - 1, &new_limit, mode);
2959
2960           alg_in->cost.cost += op_cost;
2961           alg_in->cost.latency += op_cost;
2962           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2963             {
2964               best_cost = alg_in->cost;
2965               std::swap (alg_in, best_alg);
2966               best_alg->log[best_alg->ops] = 0;
2967               best_alg->op[best_alg->ops] = alg_add_t_m2;
2968             }
2969         }
2970
2971       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2972          quickly with a - a * n for some appropriate constant n.  */
2973       m = exact_log2 (-orig_t + 1);
2974       if (m >= 0 && m < maxm)
2975         {
2976           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2977           /* If the target has a cheap shift-and-subtract insn use
2978              that in preference to a shift insn followed by a sub insn.
2979              Assume that the shift-and-sub is "atomic" with a latency
2980              equal to it's cost, otherwise assume that on superscalar
2981              hardware the shift may be executed concurrently with the
2982              earlier steps in the algorithm.  */
2983           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2984             {
2985               op_cost = shiftsub1_cost (speed, mode, m);
2986               op_latency = op_cost;
2987             }
2988           else
2989             op_latency = add_cost (speed, mode);
2990
2991           new_limit.cost = best_cost.cost - op_cost;
2992           new_limit.latency = best_cost.latency - op_latency;
2993           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2994                       &new_limit, mode);
2995
2996           alg_in->cost.cost += op_cost;
2997           alg_in->cost.latency += op_latency;
2998           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2999             {
3000               best_cost = alg_in->cost;
3001               std::swap (alg_in, best_alg);
3002               best_alg->log[best_alg->ops] = m;
3003               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3004             }
3005         }
3006
3007       if (cache_hit)
3008         goto done;
3009     }
3010
3011   /* Look for factors of t of the form
3012      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3013      If we find such a factor, we can multiply by t using an algorithm that
3014      multiplies by q, shift the result by m and add/subtract it to itself.
3015
3016      We search for large factors first and loop down, even if large factors
3017      are less probable than small; if we find a large factor we will find a
3018      good sequence quickly, and therefore be able to prune (by decreasing
3019      COST_LIMIT) the search.  */
3020
3021  do_alg_addsub_factor:
3022   for (m = floor_log2 (t - 1); m >= 2; m--)
3023     {
3024       unsigned HOST_WIDE_INT d;
3025
3026       d = (HOST_WIDE_INT_1U << m) + 1;
3027       if (t % d == 0 && t > d && m < maxm
3028           && (!cache_hit || cache_alg == alg_add_factor))
3029         {
3030           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3031           if (shiftadd_cost (speed, mode, m) <= op_cost)
3032             op_cost = shiftadd_cost (speed, mode, m);
3033
3034           op_latency = op_cost;
3035
3036
3037           new_limit.cost = best_cost.cost - op_cost;
3038           new_limit.latency = best_cost.latency - op_latency;
3039           synth_mult (alg_in, t / d, &new_limit, mode);
3040
3041           alg_in->cost.cost += op_cost;
3042           alg_in->cost.latency += op_latency;
3043           if (alg_in->cost.latency < op_cost)
3044             alg_in->cost.latency = op_cost;
3045           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3046             {
3047               best_cost = alg_in->cost;
3048               std::swap (alg_in, best_alg);
3049               best_alg->log[best_alg->ops] = m;
3050               best_alg->op[best_alg->ops] = alg_add_factor;
3051             }
3052           /* Other factors will have been taken care of in the recursion.  */
3053           break;
3054         }
3055
3056       d = (HOST_WIDE_INT_1U << m) - 1;
3057       if (t % d == 0 && t > d && m < maxm
3058           && (!cache_hit || cache_alg == alg_sub_factor))
3059         {
3060           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3061           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3062             op_cost = shiftsub0_cost (speed, mode, m);
3063
3064           op_latency = op_cost;
3065
3066           new_limit.cost = best_cost.cost - op_cost;
3067           new_limit.latency = best_cost.latency - op_latency;
3068           synth_mult (alg_in, t / d, &new_limit, mode);
3069
3070           alg_in->cost.cost += op_cost;
3071           alg_in->cost.latency += op_latency;
3072           if (alg_in->cost.latency < op_cost)
3073             alg_in->cost.latency = op_cost;
3074           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3075             {
3076               best_cost = alg_in->cost;
3077               std::swap (alg_in, best_alg);
3078               best_alg->log[best_alg->ops] = m;
3079               best_alg->op[best_alg->ops] = alg_sub_factor;
3080             }
3081           break;
3082         }
3083     }
3084   if (cache_hit)
3085     goto done;
3086
3087   /* Try shift-and-add (load effective address) instructions,
3088      i.e. do a*3, a*5, a*9.  */
3089   if ((t & 1) != 0)
3090     {
3091     do_alg_add_t2_m:
3092       q = t - 1;
3093       m = ctz_hwi (q);
3094       if (q && m < maxm)
3095         {
3096           op_cost = shiftadd_cost (speed, mode, m);
3097           new_limit.cost = best_cost.cost - op_cost;
3098           new_limit.latency = best_cost.latency - op_cost;
3099           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3100
3101           alg_in->cost.cost += op_cost;
3102           alg_in->cost.latency += op_cost;
3103           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3104             {
3105               best_cost = alg_in->cost;
3106               std::swap (alg_in, best_alg);
3107               best_alg->log[best_alg->ops] = m;
3108               best_alg->op[best_alg->ops] = alg_add_t2_m;
3109             }
3110         }
3111       if (cache_hit)
3112         goto done;
3113
3114     do_alg_sub_t2_m:
3115       q = t + 1;
3116       m = ctz_hwi (q);
3117       if (q && m < maxm)
3118         {
3119           op_cost = shiftsub0_cost (speed, mode, m);
3120           new_limit.cost = best_cost.cost - op_cost;
3121           new_limit.latency = best_cost.latency - op_cost;
3122           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3123
3124           alg_in->cost.cost += op_cost;
3125           alg_in->cost.latency += op_cost;
3126           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3127             {
3128               best_cost = alg_in->cost;
3129               std::swap (alg_in, best_alg);
3130               best_alg->log[best_alg->ops] = m;
3131               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3132             }
3133         }
3134       if (cache_hit)
3135         goto done;
3136     }
3137
3138  done:
3139   /* If best_cost has not decreased, we have not found any algorithm.  */
3140   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3141     {
3142       /* We failed to find an algorithm.  Record alg_impossible for
3143          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3144          we are asked to find an algorithm for T within the same or
3145          lower COST_LIMIT, we can immediately return to the
3146          caller.  */
3147       entry_ptr->t = t;
3148       entry_ptr->mode = mode;
3149       entry_ptr->speed = speed;
3150       entry_ptr->alg = alg_impossible;
3151       entry_ptr->cost = *cost_limit;
3152       return;
3153     }
3154
3155   /* Cache the result.  */
3156   if (!cache_hit)
3157     {
3158       entry_ptr->t = t;
3159       entry_ptr->mode = mode;
3160       entry_ptr->speed = speed;
3161       entry_ptr->alg = best_alg->op[best_alg->ops];
3162       entry_ptr->cost.cost = best_cost.cost;
3163       entry_ptr->cost.latency = best_cost.latency;
3164     }
3165
3166   /* If we are getting a too long sequence for `struct algorithm'
3167      to record, make this search fail.  */
3168   if (best_alg->ops == MAX_BITS_PER_WORD)
3169     return;
3170
3171   /* Copy the algorithm from temporary space to the space at alg_out.
3172      We avoid using structure assignment because the majority of
3173      best_alg is normally undefined, and this is a critical function.  */
3174   alg_out->ops = best_alg->ops + 1;
3175   alg_out->cost = best_cost;
3176   memcpy (alg_out->op, best_alg->op,
3177           alg_out->ops * sizeof *alg_out->op);
3178   memcpy (alg_out->log, best_alg->log,
3179           alg_out->ops * sizeof *alg_out->log);
3180 }
3181 \f
3182 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3183    Try three variations:
3184
3185        - a shift/add sequence based on VAL itself
3186        - a shift/add sequence based on -VAL, followed by a negation
3187        - a shift/add sequence based on VAL - 1, followed by an addition.
3188
3189    Return true if the cheapest of these cost less than MULT_COST,
3190    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3191
3192 bool
3193 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3194                      struct algorithm *alg, enum mult_variant *variant,
3195                      int mult_cost)
3196 {
3197   struct algorithm alg2;
3198   struct mult_cost limit;
3199   int op_cost;
3200   bool speed = optimize_insn_for_speed_p ();
3201
3202   /* Fail quickly for impossible bounds.  */
3203   if (mult_cost < 0)
3204     return false;
3205
3206   /* Ensure that mult_cost provides a reasonable upper bound.
3207      Any constant multiplication can be performed with less
3208      than 2 * bits additions.  */
3209   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3210   if (mult_cost > op_cost)
3211     mult_cost = op_cost;
3212
3213   *variant = basic_variant;
3214   limit.cost = mult_cost;
3215   limit.latency = mult_cost;
3216   synth_mult (alg, val, &limit, mode);
3217
3218   /* This works only if the inverted value actually fits in an
3219      `unsigned int' */
3220   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3221     {
3222       op_cost = neg_cost (speed, mode);
3223       if (MULT_COST_LESS (&alg->cost, mult_cost))
3224         {
3225           limit.cost = alg->cost.cost - op_cost;
3226           limit.latency = alg->cost.latency - op_cost;
3227         }
3228       else
3229         {
3230           limit.cost = mult_cost - op_cost;
3231           limit.latency = mult_cost - op_cost;
3232         }
3233
3234       synth_mult (&alg2, -val, &limit, mode);
3235       alg2.cost.cost += op_cost;
3236       alg2.cost.latency += op_cost;
3237       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3238         *alg = alg2, *variant = negate_variant;
3239     }
3240
3241   /* This proves very useful for division-by-constant.  */
3242   op_cost = add_cost (speed, mode);
3243   if (MULT_COST_LESS (&alg->cost, mult_cost))
3244     {
3245       limit.cost = alg->cost.cost - op_cost;
3246       limit.latency = alg->cost.latency - op_cost;
3247     }
3248   else
3249     {
3250       limit.cost = mult_cost - op_cost;
3251       limit.latency = mult_cost - op_cost;
3252     }
3253
3254   synth_mult (&alg2, val - 1, &limit, mode);
3255   alg2.cost.cost += op_cost;
3256   alg2.cost.latency += op_cost;
3257   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3258     *alg = alg2, *variant = add_variant;
3259
3260   return MULT_COST_LESS (&alg->cost, mult_cost);
3261 }
3262
3263 /* A subroutine of expand_mult, used for constant multiplications.
3264    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3265    convenient.  Use the shift/add sequence described by ALG and apply
3266    the final fixup specified by VARIANT.  */
3267
3268 static rtx
3269 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3270                    rtx target, const struct algorithm *alg,
3271                    enum mult_variant variant)
3272 {
3273   unsigned HOST_WIDE_INT val_so_far;
3274   rtx_insn *insn;
3275   rtx accum, tem;
3276   int opno;
3277   machine_mode nmode;
3278
3279   /* Avoid referencing memory over and over and invalid sharing
3280      on SUBREGs.  */
3281   op0 = force_reg (mode, op0);
3282
3283   /* ACCUM starts out either as OP0 or as a zero, depending on
3284      the first operation.  */
3285
3286   if (alg->op[0] == alg_zero)
3287     {
3288       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3289       val_so_far = 0;
3290     }
3291   else if (alg->op[0] == alg_m)
3292     {
3293       accum = copy_to_mode_reg (mode, op0);
3294       val_so_far = 1;
3295     }
3296   else
3297     gcc_unreachable ();
3298
3299   for (opno = 1; opno < alg->ops; opno++)
3300     {
3301       int log = alg->log[opno];
3302       rtx shift_subtarget = optimize ? 0 : accum;
3303       rtx add_target
3304         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3305            && !optimize)
3306           ? target : 0;
3307       rtx accum_target = optimize ? 0 : accum;
3308       rtx accum_inner;
3309
3310       switch (alg->op[opno])
3311         {
3312         case alg_shift:
3313           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3314           /* REG_EQUAL note will be attached to the following insn.  */
3315           emit_move_insn (accum, tem);
3316           val_so_far <<= log;
3317           break;
3318
3319         case alg_add_t_m2:
3320           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3321           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3322                                  add_target ? add_target : accum_target);
3323           val_so_far += HOST_WIDE_INT_1U << log;
3324           break;
3325
3326         case alg_sub_t_m2:
3327           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3328           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3329                                  add_target ? add_target : accum_target);
3330           val_so_far -= HOST_WIDE_INT_1U << log;
3331           break;
3332
3333         case alg_add_t2_m:
3334           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3335                                 log, shift_subtarget, 0);
3336           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3337                                  add_target ? add_target : accum_target);
3338           val_so_far = (val_so_far << log) + 1;
3339           break;
3340
3341         case alg_sub_t2_m:
3342           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3343                                 log, shift_subtarget, 0);
3344           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3345                                  add_target ? add_target : accum_target);
3346           val_so_far = (val_so_far << log) - 1;
3347           break;
3348
3349         case alg_add_factor:
3350           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3351           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3352                                  add_target ? add_target : accum_target);
3353           val_so_far += val_so_far << log;
3354           break;
3355
3356         case alg_sub_factor:
3357           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3358           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3359                                  (add_target
3360                                   ? add_target : (optimize ? 0 : tem)));
3361           val_so_far = (val_so_far << log) - val_so_far;
3362           break;
3363
3364         default:
3365           gcc_unreachable ();
3366         }
3367
3368       if (SCALAR_INT_MODE_P (mode))
3369         {
3370           /* Write a REG_EQUAL note on the last insn so that we can cse
3371              multiplication sequences.  Note that if ACCUM is a SUBREG,
3372              we've set the inner register and must properly indicate that.  */
3373           tem = op0, nmode = mode;
3374           accum_inner = accum;
3375           if (GET_CODE (accum) == SUBREG)
3376             {
3377               accum_inner = SUBREG_REG (accum);
3378               nmode = GET_MODE (accum_inner);
3379               tem = gen_lowpart (nmode, op0);
3380             }
3381
3382           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3383              In that case, only the low bits of accum would be guaranteed to
3384              be equal to the content of the REG_EQUAL note, the upper bits
3385              can be anything.  */
3386           if (!paradoxical_subreg_p (tem))
3387             {
3388               insn = get_last_insn ();
3389               wide_int wval_so_far
3390                 = wi::uhwi (val_so_far,
3391                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3392               rtx c = immed_wide_int_const (wval_so_far, nmode);
3393               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3394                                 accum_inner);
3395             }
3396         }
3397     }
3398
3399   if (variant == negate_variant)
3400     {
3401       val_so_far = -val_so_far;
3402       accum = expand_unop (mode, neg_optab, accum, target, 0);
3403     }
3404   else if (variant == add_variant)
3405     {
3406       val_so_far = val_so_far + 1;
3407       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3408     }
3409
3410   /* Compare only the bits of val and val_so_far that are significant
3411      in the result mode, to avoid sign-/zero-extension confusion.  */
3412   nmode = GET_MODE_INNER (mode);
3413   val &= GET_MODE_MASK (nmode);
3414   val_so_far &= GET_MODE_MASK (nmode);
3415   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3416
3417   return accum;
3418 }
3419
3420 /* Perform a multiplication and return an rtx for the result.
3421    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3422    TARGET is a suggestion for where to store the result (an rtx).
3423
3424    We check specially for a constant integer as OP1.
3425    If you want this check for OP0 as well, then before calling
3426    you should swap the two operands if OP0 would be constant.  */
3427
3428 rtx
3429 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3430              int unsignedp, bool no_libcall)
3431 {
3432   enum mult_variant variant;
3433   struct algorithm algorithm;
3434   rtx scalar_op1;
3435   int max_cost;
3436   bool speed = optimize_insn_for_speed_p ();
3437   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3438
3439   if (CONSTANT_P (op0))
3440     std::swap (op0, op1);
3441
3442   /* For vectors, there are several simplifications that can be made if
3443      all elements of the vector constant are identical.  */
3444   scalar_op1 = unwrap_const_vec_duplicate (op1);
3445
3446   if (INTEGRAL_MODE_P (mode))
3447     {
3448       rtx fake_reg;
3449       HOST_WIDE_INT coeff;
3450       bool is_neg;
3451       int mode_bitsize;
3452
3453       if (op1 == CONST0_RTX (mode))
3454         return op1;
3455       if (op1 == CONST1_RTX (mode))
3456         return op0;
3457       if (op1 == CONSTM1_RTX (mode))
3458         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3459                             op0, target, 0);
3460
3461       if (do_trapv)
3462         goto skip_synth;
3463
3464       /* If mode is integer vector mode, check if the backend supports
3465          vector lshift (by scalar or vector) at all.  If not, we can't use
3466          synthetized multiply.  */
3467       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3468           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3469           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3470         goto skip_synth;
3471
3472       /* These are the operations that are potentially turned into
3473          a sequence of shifts and additions.  */
3474       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3475
3476       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3477          less than or equal in size to `unsigned int' this doesn't matter.
3478          If the mode is larger than `unsigned int', then synth_mult works
3479          only if the constant value exactly fits in an `unsigned int' without
3480          any truncation.  This means that multiplying by negative values does
3481          not work; results are off by 2^32 on a 32 bit machine.  */
3482       if (CONST_INT_P (scalar_op1))
3483         {
3484           coeff = INTVAL (scalar_op1);
3485           is_neg = coeff < 0;
3486         }
3487 #if TARGET_SUPPORTS_WIDE_INT
3488       else if (CONST_WIDE_INT_P (scalar_op1))
3489 #else
3490       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3491 #endif
3492         {
3493           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3494           /* Perfect power of 2 (other than 1, which is handled above).  */
3495           if (shift > 0)
3496             return expand_shift (LSHIFT_EXPR, mode, op0,
3497                                  shift, target, unsignedp);
3498           else
3499             goto skip_synth;
3500         }
3501       else
3502         goto skip_synth;
3503
3504       /* We used to test optimize here, on the grounds that it's better to
3505          produce a smaller program when -O is not used.  But this causes
3506          such a terrible slowdown sometimes that it seems better to always
3507          use synth_mult.  */
3508
3509       /* Special case powers of two.  */
3510       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3511           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3512         return expand_shift (LSHIFT_EXPR, mode, op0,
3513                              floor_log2 (coeff), target, unsignedp);
3514
3515       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3516
3517       /* Attempt to handle multiplication of DImode values by negative
3518          coefficients, by performing the multiplication by a positive
3519          multiplier and then inverting the result.  */
3520       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3521         {
3522           /* Its safe to use -coeff even for INT_MIN, as the
3523              result is interpreted as an unsigned coefficient.
3524              Exclude cost of op0 from max_cost to match the cost
3525              calculation of the synth_mult.  */
3526           coeff = -(unsigned HOST_WIDE_INT) coeff;
3527           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3528                                     mode, speed)
3529                       - neg_cost (speed, mode));
3530           if (max_cost <= 0)
3531             goto skip_synth;
3532
3533           /* Special case powers of two.  */
3534           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3535             {
3536               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3537                                        floor_log2 (coeff), target, unsignedp);
3538               return expand_unop (mode, neg_optab, temp, target, 0);
3539             }
3540
3541           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3542                                    max_cost))
3543             {
3544               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3545                                             &algorithm, variant);
3546               return expand_unop (mode, neg_optab, temp, target, 0);
3547             }
3548           goto skip_synth;
3549         }
3550
3551       /* Exclude cost of op0 from max_cost to match the cost
3552          calculation of the synth_mult.  */
3553       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3554       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3555         return expand_mult_const (mode, op0, coeff, target,
3556                                   &algorithm, variant);
3557     }
3558  skip_synth:
3559
3560   /* Expand x*2.0 as x+x.  */
3561   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3562       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3563     {
3564       op0 = force_reg (GET_MODE (op0), op0);
3565       return expand_binop (mode, add_optab, op0, op0,
3566                            target, unsignedp,
3567                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3568     }
3569
3570   /* This used to use umul_optab if unsigned, but for non-widening multiply
3571      there is no difference between signed and unsigned.  */
3572   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3573                       op0, op1, target, unsignedp,
3574                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3575   gcc_assert (op0 || no_libcall);
3576   return op0;
3577 }
3578
3579 /* Return a cost estimate for multiplying a register by the given
3580    COEFFicient in the given MODE and SPEED.  */
3581
3582 int
3583 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3584 {
3585   int max_cost;
3586   struct algorithm algorithm;
3587   enum mult_variant variant;
3588
3589   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3590   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3591                            mode, speed);
3592   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3593     return algorithm.cost.cost;
3594   else
3595     return max_cost;
3596 }
3597
3598 /* Perform a widening multiplication and return an rtx for the result.
3599    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3600    TARGET is a suggestion for where to store the result (an rtx).
3601    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3602    or smul_widen_optab.
3603
3604    We check specially for a constant integer as OP1, comparing the
3605    cost of a widening multiply against the cost of a sequence of shifts
3606    and adds.  */
3607
3608 rtx
3609 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3610                       int unsignedp, optab this_optab)
3611 {
3612   bool speed = optimize_insn_for_speed_p ();
3613   rtx cop1;
3614
3615   if (CONST_INT_P (op1)
3616       && GET_MODE (op0) != VOIDmode
3617       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3618                                 this_optab == umul_widen_optab))
3619       && CONST_INT_P (cop1)
3620       && (INTVAL (cop1) >= 0
3621           || HWI_COMPUTABLE_MODE_P (mode)))
3622     {
3623       HOST_WIDE_INT coeff = INTVAL (cop1);
3624       int max_cost;
3625       enum mult_variant variant;
3626       struct algorithm algorithm;
3627
3628       if (coeff == 0)
3629         return CONST0_RTX (mode);
3630
3631       /* Special case powers of two.  */
3632       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3633         {
3634           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3635           return expand_shift (LSHIFT_EXPR, mode, op0,
3636                                floor_log2 (coeff), target, unsignedp);
3637         }
3638
3639       /* Exclude cost of op0 from max_cost to match the cost
3640          calculation of the synth_mult.  */
3641       max_cost = mul_widen_cost (speed, mode);
3642       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3643                                max_cost))
3644         {
3645           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3646           return expand_mult_const (mode, op0, coeff, target,
3647                                     &algorithm, variant);
3648         }
3649     }
3650   return expand_binop (mode, this_optab, op0, op1, target,
3651                        unsignedp, OPTAB_LIB_WIDEN);
3652 }
3653 \f
3654 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3655    replace division by D, and put the least significant N bits of the result
3656    in *MULTIPLIER_PTR and return the most significant bit.
3657
3658    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3659    needed precision is in PRECISION (should be <= N).
3660
3661    PRECISION should be as small as possible so this function can choose
3662    multiplier more freely.
3663
3664    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3665    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3666
3667    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3668    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3669
3670 unsigned HOST_WIDE_INT
3671 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3672                    unsigned HOST_WIDE_INT *multiplier_ptr,
3673                    int *post_shift_ptr, int *lgup_ptr)
3674 {
3675   int lgup, post_shift;
3676   int pow, pow2;
3677
3678   /* lgup = ceil(log2(divisor)); */
3679   lgup = ceil_log2 (d);
3680
3681   gcc_assert (lgup <= n);
3682
3683   pow = n + lgup;
3684   pow2 = n + lgup - precision;
3685
3686   /* mlow = 2^(N + lgup)/d */
3687   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3688   wide_int mlow = wi::udiv_trunc (val, d);
3689
3690   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3691   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3692   wide_int mhigh = wi::udiv_trunc (val, d);
3693
3694   /* If precision == N, then mlow, mhigh exceed 2^N
3695      (but they do not exceed 2^(N+1)).  */
3696
3697   /* Reduce to lowest terms.  */
3698   for (post_shift = lgup; post_shift > 0; post_shift--)
3699     {
3700       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3701                                                        HOST_BITS_PER_WIDE_INT);
3702       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3703                                                        HOST_BITS_PER_WIDE_INT);
3704       if (ml_lo >= mh_lo)
3705         break;
3706
3707       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3708       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3709     }
3710
3711   *post_shift_ptr = post_shift;
3712   *lgup_ptr = lgup;
3713   if (n < HOST_BITS_PER_WIDE_INT)
3714     {
3715       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3716       *multiplier_ptr = mhigh.to_uhwi () & mask;
3717       return mhigh.to_uhwi () > mask;
3718     }
3719   else
3720     {
3721       *multiplier_ptr = mhigh.to_uhwi ();
3722       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3723     }
3724 }
3725
3726 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3727    congruent to 1 (mod 2**N).  */
3728
3729 static unsigned HOST_WIDE_INT
3730 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3731 {
3732   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3733
3734   /* The algorithm notes that the choice y = x satisfies
3735      x*y == 1 mod 2^3, since x is assumed odd.
3736      Each iteration doubles the number of bits of significance in y.  */
3737
3738   unsigned HOST_WIDE_INT mask;
3739   unsigned HOST_WIDE_INT y = x;
3740   int nbit = 3;
3741
3742   mask = (n == HOST_BITS_PER_WIDE_INT
3743           ? HOST_WIDE_INT_M1U
3744           : (HOST_WIDE_INT_1U << n) - 1);
3745
3746   while (nbit < n)
3747     {
3748       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3749       nbit *= 2;
3750     }
3751   return y;
3752 }
3753
3754 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3755    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3756    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3757    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3758    become signed.
3759
3760    The result is put in TARGET if that is convenient.
3761
3762    MODE is the mode of operation.  */
3763
3764 rtx
3765 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3766                              rtx op1, rtx target, int unsignedp)
3767 {
3768   rtx tem;
3769   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3770
3771   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3772                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3773   tem = expand_and (mode, tem, op1, NULL_RTX);
3774   adj_operand
3775     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3776                      adj_operand);
3777
3778   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3779                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3780   tem = expand_and (mode, tem, op0, NULL_RTX);
3781   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3782                           target);
3783
3784   return target;
3785 }
3786
3787 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3788
3789 static rtx
3790 extract_high_half (scalar_int_mode mode, rtx op)
3791 {
3792   if (mode == word_mode)
3793     return gen_highpart (mode, op);
3794
3795   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3796
3797   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3798                      GET_MODE_BITSIZE (mode), 0, 1);
3799   return convert_modes (mode, wider_mode, op, 0);
3800 }
3801
3802 /* Like expmed_mult_highpart, but only consider using a multiplication
3803    optab.  OP1 is an rtx for the constant operand.  */
3804
3805 static rtx
3806 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3807                             rtx target, int unsignedp, int max_cost)
3808 {
3809   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3810   optab moptab;
3811   rtx tem;
3812   int size;
3813   bool speed = optimize_insn_for_speed_p ();
3814
3815   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3816
3817   size = GET_MODE_BITSIZE (mode);
3818
3819   /* Firstly, try using a multiplication insn that only generates the needed
3820      high part of the product, and in the sign flavor of unsignedp.  */
3821   if (mul_highpart_cost (speed, mode) < max_cost)
3822     {
3823       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3824       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3825                           unsignedp, OPTAB_DIRECT);
3826       if (tem)
3827         return tem;
3828     }
3829
3830   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3831      Need to adjust the result after the multiplication.  */
3832   if (size - 1 < BITS_PER_WORD
3833       && (mul_highpart_cost (speed, mode)
3834           + 2 * shift_cost (speed, mode, size-1)
3835           + 4 * add_cost (speed, mode) < max_cost))
3836     {
3837       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3838       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3839                           unsignedp, OPTAB_DIRECT);
3840       if (tem)
3841         /* We used the wrong signedness.  Adjust the result.  */
3842         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3843                                             tem, unsignedp);
3844     }
3845
3846   /* Try widening multiplication.  */
3847   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3848   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3849       && mul_widen_cost (speed, wider_mode) < max_cost)
3850     {
3851       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3852                           unsignedp, OPTAB_WIDEN);
3853       if (tem)
3854         return extract_high_half (mode, tem);
3855     }
3856
3857   /* Try widening the mode and perform a non-widening multiplication.  */
3858   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3859       && size - 1 < BITS_PER_WORD
3860       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3861           < max_cost))
3862     {
3863       rtx_insn *insns;
3864       rtx wop0, wop1;
3865
3866       /* We need to widen the operands, for example to ensure the
3867          constant multiplier is correctly sign or zero extended.
3868          Use a sequence to clean-up any instructions emitted by
3869          the conversions if things don't work out.  */
3870       start_sequence ();
3871       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3872       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3873       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3874                           unsignedp, OPTAB_WIDEN);
3875       insns = get_insns ();
3876       end_sequence ();
3877
3878       if (tem)
3879         {
3880           emit_insn (insns);
3881           return extract_high_half (mode, tem);
3882         }
3883     }
3884
3885   /* Try widening multiplication of opposite signedness, and adjust.  */
3886   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3887   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3888       && size - 1 < BITS_PER_WORD
3889       && (mul_widen_cost (speed, wider_mode)
3890           + 2 * shift_cost (speed, mode, size-1)
3891           + 4 * add_cost (speed, mode) < max_cost))
3892     {
3893       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3894                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3895       if (tem != 0)
3896         {
3897           tem = extract_high_half (mode, tem);
3898           /* We used the wrong signedness.  Adjust the result.  */
3899           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3900                                               target, unsignedp);
3901         }
3902     }
3903
3904   return 0;
3905 }
3906
3907 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3908    putting the high half of the result in TARGET if that is convenient,
3909    and return where the result is.  If the operation cannot be performed,
3910    0 is returned.
3911
3912    MODE is the mode of operation and result.
3913
3914    UNSIGNEDP nonzero means unsigned multiply.
3915
3916    MAX_COST is the total allowed cost for the expanded RTL.  */
3917
3918 static rtx
3919 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3920                       rtx target, int unsignedp, int max_cost)
3921 {
3922   unsigned HOST_WIDE_INT cnst1;
3923   int extra_cost;
3924   bool sign_adjust = false;
3925   enum mult_variant variant;
3926   struct algorithm alg;
3927   rtx tem;
3928   bool speed = optimize_insn_for_speed_p ();
3929
3930   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3931   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3932
3933   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3934
3935   /* We can't optimize modes wider than BITS_PER_WORD.
3936      ??? We might be able to perform double-word arithmetic if
3937      mode == word_mode, however all the cost calculations in
3938      synth_mult etc. assume single-word operations.  */
3939   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3940   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3941     return expmed_mult_highpart_optab (mode, op0, op1, target,
3942                                        unsignedp, max_cost);
3943
3944   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3945
3946   /* Check whether we try to multiply by a negative constant.  */
3947   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3948     {
3949       sign_adjust = true;
3950       extra_cost += add_cost (speed, mode);
3951     }
3952
3953   /* See whether shift/add multiplication is cheap enough.  */
3954   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3955                            max_cost - extra_cost))
3956     {
3957       /* See whether the specialized multiplication optabs are
3958          cheaper than the shift/add version.  */
3959       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3960                                         alg.cost.cost + extra_cost);
3961       if (tem)
3962         return tem;
3963
3964       tem = convert_to_mode (wider_mode, op0, unsignedp);
3965       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3966       tem = extract_high_half (mode, tem);
3967
3968       /* Adjust result for signedness.  */
3969       if (sign_adjust)
3970         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3971
3972       return tem;
3973     }
3974   return expmed_mult_highpart_optab (mode, op0, op1, target,
3975                                      unsignedp, max_cost);
3976 }
3977
3978
3979 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3980
3981 static rtx
3982 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3983 {
3984   rtx result, temp, shift;
3985   rtx_code_label *label;
3986   int logd;
3987   int prec = GET_MODE_PRECISION (mode);
3988
3989   logd = floor_log2 (d);
3990   result = gen_reg_rtx (mode);
3991
3992   /* Avoid conditional branches when they're expensive.  */
3993   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3994       && optimize_insn_for_speed_p ())
3995     {
3996       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3997                                       mode, 0, -1);
3998       if (signmask)
3999         {
4000           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4001           signmask = force_reg (mode, signmask);
4002           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4003
4004           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4005              which instruction sequence to use.  If logical right shifts
4006              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4007              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4008
4009           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4010           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4011               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4012                   > COSTS_N_INSNS (2)))
4013             {
4014               temp = expand_binop (mode, xor_optab, op0, signmask,
4015                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4016               temp = expand_binop (mode, sub_optab, temp, signmask,
4017                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4018               temp = expand_binop (mode, and_optab, temp,
4019                                    gen_int_mode (masklow, mode),
4020                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4021               temp = expand_binop (mode, xor_optab, temp, signmask,
4022                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4023               temp = expand_binop (mode, sub_optab, temp, signmask,
4024                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4025             }
4026           else
4027             {
4028               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4029                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030               signmask = force_reg (mode, signmask);
4031
4032               temp = expand_binop (mode, add_optab, op0, signmask,
4033                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4034               temp = expand_binop (mode, and_optab, temp,
4035                                    gen_int_mode (masklow, mode),
4036                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4037               temp = expand_binop (mode, sub_optab, temp, signmask,
4038                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4039             }
4040           return temp;
4041         }
4042     }
4043
4044   /* Mask contains the mode's signbit and the significant bits of the
4045      modulus.  By including the signbit in the operation, many targets
4046      can avoid an explicit compare operation in the following comparison
4047      against zero.  */
4048   wide_int mask = wi::mask (logd, false, prec);
4049   mask = wi::set_bit (mask, prec - 1);
4050
4051   temp = expand_binop (mode, and_optab, op0,
4052                        immed_wide_int_const (mask, mode),
4053                        result, 1, OPTAB_LIB_WIDEN);
4054   if (temp != result)
4055     emit_move_insn (result, temp);
4056
4057   label = gen_label_rtx ();
4058   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4059
4060   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4061                        0, OPTAB_LIB_WIDEN);
4062
4063   mask = wi::mask (logd, true, prec);
4064   temp = expand_binop (mode, ior_optab, temp,
4065                        immed_wide_int_const (mask, mode),
4066                        result, 1, OPTAB_LIB_WIDEN);
4067   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4068                        0, OPTAB_LIB_WIDEN);
4069   if (temp != result)
4070     emit_move_insn (result, temp);
4071   emit_label (label);
4072   return result;
4073 }
4074
4075 /* Expand signed division of OP0 by a power of two D in mode MODE.
4076    This routine is only called for positive values of D.  */
4077
4078 static rtx
4079 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4080 {
4081   rtx temp;
4082   rtx_code_label *label;
4083   int logd;
4084
4085   logd = floor_log2 (d);
4086
4087   if (d == 2
4088       && BRANCH_COST (optimize_insn_for_speed_p (),
4089                       false) >= 1)
4090     {
4091       temp = gen_reg_rtx (mode);
4092       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4093       if (temp != NULL_RTX)
4094         {
4095           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4096                                0, OPTAB_LIB_WIDEN);
4097           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4098         }
4099     }
4100
4101   if (HAVE_conditional_move
4102       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4103     {
4104       rtx temp2;
4105
4106       start_sequence ();
4107       temp2 = copy_to_mode_reg (mode, op0);
4108       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4109                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4110       temp = force_reg (mode, temp);
4111
4112       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4113       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4114                                      mode, temp, temp2, mode, 0);
4115       if (temp2)
4116         {
4117           rtx_insn *seq = get_insns ();
4118           end_sequence ();
4119           emit_insn (seq);
4120           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4121         }
4122       end_sequence ();
4123     }
4124
4125   if (BRANCH_COST (optimize_insn_for_speed_p (),
4126                    false) >= 2)
4127     {
4128       int ushift = GET_MODE_BITSIZE (mode) - logd;
4129
4130       temp = gen_reg_rtx (mode);
4131       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4132       if (temp != NULL_RTX)
4133         {
4134           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4135               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4136               > COSTS_N_INSNS (1))
4137             temp = expand_binop (mode, and_optab, temp,
4138                                  gen_int_mode (d - 1, mode),
4139                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4140           else
4141             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4142                                  ushift, NULL_RTX, 1);
4143           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4144                                0, OPTAB_LIB_WIDEN);
4145           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4146         }
4147     }
4148
4149   label = gen_label_rtx ();
4150   temp = copy_to_mode_reg (mode, op0);
4151   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4152   expand_inc (temp, gen_int_mode (d - 1, mode));
4153   emit_label (label);
4154   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4155 }
4156 \f
4157 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4158    if that is convenient, and returning where the result is.
4159    You may request either the quotient or the remainder as the result;
4160    specify REM_FLAG nonzero to get the remainder.
4161
4162    CODE is the expression code for which kind of division this is;
4163    it controls how rounding is done.  MODE is the machine mode to use.
4164    UNSIGNEDP nonzero means do unsigned division.  */
4165
4166 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4167    and then correct it by or'ing in missing high bits
4168    if result of ANDI is nonzero.
4169    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4170    This could optimize to a bfexts instruction.
4171    But C doesn't use these operations, so their optimizations are
4172    left for later.  */
4173 /* ??? For modulo, we don't actually need the highpart of the first product,
4174    the low part will do nicely.  And for small divisors, the second multiply
4175    can also be a low-part only multiply or even be completely left out.
4176    E.g. to calculate the remainder of a division by 3 with a 32 bit
4177    multiply, multiply with 0x55555556 and extract the upper two bits;
4178    the result is exact for inputs up to 0x1fffffff.
4179    The input range can be reduced by using cross-sum rules.
4180    For odd divisors >= 3, the following table gives right shift counts
4181    so that if a number is shifted by an integer multiple of the given
4182    amount, the remainder stays the same:
4183    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4184    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4185    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4186    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4187    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4188
4189    Cross-sum rules for even numbers can be derived by leaving as many bits
4190    to the right alone as the divisor has zeros to the right.
4191    E.g. if x is an unsigned 32 bit number:
4192    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4193    */
4194
4195 rtx
4196 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4197                rtx op0, rtx op1, rtx target, int unsignedp,
4198                enum optab_methods methods)
4199 {
4200   machine_mode compute_mode;
4201   rtx tquotient;
4202   rtx quotient = 0, remainder = 0;
4203   rtx_insn *last;
4204   rtx_insn *insn;
4205   optab optab1, optab2;
4206   int op1_is_constant, op1_is_pow2 = 0;
4207   int max_cost, extra_cost;
4208   static HOST_WIDE_INT last_div_const = 0;
4209   bool speed = optimize_insn_for_speed_p ();
4210
4211   op1_is_constant = CONST_INT_P (op1);
4212   if (op1_is_constant)
4213     {
4214       wide_int ext_op1 = rtx_mode_t (op1, mode);
4215       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4216                      || (! unsignedp
4217                          && wi::popcount (wi::neg (ext_op1)) == 1));
4218     }
4219
4220   /*
4221      This is the structure of expand_divmod:
4222
4223      First comes code to fix up the operands so we can perform the operations
4224      correctly and efficiently.
4225
4226      Second comes a switch statement with code specific for each rounding mode.
4227      For some special operands this code emits all RTL for the desired
4228      operation, for other cases, it generates only a quotient and stores it in
4229      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4230      to indicate that it has not done anything.
4231
4232      Last comes code that finishes the operation.  If QUOTIENT is set and
4233      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4234      QUOTIENT is not set, it is computed using trunc rounding.
4235
4236      We try to generate special code for division and remainder when OP1 is a
4237      constant.  If |OP1| = 2**n we can use shifts and some other fast
4238      operations.  For other values of OP1, we compute a carefully selected
4239      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4240      by m.
4241
4242      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4243      half of the product.  Different strategies for generating the product are
4244      implemented in expmed_mult_highpart.
4245
4246      If what we actually want is the remainder, we generate that by another
4247      by-constant multiplication and a subtraction.  */
4248
4249   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4250      code below will malfunction if we are, so check here and handle
4251      the special case if so.  */
4252   if (op1 == const1_rtx)
4253     return rem_flag ? const0_rtx : op0;
4254
4255     /* When dividing by -1, we could get an overflow.
4256      negv_optab can handle overflows.  */
4257   if (! unsignedp && op1 == constm1_rtx)
4258     {
4259       if (rem_flag)
4260         return const0_rtx;
4261       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4262                           ? negv_optab : neg_optab, op0, target, 0);
4263     }
4264
4265   if (target
4266       /* Don't use the function value register as a target
4267          since we have to read it as well as write it,
4268          and function-inlining gets confused by this.  */
4269       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4270           /* Don't clobber an operand while doing a multi-step calculation.  */
4271           || ((rem_flag || op1_is_constant)
4272               && (reg_mentioned_p (target, op0)
4273                   || (MEM_P (op0) && MEM_P (target))))
4274           || reg_mentioned_p (target, op1)
4275           || (MEM_P (op1) && MEM_P (target))))
4276     target = 0;
4277
4278   /* Get the mode in which to perform this computation.  Normally it will
4279      be MODE, but sometimes we can't do the desired operation in MODE.
4280      If so, pick a wider mode in which we can do the operation.  Convert
4281      to that mode at the start to avoid repeated conversions.
4282
4283      First see what operations we need.  These depend on the expression
4284      we are evaluating.  (We assume that divxx3 insns exist under the
4285      same conditions that modxx3 insns and that these insns don't normally
4286      fail.  If these assumptions are not correct, we may generate less
4287      efficient code in some cases.)
4288
4289      Then see if we find a mode in which we can open-code that operation
4290      (either a division, modulus, or shift).  Finally, check for the smallest
4291      mode for which we can do the operation with a library call.  */
4292
4293   /* We might want to refine this now that we have division-by-constant
4294      optimization.  Since expmed_mult_highpart tries so many variants, it is
4295      not straightforward to generalize this.  Maybe we should make an array
4296      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4297
4298   optab1 = (op1_is_pow2
4299             ? (unsignedp ? lshr_optab : ashr_optab)
4300             : (unsignedp ? udiv_optab : sdiv_optab));
4301   optab2 = (op1_is_pow2 ? optab1
4302             : (unsignedp ? udivmod_optab : sdivmod_optab));
4303
4304   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4305     {
4306       FOR_EACH_MODE_FROM (compute_mode, mode)
4307       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4308           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4309         break;
4310
4311       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4312         FOR_EACH_MODE_FROM (compute_mode, mode)
4313           if (optab_libfunc (optab1, compute_mode)
4314               || optab_libfunc (optab2, compute_mode))
4315             break;
4316     }
4317   else
4318     compute_mode = mode;
4319
4320   /* If we still couldn't find a mode, use MODE, but expand_binop will
4321      probably die.  */
4322   if (compute_mode == VOIDmode)
4323     compute_mode = mode;
4324
4325   if (target && GET_MODE (target) == compute_mode)
4326     tquotient = target;
4327   else
4328     tquotient = gen_reg_rtx (compute_mode);
4329
4330 #if 0
4331   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4332      (mode), and thereby get better code when OP1 is a constant.  Do that
4333      later.  It will require going over all usages of SIZE below.  */
4334   size = GET_MODE_BITSIZE (mode);
4335 #endif
4336
4337   /* Only deduct something for a REM if the last divide done was
4338      for a different constant.   Then set the constant of the last
4339      divide.  */
4340   max_cost = (unsignedp
4341               ? udiv_cost (speed, compute_mode)
4342               : sdiv_cost (speed, compute_mode));
4343   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4344                      && INTVAL (op1) == last_div_const))
4345     max_cost -= (mul_cost (speed, compute_mode)
4346                  + add_cost (speed, compute_mode));
4347
4348   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4349
4350   /* Now convert to the best mode to use.  */
4351   if (compute_mode != mode)
4352     {
4353       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4354       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4355
4356       /* convert_modes may have placed op1 into a register, so we
4357          must recompute the following.  */
4358       op1_is_constant = CONST_INT_P (op1);
4359       if (op1_is_constant)
4360         {
4361           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4362           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4363                          || (! unsignedp
4364                              && wi::popcount (wi::neg (ext_op1)) == 1));
4365         }
4366       else
4367         op1_is_pow2 = 0;
4368     }
4369
4370   /* If one of the operands is a volatile MEM, copy it into a register.  */
4371
4372   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4373     op0 = force_reg (compute_mode, op0);
4374   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4375     op1 = force_reg (compute_mode, op1);
4376
4377   /* If we need the remainder or if OP1 is constant, we need to
4378      put OP0 in a register in case it has any queued subexpressions.  */
4379   if (rem_flag || op1_is_constant)
4380     op0 = force_reg (compute_mode, op0);
4381
4382   last = get_last_insn ();
4383
4384   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4385   if (unsignedp)
4386     {
4387       if (code == FLOOR_DIV_EXPR)
4388         code = TRUNC_DIV_EXPR;
4389       if (code == FLOOR_MOD_EXPR)
4390         code = TRUNC_MOD_EXPR;
4391       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4392         code = TRUNC_DIV_EXPR;
4393     }
4394
4395   if (op1 != const0_rtx)
4396     switch (code)
4397       {
4398       case TRUNC_MOD_EXPR:
4399       case TRUNC_DIV_EXPR:
4400         if (op1_is_constant)
4401           {
4402             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4403             int size = GET_MODE_BITSIZE (int_mode);
4404             if (unsignedp)
4405               {
4406                 unsigned HOST_WIDE_INT mh, ml;
4407                 int pre_shift, post_shift;
4408                 int dummy;
4409                 wide_int wd = rtx_mode_t (op1, int_mode);
4410                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4411
4412                 if (wi::popcount (wd) == 1)
4413                   {
4414                     pre_shift = floor_log2 (d);
4415                     if (rem_flag)
4416                       {
4417                         unsigned HOST_WIDE_INT mask
4418                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4419                         remainder
4420                           = expand_binop (int_mode, and_optab, op0,
4421                                           gen_int_mode (mask, int_mode),
4422                                           remainder, 1, methods);
4423                         if (remainder)
4424                           return gen_lowpart (mode, remainder);
4425                       }
4426                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4427                                              pre_shift, tquotient, 1);
4428                   }
4429                 else if (size <= HOST_BITS_PER_WIDE_INT)
4430                   {
4431                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4432                       {
4433                         /* Most significant bit of divisor is set; emit an scc
4434                            insn.  */
4435                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4436                                                           int_mode, 1, 1);
4437                       }
4438                     else
4439                       {
4440                         /* Find a suitable multiplier and right shift count
4441                            instead of multiplying with D.  */
4442
4443                         mh = choose_multiplier (d, size, size,
4444                                                 &ml, &post_shift, &dummy);
4445
4446                         /* If the suggested multiplier is more than SIZE bits,
4447                            we can do better for even divisors, using an
4448                            initial right shift.  */
4449                         if (mh != 0 && (d & 1) == 0)
4450                           {
4451                             pre_shift = ctz_or_zero (d);
4452                             mh = choose_multiplier (d >> pre_shift, size,
4453                                                     size - pre_shift,
4454                                                     &ml, &post_shift, &dummy);
4455                             gcc_assert (!mh);
4456                           }
4457                         else
4458                           pre_shift = 0;
4459
4460                         if (mh != 0)
4461                           {
4462                             rtx t1, t2, t3, t4;
4463
4464                             if (post_shift - 1 >= BITS_PER_WORD)
4465                               goto fail1;
4466
4467                             extra_cost
4468                               = (shift_cost (speed, int_mode, post_shift - 1)
4469                                  + shift_cost (speed, int_mode, 1)
4470                                  + 2 * add_cost (speed, int_mode));
4471                             t1 = expmed_mult_highpart
4472                               (int_mode, op0, gen_int_mode (ml, int_mode),
4473                                NULL_RTX, 1, max_cost - extra_cost);
4474                             if (t1 == 0)
4475                               goto fail1;
4476                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4477                                                                op0, t1),
4478                                                 NULL_RTX);
4479                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4480                                                t2, 1, NULL_RTX, 1);
4481                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4482                                                               t1, t3),
4483                                                 NULL_RTX);
4484                             quotient = expand_shift
4485                               (RSHIFT_EXPR, int_mode, t4,
4486                                post_shift - 1, tquotient, 1);
4487                           }
4488                         else
4489                           {
4490                             rtx t1, t2;
4491
4492                             if (pre_shift >= BITS_PER_WORD
4493                                 || post_shift >= BITS_PER_WORD)
4494                               goto fail1;
4495
4496                             t1 = expand_shift
4497                               (RSHIFT_EXPR, int_mode, op0,
4498                                pre_shift, NULL_RTX, 1);
4499                             extra_cost
4500                               = (shift_cost (speed, int_mode, pre_shift)
4501                                  + shift_cost (speed, int_mode, post_shift));
4502                             t2 = expmed_mult_highpart
4503                               (int_mode, t1,
4504                                gen_int_mode (ml, int_mode),
4505                                NULL_RTX, 1, max_cost - extra_cost);
4506                             if (t2 == 0)
4507                               goto fail1;
4508                             quotient = expand_shift
4509                               (RSHIFT_EXPR, int_mode, t2,
4510                                post_shift, tquotient, 1);
4511                           }
4512                       }
4513                   }
4514                 else            /* Too wide mode to use tricky code */
4515                   break;
4516
4517                 insn = get_last_insn ();
4518                 if (insn != last)
4519                   set_dst_reg_note (insn, REG_EQUAL,
4520                                     gen_rtx_UDIV (int_mode, op0, op1),
4521                                     quotient);
4522               }
4523             else                /* TRUNC_DIV, signed */
4524               {
4525                 unsigned HOST_WIDE_INT ml;
4526                 int lgup, post_shift;
4527                 rtx mlr;
4528                 HOST_WIDE_INT d = INTVAL (op1);
4529                 unsigned HOST_WIDE_INT abs_d;
4530
4531                 /* Not prepared to handle division/remainder by
4532                    0xffffffffffffffff8000000000000000 etc.  */
4533                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4534                   break;
4535
4536                 /* Since d might be INT_MIN, we have to cast to
4537                    unsigned HOST_WIDE_INT before negating to avoid
4538                    undefined signed overflow.  */
4539                 abs_d = (d >= 0
4540                          ? (unsigned HOST_WIDE_INT) d
4541                          : - (unsigned HOST_WIDE_INT) d);
4542
4543                 /* n rem d = n rem -d */
4544                 if (rem_flag && d < 0)
4545                   {
4546                     d = abs_d;
4547                     op1 = gen_int_mode (abs_d, int_mode);
4548                   }
4549
4550                 if (d == 1)
4551                   quotient = op0;
4552                 else if (d == -1)
4553                   quotient = expand_unop (int_mode, neg_optab, op0,
4554                                           tquotient, 0);
4555                 else if (size <= HOST_BITS_PER_WIDE_INT
4556                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4557                   {
4558                     /* This case is not handled correctly below.  */
4559                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4560                                                 int_mode, 1, 1);
4561                     if (quotient == 0)
4562                       goto fail1;
4563                   }
4564                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4565                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4566                          && (rem_flag
4567                              ? smod_pow2_cheap (speed, int_mode)
4568                              : sdiv_pow2_cheap (speed, int_mode))
4569                          /* We assume that cheap metric is true if the
4570                             optab has an expander for this mode.  */
4571                          && ((optab_handler ((rem_flag ? smod_optab
4572                                               : sdiv_optab),
4573                                              int_mode)
4574                               != CODE_FOR_nothing)
4575                              || (optab_handler (sdivmod_optab, int_mode)
4576                                  != CODE_FOR_nothing)))
4577                   ;
4578                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4579                   {
4580                     if (rem_flag)
4581                       {
4582                         remainder = expand_smod_pow2 (int_mode, op0, d);
4583                         if (remainder)
4584                           return gen_lowpart (mode, remainder);
4585                       }
4586
4587                     if (sdiv_pow2_cheap (speed, int_mode)
4588                         && ((optab_handler (sdiv_optab, int_mode)
4589                              != CODE_FOR_nothing)
4590                             || (optab_handler (sdivmod_optab, int_mode)
4591                                 != CODE_FOR_nothing)))
4592                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4593                                                 int_mode, op0,
4594                                                 gen_int_mode (abs_d,
4595                                                               int_mode),
4596                                                 NULL_RTX, 0);
4597                     else
4598                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4599
4600                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4601                        negate the quotient.  */
4602                     if (d < 0)
4603                       {
4604                         insn = get_last_insn ();
4605                         if (insn != last
4606                             && abs_d < (HOST_WIDE_INT_1U
4607                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4608                           set_dst_reg_note (insn, REG_EQUAL,
4609                                             gen_rtx_DIV (int_mode, op0,
4610                                                          gen_int_mode
4611                                                            (abs_d,
4612                                                             int_mode)),
4613                                             quotient);
4614
4615                         quotient = expand_unop (int_mode, neg_optab,
4616                                                 quotient, quotient, 0);
4617                       }
4618                   }
4619                 else if (size <= HOST_BITS_PER_WIDE_INT)
4620                   {
4621                     choose_multiplier (abs_d, size, size - 1,
4622                                        &ml, &post_shift, &lgup);
4623                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4624                       {
4625                         rtx t1, t2, t3;
4626
4627                         if (post_shift >= BITS_PER_WORD
4628                             || size - 1 >= BITS_PER_WORD)
4629                           goto fail1;
4630
4631                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4632                                       + shift_cost (speed, int_mode, size - 1)
4633                                       + add_cost (speed, int_mode));
4634                         t1 = expmed_mult_highpart
4635                           (int_mode, op0, gen_int_mode (ml, int_mode),
4636                            NULL_RTX, 0, max_cost - extra_cost);
4637                         if (t1 == 0)
4638                           goto fail1;
4639                         t2 = expand_shift
4640                           (RSHIFT_EXPR, int_mode, t1,
4641                            post_shift, NULL_RTX, 0);
4642                         t3 = expand_shift
4643                           (RSHIFT_EXPR, int_mode, op0,
4644                            size - 1, NULL_RTX, 0);
4645                         if (d < 0)
4646                           quotient
4647                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4648                                              tquotient);
4649                         else
4650                           quotient
4651                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4652                                              tquotient);
4653                       }
4654                     else
4655                       {
4656                         rtx t1, t2, t3, t4;
4657
4658                         if (post_shift >= BITS_PER_WORD
4659                             || size - 1 >= BITS_PER_WORD)
4660                           goto fail1;
4661
4662                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4663                         mlr = gen_int_mode (ml, int_mode);
4664                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4665                                       + shift_cost (speed, int_mode, size - 1)
4666                                       + 2 * add_cost (speed, int_mode));
4667                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4668                                                    NULL_RTX, 0,
4669                                                    max_cost - extra_cost);
4670                         if (t1 == 0)
4671                           goto fail1;
4672                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4673                                             NULL_RTX);
4674                         t3 = expand_shift
4675                           (RSHIFT_EXPR, int_mode, t2,
4676                            post_shift, NULL_RTX, 0);
4677                         t4 = expand_shift
4678                           (RSHIFT_EXPR, int_mode, op0,
4679                            size - 1, NULL_RTX, 0);
4680                         if (d < 0)
4681                           quotient
4682                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4683                                              tquotient);
4684                         else
4685                           quotient
4686                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4687                                              tquotient);
4688                       }
4689                   }
4690                 else            /* Too wide mode to use tricky code */
4691                   break;
4692
4693                 insn = get_last_insn ();
4694                 if (insn != last)
4695                   set_dst_reg_note (insn, REG_EQUAL,
4696                                     gen_rtx_DIV (int_mode, op0, op1),
4697                                     quotient);
4698               }
4699             break;
4700           }
4701       fail1:
4702         delete_insns_since (last);
4703         break;
4704
4705       case FLOOR_DIV_EXPR:
4706       case FLOOR_MOD_EXPR:
4707       /* We will come here only for signed operations.  */
4708         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4709           {
4710             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4711             int size = GET_MODE_BITSIZE (int_mode);
4712             unsigned HOST_WIDE_INT mh, ml;
4713             int pre_shift, lgup, post_shift;
4714             HOST_WIDE_INT d = INTVAL (op1);
4715
4716             if (d > 0)
4717               {
4718                 /* We could just as easily deal with negative constants here,
4719                    but it does not seem worth the trouble for GCC 2.6.  */
4720                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4721                   {
4722                     pre_shift = floor_log2 (d);
4723                     if (rem_flag)
4724                       {
4725                         unsigned HOST_WIDE_INT mask
4726                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4727                         remainder = expand_binop
4728                           (int_mode, and_optab, op0,
4729                            gen_int_mode (mask, int_mode),
4730                            remainder, 0, methods);
4731                         if (remainder)
4732                           return gen_lowpart (mode, remainder);
4733                       }
4734                     quotient = expand_shift
4735                       (RSHIFT_EXPR, int_mode, op0,
4736                        pre_shift, tquotient, 0);
4737                   }
4738                 else
4739                   {
4740                     rtx t1, t2, t3, t4;
4741
4742                     mh = choose_multiplier (d, size, size - 1,
4743                                             &ml, &post_shift, &lgup);
4744                     gcc_assert (!mh);
4745
4746                     if (post_shift < BITS_PER_WORD
4747                         && size - 1 < BITS_PER_WORD)
4748                       {
4749                         t1 = expand_shift
4750                           (RSHIFT_EXPR, int_mode, op0,
4751                            size - 1, NULL_RTX, 0);
4752                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4753                                            NULL_RTX, 0, OPTAB_WIDEN);
4754                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4755                                       + shift_cost (speed, int_mode, size - 1)
4756                                       + 2 * add_cost (speed, int_mode));
4757                         t3 = expmed_mult_highpart
4758                           (int_mode, t2, gen_int_mode (ml, int_mode),
4759                            NULL_RTX, 1, max_cost - extra_cost);
4760                         if (t3 != 0)
4761                           {
4762                             t4 = expand_shift
4763                               (RSHIFT_EXPR, int_mode, t3,
4764                                post_shift, NULL_RTX, 1);
4765                             quotient = expand_binop (int_mode, xor_optab,
4766                                                      t4, t1, tquotient, 0,
4767                                                      OPTAB_WIDEN);
4768                           }
4769                       }
4770                   }
4771               }
4772             else
4773               {
4774                 rtx nsign, t1, t2, t3, t4;
4775                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4776                                                   op0, constm1_rtx), NULL_RTX);
4777                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4778                                    0, OPTAB_WIDEN);
4779                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4780                                       size - 1, NULL_RTX, 0);
4781                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4782                                     NULL_RTX);
4783                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4784                                     NULL_RTX, 0);
4785                 if (t4)
4786                   {
4787                     rtx t5;
4788                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4789                                       NULL_RTX, 0);
4790                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4791                                               tquotient);
4792                   }
4793               }
4794           }
4795
4796         if (quotient != 0)
4797           break;
4798         delete_insns_since (last);
4799
4800         /* Try using an instruction that produces both the quotient and
4801            remainder, using truncation.  We can easily compensate the quotient
4802            or remainder to get floor rounding, once we have the remainder.
4803            Notice that we compute also the final remainder value here,
4804            and return the result right away.  */
4805         if (target == 0 || GET_MODE (target) != compute_mode)
4806           target = gen_reg_rtx (compute_mode);
4807
4808         if (rem_flag)
4809           {
4810             remainder
4811               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4812             quotient = gen_reg_rtx (compute_mode);
4813           }
4814         else
4815           {
4816             quotient
4817               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4818             remainder = gen_reg_rtx (compute_mode);
4819           }
4820
4821         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4822                                  quotient, remainder, 0))
4823           {
4824             /* This could be computed with a branch-less sequence.
4825                Save that for later.  */
4826             rtx tem;
4827             rtx_code_label *label = gen_label_rtx ();
4828             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4829             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4830                                 NULL_RTX, 0, OPTAB_WIDEN);
4831             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4832             expand_dec (quotient, const1_rtx);
4833             expand_inc (remainder, op1);
4834             emit_label (label);
4835             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4836           }
4837
4838         /* No luck with division elimination or divmod.  Have to do it
4839            by conditionally adjusting op0 *and* the result.  */
4840         {
4841           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4842           rtx adjusted_op0;
4843           rtx tem;
4844
4845           quotient = gen_reg_rtx (compute_mode);
4846           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4847           label1 = gen_label_rtx ();
4848           label2 = gen_label_rtx ();
4849           label3 = gen_label_rtx ();
4850           label4 = gen_label_rtx ();
4851           label5 = gen_label_rtx ();
4852           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4853           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4854           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4855                               quotient, 0, methods);
4856           if (tem != quotient)
4857             emit_move_insn (quotient, tem);
4858           emit_jump_insn (targetm.gen_jump (label5));
4859           emit_barrier ();
4860           emit_label (label1);
4861           expand_inc (adjusted_op0, const1_rtx);
4862           emit_jump_insn (targetm.gen_jump (label4));
4863           emit_barrier ();
4864           emit_label (label2);
4865           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4866           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4867                               quotient, 0, methods);
4868           if (tem != quotient)
4869             emit_move_insn (quotient, tem);
4870           emit_jump_insn (targetm.gen_jump (label5));
4871           emit_barrier ();
4872           emit_label (label3);
4873           expand_dec (adjusted_op0, const1_rtx);
4874           emit_label (label4);
4875           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4876                               quotient, 0, methods);
4877           if (tem != quotient)
4878             emit_move_insn (quotient, tem);
4879           expand_dec (quotient, const1_rtx);
4880           emit_label (label5);
4881         }
4882         break;
4883
4884       case CEIL_DIV_EXPR:
4885       case CEIL_MOD_EXPR:
4886         if (unsignedp)
4887           {
4888             if (op1_is_constant
4889                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4890                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4891                     || INTVAL (op1) >= 0))
4892               {
4893                 scalar_int_mode int_mode
4894                   = as_a <scalar_int_mode> (compute_mode);
4895                 rtx t1, t2, t3;
4896                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4897                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4898                                    floor_log2 (d), tquotient, 1);
4899                 t2 = expand_binop (int_mode, and_optab, op0,
4900                                    gen_int_mode (d - 1, int_mode),
4901                                    NULL_RTX, 1, methods);
4902                 t3 = gen_reg_rtx (int_mode);
4903                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4904                 if (t3 == 0)
4905                   {
4906                     rtx_code_label *lab;
4907                     lab = gen_label_rtx ();
4908                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4909                     expand_inc (t1, const1_rtx);
4910                     emit_label (lab);
4911                     quotient = t1;
4912                   }
4913                 else
4914                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4915                                             tquotient);
4916                 break;
4917               }
4918
4919             /* Try using an instruction that produces both the quotient and
4920                remainder, using truncation.  We can easily compensate the
4921                quotient or remainder to get ceiling rounding, once we have the
4922                remainder.  Notice that we compute also the final remainder
4923                value here, and return the result right away.  */
4924             if (target == 0 || GET_MODE (target) != compute_mode)
4925               target = gen_reg_rtx (compute_mode);
4926
4927             if (rem_flag)
4928               {
4929                 remainder = (REG_P (target)
4930                              ? target : gen_reg_rtx (compute_mode));
4931                 quotient = gen_reg_rtx (compute_mode);
4932               }
4933             else
4934               {
4935                 quotient = (REG_P (target)
4936                             ? target : gen_reg_rtx (compute_mode));
4937                 remainder = gen_reg_rtx (compute_mode);
4938               }
4939
4940             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4941                                      remainder, 1))
4942               {
4943                 /* This could be computed with a branch-less sequence.
4944                    Save that for later.  */
4945                 rtx_code_label *label = gen_label_rtx ();
4946                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4947                                  compute_mode, label);
4948                 expand_inc (quotient, const1_rtx);
4949                 expand_dec (remainder, op1);
4950                 emit_label (label);
4951                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4952               }
4953
4954             /* No luck with division elimination or divmod.  Have to do it
4955                by conditionally adjusting op0 *and* the result.  */
4956             {
4957               rtx_code_label *label1, *label2;
4958               rtx adjusted_op0, tem;
4959
4960               quotient = gen_reg_rtx (compute_mode);
4961               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4962               label1 = gen_label_rtx ();
4963               label2 = gen_label_rtx ();
4964               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4965                                compute_mode, label1);
4966               emit_move_insn  (quotient, const0_rtx);
4967               emit_jump_insn (targetm.gen_jump (label2));
4968               emit_barrier ();
4969               emit_label (label1);
4970               expand_dec (adjusted_op0, const1_rtx);
4971               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4972                                   quotient, 1, methods);
4973               if (tem != quotient)
4974                 emit_move_insn (quotient, tem);
4975               expand_inc (quotient, const1_rtx);
4976               emit_label (label2);
4977             }
4978           }
4979         else /* signed */
4980           {
4981             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4982                 && INTVAL (op1) >= 0)
4983               {
4984                 /* This is extremely similar to the code for the unsigned case
4985                    above.  For 2.7 we should merge these variants, but for
4986                    2.6.1 I don't want to touch the code for unsigned since that
4987                    get used in C.  The signed case will only be used by other
4988                    languages (Ada).  */
4989
4990                 rtx t1, t2, t3;
4991                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4992                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4993                                    floor_log2 (d), tquotient, 0);
4994                 t2 = expand_binop (compute_mode, and_optab, op0,
4995                                    gen_int_mode (d - 1, compute_mode),
4996                                    NULL_RTX, 1, methods);
4997                 t3 = gen_reg_rtx (compute_mode);
4998                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4999                                       compute_mode, 1, 1);
5000                 if (t3 == 0)
5001                   {
5002                     rtx_code_label *lab;
5003                     lab = gen_label_rtx ();
5004                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5005                     expand_inc (t1, const1_rtx);
5006                     emit_label (lab);
5007                     quotient = t1;
5008                   }
5009                 else
5010                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5011                                                           t1, t3),
5012                                             tquotient);
5013                 break;
5014               }
5015
5016             /* Try using an instruction that produces both the quotient and
5017                remainder, using truncation.  We can easily compensate the
5018                quotient or remainder to get ceiling rounding, once we have the
5019                remainder.  Notice that we compute also the final remainder
5020                value here, and return the result right away.  */
5021             if (target == 0 || GET_MODE (target) != compute_mode)
5022               target = gen_reg_rtx (compute_mode);
5023             if (rem_flag)
5024               {
5025                 remainder= (REG_P (target)
5026                             ? target : gen_reg_rtx (compute_mode));
5027                 quotient = gen_reg_rtx (compute_mode);
5028               }
5029             else
5030               {
5031                 quotient = (REG_P (target)
5032                             ? target : gen_reg_rtx (compute_mode));
5033                 remainder = gen_reg_rtx (compute_mode);
5034               }
5035
5036             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5037                                      remainder, 0))
5038               {
5039                 /* This could be computed with a branch-less sequence.
5040                    Save that for later.  */
5041                 rtx tem;
5042                 rtx_code_label *label = gen_label_rtx ();
5043                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5044                                  compute_mode, label);
5045                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5046                                     NULL_RTX, 0, OPTAB_WIDEN);
5047                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5048                 expand_inc (quotient, const1_rtx);
5049                 expand_dec (remainder, op1);
5050                 emit_label (label);
5051                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5052               }
5053
5054             /* No luck with division elimination or divmod.  Have to do it
5055                by conditionally adjusting op0 *and* the result.  */
5056             {
5057               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5058               rtx adjusted_op0;
5059               rtx tem;
5060
5061               quotient = gen_reg_rtx (compute_mode);
5062               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5063               label1 = gen_label_rtx ();
5064               label2 = gen_label_rtx ();
5065               label3 = gen_label_rtx ();
5066               label4 = gen_label_rtx ();
5067               label5 = gen_label_rtx ();
5068               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5069               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5070                                compute_mode, label1);
5071               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5072                                   quotient, 0, methods);
5073               if (tem != quotient)
5074                 emit_move_insn (quotient, tem);
5075               emit_jump_insn (targetm.gen_jump (label5));
5076               emit_barrier ();
5077               emit_label (label1);
5078               expand_dec (adjusted_op0, const1_rtx);
5079               emit_jump_insn (targetm.gen_jump (label4));
5080               emit_barrier ();
5081               emit_label (label2);
5082               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5083                                compute_mode, label3);
5084               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5085                                   quotient, 0, methods);
5086               if (tem != quotient)
5087                 emit_move_insn (quotient, tem);
5088               emit_jump_insn (targetm.gen_jump (label5));
5089               emit_barrier ();
5090               emit_label (label3);
5091               expand_inc (adjusted_op0, const1_rtx);
5092               emit_label (label4);
5093               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5094                                   quotient, 0, methods);
5095               if (tem != quotient)
5096                 emit_move_insn (quotient, tem);
5097               expand_inc (quotient, const1_rtx);
5098               emit_label (label5);
5099             }
5100           }
5101         break;
5102
5103       case EXACT_DIV_EXPR:
5104         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5105           {
5106             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5107             int size = GET_MODE_BITSIZE (int_mode);
5108             HOST_WIDE_INT d = INTVAL (op1);
5109             unsigned HOST_WIDE_INT ml;
5110             int pre_shift;
5111             rtx t1;
5112
5113             pre_shift = ctz_or_zero (d);
5114             ml = invert_mod2n (d >> pre_shift, size);
5115             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5116                                pre_shift, NULL_RTX, unsignedp);
5117             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5118                                     NULL_RTX, 1);
5119
5120             insn = get_last_insn ();
5121             set_dst_reg_note (insn, REG_EQUAL,
5122                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5123                                               int_mode, op0, op1),
5124                               quotient);
5125           }
5126         break;
5127
5128       case ROUND_DIV_EXPR:
5129       case ROUND_MOD_EXPR:
5130         if (unsignedp)
5131           {
5132             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5133             rtx tem;
5134             rtx_code_label *label;
5135             label = gen_label_rtx ();
5136             quotient = gen_reg_rtx (int_mode);
5137             remainder = gen_reg_rtx (int_mode);
5138             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5139               {
5140                 rtx tem;
5141                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5142                                          quotient, 1, methods);
5143                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5144                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5145                                           remainder, 1, methods);
5146               }
5147             tem = plus_constant (int_mode, op1, -1);
5148             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5149             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5150             expand_inc (quotient, const1_rtx);
5151             expand_dec (remainder, op1);
5152             emit_label (label);
5153           }
5154         else
5155           {
5156             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5157             int size = GET_MODE_BITSIZE (int_mode);
5158             rtx abs_rem, abs_op1, tem, mask;
5159             rtx_code_label *label;
5160             label = gen_label_rtx ();
5161             quotient = gen_reg_rtx (int_mode);
5162             remainder = gen_reg_rtx (int_mode);
5163             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5164               {
5165                 rtx tem;
5166                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5167                                          quotient, 0, methods);
5168                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5169                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5170                                           remainder, 0, methods);
5171               }
5172             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5173             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5174             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5175                                 1, NULL_RTX, 1);
5176             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5177             tem = expand_binop (int_mode, xor_optab, op0, op1,
5178                                 NULL_RTX, 0, OPTAB_WIDEN);
5179             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5180                                  size - 1, NULL_RTX, 0);
5181             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5182                                 NULL_RTX, 0, OPTAB_WIDEN);
5183             tem = expand_binop (int_mode, sub_optab, tem, mask,
5184                                 NULL_RTX, 0, OPTAB_WIDEN);
5185             expand_inc (quotient, tem);
5186             tem = expand_binop (int_mode, xor_optab, mask, op1,
5187                                 NULL_RTX, 0, OPTAB_WIDEN);
5188             tem = expand_binop (int_mode, sub_optab, tem, mask,
5189                                 NULL_RTX, 0, OPTAB_WIDEN);
5190             expand_dec (remainder, tem);
5191             emit_label (label);
5192           }
5193         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5194
5195       default:
5196         gcc_unreachable ();
5197       }
5198
5199   if (quotient == 0)
5200     {
5201       if (target && GET_MODE (target) != compute_mode)
5202         target = 0;
5203
5204       if (rem_flag)
5205         {
5206           /* Try to produce the remainder without producing the quotient.
5207              If we seem to have a divmod pattern that does not require widening,
5208              don't try widening here.  We should really have a WIDEN argument
5209              to expand_twoval_binop, since what we'd really like to do here is
5210              1) try a mod insn in compute_mode
5211              2) try a divmod insn in compute_mode
5212              3) try a div insn in compute_mode and multiply-subtract to get
5213                 remainder
5214              4) try the same things with widening allowed.  */
5215           remainder
5216             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5217                                  op0, op1, target,
5218                                  unsignedp,
5219                                  ((optab_handler (optab2, compute_mode)
5220                                    != CODE_FOR_nothing)
5221                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5222           if (remainder == 0)
5223             {
5224               /* No luck there.  Can we do remainder and divide at once
5225                  without a library call?  */
5226               remainder = gen_reg_rtx (compute_mode);
5227               if (! expand_twoval_binop ((unsignedp
5228                                           ? udivmod_optab
5229                                           : sdivmod_optab),
5230                                          op0, op1,
5231                                          NULL_RTX, remainder, unsignedp))
5232                 remainder = 0;
5233             }
5234
5235           if (remainder)
5236             return gen_lowpart (mode, remainder);
5237         }
5238
5239       /* Produce the quotient.  Try a quotient insn, but not a library call.
5240          If we have a divmod in this mode, use it in preference to widening
5241          the div (for this test we assume it will not fail). Note that optab2
5242          is set to the one of the two optabs that the call below will use.  */
5243       quotient
5244         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5245                              op0, op1, rem_flag ? NULL_RTX : target,
5246                              unsignedp,
5247                              ((optab_handler (optab2, compute_mode)
5248                                != CODE_FOR_nothing)
5249                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5250
5251       if (quotient == 0)
5252         {
5253           /* No luck there.  Try a quotient-and-remainder insn,
5254              keeping the quotient alone.  */
5255           quotient = gen_reg_rtx (compute_mode);
5256           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5257                                      op0, op1,
5258                                      quotient, NULL_RTX, unsignedp))
5259             {
5260               quotient = 0;
5261               if (! rem_flag)
5262                 /* Still no luck.  If we are not computing the remainder,
5263                    use a library call for the quotient.  */
5264                 quotient = sign_expand_binop (compute_mode,
5265                                               udiv_optab, sdiv_optab,
5266                                               op0, op1, target,
5267                                               unsignedp, methods);
5268             }
5269         }
5270     }
5271
5272   if (rem_flag)
5273     {
5274       if (target && GET_MODE (target) != compute_mode)
5275         target = 0;
5276
5277       if (quotient == 0)
5278         {
5279           /* No divide instruction either.  Use library for remainder.  */
5280           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5281                                          op0, op1, target,
5282                                          unsignedp, methods);
5283           /* No remainder function.  Try a quotient-and-remainder
5284              function, keeping the remainder.  */
5285           if (!remainder
5286               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5287             {
5288               remainder = gen_reg_rtx (compute_mode);
5289               if (!expand_twoval_binop_libfunc
5290                   (unsignedp ? udivmod_optab : sdivmod_optab,
5291                    op0, op1,
5292                    NULL_RTX, remainder,
5293                    unsignedp ? UMOD : MOD))
5294                 remainder = NULL_RTX;
5295             }
5296         }
5297       else
5298         {
5299           /* We divided.  Now finish doing X - Y * (X / Y).  */
5300           remainder = expand_mult (compute_mode, quotient, op1,
5301                                    NULL_RTX, unsignedp);
5302           remainder = expand_binop (compute_mode, sub_optab, op0,
5303                                     remainder, target, unsignedp,
5304                                     methods);
5305         }
5306     }
5307
5308   if (methods != OPTAB_LIB_WIDEN
5309       && (rem_flag ? remainder : quotient) == NULL_RTX)
5310     return NULL_RTX;
5311
5312   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5313 }
5314 \f
5315 /* Return a tree node with data type TYPE, describing the value of X.
5316    Usually this is an VAR_DECL, if there is no obvious better choice.
5317    X may be an expression, however we only support those expressions
5318    generated by loop.c.  */
5319
5320 tree
5321 make_tree (tree type, rtx x)
5322 {
5323   tree t;
5324
5325   switch (GET_CODE (x))
5326     {
5327     case CONST_INT:
5328     case CONST_WIDE_INT:
5329       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5330       return t;
5331
5332     case CONST_DOUBLE:
5333       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5334       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5335         t = wide_int_to_tree (type,
5336                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5337                                                     HOST_BITS_PER_WIDE_INT * 2));
5338       else
5339         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5340
5341       return t;
5342
5343     case CONST_VECTOR:
5344       {
5345         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5346         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5347         tree itype = TREE_TYPE (type);
5348
5349         /* Build a tree with vector elements.  */
5350         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5351         unsigned int count = elts.encoded_nelts ();
5352         for (unsigned int i = 0; i < count; ++i)
5353           {
5354             rtx elt = CONST_VECTOR_ELT (x, i);
5355             elts.quick_push (make_tree (itype, elt));
5356           }
5357
5358         return elts.build ();
5359       }
5360
5361     case PLUS:
5362       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5363                           make_tree (type, XEXP (x, 1)));
5364
5365     case MINUS:
5366       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5367                           make_tree (type, XEXP (x, 1)));
5368
5369     case NEG:
5370       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5371
5372     case MULT:
5373       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5374                           make_tree (type, XEXP (x, 1)));
5375
5376     case ASHIFT:
5377       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5378                           make_tree (type, XEXP (x, 1)));
5379
5380     case LSHIFTRT:
5381       t = unsigned_type_for (type);
5382       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5383                                          make_tree (t, XEXP (x, 0)),
5384                                          make_tree (type, XEXP (x, 1))));
5385
5386     case ASHIFTRT:
5387       t = signed_type_for (type);
5388       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5389                                          make_tree (t, XEXP (x, 0)),
5390                                          make_tree (type, XEXP (x, 1))));
5391
5392     case DIV:
5393       if (TREE_CODE (type) != REAL_TYPE)
5394         t = signed_type_for (type);
5395       else
5396         t = type;
5397
5398       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5399                                          make_tree (t, XEXP (x, 0)),
5400                                          make_tree (t, XEXP (x, 1))));
5401     case UDIV:
5402       t = unsigned_type_for (type);
5403       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5404                                          make_tree (t, XEXP (x, 0)),
5405                                          make_tree (t, XEXP (x, 1))));
5406
5407     case SIGN_EXTEND:
5408     case ZERO_EXTEND:
5409       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5410                                           GET_CODE (x) == ZERO_EXTEND);
5411       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5412
5413     case CONST:
5414       return make_tree (type, XEXP (x, 0));
5415
5416     case SYMBOL_REF:
5417       t = SYMBOL_REF_DECL (x);
5418       if (t)
5419         return fold_convert (type, build_fold_addr_expr (t));
5420       /* fall through.  */
5421
5422     default:
5423       if (CONST_POLY_INT_P (x))
5424         return wide_int_to_tree (t, const_poly_int_value (x));
5425
5426       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5427
5428       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5429          address mode to pointer mode.  */
5430       if (POINTER_TYPE_P (type))
5431         x = convert_memory_address_addr_space
5432           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5433
5434       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5435          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5436       t->decl_with_rtl.rtl = x;
5437
5438       return t;
5439     }
5440 }
5441 \f
5442 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5443    and returning TARGET.
5444
5445    If TARGET is 0, a pseudo-register or constant is returned.  */
5446
5447 rtx
5448 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5449 {
5450   rtx tem = 0;
5451
5452   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5453     tem = simplify_binary_operation (AND, mode, op0, op1);
5454   if (tem == 0)
5455     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5456
5457   if (target == 0)
5458     target = tem;
5459   else if (tem != target)
5460     emit_move_insn (target, tem);
5461   return target;
5462 }
5463
5464 /* Helper function for emit_store_flag.  */
5465 rtx
5466 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5467              machine_mode mode, machine_mode compare_mode,
5468              int unsignedp, rtx x, rtx y, int normalizep,
5469              machine_mode target_mode)
5470 {
5471   class expand_operand ops[4];
5472   rtx op0, comparison, subtarget;
5473   rtx_insn *last;
5474   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5475   scalar_int_mode int_target_mode;
5476
5477   last = get_last_insn ();
5478   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5479   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5480   if (!x || !y)
5481     {
5482       delete_insns_since (last);
5483       return NULL_RTX;
5484     }
5485
5486   if (target_mode == VOIDmode)
5487     int_target_mode = result_mode;
5488   else
5489     int_target_mode = as_a <scalar_int_mode> (target_mode);
5490   if (!target)
5491     target = gen_reg_rtx (int_target_mode);
5492
5493   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5494
5495   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5496   create_fixed_operand (&ops[1], comparison);
5497   create_fixed_operand (&ops[2], x);
5498   create_fixed_operand (&ops[3], y);
5499   if (!maybe_expand_insn (icode, 4, ops))
5500     {
5501       delete_insns_since (last);
5502       return NULL_RTX;
5503     }
5504   subtarget = ops[0].value;
5505
5506   /* If we are converting to a wider mode, first convert to
5507      INT_TARGET_MODE, then normalize.  This produces better combining
5508      opportunities on machines that have a SIGN_EXTRACT when we are
5509      testing a single bit.  This mostly benefits the 68k.
5510
5511      If STORE_FLAG_VALUE does not have the sign bit set when
5512      interpreted in MODE, we can do this conversion as unsigned, which
5513      is usually more efficient.  */
5514   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5515     {
5516       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5517                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5518
5519       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5520       convert_move (target, subtarget, unsignedp);
5521
5522       op0 = target;
5523       result_mode = int_target_mode;
5524     }
5525   else
5526     op0 = subtarget;
5527
5528   /* If we want to keep subexpressions around, don't reuse our last
5529      target.  */
5530   if (optimize)
5531     subtarget = 0;
5532
5533   /* Now normalize to the proper value in MODE.  Sometimes we don't
5534      have to do anything.  */
5535   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5536     ;
5537   /* STORE_FLAG_VALUE might be the most negative number, so write
5538      the comparison this way to avoid a compiler-time warning.  */
5539   else if (- normalizep == STORE_FLAG_VALUE)
5540     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5541
5542   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5543      it hard to use a value of just the sign bit due to ANSI integer
5544      constant typing rules.  */
5545   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5546     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5547                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5548                         normalizep == 1);
5549   else
5550     {
5551       gcc_assert (STORE_FLAG_VALUE & 1);
5552
5553       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5554       if (normalizep == -1)
5555         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5556     }
5557
5558   /* If we were converting to a smaller mode, do the conversion now.  */
5559   if (int_target_mode != result_mode)
5560     {
5561       convert_move (target, op0, 0);
5562       return target;
5563     }
5564   else
5565     return op0;
5566 }
5567
5568
5569 /* A subroutine of emit_store_flag only including "tricks" that do not
5570    need a recursive call.  These are kept separate to avoid infinite
5571    loops.  */
5572
5573 static rtx
5574 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5575                    machine_mode mode, int unsignedp, int normalizep,
5576                    machine_mode target_mode)
5577 {
5578   rtx subtarget;
5579   enum insn_code icode;
5580   machine_mode compare_mode;
5581   enum mode_class mclass;
5582   enum rtx_code scode;
5583
5584   if (unsignedp)
5585     code = unsigned_condition (code);
5586   scode = swap_condition (code);
5587
5588   /* If one operand is constant, make it the second one.  Only do this
5589      if the other operand is not constant as well.  */
5590
5591   if (swap_commutative_operands_p (op0, op1))
5592     {
5593       std::swap (op0, op1);
5594       code = swap_condition (code);
5595     }
5596
5597   if (mode == VOIDmode)
5598     mode = GET_MODE (op0);
5599
5600   if (CONST_SCALAR_INT_P (op1))
5601     canonicalize_comparison (mode, &code, &op1);
5602
5603   /* For some comparisons with 1 and -1, we can convert this to
5604      comparisons with zero.  This will often produce more opportunities for
5605      store-flag insns.  */
5606
5607   switch (code)
5608     {
5609     case LT:
5610       if (op1 == const1_rtx)
5611         op1 = const0_rtx, code = LE;
5612       break;
5613     case LE:
5614       if (op1 == constm1_rtx)
5615         op1 = const0_rtx, code = LT;
5616       break;
5617     case GE:
5618       if (op1 == const1_rtx)
5619         op1 = const0_rtx, code = GT;
5620       break;
5621     case GT:
5622       if (op1 == constm1_rtx)
5623         op1 = const0_rtx, code = GE;
5624       break;
5625     case GEU:
5626       if (op1 == const1_rtx)
5627         op1 = const0_rtx, code = NE;
5628       break;
5629     case LTU:
5630       if (op1 == const1_rtx)
5631         op1 = const0_rtx, code = EQ;
5632       break;
5633     default:
5634       break;
5635     }
5636
5637   /* If we are comparing a double-word integer with zero or -1, we can
5638      convert the comparison into one involving a single word.  */
5639   scalar_int_mode int_mode;
5640   if (is_int_mode (mode, &int_mode)
5641       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5642       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5643     {
5644       rtx tem;
5645       if ((code == EQ || code == NE)
5646           && (op1 == const0_rtx || op1 == constm1_rtx))
5647         {
5648           rtx op00, op01;
5649
5650           /* Do a logical OR or AND of the two words and compare the
5651              result.  */
5652           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5653           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5654           tem = expand_binop (word_mode,
5655                               op1 == const0_rtx ? ior_optab : and_optab,
5656                               op00, op01, NULL_RTX, unsignedp,
5657                               OPTAB_DIRECT);
5658
5659           if (tem != 0)
5660             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5661                                    unsignedp, normalizep);
5662         }
5663       else if ((code == LT || code == GE) && op1 == const0_rtx)
5664         {
5665           rtx op0h;
5666
5667           /* If testing the sign bit, can just test on high word.  */
5668           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5669                                       subreg_highpart_offset (word_mode,
5670                                                               int_mode));
5671           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5672                                  unsignedp, normalizep);
5673         }
5674       else
5675         tem = NULL_RTX;
5676
5677       if (tem)
5678         {
5679           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5680             return tem;
5681           if (!target)
5682             target = gen_reg_rtx (target_mode);
5683
5684           convert_move (target, tem,
5685                         !val_signbit_known_set_p (word_mode,
5686                                                   (normalizep ? normalizep
5687                                                    : STORE_FLAG_VALUE)));
5688           return target;
5689         }
5690     }
5691
5692   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5693      complement of A (for GE) and shifting the sign bit to the low bit.  */
5694   if (op1 == const0_rtx && (code == LT || code == GE)
5695       && is_int_mode (mode, &int_mode)
5696       && (normalizep || STORE_FLAG_VALUE == 1
5697           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5698     {
5699       scalar_int_mode int_target_mode;
5700       subtarget = target;
5701
5702       if (!target)
5703         int_target_mode = int_mode;
5704       else
5705         {
5706           /* If the result is to be wider than OP0, it is best to convert it
5707              first.  If it is to be narrower, it is *incorrect* to convert it
5708              first.  */
5709           int_target_mode = as_a <scalar_int_mode> (target_mode);
5710           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5711             {
5712               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5713               int_mode = int_target_mode;
5714             }
5715         }
5716
5717       if (int_target_mode != int_mode)
5718         subtarget = 0;
5719
5720       if (code == GE)
5721         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5722                            ((STORE_FLAG_VALUE == 1 || normalizep)
5723                             ? 0 : subtarget), 0);
5724
5725       if (STORE_FLAG_VALUE == 1 || normalizep)
5726         /* If we are supposed to produce a 0/1 value, we want to do
5727            a logical shift from the sign bit to the low-order bit; for
5728            a -1/0 value, we do an arithmetic shift.  */
5729         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5730                             GET_MODE_BITSIZE (int_mode) - 1,
5731                             subtarget, normalizep != -1);
5732
5733       if (int_mode != int_target_mode)
5734         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5735
5736       return op0;
5737     }
5738
5739   mclass = GET_MODE_CLASS (mode);
5740   FOR_EACH_MODE_FROM (compare_mode, mode)
5741     {
5742      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5743      icode = optab_handler (cstore_optab, optab_mode);
5744      if (icode != CODE_FOR_nothing)
5745         {
5746           do_pending_stack_adjust ();
5747           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5748                                  unsignedp, op0, op1, normalizep, target_mode);
5749           if (tem)
5750             return tem;
5751
5752           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5753             {
5754               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5755                                  unsignedp, op1, op0, normalizep, target_mode);
5756               if (tem)
5757                 return tem;
5758             }
5759           break;
5760         }
5761     }
5762
5763   return 0;
5764 }
5765
5766 /* Subroutine of emit_store_flag that handles cases in which the operands
5767    are scalar integers.  SUBTARGET is the target to use for temporary
5768    operations and TRUEVAL is the value to store when the condition is
5769    true.  All other arguments are as for emit_store_flag.  */
5770
5771 rtx
5772 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5773                      rtx op1, scalar_int_mode mode, int unsignedp,
5774                      int normalizep, rtx trueval)
5775 {
5776   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5777   rtx_insn *last = get_last_insn ();
5778
5779   /* If this is an equality comparison of integers, we can try to exclusive-or
5780      (or subtract) the two operands and use a recursive call to try the
5781      comparison with zero.  Don't do any of these cases if branches are
5782      very cheap.  */
5783
5784   if ((code == EQ || code == NE) && op1 != const0_rtx)
5785     {
5786       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5787                               OPTAB_WIDEN);
5788
5789       if (tem == 0)
5790         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5791                             OPTAB_WIDEN);
5792       if (tem != 0)
5793         tem = emit_store_flag (target, code, tem, const0_rtx,
5794                                mode, unsignedp, normalizep);
5795       if (tem != 0)
5796         return tem;
5797
5798       delete_insns_since (last);
5799     }
5800
5801   /* For integer comparisons, try the reverse comparison.  However, for
5802      small X and if we'd have anyway to extend, implementing "X != 0"
5803      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5804   rtx_code rcode = reverse_condition (code);
5805   if (can_compare_p (rcode, mode, ccp_store_flag)
5806       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5807             && code == NE
5808             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5809             && op1 == const0_rtx))
5810     {
5811       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5812                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5813
5814       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5815       if (want_add
5816           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5817                        optimize_insn_for_speed_p ()) == 0)
5818         {
5819           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5820                                        STORE_FLAG_VALUE, target_mode);
5821           if (tem != 0)
5822             tem = expand_binop (target_mode, add_optab, tem,
5823                                 gen_int_mode (normalizep, target_mode),
5824                                 target, 0, OPTAB_WIDEN);
5825           if (tem != 0)
5826             return tem;
5827         }
5828       else if (!want_add
5829                && rtx_cost (trueval, mode, XOR, 1,
5830                             optimize_insn_for_speed_p ()) == 0)
5831         {
5832           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5833                                        normalizep, target_mode);
5834           if (tem != 0)
5835             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5836                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5837           if (tem != 0)
5838             return tem;
5839         }
5840
5841       delete_insns_since (last);
5842     }
5843
5844   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5845      the constant zero.  Reject all other comparisons at this point.  Only
5846      do LE and GT if branches are expensive since they are expensive on
5847      2-operand machines.  */
5848
5849   if (op1 != const0_rtx
5850       || (code != EQ && code != NE
5851           && (BRANCH_COST (optimize_insn_for_speed_p (),
5852                            false) <= 1 || (code != LE && code != GT))))
5853     return 0;
5854
5855   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5856      do the necessary operation below.  */
5857
5858   rtx tem = 0;
5859
5860   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5861      the sign bit set.  */
5862
5863   if (code == LE)
5864     {
5865       /* This is destructive, so SUBTARGET can't be OP0.  */
5866       if (rtx_equal_p (subtarget, op0))
5867         subtarget = 0;
5868
5869       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5870                           OPTAB_WIDEN);
5871       if (tem)
5872         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5873                             OPTAB_WIDEN);
5874     }
5875
5876   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5877      number of bits in the mode of OP0, minus one.  */
5878
5879   if (code == GT)
5880     {
5881       if (rtx_equal_p (subtarget, op0))
5882         subtarget = 0;
5883
5884       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5885                                 GET_MODE_BITSIZE (mode) - 1,
5886                                 subtarget, 0);
5887       if (tem)
5888         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5889                             OPTAB_WIDEN);
5890     }
5891
5892   if (code == EQ || code == NE)
5893     {
5894       /* For EQ or NE, one way to do the comparison is to apply an operation
5895          that converts the operand into a positive number if it is nonzero
5896          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5897          for NE we negate.  This puts the result in the sign bit.  Then we
5898          normalize with a shift, if needed.
5899
5900          Two operations that can do the above actions are ABS and FFS, so try
5901          them.  If that doesn't work, and MODE is smaller than a full word,
5902          we can use zero-extension to the wider mode (an unsigned conversion)
5903          as the operation.  */
5904
5905       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5906          that is compensated by the subsequent overflow when subtracting
5907          one / negating.  */
5908
5909       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5910         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5911       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5912         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5913       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5914         {
5915           tem = convert_modes (word_mode, mode, op0, 1);
5916           mode = word_mode;
5917         }
5918
5919       if (tem != 0)
5920         {
5921           if (code == EQ)
5922             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5923                                 0, OPTAB_WIDEN);
5924           else
5925             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5926         }
5927
5928       /* If we couldn't do it that way, for NE we can "or" the two's complement
5929          of the value with itself.  For EQ, we take the one's complement of
5930          that "or", which is an extra insn, so we only handle EQ if branches
5931          are expensive.  */
5932
5933       if (tem == 0
5934           && (code == NE
5935               || BRANCH_COST (optimize_insn_for_speed_p (),
5936                               false) > 1))
5937         {
5938           if (rtx_equal_p (subtarget, op0))
5939             subtarget = 0;
5940
5941           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5942           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5943                               OPTAB_WIDEN);
5944
5945           if (tem && code == EQ)
5946             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5947         }
5948     }
5949
5950   if (tem && normalizep)
5951     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5952                               GET_MODE_BITSIZE (mode) - 1,
5953                               subtarget, normalizep == 1);
5954
5955   if (tem)
5956     {
5957       if (!target)
5958         ;
5959       else if (GET_MODE (tem) != target_mode)
5960         {
5961           convert_move (target, tem, 0);
5962           tem = target;
5963         }
5964       else if (!subtarget)
5965         {
5966           emit_move_insn (target, tem);
5967           tem = target;
5968         }
5969     }
5970   else
5971     delete_insns_since (last);
5972
5973   return tem;
5974 }
5975
5976 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5977    and storing in TARGET.  Normally return TARGET.
5978    Return 0 if that cannot be done.
5979
5980    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5981    it is VOIDmode, they cannot both be CONST_INT.
5982
5983    UNSIGNEDP is for the case where we have to widen the operands
5984    to perform the operation.  It says to use zero-extension.
5985
5986    NORMALIZEP is 1 if we should convert the result to be either zero
5987    or one.  Normalize is -1 if we should convert the result to be
5988    either zero or -1.  If NORMALIZEP is zero, the result will be left
5989    "raw" out of the scc insn.  */
5990
5991 rtx
5992 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5993                  machine_mode mode, int unsignedp, int normalizep)
5994 {
5995   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5996   enum rtx_code rcode;
5997   rtx subtarget;
5998   rtx tem, trueval;
5999   rtx_insn *last;
6000
6001   /* If we compare constants, we shouldn't use a store-flag operation,
6002      but a constant load.  We can get there via the vanilla route that
6003      usually generates a compare-branch sequence, but will in this case
6004      fold the comparison to a constant, and thus elide the branch.  */
6005   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6006     return NULL_RTX;
6007
6008   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6009                            target_mode);
6010   if (tem)
6011     return tem;
6012
6013   /* If we reached here, we can't do this with a scc insn, however there
6014      are some comparisons that can be done in other ways.  Don't do any
6015      of these cases if branches are very cheap.  */
6016   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6017     return 0;
6018
6019   /* See what we need to return.  We can only return a 1, -1, or the
6020      sign bit.  */
6021
6022   if (normalizep == 0)
6023     {
6024       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6025         normalizep = STORE_FLAG_VALUE;
6026
6027       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6028         ;
6029       else
6030         return 0;
6031     }
6032
6033   last = get_last_insn ();
6034
6035   /* If optimizing, use different pseudo registers for each insn, instead
6036      of reusing the same pseudo.  This leads to better CSE, but slows
6037      down the compiler, since there are more pseudos.  */
6038   subtarget = (!optimize
6039                && (target_mode == mode)) ? target : NULL_RTX;
6040   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6041
6042   /* For floating-point comparisons, try the reverse comparison or try
6043      changing the "orderedness" of the comparison.  */
6044   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6045     {
6046       enum rtx_code first_code;
6047       bool and_them;
6048
6049       rcode = reverse_condition_maybe_unordered (code);
6050       if (can_compare_p (rcode, mode, ccp_store_flag)
6051           && (code == ORDERED || code == UNORDERED
6052               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6053               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6054         {
6055           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6056                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6057
6058           /* For the reverse comparison, use either an addition or a XOR.  */
6059           if (want_add
6060               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6061                            optimize_insn_for_speed_p ()) == 0)
6062             {
6063               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6064                                        STORE_FLAG_VALUE, target_mode);
6065               if (tem)
6066                 return expand_binop (target_mode, add_optab, tem,
6067                                      gen_int_mode (normalizep, target_mode),
6068                                      target, 0, OPTAB_WIDEN);
6069             }
6070           else if (!want_add
6071                    && rtx_cost (trueval, mode, XOR, 1,
6072                                 optimize_insn_for_speed_p ()) == 0)
6073             {
6074               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6075                                        normalizep, target_mode);
6076               if (tem)
6077                 return expand_binop (target_mode, xor_optab, tem, trueval,
6078                                      target, INTVAL (trueval) >= 0,
6079                                      OPTAB_WIDEN);
6080             }
6081         }
6082
6083       delete_insns_since (last);
6084
6085       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6086       if (code == ORDERED || code == UNORDERED)
6087         return 0;
6088
6089       and_them = split_comparison (code, mode, &first_code, &code);
6090
6091       /* If there are no NaNs, the first comparison should always fall through.
6092          Effectively change the comparison to the other one.  */
6093       if (!HONOR_NANS (mode))
6094         {
6095           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6096           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6097                                     target_mode);
6098         }
6099
6100       if (!HAVE_conditional_move)
6101         return 0;
6102
6103       /* Do not turn a trapping comparison into a non-trapping one.  */
6104       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6105           && flag_trapping_math)
6106         return 0;
6107
6108       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6109          conditional move.  */
6110       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6111                                normalizep, target_mode);
6112       if (tem == 0)
6113         return 0;
6114
6115       if (and_them)
6116         tem = emit_conditional_move (target, code, op0, op1, mode,
6117                                      tem, const0_rtx, GET_MODE (tem), 0);
6118       else
6119         tem = emit_conditional_move (target, code, op0, op1, mode,
6120                                      trueval, tem, GET_MODE (tem), 0);
6121
6122       if (tem == 0)
6123         delete_insns_since (last);
6124       return tem;
6125     }
6126
6127   /* The remaining tricks only apply to integer comparisons.  */
6128
6129   scalar_int_mode int_mode;
6130   if (is_int_mode (mode, &int_mode))
6131     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6132                                 unsignedp, normalizep, trueval);
6133
6134   return 0;
6135 }
6136
6137 /* Like emit_store_flag, but always succeeds.  */
6138
6139 rtx
6140 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6141                        machine_mode mode, int unsignedp, int normalizep)
6142 {
6143   rtx tem;
6144   rtx_code_label *label;
6145   rtx trueval, falseval;
6146
6147   /* First see if emit_store_flag can do the job.  */
6148   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6149   if (tem != 0)
6150     return tem;
6151
6152   /* If one operand is constant, make it the second one.  Only do this
6153      if the other operand is not constant as well.  */
6154   if (swap_commutative_operands_p (op0, op1))
6155     {
6156       std::swap (op0, op1);
6157       code = swap_condition (code);
6158     }
6159
6160   if (mode == VOIDmode)
6161     mode = GET_MODE (op0);
6162
6163   if (!target)
6164     target = gen_reg_rtx (word_mode);
6165
6166   /* If this failed, we have to do this with set/compare/jump/set code.
6167      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6168   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6169   if (code == NE
6170       && GET_MODE_CLASS (mode) == MODE_INT
6171       && REG_P (target)
6172       && op0 == target
6173       && op1 == const0_rtx)
6174     {
6175       label = gen_label_rtx ();
6176       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6177                                NULL_RTX, NULL, label,
6178                                profile_probability::uninitialized ());
6179       emit_move_insn (target, trueval);
6180       emit_label (label);
6181       return target;
6182     }
6183
6184   if (!REG_P (target)
6185       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6186     target = gen_reg_rtx (GET_MODE (target));
6187
6188   /* Jump in the right direction if the target cannot implement CODE
6189      but can jump on its reverse condition.  */
6190   falseval = const0_rtx;
6191   if (! can_compare_p (code, mode, ccp_jump)
6192       && (! FLOAT_MODE_P (mode)
6193           || code == ORDERED || code == UNORDERED
6194           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6195           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6196     {
6197       enum rtx_code rcode;
6198       if (FLOAT_MODE_P (mode))
6199         rcode = reverse_condition_maybe_unordered (code);
6200       else
6201         rcode = reverse_condition (code);
6202
6203       /* Canonicalize to UNORDERED for the libcall.  */
6204       if (can_compare_p (rcode, mode, ccp_jump)
6205           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6206         {
6207           falseval = trueval;
6208           trueval = const0_rtx;
6209           code = rcode;
6210         }
6211     }
6212
6213   emit_move_insn (target, trueval);
6214   label = gen_label_rtx ();
6215   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6216                            label, profile_probability::uninitialized ());
6217
6218   emit_move_insn (target, falseval);
6219   emit_label (label);
6220
6221   return target;
6222 }
6223
6224 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6225    and exclusive ranges in order to create an equivalent comparison.  See
6226    canonicalize_cmp_for_target for the possible cases.  */
6227
6228 static enum rtx_code
6229 equivalent_cmp_code (enum rtx_code code)
6230 {
6231   switch (code)
6232     {
6233     case GT:
6234       return GE;
6235     case GE:
6236       return GT;
6237     case LT:
6238       return LE;
6239     case LE:
6240       return LT;
6241     case GTU:
6242       return GEU;
6243     case GEU:
6244       return GTU;
6245     case LTU:
6246       return LEU;
6247     case LEU:
6248       return LTU;
6249
6250     default:
6251       return code;
6252     }
6253 }
6254
6255 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6256    purpose of this is to end up with an immediate which can be loaded into a
6257    register in fewer moves, if possible.
6258
6259    For each integer comparison there exists an equivalent choice:
6260      i)   a >  b or a >= b + 1
6261      ii)  a <= b or a <  b + 1
6262      iii) a >= b or a >  b - 1
6263      iv)  a <  b or a <= b - 1
6264
6265    MODE is the mode of the first operand.
6266    CODE points to the comparison code.
6267    IMM points to the rtx containing the immediate.  *IMM must satisfy
6268    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6269    on exit.  */
6270
6271 void
6272 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6273 {
6274   if (!SCALAR_INT_MODE_P (mode))
6275     return;
6276
6277   int to_add = 0;
6278   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6279
6280   /* Extract the immediate value from the rtx.  */
6281   wide_int imm_val = rtx_mode_t (*imm, mode);
6282
6283   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6284     to_add = 1;
6285   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6286     to_add = -1;
6287   else
6288     return;
6289
6290   /* Check for overflow/underflow in the case of signed values and
6291      wrapping around in the case of unsigned values.  If any occur
6292      cancel the optimization.  */
6293   wi::overflow_type overflow = wi::OVF_NONE;
6294   wide_int imm_modif;
6295
6296   if (to_add == 1)
6297     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6298   else
6299     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6300
6301   if (overflow)
6302     return;
6303
6304   /* The following creates a pseudo; if we cannot do that, bail out.  */
6305   if (!can_create_pseudo_p ())
6306     return;
6307
6308   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6309   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6310
6311   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6312   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6313
6314   /* Update the immediate and the code.  */
6315   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6316     {
6317       *code = equivalent_cmp_code (*code);
6318       *imm = new_imm;
6319     }
6320 }
6321
6322
6323 \f
6324 /* Perform possibly multi-word comparison and conditional jump to LABEL
6325    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6326    now a thin wrapper around do_compare_rtx_and_jump.  */
6327
6328 static void
6329 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6330                  rtx_code_label *label)
6331 {
6332   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6333   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6334                            NULL, label, profile_probability::uninitialized ());
6335 }