gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
  23         (match_operand:VALL_F16MOV 1 "general_operand"))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand")
  43         (match_operand:VALL 1 "general_operand"))]
  44   "TARGET_SIMD && !STRICT_ALIGNMENT"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
 105   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VDMOV 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
 133   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQMOV 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %z1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %z0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %z1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %z0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQMOV 0 "register_operand" "")
 238       (match_operand:VQMOV 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQMOV 0 "register_operand" "")
 250         (match_operand:VQMOV 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQMOV 0)
 262         (match_operand:VQMOV 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
 286         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
 287       }
 288     DONE;
 289   }
 290 )
 291
 292 (define_expand "aarch64_get_half<mode>"
 293   [(set (match_operand:<VHALF> 0 "register_operand")
 294         (vec_select:<VHALF>
 295           (match_operand:VQMOV 1 "register_operand")
 296           (match_operand 2 "ascending_int_parallel")))]
 297   "TARGET_SIMD"
 298 )
 299
 300 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
 301   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 302         (vec_select:<VHALF>
 303           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 304           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
 305   "TARGET_SIMD"
 306   "@
 307    #
 308    umov\t%0, %1.d[0]"
 309   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
 310   [(set (match_dup 0) (match_dup 1))]
 311   {
 312     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
 313   }
 314   [(set_attr "type" "mov_reg,neon_to_gp<q>")
 315    (set_attr "length" "4")]
 316 )
 317
 318 (define_insn "aarch64_simd_mov_from_<mode>high"
 319   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 320         (vec_select:<VHALF>
 321           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 322           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
 323   "TARGET_SIMD"
 324   "@
 325    dup\\t%d0, %1.d[1]
 326    umov\t%0, %1.d[1]"
 327   [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
 328    (set_attr "length" "4")]
 329 )
 330
 331 (define_insn "orn<mode>3"
 332  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 333        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 334                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 335  "TARGET_SIMD"
 336  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 337   [(set_attr "type" "neon_logic<q>")]
 338 )
 339
 340 (define_insn "bic<mode>3"
 341  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 342        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 343                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 344  "TARGET_SIMD"
 345  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 346   [(set_attr "type" "neon_logic<q>")]
 347 )
 348
 349 (define_insn "add<mode>3"
 350   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 351         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 352                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 353   "TARGET_SIMD"
 354   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 355   [(set_attr "type" "neon_add<q>")]
 356 )
 357
 358 (define_insn "sub<mode>3"
 359   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 360         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 361                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 362   "TARGET_SIMD"
 363   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 364   [(set_attr "type" "neon_sub<q>")]
 365 )
 366
 367 (define_insn "mul<mode>3"
 368   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 369         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 370                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 371   "TARGET_SIMD"
 372   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 373   [(set_attr "type" "neon_mul_<Vetype><q>")]
 374 )
 375
 376 (define_insn "bswap<mode>2"
 377   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 378         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 379   "TARGET_SIMD"
 380   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 381   [(set_attr "type" "neon_rev<q>")]
 382 )
 383
 384 (define_insn "aarch64_rbit<mode>"
 385   [(set (match_operand:VB 0 "register_operand" "=w")
 386         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 387                    UNSPEC_RBIT))]
 388   "TARGET_SIMD"
 389   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 390   [(set_attr "type" "neon_rbit")]
 391 )
 392
 393 (define_expand "ctz<mode>2"
 394   [(set (match_operand:VS 0 "register_operand")
 395         (ctz:VS (match_operand:VS 1 "register_operand")))]
 396   "TARGET_SIMD"
 397   {
 398      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 399      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 400                                              <MODE>mode, 0);
 401      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 402      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 403      DONE;
 404   }
 405 )
 406
 407 (define_expand "xorsign<mode>3"
 408   [(match_operand:VHSDF 0 "register_operand")
 409    (match_operand:VHSDF 1 "register_operand")
 410    (match_operand:VHSDF 2 "register_operand")]
 411   "TARGET_SIMD"
 412 {
 413
 414   machine_mode imode = <V_INT_EQUIV>mode;
 415   rtx v_bitmask = gen_reg_rtx (imode);
 416   rtx op1x = gen_reg_rtx (imode);
 417   rtx op2x = gen_reg_rtx (imode);
 418
 419   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 420   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 421
 422   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 423
 424   emit_move_insn (v_bitmask,
 425                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 426                                                      HOST_WIDE_INT_M1U << bits));
 427
 428   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 429   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 430   emit_move_insn (operands[0],
 431                   lowpart_subreg (<MODE>mode, op1x, imode));
 432   DONE;
 433 }
 434 )
 435
 436 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 437 ;; fact that their usage need to guarantee that the source vectors are
 438 ;; contiguous.  It would be wrong to describe the operation without being able
 439 ;; to describe the permute that is also required, but even if that is done
 440 ;; the permute would have been created as a LOAD_LANES which means the values
 441 ;; in the registers are in the wrong order.
 442 (define_insn "aarch64_fcadd<rot><mode>"
 443   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 444         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 445                        (match_operand:VHSDF 2 "register_operand" "w")]
 446                        FCADD))]
 447   "TARGET_COMPLEX"
 448   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 449   [(set_attr "type" "neon_fcadd")]
 450 )
 451
 452 (define_expand "cadd<rot><mode>3"
 453   [(set (match_operand:VHSDF 0 "register_operand")
 454         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 455                        (match_operand:VHSDF 2 "register_operand")]
 456                        FCADD))]
 457   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 458 )
 459
 460 (define_insn "aarch64_fcmla<rot><mode>"
 461   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 462         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 463                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 464                                    (match_operand:VHSDF 3 "register_operand" "w")]
 465                                    FCMLA)))]
 466   "TARGET_COMPLEX"
 467   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 468   [(set_attr "type" "neon_fcmla")]
 469 )
 470
 471
 472 (define_insn "aarch64_fcmla_lane<rot><mode>"
 473   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 474         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 475                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 476                                    (match_operand:VHSDF 3 "register_operand" "w")
 477                                    (match_operand:SI 4 "const_int_operand" "n")]
 478                                    FCMLA)))]
 479   "TARGET_COMPLEX"
 480 {
 481   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 482   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 483 }
 484   [(set_attr "type" "neon_fcmla")]
 485 )
 486
 487 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 488   [(set (match_operand:V4HF 0 "register_operand" "=w")
 489         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 490                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 491                                  (match_operand:V8HF 3 "register_operand" "w")
 492                                  (match_operand:SI 4 "const_int_operand" "n")]
 493                                  FCMLA)))]
 494   "TARGET_COMPLEX"
 495 {
 496   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 497   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 498 }
 499   [(set_attr "type" "neon_fcmla")]
 500 )
 501
 502 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 503   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 504         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 505                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 506                                      (match_operand:<VHALF> 3 "register_operand" "w")
 507                                      (match_operand:SI 4 "const_int_operand" "n")]
 508                                      FCMLA)))]
 509   "TARGET_COMPLEX"
 510 {
 511   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 512   operands[4]
 513     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 514   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 515 }
 516   [(set_attr "type" "neon_fcmla")]
 517 )
 518
 519 ;; The complex mla/mls operations always need to expand to two instructions.
 520 ;; The first operation does half the computation and the second does the
 521 ;; remainder.  Because of this, expand early.
 522 (define_expand "cml<fcmac1><conj_op><mode>4"
 523   [(set (match_operand:VHSDF 0 "register_operand")
 524         (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
 525                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
 526                                    (match_operand:VHSDF 3 "register_operand")]
 527                                    FCMLA_OP)))]
 528   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 529 {
 530   rtx tmp = gen_reg_rtx (<MODE>mode);
 531   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
 532                                                  operands[3], operands[2]));
 533   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
 534                                                  operands[3], operands[2]));
 535   DONE;
 536 })
 537
 538 ;; The complex mul operations always need to expand to two instructions.
 539 ;; The first operation does half the computation and the second does the
 540 ;; remainder.  Because of this, expand early.
 541 (define_expand "cmul<conj_op><mode>3"
 542   [(set (match_operand:VHSDF 0 "register_operand")
 543         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 544                        (match_operand:VHSDF 2 "register_operand")]
 545                        FCMUL_OP))]
 546   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 547 {
 548   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
 549   rtx res1 = gen_reg_rtx (<MODE>mode);
 550   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
 551                                                  operands[2], operands[1]));
 552   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
 553                                                  operands[2], operands[1]));
 554   DONE;
 555 })
 556
 557 ;; These instructions map to the __builtins for the Dot Product operations.
 558 (define_insn "aarch64_<sur>dot<vsi2qi>"
 559   [(set (match_operand:VS 0 "register_operand" "=w")
 560         (plus:VS (match_operand:VS 1 "register_operand" "0")
 561                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 562                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 563                 DOTPROD)))]
 564   "TARGET_DOTPROD"
 565   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 566   [(set_attr "type" "neon_dot<q>")]
 567 )
 568
 569 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
 570 ;; (vector) Dot Product operation.
 571 (define_insn "aarch64_usdot<vsi2qi>"
 572   [(set (match_operand:VS 0 "register_operand" "=w")
 573         (plus:VS
 574           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 575                       (match_operand:<VSI2QI> 3 "register_operand" "w")]
 576           UNSPEC_USDOT)
 577           (match_operand:VS 1 "register_operand" "0")))]
 578   "TARGET_I8MM"
 579   "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 580   [(set_attr "type" "neon_dot<q>")]
 581 )
 582
 583 ;; These expands map to the Dot Product optab the vectorizer checks for.
 584 ;; The auto-vectorizer expects a dot product builtin that also does an
 585 ;; accumulation into the provided register.
 586 ;; Given the following pattern
 587 ;;
 588 ;; for (i=0; i<len; i++) {
 589 ;;     c = a[i] * b[i];
 590 ;;     r += c;
 591 ;; }
 592 ;; return result;
 593 ;;
 594 ;; This can be auto-vectorized to
 595 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 596 ;;
 597 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 598 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 599 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 600 ;; ...
 601 ;;
 602 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 603 (define_expand "<sur>dot_prod<vsi2qi>"
 604   [(set (match_operand:VS 0 "register_operand")
 605         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 606                             (match_operand:<VSI2QI> 2 "register_operand")]
 607                  DOTPROD)
 608                 (match_operand:VS 3 "register_operand")))]
 609   "TARGET_DOTPROD"
 610 {
 611   emit_insn (
 612     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 613                                     operands[2]));
 614   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 615   DONE;
 616 })
 617
 618 ;; These instructions map to the __builtins for the Dot Product
 619 ;; indexed operations.
 620 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 621   [(set (match_operand:VS 0 "register_operand" "=w")
 622         (plus:VS (match_operand:VS 1 "register_operand" "0")
 623                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 624                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 625                             (match_operand:SI 4 "immediate_operand" "i")]
 626                 DOTPROD)))]
 627   "TARGET_DOTPROD"
 628   {
 629     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 630     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 631   }
 632   [(set_attr "type" "neon_dot<q>")]
 633 )
 634
 635 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 636   [(set (match_operand:VS 0 "register_operand" "=w")
 637         (plus:VS (match_operand:VS 1 "register_operand" "0")
 638                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 639                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 640                             (match_operand:SI 4 "immediate_operand" "i")]
 641                 DOTPROD)))]
 642   "TARGET_DOTPROD"
 643   {
 644     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 645     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 646   }
 647   [(set_attr "type" "neon_dot<q>")]
 648 )
 649
 650 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 651 ;; (by element) Dot Product operations.
 652 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
 653   [(set (match_operand:VS 0 "register_operand" "=w")
 654         (plus:VS
 655           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
 656                       (match_operand:VB 3 "register_operand" "w")
 657                       (match_operand:SI 4 "immediate_operand" "i")]
 658           DOTPROD_I8MM)
 659           (match_operand:VS 1 "register_operand" "0")))]
 660   "TARGET_I8MM"
 661   {
 662     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
 663     int lane = INTVAL (operands[4]);
 664     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
 665     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
 666   }
 667   [(set_attr "type" "neon_dot<VS:q>")]
 668 )
 669
 670 (define_expand "copysign<mode>3"
 671   [(match_operand:VHSDF 0 "register_operand")
 672    (match_operand:VHSDF 1 "register_operand")
 673    (match_operand:VHSDF 2 "register_operand")]
 674   "TARGET_FLOAT && TARGET_SIMD"
 675 {
 676   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 677   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 678
 679   emit_move_insn (v_bitmask,
 680                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 681                                                      HOST_WIDE_INT_M1U << bits));
 682   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 683                                          operands[2], operands[1]));
 684   DONE;
 685 }
 686 )
 687
 688 (define_insn "*aarch64_mul3_elt<mode>"
 689  [(set (match_operand:VMUL 0 "register_operand" "=w")
 690     (mult:VMUL
 691       (vec_duplicate:VMUL
 692           (vec_select:<VEL>
 693             (match_operand:VMUL 1 "register_operand" "<h_con>")
 694             (parallel [(match_operand:SI 2 "immediate_operand")])))
 695       (match_operand:VMUL 3 "register_operand" "w")))]
 696   "TARGET_SIMD"
 697   {
 698     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 699     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 700   }
 701   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 702 )
 703
 704 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 705   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 706      (mult:VMUL_CHANGE_NLANES
 707        (vec_duplicate:VMUL_CHANGE_NLANES
 708           (vec_select:<VEL>
 709             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 710             (parallel [(match_operand:SI 2 "immediate_operand")])))
 711       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 712   "TARGET_SIMD"
 713   {
 714     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 715     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 716   }
 717   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 718 )
 719
 720 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 721  [(set (match_operand:VMUL 0 "register_operand" "=w")
 722     (mult:VMUL
 723       (vec_duplicate:VMUL
 724             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 725       (match_operand:VMUL 2 "register_operand" "w")))]
 726   "TARGET_SIMD"
 727   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 728   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 729 )
 730
 731 (define_insn "@aarch64_rsqrte<mode>"
 732   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 733         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 734                      UNSPEC_RSQRTE))]
 735   "TARGET_SIMD"
 736   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 737   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 738
 739 (define_insn "@aarch64_rsqrts<mode>"
 740   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 741         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 742                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 743          UNSPEC_RSQRTS))]
 744   "TARGET_SIMD"
 745   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 746   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 747
 748 (define_expand "rsqrt<mode>2"
 749   [(set (match_operand:VALLF 0 "register_operand")
 750         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 751                      UNSPEC_RSQRT))]
 752   "TARGET_SIMD"
 753 {
 754   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 755   DONE;
 756 })
 757
 758 (define_insn "*aarch64_mul3_elt_to_64v2df"
 759   [(set (match_operand:DF 0 "register_operand" "=w")
 760      (mult:DF
 761        (vec_select:DF
 762          (match_operand:V2DF 1 "register_operand" "w")
 763          (parallel [(match_operand:SI 2 "immediate_operand")]))
 764        (match_operand:DF 3 "register_operand" "w")))]
 765   "TARGET_SIMD"
 766   {
 767     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 768     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 769   }
 770   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 771 )
 772
 773 (define_insn "neg<mode>2"
 774   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 775         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 776   "TARGET_SIMD"
 777   "neg\t%0.<Vtype>, %1.<Vtype>"
 778   [(set_attr "type" "neon_neg<q>")]
 779 )
 780
 781 (define_insn "abs<mode>2"
 782   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 783         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 784   "TARGET_SIMD"
 785   "abs\t%0.<Vtype>, %1.<Vtype>"
 786   [(set_attr "type" "neon_abs<q>")]
 787 )
 788
 789 ;; The intrinsic version of integer ABS must not be allowed to
 790 ;; combine with any operation with an integerated ABS step, such
 791 ;; as SABD.
 792 (define_insn "aarch64_abs<mode>"
 793   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 794           (unspec:VSDQ_I_DI
 795             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 796            UNSPEC_ABS))]
 797   "TARGET_SIMD"
 798   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 799   [(set_attr "type" "neon_abs<q>")]
 800 )
 801
 802 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 803 ;; This isn't accurate as ABS treats always its input as a signed value.
 804 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 805 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 806 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 807 (define_insn "aarch64_<su>abd<mode>"
 808   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 809         (minus:VDQ_BHSI
 810           (USMAX:VDQ_BHSI
 811             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 812             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 813           (<max_opp>:VDQ_BHSI
 814             (match_dup 1)
 815             (match_dup 2))))]
 816   "TARGET_SIMD"
 817   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 818   [(set_attr "type" "neon_abd<q>")]
 819 )
 820
 821 (define_insn "aarch64_<sur>abdl2<mode>_3"
 822   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 823         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 824                           (match_operand:VDQV_S 2 "register_operand" "w")]
 825         ABDL2))]
 826   "TARGET_SIMD"
 827   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 828   [(set_attr "type" "neon_abd<q>")]
 829 )
 830
 831 (define_insn "aarch64_<sur>abal<mode>_4"
 832   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 833         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 834                           (match_operand:VDQV_S 2 "register_operand" "w")
 835                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 836         ABAL))]
 837   "TARGET_SIMD"
 838   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 839   [(set_attr "type" "neon_arith_acc<q>")]
 840 )
 841
 842 (define_insn "aarch64_<sur>adalp<mode>"
 843   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 844         (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w")
 845                           (match_operand:<VDBLW> 1 "register_operand" "0")]
 846         ADALP))]
 847   "TARGET_SIMD"
 848   "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
 849   [(set_attr "type" "neon_reduc_add<q>")]
 850 )
 851
 852 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 853 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 854 ;; reduction of the difference into a V4SI vector and accumulate that into
 855 ;; operand 3 before copying that into the result operand 0.
 856 ;; Perform that with a sequence of:
 857 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 858 ;; UABAL        tmp.8h, op1.16b, op2.16b
 859 ;; UADALP       op3.4s, tmp.8h
 860 ;; MOV          op0, op3 // should be eliminated in later passes.
 861 ;;
 862 ;; For TARGET_DOTPROD we do:
 863 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 864 ;; UABD tmp2.16b, op1.16b, op2.16b
 865 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 866 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 867 ;;
 868 ;; The signed version just uses the signed variants of the above instructions
 869 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 870 ;; unsigned.
 871
 872 (define_expand "<sur>sadv16qi"
 873   [(use (match_operand:V4SI 0 "register_operand"))
 874    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 875                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 876    (use (match_operand:V4SI 3 "register_operand"))]
 877   "TARGET_SIMD"
 878   {
 879     if (TARGET_DOTPROD)
 880       {
 881         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 882         rtx abd = gen_reg_rtx (V16QImode);
 883         emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
 884         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
 885                                           abd, ones));
 886         DONE;
 887       }
 888     rtx reduc = gen_reg_rtx (V8HImode);
 889     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 890                                                operands[2]));
 891     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 892                                               operands[2], reduc));
 893     emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
 894     emit_move_insn (operands[0], operands[3]);
 895     DONE;
 896   }
 897 )
 898
 899 (define_insn "aarch64_<su>aba<mode>"
 900   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 901         (plus:VDQ_BHSI (minus:VDQ_BHSI
 902                          (USMAX:VDQ_BHSI
 903                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
 904                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
 905                          (<max_opp>:VDQ_BHSI
 906                            (match_dup 2)
 907                            (match_dup 3)))
 908                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
 909   "TARGET_SIMD"
 910   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
 911   [(set_attr "type" "neon_arith_acc<q>")]
 912 )
 913
 914 (define_insn "fabd<mode>3"
 915   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 916         (abs:VHSDF_HSDF
 917           (minus:VHSDF_HSDF
 918             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 919             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 920   "TARGET_SIMD"
 921   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 922   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 923 )
 924
 925 ;; For AND (vector, register) and BIC (vector, immediate)
 926 (define_insn "and<mode>3"
 927   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 928         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 929                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 930   "TARGET_SIMD"
 931   {
 932     switch (which_alternative)
 933       {
 934       case 0:
 935         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 936       case 1:
 937         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 938                                                   AARCH64_CHECK_BIC);
 939       default:
 940         gcc_unreachable ();
 941       }
 942   }
 943   [(set_attr "type" "neon_logic<q>")]
 944 )
 945
 946 ;; For ORR (vector, register) and ORR (vector, immediate)
 947 (define_insn "ior<mode>3"
 948   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 949         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 950                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 951   "TARGET_SIMD"
 952   {
 953     switch (which_alternative)
 954       {
 955       case 0:
 956         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 957       case 1:
 958         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 959                                                   AARCH64_CHECK_ORR);
 960       default:
 961         gcc_unreachable ();
 962       }
 963   }
 964   [(set_attr "type" "neon_logic<q>")]
 965 )
 966
 967 (define_insn "xor<mode>3"
 968   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 969         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 970                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 971   "TARGET_SIMD"
 972   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 973   [(set_attr "type" "neon_logic<q>")]
 974 )
 975
 976 (define_insn "one_cmpl<mode>2"
 977   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 978         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 979   "TARGET_SIMD"
 980   "not\t%0.<Vbtype>, %1.<Vbtype>"
 981   [(set_attr "type" "neon_logic<q>")]
 982 )
 983
 984 (define_insn "aarch64_simd_vec_set<mode>"
 985   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 986         (vec_merge:VALL_F16
 987             (vec_duplicate:VALL_F16
 988                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 989             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 990             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 991   "TARGET_SIMD"
 992   {
 993    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 994    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 995    switch (which_alternative)
 996      {
 997      case 0:
 998         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 999      case 1:
1000         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1001      case 2:
1002         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1003      default:
1004         gcc_unreachable ();
1005      }
1006   }
1007   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1008 )
1009
1010 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1011   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1012         (vec_merge:VALL_F16
1013             (vec_duplicate:VALL_F16
1014               (vec_select:<VEL>
1015                 (match_operand:VALL_F16 3 "register_operand" "w")
1016                 (parallel
1017                   [(match_operand:SI 4 "immediate_operand" "i")])))
1018             (match_operand:VALL_F16 1 "register_operand" "0")
1019             (match_operand:SI 2 "immediate_operand" "i")))]
1020   "TARGET_SIMD"
1021   {
1022     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1023     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1024     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1025
1026     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1027   }
1028   [(set_attr "type" "neon_ins<q>")]
1029 )
1030
1031 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1032   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1033         (vec_merge:VALL_F16_NO_V2Q
1034             (vec_duplicate:VALL_F16_NO_V2Q
1035               (vec_select:<VEL>
1036                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1037                 (parallel
1038                   [(match_operand:SI 4 "immediate_operand" "i")])))
1039             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1040             (match_operand:SI 2 "immediate_operand" "i")))]
1041   "TARGET_SIMD"
1042   {
1043     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1044     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1045     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1046                                            INTVAL (operands[4]));
1047
1048     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1049   }
1050   [(set_attr "type" "neon_ins<q>")]
1051 )
1052
1053 (define_expand "signbit<mode>2"
1054   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1055    (use (match_operand:VDQSF 1 "register_operand"))]
1056   "TARGET_SIMD"
1057 {
1058   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1059   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1060                                                         shift_amount);
1061   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1062
1063   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1064                                                  shift_vector));
1065   DONE;
1066 })
1067
1068 (define_insn "aarch64_simd_lshr<mode>"
1069  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1070        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1071                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1072  "TARGET_SIMD"
1073  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1074   [(set_attr "type" "neon_shift_imm<q>")]
1075 )
1076
1077 (define_insn "aarch64_simd_ashr<mode>"
1078  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1079        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1080                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1081  "TARGET_SIMD"
1082  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1083   [(set_attr "type" "neon_shift_imm<q>")]
1084 )
1085
1086 (define_insn "*aarch64_simd_sra<mode>"
1087  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1088         (plus:VDQ_I
1089            (SHIFTRT:VDQ_I
1090                 (match_operand:VDQ_I 1 "register_operand" "w")
1091                 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1092            (match_operand:VDQ_I 3 "register_operand" "0")))]
1093   "TARGET_SIMD"
1094   "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1095   [(set_attr "type" "neon_shift_acc<q>")]
1096 )
1097
1098 (define_insn "aarch64_simd_imm_shl<mode>"
1099  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1100        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1101                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1102  "TARGET_SIMD"
1103   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1104   [(set_attr "type" "neon_shift_imm<q>")]
1105 )
1106
1107 (define_insn "aarch64_simd_reg_sshl<mode>"
1108  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1109        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1110                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1111  "TARGET_SIMD"
1112  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1113   [(set_attr "type" "neon_shift_reg<q>")]
1114 )
1115
1116 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1117  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1118        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1119                     (match_operand:VDQ_I 2 "register_operand" "w")]
1120                    UNSPEC_ASHIFT_UNSIGNED))]
1121  "TARGET_SIMD"
1122  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1123   [(set_attr "type" "neon_shift_reg<q>")]
1124 )
1125
1126 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1127  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1128        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1129                     (match_operand:VDQ_I 2 "register_operand" "w")]
1130                    UNSPEC_ASHIFT_SIGNED))]
1131  "TARGET_SIMD"
1132  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1133   [(set_attr "type" "neon_shift_reg<q>")]
1134 )
1135
1136 (define_expand "ashl<mode>3"
1137   [(match_operand:VDQ_I 0 "register_operand")
1138    (match_operand:VDQ_I 1 "register_operand")
1139    (match_operand:SI  2 "general_operand")]
1140  "TARGET_SIMD"
1141 {
1142   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1143   int shift_amount;
1144
1145   if (CONST_INT_P (operands[2]))
1146     {
1147       shift_amount = INTVAL (operands[2]);
1148       if (shift_amount >= 0 && shift_amount < bit_width)
1149         {
1150           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1151                                                        shift_amount);
1152           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1153                                                      operands[1],
1154                                                      tmp));
1155           DONE;
1156         }
1157     }
1158
1159   operands[2] = force_reg (SImode, operands[2]);
1160
1161   rtx tmp = gen_reg_rtx (<MODE>mode);
1162   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1163                                                                operands[2],
1164                                                                0)));
1165   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1166   DONE;
1167 })
1168
1169 (define_expand "lshr<mode>3"
1170   [(match_operand:VDQ_I 0 "register_operand")
1171    (match_operand:VDQ_I 1 "register_operand")
1172    (match_operand:SI  2 "general_operand")]
1173  "TARGET_SIMD"
1174 {
1175   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1176   int shift_amount;
1177
1178   if (CONST_INT_P (operands[2]))
1179     {
1180       shift_amount = INTVAL (operands[2]);
1181       if (shift_amount > 0 && shift_amount <= bit_width)
1182         {
1183           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1184                                                        shift_amount);
1185           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1186                                                   operands[1],
1187                                                   tmp));
1188           DONE;
1189         }
1190     }
1191
1192   operands[2] = force_reg (SImode, operands[2]);
1193
1194   rtx tmp = gen_reg_rtx (SImode);
1195   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1196   emit_insn (gen_negsi2 (tmp, operands[2]));
1197   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1198                                          convert_to_mode (<VEL>mode, tmp, 0)));
1199   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1200                                                       tmp1));
1201   DONE;
1202 })
1203
1204 (define_expand "ashr<mode>3"
1205   [(match_operand:VDQ_I 0 "register_operand")
1206    (match_operand:VDQ_I 1 "register_operand")
1207    (match_operand:SI  2 "general_operand")]
1208  "TARGET_SIMD"
1209 {
1210   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1211   int shift_amount;
1212
1213   if (CONST_INT_P (operands[2]))
1214     {
1215       shift_amount = INTVAL (operands[2]);
1216       if (shift_amount > 0 && shift_amount <= bit_width)
1217         {
1218           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1219                                                        shift_amount);
1220           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1221                                                   operands[1],
1222                                                   tmp));
1223           DONE;
1224         }
1225     }
1226
1227   operands[2] = force_reg (SImode, operands[2]);
1228
1229   rtx tmp = gen_reg_rtx (SImode);
1230   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1231   emit_insn (gen_negsi2 (tmp, operands[2]));
1232   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1233                                                                 tmp, 0)));
1234   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1235                                                     tmp1));
1236   DONE;
1237 })
1238
1239 (define_expand "vashl<mode>3"
1240  [(match_operand:VDQ_I 0 "register_operand")
1241   (match_operand:VDQ_I 1 "register_operand")
1242   (match_operand:VDQ_I 2 "register_operand")]
1243  "TARGET_SIMD"
1244 {
1245   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1246                                               operands[2]));
1247   DONE;
1248 })
1249
1250 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1251 ;; Negating individual lanes most certainly offsets the
1252 ;; gain from vectorization.
1253 (define_expand "vashr<mode>3"
1254  [(match_operand:VDQ_BHSI 0 "register_operand")
1255   (match_operand:VDQ_BHSI 1 "register_operand")
1256   (match_operand:VDQ_BHSI 2 "register_operand")]
1257  "TARGET_SIMD"
1258 {
1259   rtx neg = gen_reg_rtx (<MODE>mode);
1260   emit (gen_neg<mode>2 (neg, operands[2]));
1261   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1262                                                     neg));
1263   DONE;
1264 })
1265
1266 ;; DI vector shift
1267 (define_expand "aarch64_ashr_simddi"
1268   [(match_operand:DI 0 "register_operand")
1269    (match_operand:DI 1 "register_operand")
1270    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1271   "TARGET_SIMD"
1272   {
1273     /* An arithmetic shift right by 64 fills the result with copies of the sign
1274        bit, just like asr by 63 - however the standard pattern does not handle
1275        a shift by 64.  */
1276     if (INTVAL (operands[2]) == 64)
1277       operands[2] = GEN_INT (63);
1278     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1279     DONE;
1280   }
1281 )
1282
1283 (define_expand "vlshr<mode>3"
1284  [(match_operand:VDQ_BHSI 0 "register_operand")
1285   (match_operand:VDQ_BHSI 1 "register_operand")
1286   (match_operand:VDQ_BHSI 2 "register_operand")]
1287  "TARGET_SIMD"
1288 {
1289   rtx neg = gen_reg_rtx (<MODE>mode);
1290   emit (gen_neg<mode>2 (neg, operands[2]));
1291   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1292                                                       neg));
1293   DONE;
1294 })
1295
1296 (define_expand "aarch64_lshr_simddi"
1297   [(match_operand:DI 0 "register_operand")
1298    (match_operand:DI 1 "register_operand")
1299    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1300   "TARGET_SIMD"
1301   {
1302     if (INTVAL (operands[2]) == 64)
1303       emit_move_insn (operands[0], const0_rtx);
1304     else
1305       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1306     DONE;
1307   }
1308 )
1309
1310 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1311 (define_insn "vec_shr_<mode>"
1312   [(set (match_operand:VD 0 "register_operand" "=w")
1313         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1314                     (match_operand:SI 2 "immediate_operand" "i")]
1315                    UNSPEC_VEC_SHR))]
1316   "TARGET_SIMD"
1317   {
1318     if (BYTES_BIG_ENDIAN)
1319       return "shl %d0, %d1, %2";
1320     else
1321       return "ushr %d0, %d1, %2";
1322   }
1323   [(set_attr "type" "neon_shift_imm")]
1324 )
1325
1326 (define_expand "vec_set<mode>"
1327   [(match_operand:VALL_F16 0 "register_operand")
1328    (match_operand:<VEL> 1 "register_operand")
1329    (match_operand:SI 2 "immediate_operand")]
1330   "TARGET_SIMD"
1331   {
1332     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1333     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1334                                           GEN_INT (elem), operands[0]));
1335     DONE;
1336   }
1337 )
1338
1339
1340 (define_insn "aarch64_mla<mode>"
1341  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342        (plus:VDQ_BHSI (mult:VDQ_BHSI
1343                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1345                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1346  "TARGET_SIMD"
1347  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1348   [(set_attr "type" "neon_mla_<Vetype><q>")]
1349 )
1350
1351 (define_insn "*aarch64_mla_elt<mode>"
1352  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1353        (plus:VDQHS
1354          (mult:VDQHS
1355            (vec_duplicate:VDQHS
1356               (vec_select:<VEL>
1357                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1359            (match_operand:VDQHS 3 "register_operand" "w"))
1360          (match_operand:VDQHS 4 "register_operand" "0")))]
1361  "TARGET_SIMD"
1362   {
1363     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1364     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1365   }
1366   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1367 )
1368
1369 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1370  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1371        (plus:VDQHS
1372          (mult:VDQHS
1373            (vec_duplicate:VDQHS
1374               (vec_select:<VEL>
1375                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1377            (match_operand:VDQHS 3 "register_operand" "w"))
1378          (match_operand:VDQHS 4 "register_operand" "0")))]
1379  "TARGET_SIMD"
1380   {
1381     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1382     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1383   }
1384   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1385 )
1386
1387 (define_insn "*aarch64_mla_elt_merge<mode>"
1388   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1389         (plus:VDQHS
1390           (mult:VDQHS (vec_duplicate:VDQHS
1391                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1392                 (match_operand:VDQHS 2 "register_operand" "w"))
1393           (match_operand:VDQHS 3 "register_operand" "0")))]
1394  "TARGET_SIMD"
1395  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1396   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1397 )
1398
1399 (define_insn "aarch64_mls<mode>"
1400  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1402                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1403                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1404  "TARGET_SIMD"
1405  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1406   [(set_attr "type" "neon_mla_<Vetype><q>")]
1407 )
1408
1409 (define_insn "*aarch64_mls_elt<mode>"
1410  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1411        (minus:VDQHS
1412          (match_operand:VDQHS 4 "register_operand" "0")
1413          (mult:VDQHS
1414            (vec_duplicate:VDQHS
1415               (vec_select:<VEL>
1416                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1417                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1418            (match_operand:VDQHS 3 "register_operand" "w"))))]
1419  "TARGET_SIMD"
1420   {
1421     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1422     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1423   }
1424   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1425 )
1426
1427 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1428  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1429        (minus:VDQHS
1430          (match_operand:VDQHS 4 "register_operand" "0")
1431          (mult:VDQHS
1432            (vec_duplicate:VDQHS
1433               (vec_select:<VEL>
1434                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1435                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1436            (match_operand:VDQHS 3 "register_operand" "w"))))]
1437  "TARGET_SIMD"
1438   {
1439     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1440     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1441   }
1442   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1443 )
1444
1445 (define_insn "*aarch64_mls_elt_merge<mode>"
1446   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1447         (minus:VDQHS
1448           (match_operand:VDQHS 1 "register_operand" "0")
1449           (mult:VDQHS (vec_duplicate:VDQHS
1450                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1451                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1452   "TARGET_SIMD"
1453   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1454   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1455 )
1456
1457 ;; Max/Min operations.
1458 (define_insn "<su><maxmin><mode>3"
1459  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1460        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1461                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1462  "TARGET_SIMD"
1463  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1464   [(set_attr "type" "neon_minmax<q>")]
1465 )
1466
1467 (define_expand "<su><maxmin>v2di3"
1468  [(set (match_operand:V2DI 0 "register_operand")
1469        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1470                     (match_operand:V2DI 2 "register_operand")))]
1471  "TARGET_SIMD"
1472 {
1473   enum rtx_code cmp_operator;
1474   rtx cmp_fmt;
1475
1476   switch (<CODE>)
1477     {
1478     case UMIN:
1479       cmp_operator = LTU;
1480       break;
1481     case SMIN:
1482       cmp_operator = LT;
1483       break;
1484     case UMAX:
1485       cmp_operator = GTU;
1486       break;
1487     case SMAX:
1488       cmp_operator = GT;
1489       break;
1490     default:
1491       gcc_unreachable ();
1492     }
1493
1494   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1495   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1496               operands[2], cmp_fmt, operands[1], operands[2]));
1497   DONE;
1498 })
1499
1500 ;; Pairwise Integer Max/Min operations.
1501 (define_insn "aarch64_<maxmin_uns>p<mode>"
1502  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1503        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1504                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1505                         MAXMINV))]
1506  "TARGET_SIMD"
1507  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1508   [(set_attr "type" "neon_minmax<q>")]
1509 )
1510
1511 ;; Pairwise FP Max/Min operations.
1512 (define_insn "aarch64_<maxmin_uns>p<mode>"
1513  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1514        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1515                       (match_operand:VHSDF 2 "register_operand" "w")]
1516                       FMAXMINV))]
1517  "TARGET_SIMD"
1518  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1519   [(set_attr "type" "neon_minmax<q>")]
1520 )
1521
1522 ;; vec_concat gives a new vector with the low elements from operand 1, and
1523 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1524 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1525 ;; What that means, is that the RTL descriptions of the below patterns
1526 ;; need to change depending on endianness.
1527
1528 ;; Move to the low architectural bits of the register.
1529 ;; On little-endian this is { operand, zeroes }
1530 ;; On big-endian this is { zeroes, operand }
1531
1532 (define_insn "move_lo_quad_internal_<mode>"
1533   [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1534         (vec_concat:VQMOV_NO2E
1535           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1536           (vec_duplicate:<VHALF> (const_int 0))))]
1537   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1538   "@
1539    dup\\t%d0, %1.d[0]
1540    fmov\\t%d0, %1
1541    dup\\t%d0, %1"
1542   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1543    (set_attr "length" "4")
1544    (set_attr "arch" "simd,fp,simd")]
1545 )
1546
1547 (define_insn "move_lo_quad_internal_<mode>"
1548   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1549         (vec_concat:VQ_2E
1550           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1551           (const_int 0)))]
1552   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1553   "@
1554    dup\\t%d0, %1.d[0]
1555    fmov\\t%d0, %1
1556    dup\\t%d0, %1"
1557   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1558    (set_attr "length" "4")
1559    (set_attr "arch" "simd,fp,simd")]
1560 )
1561
1562 (define_insn "move_lo_quad_internal_be_<mode>"
1563   [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1564         (vec_concat:VQMOV_NO2E
1565           (vec_duplicate:<VHALF> (const_int 0))
1566           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1567   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1568   "@
1569    dup\\t%d0, %1.d[0]
1570    fmov\\t%d0, %1
1571    dup\\t%d0, %1"
1572   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1573    (set_attr "length" "4")
1574    (set_attr "arch" "simd,fp,simd")]
1575 )
1576
1577 (define_insn "move_lo_quad_internal_be_<mode>"
1578   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1579         (vec_concat:VQ_2E
1580           (const_int 0)
1581           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1582   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1583   "@
1584    dup\\t%d0, %1.d[0]
1585    fmov\\t%d0, %1
1586    dup\\t%d0, %1"
1587   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1588    (set_attr "length" "4")
1589    (set_attr "arch" "simd,fp,simd")]
1590 )
1591
1592 (define_expand "move_lo_quad_<mode>"
1593   [(match_operand:VQMOV 0 "register_operand")
1594    (match_operand:VQMOV 1 "register_operand")]
1595   "TARGET_SIMD"
1596 {
1597   if (BYTES_BIG_ENDIAN)
1598     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1599   else
1600     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1601   DONE;
1602 }
1603 )
1604
1605 ;; Move operand1 to the high architectural bits of the register, keeping
1606 ;; the low architectural bits of operand2.
1607 ;; For little-endian this is { operand2, operand1 }
1608 ;; For big-endian this is { operand1, operand2 }
1609
1610 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1611   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1612         (vec_concat:VQMOV
1613           (vec_select:<VHALF>
1614                 (match_dup 0)
1615                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1616           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1617   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1618   "@
1619    ins\\t%0.d[1], %1.d[0]
1620    ins\\t%0.d[1], %1"
1621   [(set_attr "type" "neon_ins")]
1622 )
1623
1624 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1625   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1626         (vec_concat:VQMOV
1627           (match_operand:<VHALF> 1 "register_operand" "w,r")
1628           (vec_select:<VHALF>
1629                 (match_dup 0)
1630                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1631   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1632   "@
1633    ins\\t%0.d[1], %1.d[0]
1634    ins\\t%0.d[1], %1"
1635   [(set_attr "type" "neon_ins")]
1636 )
1637
1638 (define_expand "move_hi_quad_<mode>"
1639  [(match_operand:VQMOV 0 "register_operand")
1640   (match_operand:<VHALF> 1 "register_operand")]
1641  "TARGET_SIMD"
1642 {
1643   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1644   if (BYTES_BIG_ENDIAN)
1645     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1646                     operands[1], p));
1647   else
1648     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1649                     operands[1], p));
1650   DONE;
1651 })
1652
1653 ;; Narrowing operations.
1654
1655 ;; For doubles.
1656 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1657  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1658        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1659  "TARGET_SIMD"
1660  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1661   [(set_attr "type" "neon_shift_imm_narrow_q")]
1662 )
1663
1664 (define_expand "vec_pack_trunc_<mode>"
1665  [(match_operand:<VNARROWD> 0 "register_operand")
1666   (match_operand:VDN 1 "register_operand")
1667   (match_operand:VDN 2 "register_operand")]
1668  "TARGET_SIMD"
1669 {
1670   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1671   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1672   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1673
1674   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1675   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1676   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1677   DONE;
1678 })
1679
1680 ;; For quads.
1681
1682 (define_insn "vec_pack_trunc_<mode>"
1683  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1684        (vec_concat:<VNARROWQ2>
1685          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1686          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1687  "TARGET_SIMD"
1688  {
1689    if (BYTES_BIG_ENDIAN)
1690      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1691    else
1692      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1693  }
1694   [(set_attr "type" "multiple")
1695    (set_attr "length" "8")]
1696 )
1697
1698 ;; Widening operations.
1699
1700 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1701   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1702         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1703                                (match_operand:VQW 1 "register_operand" "w")
1704                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1705                             )))]
1706   "TARGET_SIMD"
1707   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1708   [(set_attr "type" "neon_shift_imm_long")]
1709 )
1710
1711 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1712   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1713         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1714                                (match_operand:VQW 1 "register_operand" "w")
1715                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1716                             )))]
1717   "TARGET_SIMD"
1718   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1719   [(set_attr "type" "neon_shift_imm_long")]
1720 )
1721
1722 (define_expand "vec_unpack<su>_hi_<mode>"
1723   [(match_operand:<VWIDE> 0 "register_operand")
1724    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1725   "TARGET_SIMD"
1726   {
1727     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1728     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1729                                                           operands[1], p));
1730     DONE;
1731   }
1732 )
1733
1734 (define_expand "vec_unpack<su>_lo_<mode>"
1735   [(match_operand:<VWIDE> 0 "register_operand")
1736    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1737   "TARGET_SIMD"
1738   {
1739     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1740     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1741                                                           operands[1], p));
1742     DONE;
1743   }
1744 )
1745
1746 ;; Widening arithmetic.
1747
1748 (define_insn "*aarch64_<su>mlal_lo<mode>"
1749   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1750         (plus:<VWIDE>
1751           (mult:<VWIDE>
1752               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1753                  (match_operand:VQW 2 "register_operand" "w")
1754                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1755               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1756                  (match_operand:VQW 4 "register_operand" "w")
1757                  (match_dup 3))))
1758           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1759   "TARGET_SIMD"
1760   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1761   [(set_attr "type" "neon_mla_<Vetype>_long")]
1762 )
1763
1764 (define_insn "*aarch64_<su>mlal_hi<mode>"
1765   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1766         (plus:<VWIDE>
1767           (mult:<VWIDE>
1768               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769                  (match_operand:VQW 2 "register_operand" "w")
1770                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1771               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772                  (match_operand:VQW 4 "register_operand" "w")
1773                  (match_dup 3))))
1774           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1775   "TARGET_SIMD"
1776   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1777   [(set_attr "type" "neon_mla_<Vetype>_long")]
1778 )
1779
1780 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1781   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1782         (minus:<VWIDE>
1783           (match_operand:<VWIDE> 1 "register_operand" "0")
1784           (mult:<VWIDE>
1785               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1786                  (match_operand:VQW 2 "register_operand" "w")
1787                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1788               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1789                  (match_operand:VQW 4 "register_operand" "w")
1790                  (match_dup 3))))))]
1791   "TARGET_SIMD"
1792   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1793   [(set_attr "type" "neon_mla_<Vetype>_long")]
1794 )
1795
1796 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
1797   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798         (minus:<VWIDE>
1799           (match_operand:<VWIDE> 1 "register_operand" "0")
1800           (mult:<VWIDE>
1801               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802                  (match_operand:VQW 2 "register_operand" "w")
1803                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1804               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1805                  (match_operand:VQW 4 "register_operand" "w")
1806                  (match_dup 3))))))]
1807   "TARGET_SIMD"
1808   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1809   [(set_attr "type" "neon_mla_<Vetype>_long")]
1810 )
1811
1812 (define_expand "aarch64_<su>mlsl_hi<mode>"
1813   [(match_operand:<VWIDE> 0 "register_operand")
1814    (match_operand:<VWIDE> 1 "register_operand")
1815    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
1816    (match_operand:VQW 3 "register_operand")]
1817   "TARGET_SIMD"
1818 {
1819   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1820   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
1821                                                  operands[2], p, operands[3]));
1822   DONE;
1823 }
1824 )
1825
1826 (define_insn "*aarch64_<su>mlal<mode>"
1827   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1828         (plus:<VWIDE>
1829           (mult:<VWIDE>
1830             (ANY_EXTEND:<VWIDE>
1831               (match_operand:VD_BHSI 1 "register_operand" "w"))
1832             (ANY_EXTEND:<VWIDE>
1833               (match_operand:VD_BHSI 2 "register_operand" "w")))
1834           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1835   "TARGET_SIMD"
1836   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1837   [(set_attr "type" "neon_mla_<Vetype>_long")]
1838 )
1839
1840 (define_insn "aarch64_<su>mlsl<mode>"
1841   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1842         (minus:<VWIDE>
1843           (match_operand:<VWIDE> 1 "register_operand" "0")
1844           (mult:<VWIDE>
1845             (ANY_EXTEND:<VWIDE>
1846               (match_operand:VD_BHSI 2 "register_operand" "w"))
1847             (ANY_EXTEND:<VWIDE>
1848               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1849   "TARGET_SIMD"
1850   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1851   [(set_attr "type" "neon_mla_<Vetype>_long")]
1852 )
1853
1854 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1855  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1856        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1857                            (match_operand:VQW 1 "register_operand" "w")
1858                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1859                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1860                            (match_operand:VQW 2 "register_operand" "w")
1861                            (match_dup 3)))))]
1862   "TARGET_SIMD"
1863   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1864   [(set_attr "type" "neon_mul_<Vetype>_long")]
1865 )
1866
1867 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1868   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1869         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1870                          (match_operand:VD_BHSI 1 "register_operand" "w"))
1871                       (ANY_EXTEND:<VWIDE>
1872                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1873   "TARGET_SIMD"
1874   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1875   [(set_attr "type" "neon_mul_<Vetype>_long")]
1876 )
1877
1878 (define_expand "vec_widen_<su>mult_lo_<mode>"
1879   [(match_operand:<VWIDE> 0 "register_operand")
1880    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1881    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1882  "TARGET_SIMD"
1883  {
1884    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1885    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1886                                                        operands[1],
1887                                                        operands[2], p));
1888    DONE;
1889  }
1890 )
1891
1892 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1893  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1894       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1895                             (match_operand:VQW 1 "register_operand" "w")
1896                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1897                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1898                             (match_operand:VQW 2 "register_operand" "w")
1899                             (match_dup 3)))))]
1900   "TARGET_SIMD"
1901   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1902   [(set_attr "type" "neon_mul_<Vetype>_long")]
1903 )
1904
1905 (define_expand "vec_widen_<su>mult_hi_<mode>"
1906   [(match_operand:<VWIDE> 0 "register_operand")
1907    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1908    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1909  "TARGET_SIMD"
1910  {
1911    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1912    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1913                                                        operands[1],
1914                                                        operands[2], p));
1915    DONE;
1916
1917  }
1918 )
1919
1920 ;; vmull_lane_s16 intrinsics
1921 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1922   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1923         (mult:<VWIDE>
1924           (ANY_EXTEND:<VWIDE>
1925             (match_operand:<VCOND> 1 "register_operand" "w"))
1926           (ANY_EXTEND:<VWIDE>
1927             (vec_duplicate:<VCOND>
1928               (vec_select:<VEL>
1929                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
1930                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1931   "TARGET_SIMD"
1932   {
1933     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1934     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1935   }
1936   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1937 )
1938
1939 ;; vmlal_lane_s16 intrinsics
1940 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1941   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1942         (plus:<VWIDE>
1943           (mult:<VWIDE>
1944             (ANY_EXTEND:<VWIDE>
1945               (match_operand:<VCOND> 2 "register_operand" "w"))
1946             (ANY_EXTEND:<VWIDE>
1947               (vec_duplicate:<VCOND>
1948                 (vec_select:<VEL>
1949                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
1950                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1951           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1952   "TARGET_SIMD"
1953   {
1954     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1955     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1956   }
1957   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1958 )
1959
1960 ;; FP vector operations.
1961 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1962 ;; double-precision (64-bit) floating-point data types and arithmetic as
1963 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1964 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1965 ;;
1966 ;; Floating-point operations can raise an exception.  Vectorizing such
1967 ;; operations are safe because of reasons explained below.
1968 ;;
1969 ;; ARMv8 permits an extension to enable trapped floating-point
1970 ;; exception handling, however this is an optional feature.  In the
1971 ;; event of a floating-point exception being raised by vectorised
1972 ;; code then:
1973 ;; 1.  If trapped floating-point exceptions are available, then a trap
1974 ;;     will be taken when any lane raises an enabled exception.  A trap
1975 ;;     handler may determine which lane raised the exception.
1976 ;; 2.  Alternatively a sticky exception flag is set in the
1977 ;;     floating-point status register (FPSR).  Software may explicitly
1978 ;;     test the exception flags, in which case the tests will either
1979 ;;     prevent vectorisation, allowing precise identification of the
1980 ;;     failing operation, or if tested outside of vectorisable regions
1981 ;;     then the specific operation and lane are not of interest.
1982
1983 ;; FP arithmetic operations.
1984
1985 (define_insn "add<mode>3"
1986  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1988                    (match_operand:VHSDF 2 "register_operand" "w")))]
1989  "TARGET_SIMD"
1990  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1991   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1992 )
1993
1994 (define_insn "sub<mode>3"
1995  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1996        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1997                     (match_operand:VHSDF 2 "register_operand" "w")))]
1998  "TARGET_SIMD"
1999  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2000   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2001 )
2002
2003 (define_insn "mul<mode>3"
2004  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2005        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2006                    (match_operand:VHSDF 2 "register_operand" "w")))]
2007  "TARGET_SIMD"
2008  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2009   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2010 )
2011
2012 (define_expand "div<mode>3"
2013  [(set (match_operand:VHSDF 0 "register_operand")
2014        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2015                   (match_operand:VHSDF 2 "register_operand")))]
2016  "TARGET_SIMD"
2017 {
2018   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2019     DONE;
2020
2021   operands[1] = force_reg (<MODE>mode, operands[1]);
2022 })
2023
2024 (define_insn "*div<mode>3"
2025  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2027                  (match_operand:VHSDF 2 "register_operand" "w")))]
2028  "TARGET_SIMD"
2029  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2030   [(set_attr "type" "neon_fp_div_<stype><q>")]
2031 )
2032
2033 (define_insn "neg<mode>2"
2034  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2035        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2036  "TARGET_SIMD"
2037  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2038   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2039 )
2040
2041 (define_insn "abs<mode>2"
2042  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2044  "TARGET_SIMD"
2045  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2046   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2047 )
2048
2049 (define_insn "fma<mode>4"
2050   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2051        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2052                   (match_operand:VHSDF 2 "register_operand" "w")
2053                   (match_operand:VHSDF 3 "register_operand" "0")))]
2054   "TARGET_SIMD"
2055  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2056   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2057 )
2058
2059 (define_insn "*aarch64_fma4_elt<mode>"
2060   [(set (match_operand:VDQF 0 "register_operand" "=w")
2061     (fma:VDQF
2062       (vec_duplicate:VDQF
2063         (vec_select:<VEL>
2064           (match_operand:VDQF 1 "register_operand" "<h_con>")
2065           (parallel [(match_operand:SI 2 "immediate_operand")])))
2066       (match_operand:VDQF 3 "register_operand" "w")
2067       (match_operand:VDQF 4 "register_operand" "0")))]
2068   "TARGET_SIMD"
2069   {
2070     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2071     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2072   }
2073   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2074 )
2075
2076 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2077   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2078     (fma:VDQSF
2079       (vec_duplicate:VDQSF
2080         (vec_select:<VEL>
2081           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2082           (parallel [(match_operand:SI 2 "immediate_operand")])))
2083       (match_operand:VDQSF 3 "register_operand" "w")
2084       (match_operand:VDQSF 4 "register_operand" "0")))]
2085   "TARGET_SIMD"
2086   {
2087     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2088     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2089   }
2090   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2091 )
2092
2093 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
2094   [(set (match_operand:VMUL 0 "register_operand" "=w")
2095     (fma:VMUL
2096       (vec_duplicate:VMUL
2097           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2098       (match_operand:VMUL 2 "register_operand" "w")
2099       (match_operand:VMUL 3 "register_operand" "0")))]
2100   "TARGET_SIMD"
2101   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2102   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2103 )
2104
2105 (define_insn "*aarch64_fma4_elt_to_64v2df"
2106   [(set (match_operand:DF 0 "register_operand" "=w")
2107     (fma:DF
2108         (vec_select:DF
2109           (match_operand:V2DF 1 "register_operand" "w")
2110           (parallel [(match_operand:SI 2 "immediate_operand")]))
2111       (match_operand:DF 3 "register_operand" "w")
2112       (match_operand:DF 4 "register_operand" "0")))]
2113   "TARGET_SIMD"
2114   {
2115     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2116     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2117   }
2118   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2119 )
2120
2121 (define_insn "fnma<mode>4"
2122   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2123         (fma:VHSDF
2124           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2125           (match_operand:VHSDF 2 "register_operand" "w")
2126           (match_operand:VHSDF 3 "register_operand" "0")))]
2127   "TARGET_SIMD"
2128   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2129   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2130 )
2131
2132 (define_insn "*aarch64_fnma4_elt<mode>"
2133   [(set (match_operand:VDQF 0 "register_operand" "=w")
2134     (fma:VDQF
2135       (neg:VDQF
2136         (match_operand:VDQF 3 "register_operand" "w"))
2137       (vec_duplicate:VDQF
2138         (vec_select:<VEL>
2139           (match_operand:VDQF 1 "register_operand" "<h_con>")
2140           (parallel [(match_operand:SI 2 "immediate_operand")])))
2141       (match_operand:VDQF 4 "register_operand" "0")))]
2142   "TARGET_SIMD"
2143   {
2144     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2145     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2146   }
2147   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2148 )
2149
2150 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2151   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2152     (fma:VDQSF
2153       (neg:VDQSF
2154         (match_operand:VDQSF 3 "register_operand" "w"))
2155       (vec_duplicate:VDQSF
2156         (vec_select:<VEL>
2157           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2158           (parallel [(match_operand:SI 2 "immediate_operand")])))
2159       (match_operand:VDQSF 4 "register_operand" "0")))]
2160   "TARGET_SIMD"
2161   {
2162     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2163     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2164   }
2165   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2166 )
2167
2168 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2169   [(set (match_operand:VMUL 0 "register_operand" "=w")
2170     (fma:VMUL
2171       (neg:VMUL
2172         (match_operand:VMUL 2 "register_operand" "w"))
2173       (vec_duplicate:VMUL
2174         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2175       (match_operand:VMUL 3 "register_operand" "0")))]
2176   "TARGET_SIMD"
2177   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2178   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2179 )
2180
2181 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2182   [(set (match_operand:DF 0 "register_operand" "=w")
2183     (fma:DF
2184       (vec_select:DF
2185         (match_operand:V2DF 1 "register_operand" "w")
2186         (parallel [(match_operand:SI 2 "immediate_operand")]))
2187       (neg:DF
2188         (match_operand:DF 3 "register_operand" "w"))
2189       (match_operand:DF 4 "register_operand" "0")))]
2190   "TARGET_SIMD"
2191   {
2192     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2193     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2194   }
2195   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2196 )
2197
2198 ;; Vector versions of the floating-point frint patterns.
2199 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2200 (define_insn "<frint_pattern><mode>2"
2201   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2202         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2203                        FRINT))]
2204   "TARGET_SIMD"
2205   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2206   [(set_attr "type" "neon_fp_round_<stype><q>")]
2207 )
2208
2209 ;; Vector versions of the fcvt standard patterns.
2210 ;; Expands to lbtrunc, lround, lceil, lfloor
2211 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2212   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2213         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2214                                [(match_operand:VHSDF 1 "register_operand" "w")]
2215                                FCVT)))]
2216   "TARGET_SIMD"
2217   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2218   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2219 )
2220
2221 ;; HF Scalar variants of related SIMD instructions.
2222 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2223   [(set (match_operand:HI 0 "register_operand" "=w")
2224         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2225                       FCVT)))]
2226   "TARGET_SIMD_F16INST"
2227   "fcvt<frint_suffix><su>\t%h0, %h1"
2228   [(set_attr "type" "neon_fp_to_int_s")]
2229 )
2230
2231 (define_insn "<optab>_trunchfhi2"
2232   [(set (match_operand:HI 0 "register_operand" "=w")
2233         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2234   "TARGET_SIMD_F16INST"
2235   "fcvtz<su>\t%h0, %h1"
2236   [(set_attr "type" "neon_fp_to_int_s")]
2237 )
2238
2239 (define_insn "<optab>hihf2"
2240   [(set (match_operand:HF 0 "register_operand" "=w")
2241         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2242   "TARGET_SIMD_F16INST"
2243   "<su_optab>cvtf\t%h0, %h1"
2244   [(set_attr "type" "neon_int_to_fp_s")]
2245 )
2246
2247 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2248   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2249         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2250                                [(mult:VDQF
2251          (match_operand:VDQF 1 "register_operand" "w")
2252          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2253                                UNSPEC_FRINTZ)))]
2254   "TARGET_SIMD
2255    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2256                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2257   {
2258     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2259     char buf[64];
2260     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2261     output_asm_insn (buf, operands);
2262     return "";
2263   }
2264   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2265 )
2266
2267 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2268   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2269         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2270                                [(match_operand:VHSDF 1 "register_operand")]
2271                                 UNSPEC_FRINTZ)))]
2272   "TARGET_SIMD"
2273   {})
2274
2275 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2276   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2277         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2278                                [(match_operand:VHSDF 1 "register_operand")]
2279                                 UNSPEC_FRINTZ)))]
2280   "TARGET_SIMD"
2281   {})
2282
2283 (define_expand "ftrunc<VHSDF:mode>2"
2284   [(set (match_operand:VHSDF 0 "register_operand")
2285         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2286                        UNSPEC_FRINTZ))]
2287   "TARGET_SIMD"
2288   {})
2289
2290 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2291   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2292         (FLOATUORS:VHSDF
2293           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2294   "TARGET_SIMD"
2295   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2296   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2297 )
2298
2299 ;; Conversions between vectors of floats and doubles.
2300 ;; Contains a mix of patterns to match standard pattern names
2301 ;; and those for intrinsics.
2302
2303 ;; Float widening operations.
2304
2305 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2306   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2307         (float_extend:<VWIDE> (vec_select:<VHALF>
2308                                (match_operand:VQ_HSF 1 "register_operand" "w")
2309                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2310                             )))]
2311   "TARGET_SIMD"
2312   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2313   [(set_attr "type" "neon_fp_cvt_widen_s")]
2314 )
2315
2316 ;; Convert between fixed-point and floating-point (vector modes)
2317
2318 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2319   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2320         (unspec:<VHSDF:FCVT_TARGET>
2321           [(match_operand:VHSDF 1 "register_operand" "w")
2322            (match_operand:SI 2 "immediate_operand" "i")]
2323          FCVT_F2FIXED))]
2324   "TARGET_SIMD"
2325   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2326   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2327 )
2328
2329 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2330   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2331         (unspec:<VDQ_HSDI:FCVT_TARGET>
2332           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2333            (match_operand:SI 2 "immediate_operand" "i")]
2334          FCVT_FIXED2F))]
2335   "TARGET_SIMD"
2336   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2337   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2338 )
2339
2340 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2341 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2342 ;; the meaning of HI and LO changes depending on the target endianness.
2343 ;; While elsewhere we map the higher numbered elements of a vector to
2344 ;; the lower architectural lanes of the vector, for these patterns we want
2345 ;; to always treat "hi" as referring to the higher architectural lanes.
2346 ;; Consequently, while the patterns below look inconsistent with our
2347 ;; other big-endian patterns their behavior is as required.
2348
2349 (define_expand "vec_unpacks_lo_<mode>"
2350   [(match_operand:<VWIDE> 0 "register_operand")
2351    (match_operand:VQ_HSF 1 "register_operand")]
2352   "TARGET_SIMD"
2353   {
2354     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2355     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2356                                                        operands[1], p));
2357     DONE;
2358   }
2359 )
2360
2361 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2362   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2363         (float_extend:<VWIDE> (vec_select:<VHALF>
2364                                (match_operand:VQ_HSF 1 "register_operand" "w")
2365                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2366                             )))]
2367   "TARGET_SIMD"
2368   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2369   [(set_attr "type" "neon_fp_cvt_widen_s")]
2370 )
2371
2372 (define_expand "vec_unpacks_hi_<mode>"
2373   [(match_operand:<VWIDE> 0 "register_operand")
2374    (match_operand:VQ_HSF 1 "register_operand")]
2375   "TARGET_SIMD"
2376   {
2377     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2378     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2379                                                        operands[1], p));
2380     DONE;
2381   }
2382 )
2383 (define_insn "aarch64_float_extend_lo_<Vwide>"
2384   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2385         (float_extend:<VWIDE>
2386           (match_operand:VDF 1 "register_operand" "w")))]
2387   "TARGET_SIMD"
2388   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2389   [(set_attr "type" "neon_fp_cvt_widen_s")]
2390 )
2391
2392 ;; Float narrowing operations.
2393
2394 (define_insn "aarch64_float_truncate_lo_<mode>"
2395   [(set (match_operand:VDF 0 "register_operand" "=w")
2396       (float_truncate:VDF
2397         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2398   "TARGET_SIMD"
2399   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2400   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2401 )
2402
2403 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2404   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2405     (vec_concat:<VDBL>
2406       (match_operand:VDF 1 "register_operand" "0")
2407       (float_truncate:VDF
2408         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2409   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2410   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2411   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2412 )
2413
2414 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2415   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2416     (vec_concat:<VDBL>
2417       (float_truncate:VDF
2418         (match_operand:<VWIDE> 2 "register_operand" "w"))
2419       (match_operand:VDF 1 "register_operand" "0")))]
2420   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2421   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2422   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2423 )
2424
2425 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2426   [(match_operand:<VDBL> 0 "register_operand")
2427    (match_operand:VDF 1 "register_operand")
2428    (match_operand:<VWIDE> 2 "register_operand")]
2429   "TARGET_SIMD"
2430 {
2431   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2432                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2433                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2434   emit_insn (gen (operands[0], operands[1], operands[2]));
2435   DONE;
2436 }
2437 )
2438
2439 (define_expand "vec_pack_trunc_v2df"
2440   [(set (match_operand:V4SF 0 "register_operand")
2441       (vec_concat:V4SF
2442         (float_truncate:V2SF
2443             (match_operand:V2DF 1 "register_operand"))
2444         (float_truncate:V2SF
2445             (match_operand:V2DF 2 "register_operand"))
2446           ))]
2447   "TARGET_SIMD"
2448   {
2449     rtx tmp = gen_reg_rtx (V2SFmode);
2450     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2451     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2452
2453     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2454     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2455                                                    tmp, operands[hi]));
2456     DONE;
2457   }
2458 )
2459
2460 (define_expand "vec_pack_trunc_df"
2461   [(set (match_operand:V2SF 0 "register_operand")
2462       (vec_concat:V2SF
2463         (float_truncate:SF
2464             (match_operand:DF 1 "register_operand"))
2465         (float_truncate:SF
2466             (match_operand:DF 2 "register_operand"))
2467           ))]
2468   "TARGET_SIMD"
2469   {
2470     rtx tmp = gen_reg_rtx (V2SFmode);
2471     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2472     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2473
2474     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2475     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2476     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2477     DONE;
2478   }
2479 )
2480
2481 ;; FP Max/Min
2482 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2483 ;; expression like:
2484 ;;      a = (b < c) ? b : c;
2485 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2486 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2487 ;; -ffast-math.
2488 ;;
2489 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2490 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2491 ;; operand will be returned when both operands are zero (i.e. they may not
2492 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2493 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2494 ;; NaNs.
2495
2496 (define_insn "<su><maxmin><mode>3"
2497   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2498         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2499                        (match_operand:VHSDF 2 "register_operand" "w")))]
2500   "TARGET_SIMD"
2501   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2502   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2503 )
2504
2505 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2506 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2507 ;; which implement the IEEE fmax ()/fmin () functions.
2508 (define_insn "<maxmin_uns><mode>3"
2509   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2510        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2511                       (match_operand:VHSDF 2 "register_operand" "w")]
2512                       FMAXMIN_UNS))]
2513   "TARGET_SIMD"
2514   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2515   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2516 )
2517
2518 ;; 'across lanes' add.
2519
2520 (define_expand "reduc_plus_scal_<mode>"
2521   [(match_operand:<VEL> 0 "register_operand")
2522    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2523                UNSPEC_ADDV)]
2524   "TARGET_SIMD"
2525   {
2526     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2527     rtx scratch = gen_reg_rtx (<MODE>mode);
2528     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2529     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2530     DONE;
2531   }
2532 )
2533
2534 (define_insn "aarch64_faddp<mode>"
2535  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2536        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2537                       (match_operand:VHSDF 2 "register_operand" "w")]
2538         UNSPEC_FADDV))]
2539  "TARGET_SIMD"
2540  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2541   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2542 )
2543
2544 (define_insn "aarch64_reduc_plus_internal<mode>"
2545  [(set (match_operand:VDQV 0 "register_operand" "=w")
2546        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2547                     UNSPEC_ADDV))]
2548  "TARGET_SIMD"
2549  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2550   [(set_attr "type" "neon_reduc_add<q>")]
2551 )
2552
2553 ;; ADDV with result zero-extended to SI/DImode (for popcount).
2554 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2555  [(set (match_operand:GPI 0 "register_operand" "=w")
2556        (zero_extend:GPI
2557         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2558                              UNSPEC_ADDV)))]
2559  "TARGET_SIMD"
2560  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2561   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2562 )
2563
2564 (define_insn "aarch64_reduc_plus_internalv2si"
2565  [(set (match_operand:V2SI 0 "register_operand" "=w")
2566        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2567                     UNSPEC_ADDV))]
2568  "TARGET_SIMD"
2569  "addp\\t%0.2s, %1.2s, %1.2s"
2570   [(set_attr "type" "neon_reduc_add")]
2571 )
2572
2573 (define_insn "reduc_plus_scal_<mode>"
2574  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2575        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2576                    UNSPEC_FADDV))]
2577  "TARGET_SIMD"
2578  "faddp\\t%<Vetype>0, %1.<Vtype>"
2579   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2580 )
2581
2582 (define_expand "reduc_plus_scal_v4sf"
2583  [(set (match_operand:SF 0 "register_operand")
2584        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2585                     UNSPEC_FADDV))]
2586  "TARGET_SIMD"
2587 {
2588   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2589   rtx scratch = gen_reg_rtx (V4SFmode);
2590   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2591   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2592   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2593   DONE;
2594 })
2595
2596 (define_insn "clrsb<mode>2"
2597   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2598         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2599   "TARGET_SIMD"
2600   "cls\\t%0.<Vtype>, %1.<Vtype>"
2601   [(set_attr "type" "neon_cls<q>")]
2602 )
2603
2604 (define_insn "clz<mode>2"
2605  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2606        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2607  "TARGET_SIMD"
2608  "clz\\t%0.<Vtype>, %1.<Vtype>"
2609   [(set_attr "type" "neon_cls<q>")]
2610 )
2611
2612 (define_insn "popcount<mode>2"
2613   [(set (match_operand:VB 0 "register_operand" "=w")
2614         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2615   "TARGET_SIMD"
2616   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2617   [(set_attr "type" "neon_cnt<q>")]
2618 )
2619
2620 ;; 'across lanes' max and min ops.
2621
2622 ;; Template for outputting a scalar, so we can create __builtins which can be
2623 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2624 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2625   [(match_operand:<VEL> 0 "register_operand")
2626    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2627                   FMAXMINV)]
2628   "TARGET_SIMD"
2629   {
2630     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2631     rtx scratch = gen_reg_rtx (<MODE>mode);
2632     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2633                                                               operands[1]));
2634     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2635     DONE;
2636   }
2637 )
2638
2639 ;; Likewise for integer cases, signed and unsigned.
2640 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2641   [(match_operand:<VEL> 0 "register_operand")
2642    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2643                     MAXMINV)]
2644   "TARGET_SIMD"
2645   {
2646     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2647     rtx scratch = gen_reg_rtx (<MODE>mode);
2648     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2649                                                               operands[1]));
2650     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2651     DONE;
2652   }
2653 )
2654
2655 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2656  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2657        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2658                     MAXMINV))]
2659  "TARGET_SIMD"
2660  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2661   [(set_attr "type" "neon_reduc_minmax<q>")]
2662 )
2663
2664 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2665  [(set (match_operand:V2SI 0 "register_operand" "=w")
2666        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2667                     MAXMINV))]
2668  "TARGET_SIMD"
2669  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2670   [(set_attr "type" "neon_reduc_minmax")]
2671 )
2672
2673 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2674  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2675        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2676                       FMAXMINV))]
2677  "TARGET_SIMD"
2678  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2679   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2680 )
2681
2682 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2683 ;; allocation.
2684 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2685 ;; to select.
2686 ;;
2687 ;; Thus our BSL is of the form:
2688 ;;   op0 = bsl (mask, op2, op3)
2689 ;; We can use any of:
2690 ;;
2691 ;;   if (op0 = mask)
2692 ;;     bsl mask, op1, op2
2693 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2694 ;;     bit op0, op2, mask
2695 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2696 ;;     bif op0, op1, mask
2697 ;;
2698 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2699 ;; Some forms of straight-line code may generate the equivalent form
2700 ;; in *aarch64_simd_bsl<mode>_alt.
2701
2702 (define_insn "aarch64_simd_bsl<mode>_internal"
2703   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2704         (xor:VDQ_I
2705            (and:VDQ_I
2706              (xor:VDQ_I
2707                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2708                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2709              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2710           (match_dup:<V_INT_EQUIV> 3)
2711         ))]
2712   "TARGET_SIMD"
2713   "@
2714   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2715   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2716   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2717   [(set_attr "type" "neon_bsl<q>")]
2718 )
2719
2720 ;; We need this form in addition to the above pattern to match the case
2721 ;; when combine tries merging three insns such that the second operand of
2722 ;; the outer XOR matches the second operand of the inner XOR rather than
2723 ;; the first.  The two are equivalent but since recog doesn't try all
2724 ;; permutations of commutative operations, we have to have a separate pattern.
2725
2726 (define_insn "*aarch64_simd_bsl<mode>_alt"
2727   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2728         (xor:VDQ_I
2729            (and:VDQ_I
2730              (xor:VDQ_I
2731                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2732                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2733               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2734           (match_dup:<V_INT_EQUIV> 2)))]
2735   "TARGET_SIMD"
2736   "@
2737   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2738   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2739   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2740   [(set_attr "type" "neon_bsl<q>")]
2741 )
2742
2743 ;; DImode is special, we want to avoid computing operations which are
2744 ;; more naturally computed in general purpose registers in the vector
2745 ;; registers.  If we do that, we need to move all three operands from general
2746 ;; purpose registers to vector registers, then back again.  However, we
2747 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2748 ;; optimizations based on the component operations of a BSL.
2749 ;;
2750 ;; That means we need a splitter back to the individual operations, if they
2751 ;; would be better calculated on the integer side.
2752
2753 (define_insn_and_split "aarch64_simd_bsldi_internal"
2754   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2755         (xor:DI
2756            (and:DI
2757              (xor:DI
2758                (match_operand:DI 3 "register_operand" "w,0,w,r")
2759                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2760              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2761           (match_dup:DI 3)
2762         ))]
2763   "TARGET_SIMD"
2764   "@
2765   bsl\\t%0.8b, %2.8b, %3.8b
2766   bit\\t%0.8b, %2.8b, %1.8b
2767   bif\\t%0.8b, %3.8b, %1.8b
2768   #"
2769   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2770   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2771 {
2772   /* Split back to individual operations.  If we're before reload, and
2773      able to create a temporary register, do so.  If we're after reload,
2774      we've got an early-clobber destination register, so use that.
2775      Otherwise, we can't create pseudos and we can't yet guarantee that
2776      operands[0] is safe to write, so FAIL to split.  */
2777
2778   rtx scratch;
2779   if (reload_completed)
2780     scratch = operands[0];
2781   else if (can_create_pseudo_p ())
2782     scratch = gen_reg_rtx (DImode);
2783   else
2784     FAIL;
2785
2786   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2787   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2788   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2789   DONE;
2790 }
2791   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2792    (set_attr "length" "4,4,4,12")]
2793 )
2794
2795 (define_insn_and_split "aarch64_simd_bsldi_alt"
2796   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2797         (xor:DI
2798            (and:DI
2799              (xor:DI
2800                (match_operand:DI 3 "register_operand" "w,w,0,r")
2801                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2802              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2803           (match_dup:DI 2)
2804         ))]
2805   "TARGET_SIMD"
2806   "@
2807   bsl\\t%0.8b, %3.8b, %2.8b
2808   bit\\t%0.8b, %3.8b, %1.8b
2809   bif\\t%0.8b, %2.8b, %1.8b
2810   #"
2811   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2812   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2813 {
2814   /* Split back to individual operations.  If we're before reload, and
2815      able to create a temporary register, do so.  If we're after reload,
2816      we've got an early-clobber destination register, so use that.
2817      Otherwise, we can't create pseudos and we can't yet guarantee that
2818      operands[0] is safe to write, so FAIL to split.  */
2819
2820   rtx scratch;
2821   if (reload_completed)
2822     scratch = operands[0];
2823   else if (can_create_pseudo_p ())
2824     scratch = gen_reg_rtx (DImode);
2825   else
2826     FAIL;
2827
2828   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2829   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2830   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2831   DONE;
2832 }
2833   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2834    (set_attr "length" "4,4,4,12")]
2835 )
2836
2837 (define_expand "aarch64_simd_bsl<mode>"
2838   [(match_operand:VALLDIF 0 "register_operand")
2839    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2840    (match_operand:VALLDIF 2 "register_operand")
2841    (match_operand:VALLDIF 3 "register_operand")]
2842  "TARGET_SIMD"
2843 {
2844   /* We can't alias operands together if they have different modes.  */
2845   rtx tmp = operands[0];
2846   if (FLOAT_MODE_P (<MODE>mode))
2847     {
2848       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2849       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2850       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2851     }
2852   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2853   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2854                                                          operands[1],
2855                                                          operands[2],
2856                                                          operands[3]));
2857   if (tmp != operands[0])
2858     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2859
2860   DONE;
2861 })
2862
2863 (define_expand "vcond_mask_<mode><v_int_equiv>"
2864   [(match_operand:VALLDI 0 "register_operand")
2865    (match_operand:VALLDI 1 "nonmemory_operand")
2866    (match_operand:VALLDI 2 "nonmemory_operand")
2867    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2868   "TARGET_SIMD"
2869 {
2870   /* If we have (a = (P) ? -1 : 0);
2871      Then we can simply move the generated mask (result must be int).  */
2872   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2873       && operands[2] == CONST0_RTX (<MODE>mode))
2874     emit_move_insn (operands[0], operands[3]);
2875   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2876   else if (operands[1] == CONST0_RTX (<MODE>mode)
2877            && operands[2] == CONSTM1_RTX (<MODE>mode))
2878     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2879   else
2880     {
2881       if (!REG_P (operands[1]))
2882         operands[1] = force_reg (<MODE>mode, operands[1]);
2883       if (!REG_P (operands[2]))
2884         operands[2] = force_reg (<MODE>mode, operands[2]);
2885       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2886                                              operands[1], operands[2]));
2887     }
2888
2889   DONE;
2890 })
2891
2892 ;; Patterns comparing two vectors to produce a mask.
2893
2894 (define_expand "vec_cmp<mode><mode>"
2895   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2896           (match_operator 1 "comparison_operator"
2897             [(match_operand:VSDQ_I_DI 2 "register_operand")
2898              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2899   "TARGET_SIMD"
2900 {
2901   rtx mask = operands[0];
2902   enum rtx_code code = GET_CODE (operands[1]);
2903
2904   switch (code)
2905     {
2906     case NE:
2907     case LE:
2908     case LT:
2909     case GE:
2910     case GT:
2911     case EQ:
2912       if (operands[3] == CONST0_RTX (<MODE>mode))
2913         break;
2914
2915       /* Fall through.  */
2916     default:
2917       if (!REG_P (operands[3]))
2918         operands[3] = force_reg (<MODE>mode, operands[3]);
2919
2920       break;
2921     }
2922
2923   switch (code)
2924     {
2925     case LT:
2926       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2927       break;
2928
2929     case GE:
2930       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2931       break;
2932
2933     case LE:
2934       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2935       break;
2936
2937     case GT:
2938       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2939       break;
2940
2941     case LTU:
2942       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2943       break;
2944
2945     case GEU:
2946       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2947       break;
2948
2949     case LEU:
2950       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2951       break;
2952
2953     case GTU:
2954       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2955       break;
2956
2957     case NE:
2958       /* Handle NE as !EQ.  */
2959       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2960       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2961       break;
2962
2963     case EQ:
2964       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2965       break;
2966
2967     default:
2968       gcc_unreachable ();
2969     }
2970
2971   DONE;
2972 })
2973
2974 (define_expand "vec_cmp<mode><v_int_equiv>"
2975   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2976         (match_operator 1 "comparison_operator"
2977             [(match_operand:VDQF 2 "register_operand")
2978              (match_operand:VDQF 3 "nonmemory_operand")]))]
2979   "TARGET_SIMD"
2980 {
2981   int use_zero_form = 0;
2982   enum rtx_code code = GET_CODE (operands[1]);
2983   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2984
2985   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2986
2987   switch (code)
2988     {
2989     case LE:
2990     case LT:
2991     case GE:
2992     case GT:
2993     case EQ:
2994       if (operands[3] == CONST0_RTX (<MODE>mode))
2995         {
2996           use_zero_form = 1;
2997           break;
2998         }
2999       /* Fall through.  */
3000     default:
3001       if (!REG_P (operands[3]))
3002         operands[3] = force_reg (<MODE>mode, operands[3]);
3003
3004       break;
3005     }
3006
3007   switch (code)
3008     {
3009     case LT:
3010       if (use_zero_form)
3011         {
3012           comparison = gen_aarch64_cmlt<mode>;
3013           break;
3014         }
3015       /* Fall through.  */
3016     case UNLT:
3017       std::swap (operands[2], operands[3]);
3018       /* Fall through.  */
3019     case UNGT:
3020     case GT:
3021       comparison = gen_aarch64_cmgt<mode>;
3022       break;
3023     case LE:
3024       if (use_zero_form)
3025         {
3026           comparison = gen_aarch64_cmle<mode>;
3027           break;
3028         }
3029       /* Fall through.  */
3030     case UNLE:
3031       std::swap (operands[2], operands[3]);
3032       /* Fall through.  */
3033     case UNGE:
3034     case GE:
3035       comparison = gen_aarch64_cmge<mode>;
3036       break;
3037     case NE:
3038     case EQ:
3039       comparison = gen_aarch64_cmeq<mode>;
3040       break;
3041     case UNEQ:
3042     case ORDERED:
3043     case UNORDERED:
3044     case LTGT:
3045       break;
3046     default:
3047       gcc_unreachable ();
3048     }
3049
3050   switch (code)
3051     {
3052     case UNGE:
3053     case UNGT:
3054     case UNLE:
3055     case UNLT:
3056       {
3057         /* All of the above must not raise any FP exceptions.  Thus we first
3058            check each operand for NaNs and force any elements containing NaN to
3059            zero before using them in the compare.
3060            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3061                                      (cm<cc> (isnan (a) ? 0.0 : a,
3062                                               isnan (b) ? 0.0 : b))
3063            We use the following transformations for doing the comparisions:
3064            a UNGE b -> a GE b
3065            a UNGT b -> a GT b
3066            a UNLE b -> b GE a
3067            a UNLT b -> b GT a.  */
3068
3069         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3070         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3071         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3072         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3073         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3074         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3075         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3076                                           lowpart_subreg (<V_INT_EQUIV>mode,
3077                                                           operands[2],
3078                                                           <MODE>mode)));
3079         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3080                                           lowpart_subreg (<V_INT_EQUIV>mode,
3081                                                           operands[3],
3082                                                           <MODE>mode)));
3083         gcc_assert (comparison != NULL);
3084         emit_insn (comparison (operands[0],
3085                                lowpart_subreg (<MODE>mode,
3086                                                tmp0, <V_INT_EQUIV>mode),
3087                                lowpart_subreg (<MODE>mode,
3088                                                tmp1, <V_INT_EQUIV>mode)));
3089         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3090       }
3091       break;
3092
3093     case LT:
3094     case LE:
3095     case GT:
3096     case GE:
3097     case EQ:
3098     case NE:
3099       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
3100          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
3101          a GE b -> a GE b
3102          a GT b -> a GT b
3103          a LE b -> b GE a
3104          a LT b -> b GT a
3105          a EQ b -> a EQ b
3106          a NE b -> ~(a EQ b)  */
3107       gcc_assert (comparison != NULL);
3108       emit_insn (comparison (operands[0], operands[2], operands[3]));
3109       if (code == NE)
3110         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3111       break;
3112
3113     case LTGT:
3114       /* LTGT is not guranteed to not generate a FP exception.  So let's
3115          go the faster way : ((a > b) || (b > a)).  */
3116       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3117                                          operands[2], operands[3]));
3118       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3119       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3120       break;
3121
3122     case ORDERED:
3123     case UNORDERED:
3124     case UNEQ:
3125       /* cmeq (a, a) & cmeq (b, b).  */
3126       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3127                                          operands[2], operands[2]));
3128       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3129       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3130
3131       if (code == UNORDERED)
3132         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3133       else if (code == UNEQ)
3134         {
3135           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3136           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3137         }
3138       break;
3139
3140     default:
3141       gcc_unreachable ();
3142     }
3143
3144   DONE;
3145 })
3146
3147 (define_expand "vec_cmpu<mode><mode>"
3148   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3149           (match_operator 1 "comparison_operator"
3150             [(match_operand:VSDQ_I_DI 2 "register_operand")
3151              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3152   "TARGET_SIMD"
3153 {
3154   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3155                                       operands[2], operands[3]));
3156   DONE;
3157 })
3158
3159 (define_expand "vcond<mode><mode>"
3160   [(set (match_operand:VALLDI 0 "register_operand")
3161         (if_then_else:VALLDI
3162           (match_operator 3 "comparison_operator"
3163             [(match_operand:VALLDI 4 "register_operand")
3164              (match_operand:VALLDI 5 "nonmemory_operand")])
3165           (match_operand:VALLDI 1 "nonmemory_operand")
3166           (match_operand:VALLDI 2 "nonmemory_operand")))]
3167   "TARGET_SIMD"
3168 {
3169   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3170   enum rtx_code code = GET_CODE (operands[3]);
3171
3172   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3173      it as well as switch operands 1/2 in order to avoid the additional
3174      NOT instruction.  */
3175   if (code == NE)
3176     {
3177       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3178                                     operands[4], operands[5]);
3179       std::swap (operands[1], operands[2]);
3180     }
3181   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3182                                              operands[4], operands[5]));
3183   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3184                                                  operands[2], mask));
3185
3186   DONE;
3187 })
3188
3189 (define_expand "vcond<v_cmp_mixed><mode>"
3190   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3191         (if_then_else:<V_cmp_mixed>
3192           (match_operator 3 "comparison_operator"
3193             [(match_operand:VDQF_COND 4 "register_operand")
3194              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3195           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3196           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3197   "TARGET_SIMD"
3198 {
3199   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3200   enum rtx_code code = GET_CODE (operands[3]);
3201
3202   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3203      it as well as switch operands 1/2 in order to avoid the additional
3204      NOT instruction.  */
3205   if (code == NE)
3206     {
3207       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3208                                     operands[4], operands[5]);
3209       std::swap (operands[1], operands[2]);
3210     }
3211   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3212                                              operands[4], operands[5]));
3213   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3214                                                 operands[0], operands[1],
3215                                                 operands[2], mask));
3216
3217   DONE;
3218 })
3219
3220 (define_expand "vcondu<mode><mode>"
3221   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3222         (if_then_else:VSDQ_I_DI
3223           (match_operator 3 "comparison_operator"
3224             [(match_operand:VSDQ_I_DI 4 "register_operand")
3225              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3226           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3227           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3228   "TARGET_SIMD"
3229 {
3230   rtx mask = gen_reg_rtx (<MODE>mode);
3231   enum rtx_code code = GET_CODE (operands[3]);
3232
3233   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3234      it as well as switch operands 1/2 in order to avoid the additional
3235      NOT instruction.  */
3236   if (code == NE)
3237     {
3238       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3239                                     operands[4], operands[5]);
3240       std::swap (operands[1], operands[2]);
3241     }
3242   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3243                                       operands[4], operands[5]));
3244   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3245                                                  operands[2], mask));
3246   DONE;
3247 })
3248
3249 (define_expand "vcondu<mode><v_cmp_mixed>"
3250   [(set (match_operand:VDQF 0 "register_operand")
3251         (if_then_else:VDQF
3252           (match_operator 3 "comparison_operator"
3253             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3254              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3255           (match_operand:VDQF 1 "nonmemory_operand")
3256           (match_operand:VDQF 2 "nonmemory_operand")))]
3257   "TARGET_SIMD"
3258 {
3259   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3260   enum rtx_code code = GET_CODE (operands[3]);
3261
3262   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3263      it as well as switch operands 1/2 in order to avoid the additional
3264      NOT instruction.  */
3265   if (code == NE)
3266     {
3267       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3268                                     operands[4], operands[5]);
3269       std::swap (operands[1], operands[2]);
3270     }
3271   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3272                                                   mask, operands[3],
3273                                                   operands[4], operands[5]));
3274   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3275                                                  operands[2], mask));
3276   DONE;
3277 })
3278
3279 ;; Patterns for AArch64 SIMD Intrinsics.
3280
3281 ;; Lane extraction with sign extension to general purpose register.
3282 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3283   [(set (match_operand:GPI 0 "register_operand" "=r")
3284         (sign_extend:GPI
3285           (vec_select:<VDQQH:VEL>
3286             (match_operand:VDQQH 1 "register_operand" "w")
3287             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3288   "TARGET_SIMD"
3289   {
3290     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3291                                            INTVAL (operands[2]));
3292     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3293   }
3294   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3295 )
3296
3297 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3298   [(set (match_operand:GPI 0 "register_operand" "=r")
3299         (zero_extend:GPI
3300           (vec_select:<VDQQH:VEL>
3301             (match_operand:VDQQH 1 "register_operand" "w")
3302             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3303   "TARGET_SIMD"
3304   {
3305     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3306                                            INTVAL (operands[2]));
3307     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3308   }
3309   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3310 )
3311
3312 ;; Lane extraction of a value, neither sign nor zero extension
3313 ;; is guaranteed so upper bits should be considered undefined.
3314 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3315 ;; Extracting lane zero is split into a simple move when it is between SIMD
3316 ;; registers or a store.
3317 (define_insn_and_split "aarch64_get_lane<mode>"
3318   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3319         (vec_select:<VEL>
3320           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3321           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3322   "TARGET_SIMD"
3323   {
3324     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3325     switch (which_alternative)
3326       {
3327         case 0:
3328           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3329         case 1:
3330           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3331         case 2:
3332           return "st1\\t{%1.<Vetype>}[%2], %0";
3333         default:
3334           gcc_unreachable ();
3335       }
3336   }
3337  "&& reload_completed
3338   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
3339  [(set (match_dup 0) (match_dup 1))]
3340  {
3341    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
3342  }
3343   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3344 )
3345
3346 (define_insn "load_pair_lanes<mode>"
3347   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3348         (vec_concat:<VDBL>
3349            (match_operand:VDC 1 "memory_operand" "Utq")
3350            (match_operand:VDC 2 "memory_operand" "m")))]
3351   "TARGET_SIMD && !STRICT_ALIGNMENT
3352    && rtx_equal_p (XEXP (operands[2], 0),
3353                    plus_constant (Pmode,
3354                                   XEXP (operands[1], 0),
3355                                   GET_MODE_SIZE (<MODE>mode)))"
3356   "ldr\\t%q0, %1"
3357   [(set_attr "type" "neon_load1_1reg_q")]
3358 )
3359
3360 (define_insn "store_pair_lanes<mode>"
3361   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3362         (vec_concat:<VDBL>
3363            (match_operand:VDC 1 "register_operand" "w, r")
3364            (match_operand:VDC 2 "register_operand" "w, r")))]
3365   "TARGET_SIMD"
3366   "@
3367    stp\\t%d1, %d2, %y0
3368    stp\\t%x1, %x2, %y0"
3369   [(set_attr "type" "neon_stp, store_16")]
3370 )
3371
3372 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3373 ;; dest vector.
3374
3375 (define_insn "@aarch64_combinez<mode>"
3376   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3377         (vec_concat:<VDBL>
3378           (match_operand:VDC 1 "general_operand" "w,?r,m")
3379           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3380   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3381   "@
3382    mov\\t%0.8b, %1.8b
3383    fmov\t%d0, %1
3384    ldr\\t%d0, %1"
3385   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3386    (set_attr "arch" "simd,fp,simd")]
3387 )
3388
3389 (define_insn "@aarch64_combinez_be<mode>"
3390   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3391         (vec_concat:<VDBL>
3392           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3393           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3394   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3395   "@
3396    mov\\t%0.8b, %1.8b
3397    fmov\t%d0, %1
3398    ldr\\t%d0, %1"
3399   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3400    (set_attr "arch" "simd,fp,simd")]
3401 )
3402
3403 (define_expand "aarch64_combine<mode>"
3404   [(match_operand:<VDBL> 0 "register_operand")
3405    (match_operand:VDC 1 "register_operand")
3406    (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
3407   "TARGET_SIMD"
3408 {
3409   if (operands[2] == CONST0_RTX (<MODE>mode))
3410     {
3411       if (BYTES_BIG_ENDIAN)
3412         emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
3413                                                   operands[2]));
3414       else
3415         emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
3416                                                operands[2]));
3417     }
3418   else
3419     aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3420   DONE;
3421 }
3422 )
3423
3424 (define_expand "@aarch64_simd_combine<mode>"
3425   [(match_operand:<VDBL> 0 "register_operand")
3426    (match_operand:VDC 1 "register_operand")
3427    (match_operand:VDC 2 "register_operand")]
3428   "TARGET_SIMD"
3429   {
3430     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3431     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3432     DONE;
3433   }
3434 [(set_attr "type" "multiple")]
3435 )
3436
3437 ;; <su><addsub>l<q>.
3438
3439 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3440  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3441        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3442                            (match_operand:VQW 1 "register_operand" "w")
3443                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3444                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3445                            (match_operand:VQW 2 "register_operand" "w")
3446                            (match_dup 3)))))]
3447   "TARGET_SIMD"
3448   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3449   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3450 )
3451
3452 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3453  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3454        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3455                            (match_operand:VQW 1 "register_operand" "w")
3456                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3457                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3458                            (match_operand:VQW 2 "register_operand" "w")
3459                            (match_dup 3)))))]
3460   "TARGET_SIMD"
3461   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3462   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3463 )
3464
3465 (define_expand "vec_widen_<su>addl_lo_<mode>"
3466   [(match_operand:<VWIDE> 0 "register_operand")
3467    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3468    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3469   "TARGET_SIMD"
3470 {
3471   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3472   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
3473                                                      operands[2], p));
3474   DONE;
3475 })
3476
3477 (define_expand "vec_widen_<su>addl_hi_<mode>"
3478   [(match_operand:<VWIDE> 0 "register_operand")
3479    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3480    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3481   "TARGET_SIMD"
3482 {
3483   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3484   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
3485                                                      operands[2], p));
3486   DONE;
3487 })
3488
3489 (define_expand "vec_widen_<su>subl_lo_<mode>"
3490   [(match_operand:<VWIDE> 0 "register_operand")
3491    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3492    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3493   "TARGET_SIMD"
3494 {
3495   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3496   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
3497                                                      operands[2], p));
3498   DONE;
3499 })
3500
3501 (define_expand "vec_widen_<su>subl_hi_<mode>"
3502   [(match_operand:<VWIDE> 0 "register_operand")
3503    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3504    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3505   "TARGET_SIMD"
3506 {
3507   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
3509                                                      operands[2], p));
3510   DONE;
3511 })
3512
3513 (define_expand "aarch64_saddl2<mode>"
3514   [(match_operand:<VWIDE> 0 "register_operand")
3515    (match_operand:VQW 1 "register_operand")
3516    (match_operand:VQW 2 "register_operand")]
3517   "TARGET_SIMD"
3518 {
3519   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3521                                                   operands[2], p));
3522   DONE;
3523 })
3524
3525 (define_expand "aarch64_uaddl2<mode>"
3526   [(match_operand:<VWIDE> 0 "register_operand")
3527    (match_operand:VQW 1 "register_operand")
3528    (match_operand:VQW 2 "register_operand")]
3529   "TARGET_SIMD"
3530 {
3531   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3532   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3533                                                   operands[2], p));
3534   DONE;
3535 })
3536
3537 (define_expand "aarch64_ssubl2<mode>"
3538   [(match_operand:<VWIDE> 0 "register_operand")
3539    (match_operand:VQW 1 "register_operand")
3540    (match_operand:VQW 2 "register_operand")]
3541   "TARGET_SIMD"
3542 {
3543   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3544   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3545                                                 operands[2], p));
3546   DONE;
3547 })
3548
3549 (define_expand "aarch64_usubl2<mode>"
3550   [(match_operand:<VWIDE> 0 "register_operand")
3551    (match_operand:VQW 1 "register_operand")
3552    (match_operand:VQW 2 "register_operand")]
3553   "TARGET_SIMD"
3554 {
3555   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3556   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3557                                                 operands[2], p));
3558   DONE;
3559 })
3560
3561 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3562  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3563        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3564                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3565                        (ANY_EXTEND:<VWIDE>
3566                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3567   "TARGET_SIMD"
3568   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3569   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3570 )
3571
3572 ;; <su><addsub>w<q>.
3573
3574 (define_expand "widen_ssum<mode>3"
3575   [(set (match_operand:<VDBLW> 0 "register_operand")
3576         (plus:<VDBLW> (sign_extend:<VDBLW>
3577                         (match_operand:VQW 1 "register_operand"))
3578                       (match_operand:<VDBLW> 2 "register_operand")))]
3579   "TARGET_SIMD"
3580   {
3581     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3582     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3583
3584     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3585                                                 operands[1], p));
3586     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3587     DONE;
3588   }
3589 )
3590
3591 (define_expand "widen_ssum<mode>3"
3592   [(set (match_operand:<VWIDE> 0 "register_operand")
3593         (plus:<VWIDE> (sign_extend:<VWIDE>
3594                         (match_operand:VD_BHSI 1 "register_operand"))
3595                       (match_operand:<VWIDE> 2 "register_operand")))]
3596   "TARGET_SIMD"
3597 {
3598   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3599   DONE;
3600 })
3601
3602 (define_expand "widen_usum<mode>3"
3603   [(set (match_operand:<VDBLW> 0 "register_operand")
3604         (plus:<VDBLW> (zero_extend:<VDBLW>
3605                         (match_operand:VQW 1 "register_operand"))
3606                       (match_operand:<VDBLW> 2 "register_operand")))]
3607   "TARGET_SIMD"
3608   {
3609     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3610     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3611
3612     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3613                                                  operands[1], p));
3614     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3615     DONE;
3616   }
3617 )
3618
3619 (define_expand "widen_usum<mode>3"
3620   [(set (match_operand:<VWIDE> 0 "register_operand")
3621         (plus:<VWIDE> (zero_extend:<VWIDE>
3622                         (match_operand:VD_BHSI 1 "register_operand"))
3623                       (match_operand:<VWIDE> 2 "register_operand")))]
3624   "TARGET_SIMD"
3625 {
3626   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3627   DONE;
3628 })
3629
3630 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3631   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3632         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3633           (ANY_EXTEND:<VWIDE>
3634             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3635   "TARGET_SIMD"
3636   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3637   [(set_attr "type" "neon_sub_widen")]
3638 )
3639
3640 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3641   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3642         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3643           (ANY_EXTEND:<VWIDE>
3644             (vec_select:<VHALF>
3645               (match_operand:VQW 2 "register_operand" "w")
3646               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3647   "TARGET_SIMD"
3648   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3649   [(set_attr "type" "neon_sub_widen")]
3650 )
3651
3652 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3653   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3654         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3655           (ANY_EXTEND:<VWIDE>
3656             (vec_select:<VHALF>
3657               (match_operand:VQW 2 "register_operand" "w")
3658               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3659   "TARGET_SIMD"
3660   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3661   [(set_attr "type" "neon_sub_widen")]
3662 )
3663
3664 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3665   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3666         (plus:<VWIDE>
3667           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3668           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3669   "TARGET_SIMD"
3670   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3671   [(set_attr "type" "neon_add_widen")]
3672 )
3673
3674 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3675   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3676         (plus:<VWIDE>
3677           (ANY_EXTEND:<VWIDE>
3678             (vec_select:<VHALF>
3679               (match_operand:VQW 2 "register_operand" "w")
3680               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3681           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3682   "TARGET_SIMD"
3683   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3684   [(set_attr "type" "neon_add_widen")]
3685 )
3686
3687 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3688   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3689         (plus:<VWIDE>
3690           (ANY_EXTEND:<VWIDE>
3691             (vec_select:<VHALF>
3692               (match_operand:VQW 2 "register_operand" "w")
3693               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3694           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3695   "TARGET_SIMD"
3696   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3697   [(set_attr "type" "neon_add_widen")]
3698 )
3699
3700 (define_expand "aarch64_saddw2<mode>"
3701   [(match_operand:<VWIDE> 0 "register_operand")
3702    (match_operand:<VWIDE> 1 "register_operand")
3703    (match_operand:VQW 2 "register_operand")]
3704   "TARGET_SIMD"
3705 {
3706   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3707   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3708                                                 operands[2], p));
3709   DONE;
3710 })
3711
3712 (define_expand "aarch64_uaddw2<mode>"
3713   [(match_operand:<VWIDE> 0 "register_operand")
3714    (match_operand:<VWIDE> 1 "register_operand")
3715    (match_operand:VQW 2 "register_operand")]
3716   "TARGET_SIMD"
3717 {
3718   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3719   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3720                                                 operands[2], p));
3721   DONE;
3722 })
3723
3724
3725 (define_expand "aarch64_ssubw2<mode>"
3726   [(match_operand:<VWIDE> 0 "register_operand")
3727    (match_operand:<VWIDE> 1 "register_operand")
3728    (match_operand:VQW 2 "register_operand")]
3729   "TARGET_SIMD"
3730 {
3731   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3732   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3733                                                 operands[2], p));
3734   DONE;
3735 })
3736
3737 (define_expand "aarch64_usubw2<mode>"
3738   [(match_operand:<VWIDE> 0 "register_operand")
3739    (match_operand:<VWIDE> 1 "register_operand")
3740    (match_operand:VQW 2 "register_operand")]
3741   "TARGET_SIMD"
3742 {
3743   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3744   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3745                                                 operands[2], p));
3746   DONE;
3747 })
3748
3749 ;; <su><r>h<addsub>.
3750
3751 (define_expand "<u>avg<mode>3_floor"
3752   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3753         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3754                           (match_operand:VDQ_BHSI 2 "register_operand")]
3755                          HADD))]
3756   "TARGET_SIMD"
3757 )
3758
3759 (define_expand "<u>avg<mode>3_ceil"
3760   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3761         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3762                           (match_operand:VDQ_BHSI 2 "register_operand")]
3763                          RHADD))]
3764   "TARGET_SIMD"
3765 )
3766
3767 (define_insn "aarch64_<sur>h<addsub><mode>"
3768   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3769         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3770                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3771                      HADDSUB))]
3772   "TARGET_SIMD"
3773   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3774   [(set_attr "type" "neon_<addsub>_halve<q>")]
3775 )
3776
3777 ;; <r><addsub>hn<q>.
3778
3779 (define_insn "aarch64_<sur><addsub>hn<mode>"
3780   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3781         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3782                             (match_operand:VQN 2 "register_operand" "w")]
3783                            ADDSUBHN))]
3784   "TARGET_SIMD"
3785   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3786   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3787 )
3788
3789 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3790   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3791         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3792                              (match_operand:VQN 2 "register_operand" "w")
3793                              (match_operand:VQN 3 "register_operand" "w")]
3794                             ADDSUBHN2))]
3795   "TARGET_SIMD"
3796   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3797   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3798 )
3799
3800 ;; pmul.
3801
3802 (define_insn "aarch64_pmul<mode>"
3803   [(set (match_operand:VB 0 "register_operand" "=w")
3804         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3805                     (match_operand:VB 2 "register_operand" "w")]
3806                    UNSPEC_PMUL))]
3807  "TARGET_SIMD"
3808  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3809   [(set_attr "type" "neon_mul_<Vetype><q>")]
3810 )
3811
3812 ;; fmulx.
3813
3814 (define_insn "aarch64_fmulx<mode>"
3815   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3816         (unspec:VHSDF_HSDF
3817           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3818            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3819            UNSPEC_FMULX))]
3820  "TARGET_SIMD"
3821  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3822  [(set_attr "type" "neon_fp_mul_<stype>")]
3823 )
3824
3825 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3826
3827 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3828   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3829         (unspec:VDQSF
3830          [(match_operand:VDQSF 1 "register_operand" "w")
3831           (vec_duplicate:VDQSF
3832            (vec_select:<VEL>
3833             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3834             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3835          UNSPEC_FMULX))]
3836   "TARGET_SIMD"
3837   {
3838     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3839     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3840   }
3841   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3842 )
3843
3844 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3845
3846 (define_insn "*aarch64_mulx_elt<mode>"
3847   [(set (match_operand:VDQF 0 "register_operand" "=w")
3848         (unspec:VDQF
3849          [(match_operand:VDQF 1 "register_operand" "w")
3850           (vec_duplicate:VDQF
3851            (vec_select:<VEL>
3852             (match_operand:VDQF 2 "register_operand" "w")
3853             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3854          UNSPEC_FMULX))]
3855   "TARGET_SIMD"
3856   {
3857     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3858     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3859   }
3860   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3861 )
3862
3863 ;; vmulxq_lane
3864
3865 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3866   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3867         (unspec:VHSDF
3868          [(match_operand:VHSDF 1 "register_operand" "w")
3869           (vec_duplicate:VHSDF
3870             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3871          UNSPEC_FMULX))]
3872   "TARGET_SIMD"
3873   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3874   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3875 )
3876
3877 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3878 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3879 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3880
3881 (define_insn "*aarch64_vgetfmulx<mode>"
3882   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3883         (unspec:<VEL>
3884          [(match_operand:<VEL> 1 "register_operand" "w")
3885           (vec_select:<VEL>
3886            (match_operand:VDQF 2 "register_operand" "w")
3887             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3888          UNSPEC_FMULX))]
3889   "TARGET_SIMD"
3890   {
3891     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3892     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3893   }
3894   [(set_attr "type" "fmul<Vetype>")]
3895 )
3896 ;; <su>q<addsub>
3897
3898 (define_insn "aarch64_<su_optab>q<addsub><mode>"
3899   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3900         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3901                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
3902   "TARGET_SIMD"
3903   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3904   [(set_attr "type" "neon_q<addsub><q>")]
3905 )
3906
3907 ;; suqadd and usqadd
3908
3909 (define_insn "aarch64_<sur>qadd<mode>"
3910   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3911         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3912                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3913                        USSUQADD))]
3914   "TARGET_SIMD"
3915   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3916   [(set_attr "type" "neon_qadd<q>")]
3917 )
3918
3919 ;; sqmovun
3920
3921 (define_insn "aarch64_sqmovun<mode>"
3922   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3923         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3924                             UNSPEC_SQXTUN))]
3925    "TARGET_SIMD"
3926    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3927    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3928 )
3929
3930 ;; sqmovn and uqmovn
3931
3932 (define_insn "aarch64_<sur>qmovn<mode>"
3933   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3934         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3935                             SUQMOVN))]
3936   "TARGET_SIMD"
3937   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3938    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3939 )
3940
3941 (define_insn "aarch64_<su>qxtn2<mode>_le"
3942   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3943         (vec_concat:<VNARROWQ2>
3944           (match_operand:<VNARROWQ> 1 "register_operand" "0")
3945           (SAT_TRUNC:<VNARROWQ>
3946             (match_operand:VQN 2 "register_operand" "w"))))]
3947   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3948   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
3949    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3950 )
3951
3952 (define_insn "aarch64_<su>qxtn2<mode>_be"
3953   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3954         (vec_concat:<VNARROWQ2>
3955           (SAT_TRUNC:<VNARROWQ>
3956             (match_operand:VQN 2 "register_operand" "w"))
3957           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
3958   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3959   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
3960    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3961 )
3962
3963 (define_expand "aarch64_<su>qxtn2<mode>"
3964   [(match_operand:<VNARROWQ2> 0 "register_operand")
3965    (match_operand:<VNARROWQ> 1 "register_operand")
3966    (SAT_TRUNC:<VNARROWQ>
3967      (match_operand:VQN 2 "register_operand"))]
3968   "TARGET_SIMD"
3969   {
3970     if (BYTES_BIG_ENDIAN)
3971       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
3972                                                  operands[2]));
3973     else
3974       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
3975                                                  operands[2]));
3976     DONE;
3977   }
3978 )
3979
3980 ;; <su>q<absneg>
3981
3982 (define_insn "aarch64_s<optab><mode>"
3983   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3984         (UNQOPS:VSDQ_I
3985           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3986   "TARGET_SIMD"
3987   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3988   [(set_attr "type" "neon_<optab><q>")]
3989 )
3990
3991 ;; sq<r>dmulh.
3992
3993 (define_insn "aarch64_sq<r>dmulh<mode>"
3994   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3995         (unspec:VSDQ_HSI
3996           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3997            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3998          VQDMULH))]
3999   "TARGET_SIMD"
4000   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4001   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
4002 )
4003
4004 ;; sq<r>dmulh_lane
4005
4006 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
4007   [(set (match_operand:VDQHS 0 "register_operand" "=w")
4008         (unspec:VDQHS
4009           [(match_operand:VDQHS 1 "register_operand" "w")
4010            (vec_select:<VEL>
4011              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4012              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4013          VQDMULH))]
4014   "TARGET_SIMD"
4015   "*
4016    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4017    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4018   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4019 )
4020
4021 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4022   [(set (match_operand:VDQHS 0 "register_operand" "=w")
4023         (unspec:VDQHS
4024           [(match_operand:VDQHS 1 "register_operand" "w")
4025            (vec_select:<VEL>
4026              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4027              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4028          VQDMULH))]
4029   "TARGET_SIMD"
4030   "*
4031    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4032    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4033   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4034 )
4035
4036 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
4037   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4038         (unspec:SD_HSI
4039           [(match_operand:SD_HSI 1 "register_operand" "w")
4040            (vec_select:<VEL>
4041              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4042              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4043          VQDMULH))]
4044   "TARGET_SIMD"
4045   "*
4046    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4047    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4048   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4049 )
4050
4051 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4052   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4053         (unspec:SD_HSI
4054           [(match_operand:SD_HSI 1 "register_operand" "w")
4055            (vec_select:<VEL>
4056              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4057              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4058          VQDMULH))]
4059   "TARGET_SIMD"
4060   "*
4061    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4062    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4063   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4064 )
4065
4066 ;; sqrdml[as]h.
4067
4068 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
4069   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
4070         (unspec:VSDQ_HSI
4071           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
4072            (match_operand:VSDQ_HSI 2 "register_operand" "w")
4073            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
4074           SQRDMLH_AS))]
4075    "TARGET_SIMD_RDMA"
4076    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4077    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4078 )
4079
4080 ;; sqrdml[as]h_lane.
4081
4082 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4083   [(set (match_operand:VDQHS 0 "register_operand" "=w")
4084         (unspec:VDQHS
4085           [(match_operand:VDQHS 1 "register_operand" "0")
4086            (match_operand:VDQHS 2 "register_operand" "w")
4087            (vec_select:<VEL>
4088              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4089              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4090           SQRDMLH_AS))]
4091    "TARGET_SIMD_RDMA"
4092    {
4093      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4094      return
4095       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4096    }
4097    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4098 )
4099
4100 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4101   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4102         (unspec:SD_HSI
4103           [(match_operand:SD_HSI 1 "register_operand" "0")
4104            (match_operand:SD_HSI 2 "register_operand" "w")
4105            (vec_select:<VEL>
4106              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4107              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4108           SQRDMLH_AS))]
4109    "TARGET_SIMD_RDMA"
4110    {
4111      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4112      return
4113       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
4114    }
4115    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4116 )
4117
4118 ;; sqrdml[as]h_laneq.
4119
4120 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4121   [(set (match_operand:VDQHS 0 "register_operand" "=w")
4122         (unspec:VDQHS
4123           [(match_operand:VDQHS 1 "register_operand" "0")
4124            (match_operand:VDQHS 2 "register_operand" "w")
4125            (vec_select:<VEL>
4126              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4128           SQRDMLH_AS))]
4129    "TARGET_SIMD_RDMA"
4130    {
4131      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4132      return
4133       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4134    }
4135    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4136 )
4137
4138 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4139   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4140         (unspec:SD_HSI
4141           [(match_operand:SD_HSI 1 "register_operand" "0")
4142            (match_operand:SD_HSI 2 "register_operand" "w")
4143            (vec_select:<VEL>
4144              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4145              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4146           SQRDMLH_AS))]
4147    "TARGET_SIMD_RDMA"
4148    {
4149      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4150      return
4151       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4152    }
4153    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4154 )
4155
4156 ;; vqdml[sa]l
4157
4158 (define_insn "aarch64_sqdmlal<mode>"
4159   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4160         (ss_plus:<VWIDE>
4161           (ss_ashift:<VWIDE>
4162               (mult:<VWIDE>
4163                 (sign_extend:<VWIDE>
4164                       (match_operand:VSD_HSI 2 "register_operand" "w"))
4165                 (sign_extend:<VWIDE>
4166                       (match_operand:VSD_HSI 3 "register_operand" "w")))
4167               (const_int 1))
4168           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4169   "TARGET_SIMD"
4170   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4171   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4172 )
4173
4174 (define_insn "aarch64_sqdmlsl<mode>"
4175   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4176         (ss_minus:<VWIDE>
4177           (match_operand:<VWIDE> 1 "register_operand" "0")
4178           (ss_ashift:<VWIDE>
4179               (mult:<VWIDE>
4180                 (sign_extend:<VWIDE>
4181                       (match_operand:VSD_HSI 2 "register_operand" "w"))
4182                 (sign_extend:<VWIDE>
4183                       (match_operand:VSD_HSI 3 "register_operand" "w")))
4184               (const_int 1))))]
4185   "TARGET_SIMD"
4186   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4187   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4188 )
4189
4190 ;; vqdml[sa]l_lane
4191
4192 (define_insn "aarch64_sqdmlal_lane<mode>"
4193   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4194         (ss_plus:<VWIDE>
4195           (ss_ashift:<VWIDE>
4196             (mult:<VWIDE>
4197               (sign_extend:<VWIDE>
4198                 (match_operand:VD_HSI 2 "register_operand" "w"))
4199               (sign_extend:<VWIDE>
4200                 (vec_duplicate:VD_HSI
4201                   (vec_select:<VEL>
4202                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4203                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4204               ))
4205             (const_int 1))
4206           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4207   "TARGET_SIMD"
4208   {
4209     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4210     return
4211       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4212   }
4213   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4214 )
4215
4216 (define_insn "aarch64_sqdmlsl_lane<mode>"
4217   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218         (ss_minus:<VWIDE>
4219           (match_operand:<VWIDE> 1 "register_operand" "0")
4220           (ss_ashift:<VWIDE>
4221             (mult:<VWIDE>
4222               (sign_extend:<VWIDE>
4223                 (match_operand:VD_HSI 2 "register_operand" "w"))
4224               (sign_extend:<VWIDE>
4225                 (vec_duplicate:VD_HSI
4226                   (vec_select:<VEL>
4227                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4228                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4229               ))
4230             (const_int 1))))]
4231   "TARGET_SIMD"
4232   {
4233     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4234     return
4235       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4236   }
4237   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4238 )
4239
4240
4241 (define_insn "aarch64_sqdmlsl_laneq<mode>"
4242   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4243         (ss_minus:<VWIDE>
4244           (match_operand:<VWIDE> 1 "register_operand" "0")
4245           (ss_ashift:<VWIDE>
4246             (mult:<VWIDE>
4247               (sign_extend:<VWIDE>
4248                 (match_operand:VD_HSI 2 "register_operand" "w"))
4249               (sign_extend:<VWIDE>
4250                 (vec_duplicate:VD_HSI
4251                   (vec_select:<VEL>
4252                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4253                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4254               ))
4255             (const_int 1))))]
4256   "TARGET_SIMD"
4257   {
4258     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4259     return
4260       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4261   }
4262   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4263 )
4264
4265 (define_insn "aarch64_sqdmlal_laneq<mode>"
4266   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4267         (ss_plus:<VWIDE>
4268           (ss_ashift:<VWIDE>
4269             (mult:<VWIDE>
4270               (sign_extend:<VWIDE>
4271                 (match_operand:VD_HSI 2 "register_operand" "w"))
4272               (sign_extend:<VWIDE>
4273                 (vec_duplicate:VD_HSI
4274                   (vec_select:<VEL>
4275                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4276                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4277               ))
4278             (const_int 1))
4279           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4280   "TARGET_SIMD"
4281   {
4282     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4283     return
4284       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4285   }
4286   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4287 )
4288
4289
4290 (define_insn "aarch64_sqdmlal_lane<mode>"
4291   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4292         (ss_plus:<VWIDE>
4293           (ss_ashift:<VWIDE>
4294             (mult:<VWIDE>
4295               (sign_extend:<VWIDE>
4296                 (match_operand:SD_HSI 2 "register_operand" "w"))
4297               (sign_extend:<VWIDE>
4298                 (vec_select:<VEL>
4299                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4300                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4301               )
4302             (const_int 1))
4303           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4304   "TARGET_SIMD"
4305   {
4306     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4307     return
4308       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4309   }
4310   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4311 )
4312
4313 (define_insn "aarch64_sqdmlsl_lane<mode>"
4314   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4315         (ss_minus:<VWIDE>
4316           (match_operand:<VWIDE> 1 "register_operand" "0")
4317           (ss_ashift:<VWIDE>
4318             (mult:<VWIDE>
4319               (sign_extend:<VWIDE>
4320                 (match_operand:SD_HSI 2 "register_operand" "w"))
4321               (sign_extend:<VWIDE>
4322                 (vec_select:<VEL>
4323                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4324                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4325               )
4326             (const_int 1))))]
4327   "TARGET_SIMD"
4328   {
4329     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4330     return
4331       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4332   }
4333   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4334 )
4335
4336
4337 (define_insn "aarch64_sqdmlal_laneq<mode>"
4338   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4339         (ss_plus:<VWIDE>
4340           (ss_ashift:<VWIDE>
4341             (mult:<VWIDE>
4342               (sign_extend:<VWIDE>
4343                 (match_operand:SD_HSI 2 "register_operand" "w"))
4344               (sign_extend:<VWIDE>
4345                 (vec_select:<VEL>
4346                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4347                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4348               )
4349             (const_int 1))
4350           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4351   "TARGET_SIMD"
4352   {
4353     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4354     return
4355       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4356   }
4357   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4358 )
4359
4360 (define_insn "aarch64_sqdmlsl_laneq<mode>"
4361   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4362         (ss_minus:<VWIDE>
4363           (match_operand:<VWIDE> 1 "register_operand" "0")
4364           (ss_ashift:<VWIDE>
4365             (mult:<VWIDE>
4366               (sign_extend:<VWIDE>
4367                 (match_operand:SD_HSI 2 "register_operand" "w"))
4368               (sign_extend:<VWIDE>
4369                 (vec_select:<VEL>
4370                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4371                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4372               )
4373             (const_int 1))))]
4374   "TARGET_SIMD"
4375   {
4376     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4377     return
4378       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4379   }
4380   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4381 )
4382
4383 ;; vqdml[sa]l_n
4384
4385 (define_insn "aarch64_sqdmlsl_n<mode>"
4386   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4387         (ss_minus:<VWIDE>
4388           (match_operand:<VWIDE> 1 "register_operand" "0")
4389           (ss_ashift:<VWIDE>
4390               (mult:<VWIDE>
4391                 (sign_extend:<VWIDE>
4392                       (match_operand:VD_HSI 2 "register_operand" "w"))
4393                 (sign_extend:<VWIDE>
4394                   (vec_duplicate:VD_HSI
4395                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4396               (const_int 1))))]
4397   "TARGET_SIMD"
4398   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4399   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4400 )
4401
4402 (define_insn "aarch64_sqdmlal_n<mode>"
4403   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4404         (ss_plus:<VWIDE>
4405           (ss_ashift:<VWIDE>
4406               (mult:<VWIDE>
4407                 (sign_extend:<VWIDE>
4408                       (match_operand:VD_HSI 2 "register_operand" "w"))
4409                 (sign_extend:<VWIDE>
4410                   (vec_duplicate:VD_HSI
4411                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4412               (const_int 1))
4413           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4414   "TARGET_SIMD"
4415   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4416   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4417 )
4418
4419
4420 ;; sqdml[as]l2
4421
4422 (define_insn "aarch64_sqdmlal2<mode>_internal"
4423   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4424         (ss_plus:<VWIDE>
4425          (ss_ashift:<VWIDE>
4426              (mult:<VWIDE>
4427                (sign_extend:<VWIDE>
4428                  (vec_select:<VHALF>
4429                      (match_operand:VQ_HSI 2 "register_operand" "w")
4430                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4431                (sign_extend:<VWIDE>
4432                  (vec_select:<VHALF>
4433                      (match_operand:VQ_HSI 3 "register_operand" "w")
4434                      (match_dup 4))))
4435              (const_int 1))
4436           (match_operand:<VWIDE> 1 "register_operand" "0")))]
4437   "TARGET_SIMD"
4438   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4439   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4440 )
4441
4442 (define_insn "aarch64_sqdmlsl2<mode>_internal"
4443   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4444         (ss_minus:<VWIDE>
4445          (match_operand:<VWIDE> 1 "register_operand" "0")
4446          (ss_ashift:<VWIDE>
4447              (mult:<VWIDE>
4448                (sign_extend:<VWIDE>
4449                  (vec_select:<VHALF>
4450                      (match_operand:VQ_HSI 2 "register_operand" "w")
4451                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4452                (sign_extend:<VWIDE>
4453                  (vec_select:<VHALF>
4454                      (match_operand:VQ_HSI 3 "register_operand" "w")
4455                      (match_dup 4))))
4456              (const_int 1))))]
4457   "TARGET_SIMD"
4458   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4459   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4460 )
4461
4462 (define_expand "aarch64_sqdmlal2<mode>"
4463   [(match_operand:<VWIDE> 0 "register_operand")
4464    (match_operand:<VWIDE> 1 "register_operand")
4465    (match_operand:VQ_HSI 2 "register_operand")
4466    (match_operand:VQ_HSI 3 "register_operand")]
4467   "TARGET_SIMD"
4468 {
4469   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4471                                                   operands[2], operands[3], p));
4472   DONE;
4473 })
4474
4475 (define_expand "aarch64_sqdmlsl2<mode>"
4476   [(match_operand:<VWIDE> 0 "register_operand")
4477    (match_operand:<VWIDE> 1 "register_operand")
4478    (match_operand:VQ_HSI 2 "register_operand")
4479    (match_operand:VQ_HSI 3 "register_operand")]
4480   "TARGET_SIMD"
4481 {
4482   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4484                                                   operands[2], operands[3], p));
4485   DONE;
4486 })
4487
4488 ;; vqdml[sa]l2_lane
4489
4490 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4491   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4492         (SBINQOPS:<VWIDE>
4493           (match_operand:<VWIDE> 1 "register_operand" "0")
4494           (ss_ashift:<VWIDE>
4495               (mult:<VWIDE>
4496                 (sign_extend:<VWIDE>
4497                   (vec_select:<VHALF>
4498                     (match_operand:VQ_HSI 2 "register_operand" "w")
4499                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4500                 (sign_extend:<VWIDE>
4501                   (vec_duplicate:<VHALF>
4502                     (vec_select:<VEL>
4503                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4504                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4505                     ))))
4506               (const_int 1))))]
4507   "TARGET_SIMD"
4508   {
4509     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4510     return
4511      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4512   }
4513   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4514 )
4515
4516 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4517   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4518         (SBINQOPS:<VWIDE>
4519           (match_operand:<VWIDE> 1 "register_operand" "0")
4520           (ss_ashift:<VWIDE>
4521               (mult:<VWIDE>
4522                 (sign_extend:<VWIDE>
4523                   (vec_select:<VHALF>
4524                     (match_operand:VQ_HSI 2 "register_operand" "w")
4525                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4526                 (sign_extend:<VWIDE>
4527                   (vec_duplicate:<VHALF>
4528                     (vec_select:<VEL>
4529                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4530                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4531                     ))))
4532               (const_int 1))))]
4533   "TARGET_SIMD"
4534   {
4535     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4536     return
4537      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4538   }
4539   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4540 )
4541
4542 (define_expand "aarch64_sqdmlal2_lane<mode>"
4543   [(match_operand:<VWIDE> 0 "register_operand")
4544    (match_operand:<VWIDE> 1 "register_operand")
4545    (match_operand:VQ_HSI 2 "register_operand")
4546    (match_operand:<VCOND> 3 "register_operand")
4547    (match_operand:SI 4 "immediate_operand")]
4548   "TARGET_SIMD"
4549 {
4550   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4551   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4552                                                        operands[2], operands[3],
4553                                                        operands[4], p));
4554   DONE;
4555 })
4556
4557 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4558   [(match_operand:<VWIDE> 0 "register_operand")
4559    (match_operand:<VWIDE> 1 "register_operand")
4560    (match_operand:VQ_HSI 2 "register_operand")
4561    (match_operand:<VCONQ> 3 "register_operand")
4562    (match_operand:SI 4 "immediate_operand")]
4563   "TARGET_SIMD"
4564 {
4565   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4566   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4567                                                        operands[2], operands[3],
4568                                                        operands[4], p));
4569   DONE;
4570 })
4571
4572 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4573   [(match_operand:<VWIDE> 0 "register_operand")
4574    (match_operand:<VWIDE> 1 "register_operand")
4575    (match_operand:VQ_HSI 2 "register_operand")
4576    (match_operand:<VCOND> 3 "register_operand")
4577    (match_operand:SI 4 "immediate_operand")]
4578   "TARGET_SIMD"
4579 {
4580   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4581   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4582                                                        operands[2], operands[3],
4583                                                        operands[4], p));
4584   DONE;
4585 })
4586
4587 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4588   [(match_operand:<VWIDE> 0 "register_operand")
4589    (match_operand:<VWIDE> 1 "register_operand")
4590    (match_operand:VQ_HSI 2 "register_operand")
4591    (match_operand:<VCONQ> 3 "register_operand")
4592    (match_operand:SI 4 "immediate_operand")]
4593   "TARGET_SIMD"
4594 {
4595   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4596   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4597                                                        operands[2], operands[3],
4598                                                        operands[4], p));
4599   DONE;
4600 })
4601
4602 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4603   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4604         (SBINQOPS:<VWIDE>
4605           (match_operand:<VWIDE> 1 "register_operand" "0")
4606           (ss_ashift:<VWIDE>
4607             (mult:<VWIDE>
4608               (sign_extend:<VWIDE>
4609                 (vec_select:<VHALF>
4610                   (match_operand:VQ_HSI 2 "register_operand" "w")
4611                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4612               (sign_extend:<VWIDE>
4613                 (vec_duplicate:<VHALF>
4614                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4615             (const_int 1))))]
4616   "TARGET_SIMD"
4617   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4618   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4619 )
4620
4621 (define_expand "aarch64_sqdmlal2_n<mode>"
4622   [(match_operand:<VWIDE> 0 "register_operand")
4623    (match_operand:<VWIDE> 1 "register_operand")
4624    (match_operand:VQ_HSI 2 "register_operand")
4625    (match_operand:<VEL> 3 "register_operand")]
4626   "TARGET_SIMD"
4627 {
4628   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4629   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4630                                                     operands[2], operands[3],
4631                                                     p));
4632   DONE;
4633 })
4634
4635 (define_expand "aarch64_sqdmlsl2_n<mode>"
4636   [(match_operand:<VWIDE> 0 "register_operand")
4637    (match_operand:<VWIDE> 1 "register_operand")
4638    (match_operand:VQ_HSI 2 "register_operand")
4639    (match_operand:<VEL> 3 "register_operand")]
4640   "TARGET_SIMD"
4641 {
4642   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4643   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4644                                                     operands[2], operands[3],
4645                                                     p));
4646   DONE;
4647 })
4648
4649 ;; vqdmull
4650
4651 (define_insn "aarch64_sqdmull<mode>"
4652   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4653         (ss_ashift:<VWIDE>
4654              (mult:<VWIDE>
4655                (sign_extend:<VWIDE>
4656                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4657                (sign_extend:<VWIDE>
4658                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4659              (const_int 1)))]
4660   "TARGET_SIMD"
4661   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4662   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4663 )
4664
4665 ;; vqdmull_lane
4666
4667 (define_insn "aarch64_sqdmull_lane<mode>"
4668   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4669         (ss_ashift:<VWIDE>
4670              (mult:<VWIDE>
4671                (sign_extend:<VWIDE>
4672                  (match_operand:VD_HSI 1 "register_operand" "w"))
4673                (sign_extend:<VWIDE>
4674                  (vec_duplicate:VD_HSI
4675                    (vec_select:<VEL>
4676                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4677                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4678                ))
4679              (const_int 1)))]
4680   "TARGET_SIMD"
4681   {
4682     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4683     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4684   }
4685   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4686 )
4687
4688 (define_insn "aarch64_sqdmull_laneq<mode>"
4689   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4690         (ss_ashift:<VWIDE>
4691              (mult:<VWIDE>
4692                (sign_extend:<VWIDE>
4693                  (match_operand:VD_HSI 1 "register_operand" "w"))
4694                (sign_extend:<VWIDE>
4695                  (vec_duplicate:VD_HSI
4696                    (vec_select:<VEL>
4697                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4698                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4699                ))
4700              (const_int 1)))]
4701   "TARGET_SIMD"
4702   {
4703     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4704     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4705   }
4706   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4707 )
4708
4709 (define_insn "aarch64_sqdmull_lane<mode>"
4710   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4711         (ss_ashift:<VWIDE>
4712              (mult:<VWIDE>
4713                (sign_extend:<VWIDE>
4714                  (match_operand:SD_HSI 1 "register_operand" "w"))
4715                (sign_extend:<VWIDE>
4716                  (vec_select:<VEL>
4717                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4718                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4719                ))
4720              (const_int 1)))]
4721   "TARGET_SIMD"
4722   {
4723     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4724     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4725   }
4726   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4727 )
4728
4729 (define_insn "aarch64_sqdmull_laneq<mode>"
4730   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4731         (ss_ashift:<VWIDE>
4732              (mult:<VWIDE>
4733                (sign_extend:<VWIDE>
4734                  (match_operand:SD_HSI 1 "register_operand" "w"))
4735                (sign_extend:<VWIDE>
4736                  (vec_select:<VEL>
4737                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4738                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4739                ))
4740              (const_int 1)))]
4741   "TARGET_SIMD"
4742   {
4743     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4744     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4745   }
4746   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4747 )
4748
4749 ;; vqdmull_n
4750
4751 (define_insn "aarch64_sqdmull_n<mode>"
4752   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4753         (ss_ashift:<VWIDE>
4754              (mult:<VWIDE>
4755                (sign_extend:<VWIDE>
4756                  (match_operand:VD_HSI 1 "register_operand" "w"))
4757                (sign_extend:<VWIDE>
4758                  (vec_duplicate:VD_HSI
4759                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4760                )
4761              (const_int 1)))]
4762   "TARGET_SIMD"
4763   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4764   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4765 )
4766
4767 ;; vqdmull2
4768
4769
4770
4771 (define_insn "aarch64_sqdmull2<mode>_internal"
4772   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4773         (ss_ashift:<VWIDE>
4774              (mult:<VWIDE>
4775                (sign_extend:<VWIDE>
4776                  (vec_select:<VHALF>
4777                    (match_operand:VQ_HSI 1 "register_operand" "w")
4778                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4779                (sign_extend:<VWIDE>
4780                  (vec_select:<VHALF>
4781                    (match_operand:VQ_HSI 2 "register_operand" "w")
4782                    (match_dup 3)))
4783                )
4784              (const_int 1)))]
4785   "TARGET_SIMD"
4786   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4787   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4788 )
4789
4790 (define_expand "aarch64_sqdmull2<mode>"
4791   [(match_operand:<VWIDE> 0 "register_operand")
4792    (match_operand:VQ_HSI 1 "register_operand")
4793    (match_operand:VQ_HSI 2 "register_operand")]
4794   "TARGET_SIMD"
4795 {
4796   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4797   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4798                                                   operands[2], p));
4799   DONE;
4800 })
4801
4802 ;; vqdmull2_lane
4803
4804 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4805   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4806         (ss_ashift:<VWIDE>
4807              (mult:<VWIDE>
4808                (sign_extend:<VWIDE>
4809                  (vec_select:<VHALF>
4810                    (match_operand:VQ_HSI 1 "register_operand" "w")
4811                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4812                (sign_extend:<VWIDE>
4813                  (vec_duplicate:<VHALF>
4814                    (vec_select:<VEL>
4815                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4816                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4817                ))
4818              (const_int 1)))]
4819   "TARGET_SIMD"
4820   {
4821     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4822     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4823   }
4824   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4825 )
4826
4827 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4828   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4829         (ss_ashift:<VWIDE>
4830              (mult:<VWIDE>
4831                (sign_extend:<VWIDE>
4832                  (vec_select:<VHALF>
4833                    (match_operand:VQ_HSI 1 "register_operand" "w")
4834                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4835                (sign_extend:<VWIDE>
4836                  (vec_duplicate:<VHALF>
4837                    (vec_select:<VEL>
4838                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4839                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4840                ))
4841              (const_int 1)))]
4842   "TARGET_SIMD"
4843   {
4844     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4845     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4846   }
4847   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4848 )
4849
4850 (define_expand "aarch64_sqdmull2_lane<mode>"
4851   [(match_operand:<VWIDE> 0 "register_operand")
4852    (match_operand:VQ_HSI 1 "register_operand")
4853    (match_operand:<VCOND> 2 "register_operand")
4854    (match_operand:SI 3 "immediate_operand")]
4855   "TARGET_SIMD"
4856 {
4857   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4858   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4859                                                        operands[2], operands[3],
4860                                                        p));
4861   DONE;
4862 })
4863
4864 (define_expand "aarch64_sqdmull2_laneq<mode>"
4865   [(match_operand:<VWIDE> 0 "register_operand")
4866    (match_operand:VQ_HSI 1 "register_operand")
4867    (match_operand:<VCONQ> 2 "register_operand")
4868    (match_operand:SI 3 "immediate_operand")]
4869   "TARGET_SIMD"
4870 {
4871   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4872   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4873                                                        operands[2], operands[3],
4874                                                        p));
4875   DONE;
4876 })
4877
4878 ;; vqdmull2_n
4879
4880 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4881   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4882         (ss_ashift:<VWIDE>
4883              (mult:<VWIDE>
4884                (sign_extend:<VWIDE>
4885                  (vec_select:<VHALF>
4886                    (match_operand:VQ_HSI 1 "register_operand" "w")
4887                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4888                (sign_extend:<VWIDE>
4889                  (vec_duplicate:<VHALF>
4890                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4891                )
4892              (const_int 1)))]
4893   "TARGET_SIMD"
4894   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4895   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4896 )
4897
4898 (define_expand "aarch64_sqdmull2_n<mode>"
4899   [(match_operand:<VWIDE> 0 "register_operand")
4900    (match_operand:VQ_HSI 1 "register_operand")
4901    (match_operand:<VEL> 2 "register_operand")]
4902   "TARGET_SIMD"
4903 {
4904   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4905   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4906                                                     operands[2], p));
4907   DONE;
4908 })
4909
4910 ;; vshl
4911
4912 (define_insn "aarch64_<sur>shl<mode>"
4913   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4914         (unspec:VSDQ_I_DI
4915           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4916            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4917          VSHL))]
4918   "TARGET_SIMD"
4919   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4920   [(set_attr "type" "neon_shift_reg<q>")]
4921 )
4922
4923
4924 ;; vqshl
4925
4926 (define_insn "aarch64_<sur>q<r>shl<mode>"
4927   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4928         (unspec:VSDQ_I
4929           [(match_operand:VSDQ_I 1 "register_operand" "w")
4930            (match_operand:VSDQ_I 2 "register_operand" "w")]
4931          VQSHL))]
4932   "TARGET_SIMD"
4933   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4934   [(set_attr "type" "neon_sat_shift_reg<q>")]
4935 )
4936
4937 (define_expand "vec_widen_<sur>shiftl_lo_<mode>"
4938   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4939         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4940                          (match_operand:SI 2
4941                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4942                          VSHLL))]
4943   "TARGET_SIMD"
4944   {
4945     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4946     emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
4947                                                      p, operands[2]));
4948     DONE;
4949   }
4950 )
4951
4952 (define_expand "vec_widen_<sur>shiftl_hi_<mode>"
4953    [(set (match_operand:<VWIDE> 0 "register_operand")
4954         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4955                          (match_operand:SI 2
4956                            "immediate_operand" "i")]
4957                           VSHLL))]
4958    "TARGET_SIMD"
4959    {
4960     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4961     emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
4962                                                       p, operands[2]));
4963     DONE;
4964    }
4965 )
4966
4967 ;; vshll_n
4968
4969 (define_insn "aarch64_<sur>shll<mode>_internal"
4970   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4971         (unspec:<VWIDE> [(vec_select:<VHALF>
4972                             (match_operand:VQW 1 "register_operand" "w")
4973                             (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
4974                          (match_operand:SI 3
4975                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4976                          VSHLL))]
4977   "TARGET_SIMD"
4978   {
4979     if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4980       return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
4981     else
4982       return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
4983   }
4984   [(set_attr "type" "neon_shift_imm_long")]
4985 )
4986
4987 (define_insn "aarch64_<sur>shll2<mode>_internal"
4988   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4989         (unspec:<VWIDE> [(vec_select:<VHALF>
4990                             (match_operand:VQW 1 "register_operand" "w")
4991                             (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
4992                          (match_operand:SI 3
4993                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4994                          VSHLL))]
4995   "TARGET_SIMD"
4996   {
4997     if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4998       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
4999     else
5000       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
5001   }
5002   [(set_attr "type" "neon_shift_imm_long")]
5003 )
5004
5005 (define_insn "aarch64_<sur>shll_n<mode>"
5006   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5007         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
5008                          (match_operand:SI 2
5009                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5010                          VSHLL))]
5011   "TARGET_SIMD"
5012   {
5013     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5014       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5015     else
5016       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5017   }
5018   [(set_attr "type" "neon_shift_imm_long")]
5019 )
5020
5021 ;; vshll_high_n
5022
5023 (define_insn "aarch64_<sur>shll2_n<mode>"
5024   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5025         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
5026                          (match_operand:SI 2 "immediate_operand" "i")]
5027                          VSHLL))]
5028   "TARGET_SIMD"
5029   {
5030     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5031       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5032     else
5033       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5034   }
5035   [(set_attr "type" "neon_shift_imm_long")]
5036 )
5037
5038 ;; vrshr_n
5039
5040 (define_insn "aarch64_<sur>shr_n<mode>"
5041   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5042         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
5043                            (match_operand:SI 2
5044                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5045                           VRSHR_N))]
5046   "TARGET_SIMD"
5047   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5048   [(set_attr "type" "neon_sat_shift_imm<q>")]
5049 )
5050
5051 ;; v(r)sra_n
5052
5053 (define_insn "aarch64_<sur>sra_n<mode>"
5054   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5055         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5056                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5057                        (match_operand:SI 3
5058                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5059                       VSRA))]
5060   "TARGET_SIMD"
5061   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5062   [(set_attr "type" "neon_shift_acc<q>")]
5063 )
5064
5065 ;; vs<lr>i_n
5066
5067 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
5068   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5069         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5070                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5071                        (match_operand:SI 3
5072                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
5073                       VSLRI))]
5074   "TARGET_SIMD"
5075   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5076   [(set_attr "type" "neon_shift_imm<q>")]
5077 )
5078
5079 ;; vqshl(u)
5080
5081 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
5082   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5083         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
5084                        (match_operand:SI 2
5085                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
5086                       VQSHL_N))]
5087   "TARGET_SIMD"
5088   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5089   [(set_attr "type" "neon_sat_shift_imm<q>")]
5090 )
5091
5092
5093 ;; vq(r)shr(u)n_n
5094
5095 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
5096   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5097         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
5098                             (match_operand:SI 2
5099                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5100                            VQSHRN_N))]
5101   "TARGET_SIMD"
5102   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
5103   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5104 )
5105
5106 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
5107   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5108         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
5109                              (match_operand:VQN 2 "register_operand" "w")
5110                              (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5111                             VQSHRN_N))]
5112   "TARGET_SIMD"
5113   "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
5114   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5115 )
5116
5117
5118 ;; cm(eq|ge|gt|lt|le)
5119 ;; Note, we have constraints for Dz and Z as different expanders
5120 ;; have different ideas of what should be passed to this pattern.
5121
5122 (define_insn "aarch64_cm<optab><mode>"
5123   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5124         (neg:<V_INT_EQUIV>
5125           (COMPARISONS:<V_INT_EQUIV>
5126             (match_operand:VDQ_I 1 "register_operand" "w,w")
5127             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5128           )))]
5129   "TARGET_SIMD"
5130   "@
5131   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5132   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
5133   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
5134 )
5135
5136 (define_insn_and_split "aarch64_cm<optab>di"
5137   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
5138         (neg:DI
5139           (COMPARISONS:DI
5140             (match_operand:DI 1 "register_operand" "w,w,r")
5141             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
5142           )))
5143      (clobber (reg:CC CC_REGNUM))]
5144   "TARGET_SIMD"
5145   "#"
5146   "&& reload_completed"
5147   [(set (match_operand:DI 0 "register_operand")
5148         (neg:DI
5149           (COMPARISONS:DI
5150             (match_operand:DI 1 "register_operand")
5151             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5152           )))]
5153   {
5154     /* If we are in the general purpose register file,
5155        we split to a sequence of comparison and store.  */
5156     if (GP_REGNUM_P (REGNO (operands[0]))
5157         && GP_REGNUM_P (REGNO (operands[1])))
5158       {
5159         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
5160         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5161         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5162         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5163         DONE;
5164       }
5165     /* Otherwise, we expand to a similar pattern which does not
5166        clobber CC_REGNUM.  */
5167   }
5168   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
5169 )
5170
5171 (define_insn "*aarch64_cm<optab>di"
5172   [(set (match_operand:DI 0 "register_operand" "=w,w")
5173         (neg:DI
5174           (COMPARISONS:DI
5175             (match_operand:DI 1 "register_operand" "w,w")
5176             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5177           )))]
5178   "TARGET_SIMD && reload_completed"
5179   "@
5180   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
5181   cm<optab>\t%d0, %d1, #0"
5182   [(set_attr "type" "neon_compare, neon_compare_zero")]
5183 )
5184
5185 ;; cm(hs|hi)
5186
5187 (define_insn "aarch64_cm<optab><mode>"
5188   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5189         (neg:<V_INT_EQUIV>
5190           (UCOMPARISONS:<V_INT_EQUIV>
5191             (match_operand:VDQ_I 1 "register_operand" "w")
5192             (match_operand:VDQ_I 2 "register_operand" "w")
5193           )))]
5194   "TARGET_SIMD"
5195   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5196   [(set_attr "type" "neon_compare<q>")]
5197 )
5198
5199 (define_insn_and_split "aarch64_cm<optab>di"
5200   [(set (match_operand:DI 0 "register_operand" "=w,r")
5201         (neg:DI
5202           (UCOMPARISONS:DI
5203             (match_operand:DI 1 "register_operand" "w,r")
5204             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
5205           )))
5206     (clobber (reg:CC CC_REGNUM))]
5207   "TARGET_SIMD"
5208   "#"
5209   "&& reload_completed"
5210   [(set (match_operand:DI 0 "register_operand")
5211         (neg:DI
5212           (UCOMPARISONS:DI
5213             (match_operand:DI 1 "register_operand")
5214             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5215           )))]
5216   {
5217     /* If we are in the general purpose register file,
5218        we split to a sequence of comparison and store.  */
5219     if (GP_REGNUM_P (REGNO (operands[0]))
5220         && GP_REGNUM_P (REGNO (operands[1])))
5221       {
5222         machine_mode mode = CCmode;
5223         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5224         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5225         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5226         DONE;
5227       }
5228     /* Otherwise, we expand to a similar pattern which does not
5229        clobber CC_REGNUM.  */
5230   }
5231   [(set_attr "type" "neon_compare,multiple")]
5232 )
5233
5234 (define_insn "*aarch64_cm<optab>di"
5235   [(set (match_operand:DI 0 "register_operand" "=w")
5236         (neg:DI
5237           (UCOMPARISONS:DI
5238             (match_operand:DI 1 "register_operand" "w")
5239             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
5240           )))]
5241   "TARGET_SIMD && reload_completed"
5242   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
5243   [(set_attr "type" "neon_compare")]
5244 )
5245
5246 ;; cmtst
5247
5248 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
5249 ;; we don't have any insns using ne, and aarch64_vcond outputs
5250 ;; not (neg (eq (and x y) 0))
5251 ;; which is rewritten by simplify_rtx as
5252 ;; plus (eq (and x y) 0) -1.
5253
5254 (define_insn "aarch64_cmtst<mode>"
5255   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5256         (plus:<V_INT_EQUIV>
5257           (eq:<V_INT_EQUIV>
5258             (and:VDQ_I
5259               (match_operand:VDQ_I 1 "register_operand" "w")
5260               (match_operand:VDQ_I 2 "register_operand" "w"))
5261             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
5262           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
5263   ]
5264   "TARGET_SIMD"
5265   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5266   [(set_attr "type" "neon_tst<q>")]
5267 )
5268
5269 (define_insn_and_split "aarch64_cmtstdi"
5270   [(set (match_operand:DI 0 "register_operand" "=w,r")
5271         (neg:DI
5272           (ne:DI
5273             (and:DI
5274               (match_operand:DI 1 "register_operand" "w,r")
5275               (match_operand:DI 2 "register_operand" "w,r"))
5276             (const_int 0))))
5277     (clobber (reg:CC CC_REGNUM))]
5278   "TARGET_SIMD"
5279   "#"
5280   "&& reload_completed"
5281   [(set (match_operand:DI 0 "register_operand")
5282         (neg:DI
5283           (ne:DI
5284             (and:DI
5285               (match_operand:DI 1 "register_operand")
5286               (match_operand:DI 2 "register_operand"))
5287             (const_int 0))))]
5288   {
5289     /* If we are in the general purpose register file,
5290        we split to a sequence of comparison and store.  */
5291     if (GP_REGNUM_P (REGNO (operands[0]))
5292         && GP_REGNUM_P (REGNO (operands[1])))
5293       {
5294         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
5295         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
5296         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
5297         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
5298         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5299         DONE;
5300       }
5301     /* Otherwise, we expand to a similar pattern which does not
5302        clobber CC_REGNUM.  */
5303   }
5304   [(set_attr "type" "neon_tst,multiple")]
5305 )
5306
5307 (define_insn "*aarch64_cmtstdi"
5308   [(set (match_operand:DI 0 "register_operand" "=w")
5309         (neg:DI
5310           (ne:DI
5311             (and:DI
5312               (match_operand:DI 1 "register_operand" "w")
5313               (match_operand:DI 2 "register_operand" "w"))
5314             (const_int 0))))]
5315   "TARGET_SIMD"
5316   "cmtst\t%d0, %d1, %d2"
5317   [(set_attr "type" "neon_tst")]
5318 )
5319
5320 ;; fcm(eq|ge|gt|le|lt)
5321
5322 (define_insn "aarch64_cm<optab><mode>"
5323   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5324         (neg:<V_INT_EQUIV>
5325           (COMPARISONS:<V_INT_EQUIV>
5326             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
5327             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
5328           )))]
5329   "TARGET_SIMD"
5330   "@
5331   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5332   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
5333   [(set_attr "type" "neon_fp_compare_<stype><q>")]
5334 )
5335
5336 ;; fac(ge|gt)
5337 ;; Note we can also handle what would be fac(le|lt) by
5338 ;; generating fac(ge|gt).
5339
5340 (define_insn "aarch64_fac<optab><mode>"
5341   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5342         (neg:<V_INT_EQUIV>
5343           (FAC_COMPARISONS:<V_INT_EQUIV>
5344             (abs:VHSDF_HSDF
5345               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
5346             (abs:VHSDF_HSDF
5347               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
5348   )))]
5349   "TARGET_SIMD"
5350   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5351   [(set_attr "type" "neon_fp_compare_<stype><q>")]
5352 )
5353
5354 ;; addp
5355
5356 (define_insn "aarch64_addp<mode>"
5357   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5358         (unspec:VD_BHSI
5359           [(match_operand:VD_BHSI 1 "register_operand" "w")
5360            (match_operand:VD_BHSI 2 "register_operand" "w")]
5361           UNSPEC_ADDP))]
5362   "TARGET_SIMD"
5363   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5364   [(set_attr "type" "neon_reduc_add<q>")]
5365 )
5366
5367 (define_insn "aarch64_addpdi"
5368   [(set (match_operand:DI 0 "register_operand" "=w")
5369         (unspec:DI
5370           [(match_operand:V2DI 1 "register_operand" "w")]
5371           UNSPEC_ADDP))]
5372   "TARGET_SIMD"
5373   "addp\t%d0, %1.2d"
5374   [(set_attr "type" "neon_reduc_add")]
5375 )
5376
5377 ;; sqrt
5378
5379 (define_expand "sqrt<mode>2"
5380   [(set (match_operand:VHSDF 0 "register_operand")
5381         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
5382   "TARGET_SIMD"
5383 {
5384   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
5385     DONE;
5386 })
5387
5388 (define_insn "*sqrt<mode>2"
5389   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5390         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
5391   "TARGET_SIMD"
5392   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
5393   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
5394 )
5395
5396 ;; Patterns for vector struct loads and stores.
5397
5398 (define_insn "aarch64_simd_ld2<mode>"
5399   [(set (match_operand:OI 0 "register_operand" "=w")
5400         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5401                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5402                    UNSPEC_LD2))]
5403   "TARGET_SIMD"
5404   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5405   [(set_attr "type" "neon_load2_2reg<q>")]
5406 )
5407
5408 (define_insn "aarch64_simd_ld2r<mode>"
5409   [(set (match_operand:OI 0 "register_operand" "=w")
5410        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5411                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5412                   UNSPEC_LD2_DUP))]
5413   "TARGET_SIMD"
5414   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5415   [(set_attr "type" "neon_load2_all_lanes<q>")]
5416 )
5417
5418 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5419   [(set (match_operand:OI 0 "register_operand" "=w")
5420         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5421                     (match_operand:OI 2 "register_operand" "0")
5422                     (match_operand:SI 3 "immediate_operand" "i")
5423                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5424                    UNSPEC_LD2_LANE))]
5425   "TARGET_SIMD"
5426   {
5427     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5428     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5429   }
5430   [(set_attr "type" "neon_load2_one_lane")]
5431 )
5432
5433 (define_expand "vec_load_lanesoi<mode>"
5434   [(set (match_operand:OI 0 "register_operand")
5435         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5436                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5437                    UNSPEC_LD2))]
5438   "TARGET_SIMD"
5439 {
5440   if (BYTES_BIG_ENDIAN)
5441     {
5442       rtx tmp = gen_reg_rtx (OImode);
5443       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5444       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5445       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5446     }
5447   else
5448     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5449   DONE;
5450 })
5451
5452 (define_insn "aarch64_simd_st2<mode>"
5453   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5454         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5455                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5456                    UNSPEC_ST2))]
5457   "TARGET_SIMD"
5458   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5459   [(set_attr "type" "neon_store2_2reg<q>")]
5460 )
5461
5462 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5463 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5464   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5465         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5466                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5467                     (match_operand:SI 2 "immediate_operand" "i")]
5468                    UNSPEC_ST2_LANE))]
5469   "TARGET_SIMD"
5470   {
5471     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5472     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5473   }
5474   [(set_attr "type" "neon_store2_one_lane<q>")]
5475 )
5476
5477 (define_expand "vec_store_lanesoi<mode>"
5478   [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5479         (unspec:OI [(match_operand:OI 1 "register_operand")
5480                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5481                    UNSPEC_ST2))]
5482   "TARGET_SIMD"
5483 {
5484   if (BYTES_BIG_ENDIAN)
5485     {
5486       rtx tmp = gen_reg_rtx (OImode);
5487       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5488       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5489       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5490     }
5491   else
5492     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5493   DONE;
5494 })
5495
5496 (define_insn "aarch64_simd_ld3<mode>"
5497   [(set (match_operand:CI 0 "register_operand" "=w")
5498         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5499                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5500                    UNSPEC_LD3))]
5501   "TARGET_SIMD"
5502   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5503   [(set_attr "type" "neon_load3_3reg<q>")]
5504 )
5505
5506 (define_insn "aarch64_simd_ld3r<mode>"
5507   [(set (match_operand:CI 0 "register_operand" "=w")
5508        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5509                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5510                   UNSPEC_LD3_DUP))]
5511   "TARGET_SIMD"
5512   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5513   [(set_attr "type" "neon_load3_all_lanes<q>")]
5514 )
5515
5516 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5517   [(set (match_operand:CI 0 "register_operand" "=w")
5518         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5519                     (match_operand:CI 2 "register_operand" "0")
5520                     (match_operand:SI 3 "immediate_operand" "i")
5521                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5522                    UNSPEC_LD3_LANE))]
5523   "TARGET_SIMD"
5524 {
5525     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5526     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5527 }
5528   [(set_attr "type" "neon_load3_one_lane")]
5529 )
5530
5531 (define_expand "vec_load_lanesci<mode>"
5532   [(set (match_operand:CI 0 "register_operand")
5533         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5534                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5535                    UNSPEC_LD3))]
5536   "TARGET_SIMD"
5537 {
5538   if (BYTES_BIG_ENDIAN)
5539     {
5540       rtx tmp = gen_reg_rtx (CImode);
5541       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5542       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5543       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5544     }
5545   else
5546     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5547   DONE;
5548 })
5549
5550 (define_insn "aarch64_simd_st3<mode>"
5551   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5552         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5553                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5554                    UNSPEC_ST3))]
5555   "TARGET_SIMD"
5556   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5557   [(set_attr "type" "neon_store3_3reg<q>")]
5558 )
5559
5560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5561 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5562   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5563         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5564                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5565                      (match_operand:SI 2 "immediate_operand" "i")]
5566                     UNSPEC_ST3_LANE))]
5567   "TARGET_SIMD"
5568   {
5569     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5570     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5571   }
5572   [(set_attr "type" "neon_store3_one_lane<q>")]
5573 )
5574
5575 (define_expand "vec_store_lanesci<mode>"
5576   [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5577         (unspec:CI [(match_operand:CI 1 "register_operand")
5578                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579                    UNSPEC_ST3))]
5580   "TARGET_SIMD"
5581 {
5582   if (BYTES_BIG_ENDIAN)
5583     {
5584       rtx tmp = gen_reg_rtx (CImode);
5585       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5586       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5587       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5588     }
5589   else
5590     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5591   DONE;
5592 })
5593
5594 (define_insn "aarch64_simd_ld4<mode>"
5595   [(set (match_operand:XI 0 "register_operand" "=w")
5596         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5597                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5598                    UNSPEC_LD4))]
5599   "TARGET_SIMD"
5600   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5601   [(set_attr "type" "neon_load4_4reg<q>")]
5602 )
5603
5604 (define_insn "aarch64_simd_ld4r<mode>"
5605   [(set (match_operand:XI 0 "register_operand" "=w")
5606        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5607                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5608                   UNSPEC_LD4_DUP))]
5609   "TARGET_SIMD"
5610   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5611   [(set_attr "type" "neon_load4_all_lanes<q>")]
5612 )
5613
5614 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5615   [(set (match_operand:XI 0 "register_operand" "=w")
5616         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5617                     (match_operand:XI 2 "register_operand" "0")
5618                     (match_operand:SI 3 "immediate_operand" "i")
5619                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5620                    UNSPEC_LD4_LANE))]
5621   "TARGET_SIMD"
5622 {
5623     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5624     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5625 }
5626   [(set_attr "type" "neon_load4_one_lane")]
5627 )
5628
5629 (define_expand "vec_load_lanesxi<mode>"
5630   [(set (match_operand:XI 0 "register_operand")
5631         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5632                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5633                    UNSPEC_LD4))]
5634   "TARGET_SIMD"
5635 {
5636   if (BYTES_BIG_ENDIAN)
5637     {
5638       rtx tmp = gen_reg_rtx (XImode);
5639       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5640       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5641       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5642     }
5643   else
5644     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5645   DONE;
5646 })
5647
5648 (define_insn "aarch64_simd_st4<mode>"
5649   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5650         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5651                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5652                    UNSPEC_ST4))]
5653   "TARGET_SIMD"
5654   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5655   [(set_attr "type" "neon_store4_4reg<q>")]
5656 )
5657
5658 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5659 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5660   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5661         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5662                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5663                      (match_operand:SI 2 "immediate_operand" "i")]
5664                     UNSPEC_ST4_LANE))]
5665   "TARGET_SIMD"
5666   {
5667     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5668     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5669   }
5670   [(set_attr "type" "neon_store4_one_lane<q>")]
5671 )
5672
5673 (define_expand "vec_store_lanesxi<mode>"
5674   [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5675         (unspec:XI [(match_operand:XI 1 "register_operand")
5676                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5677                    UNSPEC_ST4))]
5678   "TARGET_SIMD"
5679 {
5680   if (BYTES_BIG_ENDIAN)
5681     {
5682       rtx tmp = gen_reg_rtx (XImode);
5683       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5684       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5685       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5686     }
5687   else
5688     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5689   DONE;
5690 })
5691
5692 (define_insn_and_split "aarch64_rev_reglist<mode>"
5693 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5694         (unspec:VSTRUCT
5695                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5696                     (match_operand:V16QI 2 "register_operand" "w")]
5697                    UNSPEC_REV_REGLIST))]
5698   "TARGET_SIMD"
5699   "#"
5700   "&& reload_completed"
5701   [(const_int 0)]
5702 {
5703   int i;
5704   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5705   for (i = 0; i < nregs; i++)
5706     {
5707       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5708       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5709       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5710     }
5711   DONE;
5712 }
5713   [(set_attr "type" "neon_tbl1_q")
5714    (set_attr "length" "<insn_count>")]
5715 )
5716
5717 ;; Reload patterns for AdvSIMD register list operands.
5718
5719 (define_expand "mov<mode>"
5720   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5721         (match_operand:VSTRUCT 1 "general_operand"))]
5722   "TARGET_SIMD"
5723 {
5724   if (can_create_pseudo_p ())
5725     {
5726       if (GET_CODE (operands[0]) != REG)
5727         operands[1] = force_reg (<MODE>mode, operands[1]);
5728     }
5729 })
5730
5731
5732 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5733   [(match_operand:CI 0 "register_operand")
5734    (match_operand:DI 1 "register_operand")
5735    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5736   "TARGET_SIMD"
5737 {
5738   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5739   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5740   DONE;
5741 })
5742
5743 (define_insn "aarch64_ld1_x3_<mode>"
5744   [(set (match_operand:CI 0 "register_operand" "=w")
5745         (unspec:CI
5746           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5747            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5748   "TARGET_SIMD"
5749   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5750   [(set_attr "type" "neon_load1_3reg<q>")]
5751 )
5752
5753 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5754   [(match_operand:XI 0 "register_operand" "=w")
5755    (match_operand:DI 1 "register_operand" "r")
5756    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5757   "TARGET_SIMD"
5758 {
5759   rtx mem = gen_rtx_MEM (XImode, operands[1]);
5760   emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5761   DONE;
5762 })
5763
5764 (define_insn "aarch64_ld1_x4_<mode>"
5765   [(set (match_operand:XI 0 "register_operand" "=w")
5766         (unspec:XI
5767           [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5768            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5769         UNSPEC_LD1))]
5770   "TARGET_SIMD"
5771   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5772   [(set_attr "type" "neon_load1_4reg<q>")]
5773 )
5774
5775 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5776   [(match_operand:DI 0 "register_operand")
5777    (match_operand:OI 1 "register_operand")
5778    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5779   "TARGET_SIMD"
5780 {
5781   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5782   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5783   DONE;
5784 })
5785
5786 (define_insn "aarch64_st1_x2_<mode>"
5787    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5788          (unspec:OI
5789           [(match_operand:OI 1 "register_operand" "w")
5790           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5791   "TARGET_SIMD"
5792   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5793   [(set_attr "type" "neon_store1_2reg<q>")]
5794 )
5795
5796 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5797   [(match_operand:DI 0 "register_operand")
5798    (match_operand:CI 1 "register_operand")
5799    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5800   "TARGET_SIMD"
5801 {
5802   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5803   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5804   DONE;
5805 })
5806
5807 (define_insn "aarch64_st1_x3_<mode>"
5808    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5809         (unspec:CI
5810          [(match_operand:CI 1 "register_operand" "w")
5811           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5812   "TARGET_SIMD"
5813   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5814   [(set_attr "type" "neon_store1_3reg<q>")]
5815 )
5816
5817 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5818   [(match_operand:DI 0 "register_operand" "")
5819    (match_operand:XI 1 "register_operand" "")
5820    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5821   "TARGET_SIMD"
5822 {
5823   rtx mem = gen_rtx_MEM (XImode, operands[0]);
5824   emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5825   DONE;
5826 })
5827
5828 (define_insn "aarch64_st1_x4_<mode>"
5829   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5830         (unspec:XI
5831            [(match_operand:XI 1 "register_operand" "w")
5832            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5833         UNSPEC_ST1))]
5834   "TARGET_SIMD"
5835   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5836   [(set_attr "type" "neon_store1_4reg<q>")]
5837 )
5838
5839 (define_insn "*aarch64_mov<mode>"
5840   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5841         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5842   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5843    && (register_operand (operands[0], <MODE>mode)
5844        || register_operand (operands[1], <MODE>mode))"
5845   "@
5846    #
5847    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5848    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5849   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5850                      neon_load<nregs>_<nregs>reg_q")
5851    (set_attr "length" "<insn_count>,4,4")]
5852 )
5853
5854 (define_insn "aarch64_be_ld1<mode>"
5855   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5856         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5857                              "aarch64_simd_struct_operand" "Utv")]
5858         UNSPEC_LD1))]
5859   "TARGET_SIMD"
5860   "ld1\\t{%0<Vmtype>}, %1"
5861   [(set_attr "type" "neon_load1_1reg<q>")]
5862 )
5863
5864 (define_insn "aarch64_be_st1<mode>"
5865   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5866         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5867         UNSPEC_ST1))]
5868   "TARGET_SIMD"
5869   "st1\\t{%1<Vmtype>}, %0"
5870   [(set_attr "type" "neon_store1_1reg<q>")]
5871 )
5872
5873 (define_insn "*aarch64_be_movoi"
5874   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5875         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5876   "TARGET_SIMD && BYTES_BIG_ENDIAN
5877    && (register_operand (operands[0], OImode)
5878        || register_operand (operands[1], OImode))"
5879   "@
5880    #
5881    stp\\t%q1, %R1, %0
5882    ldp\\t%q0, %R0, %1"
5883   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5884    (set_attr "length" "8,4,4")]
5885 )
5886
5887 (define_insn "*aarch64_be_movci"
5888   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5889         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5890   "TARGET_SIMD && BYTES_BIG_ENDIAN
5891    && (register_operand (operands[0], CImode)
5892        || register_operand (operands[1], CImode))"
5893   "#"
5894   [(set_attr "type" "multiple")
5895    (set_attr "length" "12,4,4")]
5896 )
5897
5898 (define_insn "*aarch64_be_movxi"
5899   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5900         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5901   "TARGET_SIMD && BYTES_BIG_ENDIAN
5902    && (register_operand (operands[0], XImode)
5903        || register_operand (operands[1], XImode))"
5904   "#"
5905   [(set_attr "type" "multiple")
5906    (set_attr "length" "16,4,4")]
5907 )
5908
5909 (define_split
5910   [(set (match_operand:OI 0 "register_operand")
5911         (match_operand:OI 1 "register_operand"))]
5912   "TARGET_SIMD && reload_completed"
5913   [(const_int 0)]
5914 {
5915   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5916   DONE;
5917 })
5918
5919 (define_split
5920   [(set (match_operand:CI 0 "nonimmediate_operand")
5921         (match_operand:CI 1 "general_operand"))]
5922   "TARGET_SIMD && reload_completed"
5923   [(const_int 0)]
5924 {
5925   if (register_operand (operands[0], CImode)
5926       && register_operand (operands[1], CImode))
5927     {
5928       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5929       DONE;
5930     }
5931   else if (BYTES_BIG_ENDIAN)
5932     {
5933       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5934                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5935       emit_move_insn (gen_lowpart (V16QImode,
5936                                    simplify_gen_subreg (TImode, operands[0],
5937                                                         CImode, 32)),
5938                       gen_lowpart (V16QImode,
5939                                    simplify_gen_subreg (TImode, operands[1],
5940                                                         CImode, 32)));
5941       DONE;
5942     }
5943   else
5944     FAIL;
5945 })
5946
5947 (define_split
5948   [(set (match_operand:XI 0 "nonimmediate_operand")
5949         (match_operand:XI 1 "general_operand"))]
5950   "TARGET_SIMD && reload_completed"
5951   [(const_int 0)]
5952 {
5953   if (register_operand (operands[0], XImode)
5954       && register_operand (operands[1], XImode))
5955     {
5956       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5957       DONE;
5958     }
5959   else if (BYTES_BIG_ENDIAN)
5960     {
5961       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5962                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5963       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5964                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5965       DONE;
5966     }
5967   else
5968     FAIL;
5969 })
5970
5971 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5972   [(match_operand:VSTRUCT 0 "register_operand")
5973    (match_operand:DI 1 "register_operand")
5974    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5975   "TARGET_SIMD"
5976 {
5977   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5978   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5979                      * <VSTRUCT:nregs>);
5980
5981   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5982                                                                 mem));
5983   DONE;
5984 })
5985
5986 (define_insn "aarch64_ld2<mode>_dreg"
5987   [(set (match_operand:OI 0 "register_operand" "=w")
5988         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5989                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5990                    UNSPEC_LD2_DREG))]
5991   "TARGET_SIMD"
5992   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5993   [(set_attr "type" "neon_load2_2reg<q>")]
5994 )
5995
5996 (define_insn "aarch64_ld2<mode>_dreg"
5997   [(set (match_operand:OI 0 "register_operand" "=w")
5998         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5999                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6000                    UNSPEC_LD2_DREG))]
6001   "TARGET_SIMD"
6002   "ld1\\t{%S0.1d - %T0.1d}, %1"
6003   [(set_attr "type" "neon_load1_2reg<q>")]
6004 )
6005
6006 (define_insn "aarch64_ld3<mode>_dreg"
6007   [(set (match_operand:CI 0 "register_operand" "=w")
6008         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6009                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6010                    UNSPEC_LD3_DREG))]
6011   "TARGET_SIMD"
6012   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6013   [(set_attr "type" "neon_load3_3reg<q>")]
6014 )
6015
6016 (define_insn "aarch64_ld3<mode>_dreg"
6017   [(set (match_operand:CI 0 "register_operand" "=w")
6018         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6019                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6020                    UNSPEC_LD3_DREG))]
6021   "TARGET_SIMD"
6022   "ld1\\t{%S0.1d - %U0.1d}, %1"
6023   [(set_attr "type" "neon_load1_3reg<q>")]
6024 )
6025
6026 (define_insn "aarch64_ld4<mode>_dreg"
6027   [(set (match_operand:XI 0 "register_operand" "=w")
6028         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6029                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6030                    UNSPEC_LD4_DREG))]
6031   "TARGET_SIMD"
6032   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6033   [(set_attr "type" "neon_load4_4reg<q>")]
6034 )
6035
6036 (define_insn "aarch64_ld4<mode>_dreg"
6037   [(set (match_operand:XI 0 "register_operand" "=w")
6038         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6039                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6040                    UNSPEC_LD4_DREG))]
6041   "TARGET_SIMD"
6042   "ld1\\t{%S0.1d - %V0.1d}, %1"
6043   [(set_attr "type" "neon_load1_4reg<q>")]
6044 )
6045
6046 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
6047  [(match_operand:VSTRUCT 0 "register_operand")
6048   (match_operand:DI 1 "register_operand")
6049   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6050   "TARGET_SIMD"
6051 {
6052   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6053   set_mem_size (mem, <VSTRUCT:nregs> * 8);
6054
6055   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
6056   DONE;
6057 })
6058
6059 (define_expand "aarch64_ld1<VALL_F16:mode>"
6060  [(match_operand:VALL_F16 0 "register_operand")
6061   (match_operand:DI 1 "register_operand")]
6062   "TARGET_SIMD"
6063 {
6064   machine_mode mode = <VALL_F16:MODE>mode;
6065   rtx mem = gen_rtx_MEM (mode, operands[1]);
6066
6067   if (BYTES_BIG_ENDIAN)
6068     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
6069   else
6070     emit_move_insn (operands[0], mem);
6071   DONE;
6072 })
6073
6074 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
6075  [(match_operand:VSTRUCT 0 "register_operand")
6076   (match_operand:DI 1 "register_operand")
6077   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6078   "TARGET_SIMD"
6079 {
6080   machine_mode mode = <VSTRUCT:MODE>mode;
6081   rtx mem = gen_rtx_MEM (mode, operands[1]);
6082
6083   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
6084   DONE;
6085 })
6086
6087 (define_expand "aarch64_ld1x2<VQ:mode>"
6088  [(match_operand:OI 0 "register_operand")
6089   (match_operand:DI 1 "register_operand")
6090   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6091   "TARGET_SIMD"
6092 {
6093   machine_mode mode = OImode;
6094   rtx mem = gen_rtx_MEM (mode, operands[1]);
6095
6096   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
6097   DONE;
6098 })
6099
6100 (define_expand "aarch64_ld1x2<VDC:mode>"
6101  [(match_operand:OI 0 "register_operand")
6102   (match_operand:DI 1 "register_operand")
6103   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6104   "TARGET_SIMD"
6105 {
6106   machine_mode mode = OImode;
6107   rtx mem = gen_rtx_MEM (mode, operands[1]);
6108
6109   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
6110   DONE;
6111 })
6112
6113
6114 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6115   [(match_operand:VSTRUCT 0 "register_operand")
6116         (match_operand:DI 1 "register_operand")
6117         (match_operand:VSTRUCT 2 "register_operand")
6118         (match_operand:SI 3 "immediate_operand")
6119         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6120   "TARGET_SIMD"
6121 {
6122   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6123   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6124                      * <VSTRUCT:nregs>);
6125
6126   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
6127   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6128         operands[0], mem, operands[2], operands[3]));
6129   DONE;
6130 })
6131
6132 ;; Expanders for builtins to extract vector registers from large
6133 ;; opaque integer modes.
6134
6135 ;; D-register list.
6136
6137 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
6138  [(match_operand:VDC 0 "register_operand")
6139   (match_operand:VSTRUCT 1 "register_operand")
6140   (match_operand:SI 2 "immediate_operand")]
6141   "TARGET_SIMD"
6142 {
6143   int part = INTVAL (operands[2]);
6144   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
6145   int offset = part * 16;
6146
6147   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
6148   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
6149   DONE;
6150 })
6151
6152 ;; Q-register list.
6153
6154 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
6155  [(match_operand:VQ 0 "register_operand")
6156   (match_operand:VSTRUCT 1 "register_operand")
6157   (match_operand:SI 2 "immediate_operand")]
6158   "TARGET_SIMD"
6159 {
6160   int part = INTVAL (operands[2]);
6161   int offset = part * 16;
6162
6163   emit_move_insn (operands[0],
6164                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
6165   DONE;
6166 })
6167
6168 ;; Permuted-store expanders for neon intrinsics.
6169
6170 ;; Permute instructions
6171
6172 ;; vec_perm support
6173
6174 (define_expand "vec_perm<mode>"
6175   [(match_operand:VB 0 "register_operand")
6176    (match_operand:VB 1 "register_operand")
6177    (match_operand:VB 2 "register_operand")
6178    (match_operand:VB 3 "register_operand")]
6179   "TARGET_SIMD"
6180 {
6181   aarch64_expand_vec_perm (operands[0], operands[1],
6182                            operands[2], operands[3], <nunits>);
6183   DONE;
6184 })
6185
6186 (define_insn "aarch64_tbl1<mode>"
6187   [(set (match_operand:VB 0 "register_operand" "=w")
6188         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
6189                     (match_operand:VB 2 "register_operand" "w")]
6190                    UNSPEC_TBL))]
6191   "TARGET_SIMD"
6192   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
6193   [(set_attr "type" "neon_tbl1<q>")]
6194 )
6195
6196 ;; Two source registers.
6197
6198 (define_insn "aarch64_tbl2v16qi"
6199   [(set (match_operand:V16QI 0 "register_operand" "=w")
6200         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
6201                        (match_operand:V16QI 2 "register_operand" "w")]
6202                       UNSPEC_TBL))]
6203   "TARGET_SIMD"
6204   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
6205   [(set_attr "type" "neon_tbl2_q")]
6206 )
6207
6208 (define_insn "aarch64_tbl3<mode>"
6209   [(set (match_operand:VB 0 "register_operand" "=w")
6210         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
6211                       (match_operand:VB 2 "register_operand" "w")]
6212                       UNSPEC_TBL))]
6213   "TARGET_SIMD"
6214   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
6215   [(set_attr "type" "neon_tbl3")]
6216 )
6217
6218 (define_insn "aarch64_tbx4<mode>"
6219   [(set (match_operand:VB 0 "register_operand" "=w")
6220         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6221                       (match_operand:OI 2 "register_operand" "w")
6222                       (match_operand:VB 3 "register_operand" "w")]
6223                       UNSPEC_TBX))]
6224   "TARGET_SIMD"
6225   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
6226   [(set_attr "type" "neon_tbl4")]
6227 )
6228
6229 ;; Three source registers.
6230
6231 (define_insn "aarch64_qtbl3<mode>"
6232   [(set (match_operand:VB 0 "register_operand" "=w")
6233         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
6234                       (match_operand:VB 2 "register_operand" "w")]
6235                       UNSPEC_TBL))]
6236   "TARGET_SIMD"
6237   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
6238   [(set_attr "type" "neon_tbl3")]
6239 )
6240
6241 (define_insn "aarch64_qtbx3<mode>"
6242   [(set (match_operand:VB 0 "register_operand" "=w")
6243         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6244                       (match_operand:CI 2 "register_operand" "w")
6245                       (match_operand:VB 3 "register_operand" "w")]
6246                       UNSPEC_TBX))]
6247   "TARGET_SIMD"
6248   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
6249   [(set_attr "type" "neon_tbl3")]
6250 )
6251
6252 ;; Four source registers.
6253
6254 (define_insn "aarch64_qtbl4<mode>"
6255   [(set (match_operand:VB 0 "register_operand" "=w")
6256         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
6257                       (match_operand:VB 2 "register_operand" "w")]
6258                       UNSPEC_TBL))]
6259   "TARGET_SIMD"
6260   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
6261   [(set_attr "type" "neon_tbl4")]
6262 )
6263
6264 (define_insn "aarch64_qtbx4<mode>"
6265   [(set (match_operand:VB 0 "register_operand" "=w")
6266         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6267                       (match_operand:XI 2 "register_operand" "w")
6268                       (match_operand:VB 3 "register_operand" "w")]
6269                       UNSPEC_TBX))]
6270   "TARGET_SIMD"
6271   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
6272   [(set_attr "type" "neon_tbl4")]
6273 )
6274
6275 (define_insn_and_split "aarch64_combinev16qi"
6276   [(set (match_operand:OI 0 "register_operand" "=w")
6277         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
6278                     (match_operand:V16QI 2 "register_operand" "w")]
6279                    UNSPEC_CONCAT))]
6280   "TARGET_SIMD"
6281   "#"
6282   "&& reload_completed"
6283   [(const_int 0)]
6284 {
6285   aarch64_split_combinev16qi (operands);
6286   DONE;
6287 }
6288 [(set_attr "type" "multiple")]
6289 )
6290
6291 ;; This instruction's pattern is generated directly by
6292 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6293 ;; need corresponding changes there.
6294 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
6295   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6296         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6297                           (match_operand:VALL_F16 2 "register_operand" "w")]
6298          PERMUTE))]
6299   "TARGET_SIMD"
6300   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
6301   [(set_attr "type" "neon_permute<q>")]
6302 )
6303
6304 ;; This instruction's pattern is generated directly by
6305 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6306 ;; need corresponding changes there.  Note that the immediate (third)
6307 ;; operand is a lane index not a byte index.
6308 (define_insn "aarch64_ext<mode>"
6309   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6310         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6311                           (match_operand:VALL_F16 2 "register_operand" "w")
6312                           (match_operand:SI 3 "immediate_operand" "i")]
6313          UNSPEC_EXT))]
6314   "TARGET_SIMD"
6315 {
6316   operands[3] = GEN_INT (INTVAL (operands[3])
6317       * GET_MODE_UNIT_SIZE (<MODE>mode));
6318   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
6319 }
6320   [(set_attr "type" "neon_ext<q>")]
6321 )
6322
6323 ;; This instruction's pattern is generated directly by
6324 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6325 ;; need corresponding changes there.
6326 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
6327   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6328         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
6329                     REVERSE))]
6330   "TARGET_SIMD"
6331   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
6332   [(set_attr "type" "neon_rev<q>")]
6333 )
6334
6335 (define_insn "aarch64_st2<mode>_dreg"
6336   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6337         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6338                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6339                    UNSPEC_ST2))]
6340   "TARGET_SIMD"
6341   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6342   [(set_attr "type" "neon_store2_2reg")]
6343 )
6344
6345 (define_insn "aarch64_st2<mode>_dreg"
6346   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6347         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6348                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6349                    UNSPEC_ST2))]
6350   "TARGET_SIMD"
6351   "st1\\t{%S1.1d - %T1.1d}, %0"
6352   [(set_attr "type" "neon_store1_2reg")]
6353 )
6354
6355 (define_insn "aarch64_st3<mode>_dreg"
6356   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6357         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6358                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6359                    UNSPEC_ST3))]
6360   "TARGET_SIMD"
6361   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6362   [(set_attr "type" "neon_store3_3reg")]
6363 )
6364
6365 (define_insn "aarch64_st3<mode>_dreg"
6366   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6367         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6368                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6369                    UNSPEC_ST3))]
6370   "TARGET_SIMD"
6371   "st1\\t{%S1.1d - %U1.1d}, %0"
6372   [(set_attr "type" "neon_store1_3reg")]
6373 )
6374
6375 (define_insn "aarch64_st4<mode>_dreg"
6376   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6377         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6378                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6379                    UNSPEC_ST4))]
6380   "TARGET_SIMD"
6381   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6382   [(set_attr "type" "neon_store4_4reg")]
6383 )
6384
6385 (define_insn "aarch64_st4<mode>_dreg"
6386   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6387         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6388                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6389                    UNSPEC_ST4))]
6390   "TARGET_SIMD"
6391   "st1\\t{%S1.1d - %V1.1d}, %0"
6392   [(set_attr "type" "neon_store1_4reg")]
6393 )
6394
6395 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
6396  [(match_operand:DI 0 "register_operand")
6397   (match_operand:VSTRUCT 1 "register_operand")
6398   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6399   "TARGET_SIMD"
6400 {
6401   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6402   set_mem_size (mem, <VSTRUCT:nregs> * 8);
6403
6404   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6405   DONE;
6406 })
6407
6408 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6409  [(match_operand:DI 0 "register_operand")
6410   (match_operand:VSTRUCT 1 "register_operand")
6411   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6412   "TARGET_SIMD"
6413 {
6414   machine_mode mode = <VSTRUCT:MODE>mode;
6415   rtx mem = gen_rtx_MEM (mode, operands[0]);
6416
6417   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6418   DONE;
6419 })
6420
6421 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6422  [(match_operand:DI 0 "register_operand")
6423   (match_operand:VSTRUCT 1 "register_operand")
6424   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6425   (match_operand:SI 2 "immediate_operand")]
6426   "TARGET_SIMD"
6427 {
6428   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6429   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6430                      * <VSTRUCT:nregs>);
6431
6432   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6433                 mem, operands[1], operands[2]));
6434   DONE;
6435 })
6436
6437 (define_expand "aarch64_st1<VALL_F16:mode>"
6438  [(match_operand:DI 0 "register_operand")
6439   (match_operand:VALL_F16 1 "register_operand")]
6440   "TARGET_SIMD"
6441 {
6442   machine_mode mode = <VALL_F16:MODE>mode;
6443   rtx mem = gen_rtx_MEM (mode, operands[0]);
6444
6445   if (BYTES_BIG_ENDIAN)
6446     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6447   else
6448     emit_move_insn (mem, operands[1]);
6449   DONE;
6450 })
6451
6452 ;; Expander for builtins to insert vector registers into large
6453 ;; opaque integer modes.
6454
6455 ;; Q-register list.  We don't need a D-reg inserter as we zero
6456 ;; extend them in arm_neon.h and insert the resulting Q-regs.
6457
6458 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6459  [(match_operand:VSTRUCT 0 "register_operand")
6460   (match_operand:VSTRUCT 1 "register_operand")
6461   (match_operand:VQ 2 "register_operand")
6462   (match_operand:SI 3 "immediate_operand")]
6463   "TARGET_SIMD"
6464 {
6465   int part = INTVAL (operands[3]);
6466   int offset = part * 16;
6467
6468   emit_move_insn (operands[0], operands[1]);
6469   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6470                   operands[2]);
6471   DONE;
6472 })
6473
6474 ;; Standard pattern name vec_init<mode><Vel>.
6475
6476 (define_expand "vec_init<mode><Vel>"
6477   [(match_operand:VALL_F16 0 "register_operand")
6478    (match_operand 1 "" "")]
6479   "TARGET_SIMD"
6480 {
6481   aarch64_expand_vector_init (operands[0], operands[1]);
6482   DONE;
6483 })
6484
6485 (define_expand "vec_init<mode><Vhalf>"
6486   [(match_operand:VQ_NO2E 0 "register_operand")
6487    (match_operand 1 "" "")]
6488   "TARGET_SIMD"
6489 {
6490   aarch64_expand_vector_init (operands[0], operands[1]);
6491   DONE;
6492 })
6493
6494 (define_insn "*aarch64_simd_ld1r<mode>"
6495   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6496         (vec_duplicate:VALL_F16
6497           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6498   "TARGET_SIMD"
6499   "ld1r\\t{%0.<Vtype>}, %1"
6500   [(set_attr "type" "neon_load1_all_lanes")]
6501 )
6502
6503 (define_insn "aarch64_simd_ld1<mode>_x2"
6504   [(set (match_operand:OI 0 "register_operand" "=w")
6505         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6506                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6507                    UNSPEC_LD1))]
6508   "TARGET_SIMD"
6509   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6510   [(set_attr "type" "neon_load1_2reg<q>")]
6511 )
6512
6513 (define_insn "aarch64_simd_ld1<mode>_x2"
6514   [(set (match_operand:OI 0 "register_operand" "=w")
6515         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6516                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6517                    UNSPEC_LD1))]
6518   "TARGET_SIMD"
6519   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6520   [(set_attr "type" "neon_load1_2reg<q>")]
6521 )
6522
6523
6524 (define_insn "@aarch64_frecpe<mode>"
6525   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6526         (unspec:VHSDF_HSDF
6527          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6528          UNSPEC_FRECPE))]
6529   "TARGET_SIMD"
6530   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6531   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6532 )
6533
6534 (define_insn "aarch64_frecpx<mode>"
6535   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6536         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6537          UNSPEC_FRECPX))]
6538   "TARGET_SIMD"
6539   "frecpx\t%<s>0, %<s>1"
6540   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6541 )
6542
6543 (define_insn "@aarch64_frecps<mode>"
6544   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6545         (unspec:VHSDF_HSDF
6546           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6547           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6548           UNSPEC_FRECPS))]
6549   "TARGET_SIMD"
6550   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6551   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6552 )
6553
6554 (define_insn "aarch64_urecpe<mode>"
6555   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6556         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6557                 UNSPEC_URECPE))]
6558  "TARGET_SIMD"
6559  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6560   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6561
6562 ;; Standard pattern name vec_extract<mode><Vel>.
6563
6564 (define_expand "vec_extract<mode><Vel>"
6565   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6566    (match_operand:VALL_F16 1 "register_operand")
6567    (match_operand:SI 2 "immediate_operand")]
6568   "TARGET_SIMD"
6569 {
6570     emit_insn
6571       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6572     DONE;
6573 })
6574
6575 ;; Extract a 64-bit vector from one half of a 128-bit vector.
6576 (define_expand "vec_extract<mode><Vhalf>"
6577   [(match_operand:<VHALF> 0 "register_operand")
6578    (match_operand:VQMOV_NO2E 1 "register_operand")
6579    (match_operand 2 "immediate_operand")]
6580   "TARGET_SIMD"
6581 {
6582   int start = INTVAL (operands[2]);
6583   if (start != 0 && start != <nunits> / 2)
6584     FAIL;
6585   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6586   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6587   DONE;
6588 })
6589
6590 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6591 (define_expand "vec_extractv2dfv1df"
6592   [(match_operand:V1DF 0 "register_operand")
6593    (match_operand:V2DF 1 "register_operand")
6594    (match_operand 2 "immediate_operand")]
6595   "TARGET_SIMD"
6596 {
6597   /* V1DF is rarely used by other patterns, so it should be better to hide
6598      it in a subreg destination of a normal DF op.  */
6599   rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6600   emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6601   DONE;
6602 })
6603
6604 ;; aes
6605
6606 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6607   [(set (match_operand:V16QI 0 "register_operand" "=w")
6608         (unspec:V16QI
6609                 [(xor:V16QI
6610                  (match_operand:V16QI 1 "register_operand" "%0")
6611                  (match_operand:V16QI 2 "register_operand" "w"))]
6612          CRYPTO_AES))]
6613   "TARGET_SIMD && TARGET_AES"
6614   "aes<aes_op>\\t%0.16b, %2.16b"
6615   [(set_attr "type" "crypto_aese")]
6616 )
6617
6618 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6619   [(set (match_operand:V16QI 0 "register_operand" "=w")
6620         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6621          CRYPTO_AESMC))]
6622   "TARGET_SIMD && TARGET_AES"
6623   "aes<aesmc_op>\\t%0.16b, %1.16b"
6624   [(set_attr "type" "crypto_aesmc")]
6625 )
6626
6627 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6628 ;; and enforce the register dependency without scheduling or register
6629 ;; allocation messing up the order or introducing moves inbetween.
6630 ;;  Mash the two together during combine.
6631
6632 (define_insn "*aarch64_crypto_aese_fused"
6633   [(set (match_operand:V16QI 0 "register_operand" "=w")
6634         (unspec:V16QI
6635           [(unspec:V16QI
6636            [(xor:V16QI
6637                 (match_operand:V16QI 1 "register_operand" "%0")
6638                 (match_operand:V16QI 2 "register_operand" "w"))]
6639              UNSPEC_AESE)]
6640         UNSPEC_AESMC))]
6641   "TARGET_SIMD && TARGET_AES
6642    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6643   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6644   [(set_attr "type" "crypto_aese")
6645    (set_attr "length" "8")]
6646 )
6647
6648 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6649 ;; and enforce the register dependency without scheduling or register
6650 ;; allocation messing up the order or introducing moves inbetween.
6651 ;;  Mash the two together during combine.
6652
6653 (define_insn "*aarch64_crypto_aesd_fused"
6654   [(set (match_operand:V16QI 0 "register_operand" "=w")
6655         (unspec:V16QI
6656           [(unspec:V16QI
6657                     [(xor:V16QI
6658                         (match_operand:V16QI 1 "register_operand" "%0")
6659                         (match_operand:V16QI 2 "register_operand" "w"))]
6660                 UNSPEC_AESD)]
6661           UNSPEC_AESIMC))]
6662   "TARGET_SIMD && TARGET_AES
6663    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6664   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6665   [(set_attr "type" "crypto_aese")
6666    (set_attr "length" "8")]
6667 )
6668
6669 ;; sha1
6670
6671 (define_insn "aarch64_crypto_sha1hsi"
6672   [(set (match_operand:SI 0 "register_operand" "=w")
6673         (unspec:SI [(match_operand:SI 1
6674                        "register_operand" "w")]
6675          UNSPEC_SHA1H))]
6676   "TARGET_SIMD && TARGET_SHA2"
6677   "sha1h\\t%s0, %s1"
6678   [(set_attr "type" "crypto_sha1_fast")]
6679 )
6680
6681 (define_insn "aarch64_crypto_sha1hv4si"
6682   [(set (match_operand:SI 0 "register_operand" "=w")
6683         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6684                      (parallel [(const_int 0)]))]
6685          UNSPEC_SHA1H))]
6686   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6687   "sha1h\\t%s0, %s1"
6688   [(set_attr "type" "crypto_sha1_fast")]
6689 )
6690
6691 (define_insn "aarch64_be_crypto_sha1hv4si"
6692   [(set (match_operand:SI 0 "register_operand" "=w")
6693         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6694                      (parallel [(const_int 3)]))]
6695          UNSPEC_SHA1H))]
6696   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6697   "sha1h\\t%s0, %s1"
6698   [(set_attr "type" "crypto_sha1_fast")]
6699 )
6700
6701 (define_insn "aarch64_crypto_sha1su1v4si"
6702   [(set (match_operand:V4SI 0 "register_operand" "=w")
6703         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6704                       (match_operand:V4SI 2 "register_operand" "w")]
6705          UNSPEC_SHA1SU1))]
6706   "TARGET_SIMD && TARGET_SHA2"
6707   "sha1su1\\t%0.4s, %2.4s"
6708   [(set_attr "type" "crypto_sha1_fast")]
6709 )
6710
6711 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6712   [(set (match_operand:V4SI 0 "register_operand" "=w")
6713         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6714                       (match_operand:SI 2 "register_operand" "w")
6715                       (match_operand:V4SI 3 "register_operand" "w")]
6716          CRYPTO_SHA1))]
6717   "TARGET_SIMD && TARGET_SHA2"
6718   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6719   [(set_attr "type" "crypto_sha1_slow")]
6720 )
6721
6722 (define_insn "aarch64_crypto_sha1su0v4si"
6723   [(set (match_operand:V4SI 0 "register_operand" "=w")
6724         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6725                       (match_operand:V4SI 2 "register_operand" "w")
6726                       (match_operand:V4SI 3 "register_operand" "w")]
6727          UNSPEC_SHA1SU0))]
6728   "TARGET_SIMD && TARGET_SHA2"
6729   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6730   [(set_attr "type" "crypto_sha1_xor")]
6731 )
6732
6733 ;; sha256
6734
6735 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6736   [(set (match_operand:V4SI 0 "register_operand" "=w")
6737         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6738                       (match_operand:V4SI 2 "register_operand" "w")
6739                       (match_operand:V4SI 3 "register_operand" "w")]
6740          CRYPTO_SHA256))]
6741   "TARGET_SIMD && TARGET_SHA2"
6742   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6743   [(set_attr "type" "crypto_sha256_slow")]
6744 )
6745
6746 (define_insn "aarch64_crypto_sha256su0v4si"
6747   [(set (match_operand:V4SI 0 "register_operand" "=w")
6748         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6749                       (match_operand:V4SI 2 "register_operand" "w")]
6750          UNSPEC_SHA256SU0))]
6751   "TARGET_SIMD && TARGET_SHA2"
6752   "sha256su0\\t%0.4s, %2.4s"
6753   [(set_attr "type" "crypto_sha256_fast")]
6754 )
6755
6756 (define_insn "aarch64_crypto_sha256su1v4si"
6757   [(set (match_operand:V4SI 0 "register_operand" "=w")
6758         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6759                       (match_operand:V4SI 2 "register_operand" "w")
6760                       (match_operand:V4SI 3 "register_operand" "w")]
6761          UNSPEC_SHA256SU1))]
6762   "TARGET_SIMD && TARGET_SHA2"
6763   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6764   [(set_attr "type" "crypto_sha256_slow")]
6765 )
6766
6767 ;; sha512
6768
6769 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6770   [(set (match_operand:V2DI 0 "register_operand" "=w")
6771         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6772                       (match_operand:V2DI 2 "register_operand" "w")
6773                       (match_operand:V2DI 3 "register_operand" "w")]
6774          CRYPTO_SHA512))]
6775   "TARGET_SIMD && TARGET_SHA3"
6776   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6777   [(set_attr "type" "crypto_sha512")]
6778 )
6779
6780 (define_insn "aarch64_crypto_sha512su0qv2di"
6781   [(set (match_operand:V2DI 0 "register_operand" "=w")
6782         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6783                       (match_operand:V2DI 2 "register_operand" "w")]
6784          UNSPEC_SHA512SU0))]
6785   "TARGET_SIMD && TARGET_SHA3"
6786   "sha512su0\\t%0.2d, %2.2d"
6787   [(set_attr "type" "crypto_sha512")]
6788 )
6789
6790 (define_insn "aarch64_crypto_sha512su1qv2di"
6791   [(set (match_operand:V2DI 0 "register_operand" "=w")
6792         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6793                       (match_operand:V2DI 2 "register_operand" "w")
6794                       (match_operand:V2DI 3 "register_operand" "w")]
6795          UNSPEC_SHA512SU1))]
6796   "TARGET_SIMD && TARGET_SHA3"
6797   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6798   [(set_attr "type" "crypto_sha512")]
6799 )
6800
6801 ;; sha3
6802
6803 (define_insn "eor3q<mode>4"
6804   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6805         (xor:VQ_I
6806          (xor:VQ_I
6807           (match_operand:VQ_I 2 "register_operand" "w")
6808           (match_operand:VQ_I 3 "register_operand" "w"))
6809          (match_operand:VQ_I 1 "register_operand" "w")))]
6810   "TARGET_SIMD && TARGET_SHA3"
6811   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6812   [(set_attr "type" "crypto_sha3")]
6813 )
6814
6815 (define_insn "aarch64_rax1qv2di"
6816   [(set (match_operand:V2DI 0 "register_operand" "=w")
6817         (xor:V2DI
6818          (rotate:V2DI
6819           (match_operand:V2DI 2 "register_operand" "w")
6820           (const_int 1))
6821          (match_operand:V2DI 1 "register_operand" "w")))]
6822   "TARGET_SIMD && TARGET_SHA3"
6823   "rax1\\t%0.2d, %1.2d, %2.2d"
6824   [(set_attr "type" "crypto_sha3")]
6825 )
6826
6827 (define_insn "aarch64_xarqv2di"
6828   [(set (match_operand:V2DI 0 "register_operand" "=w")
6829         (rotatert:V2DI
6830          (xor:V2DI
6831           (match_operand:V2DI 1 "register_operand" "%w")
6832           (match_operand:V2DI 2 "register_operand" "w"))
6833          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6834   "TARGET_SIMD && TARGET_SHA3"
6835   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6836   [(set_attr "type" "crypto_sha3")]
6837 )
6838
6839 (define_insn "bcaxq<mode>4"
6840   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6841         (xor:VQ_I
6842          (and:VQ_I
6843           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6844           (match_operand:VQ_I 2 "register_operand" "w"))
6845          (match_operand:VQ_I 1 "register_operand" "w")))]
6846   "TARGET_SIMD && TARGET_SHA3"
6847   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6848   [(set_attr "type" "crypto_sha3")]
6849 )
6850
6851 ;; SM3
6852
6853 (define_insn "aarch64_sm3ss1qv4si"
6854   [(set (match_operand:V4SI 0 "register_operand" "=w")
6855         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6856                       (match_operand:V4SI 2 "register_operand" "w")
6857                       (match_operand:V4SI 3 "register_operand" "w")]
6858          UNSPEC_SM3SS1))]
6859   "TARGET_SIMD && TARGET_SM4"
6860   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6861   [(set_attr "type" "crypto_sm3")]
6862 )
6863
6864
6865 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6866   [(set (match_operand:V4SI 0 "register_operand" "=w")
6867         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6868                       (match_operand:V4SI 2 "register_operand" "w")
6869                       (match_operand:V4SI 3 "register_operand" "w")
6870                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6871          CRYPTO_SM3TT))]
6872   "TARGET_SIMD && TARGET_SM4"
6873   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6874   [(set_attr "type" "crypto_sm3")]
6875 )
6876
6877 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6878   [(set (match_operand:V4SI 0 "register_operand" "=w")
6879         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6880                       (match_operand:V4SI 2 "register_operand" "w")
6881                       (match_operand:V4SI 3 "register_operand" "w")]
6882          CRYPTO_SM3PART))]
6883   "TARGET_SIMD && TARGET_SM4"
6884   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6885   [(set_attr "type" "crypto_sm3")]
6886 )
6887
6888 ;; SM4
6889
6890 (define_insn "aarch64_sm4eqv4si"
6891   [(set (match_operand:V4SI 0 "register_operand" "=w")
6892         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6893                       (match_operand:V4SI 2 "register_operand" "w")]
6894          UNSPEC_SM4E))]
6895   "TARGET_SIMD && TARGET_SM4"
6896   "sm4e\\t%0.4s, %2.4s"
6897   [(set_attr "type" "crypto_sm4")]
6898 )
6899
6900 (define_insn "aarch64_sm4ekeyqv4si"
6901   [(set (match_operand:V4SI 0 "register_operand" "=w")
6902         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6903                       (match_operand:V4SI 2 "register_operand" "w")]
6904          UNSPEC_SM4EKEY))]
6905   "TARGET_SIMD && TARGET_SM4"
6906   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6907   [(set_attr "type" "crypto_sm4")]
6908 )
6909
6910 ;; fp16fml
6911
6912 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6913   [(set (match_operand:VDQSF 0 "register_operand")
6914         (unspec:VDQSF
6915          [(match_operand:VDQSF 1 "register_operand")
6916           (match_operand:<VFMLA_W> 2 "register_operand")
6917           (match_operand:<VFMLA_W> 3 "register_operand")]
6918          VFMLA16_LOW))]
6919   "TARGET_F16FML"
6920 {
6921   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6922                                             <nunits> * 2, false);
6923   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6924                                             <nunits> * 2, false);
6925
6926   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6927                                                                 operands[1],
6928                                                                 operands[2],
6929                                                                 operands[3],
6930                                                                 p1, p2));
6931   DONE;
6932
6933 })
6934
6935 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6936   [(set (match_operand:VDQSF 0 "register_operand")
6937         (unspec:VDQSF
6938          [(match_operand:VDQSF 1 "register_operand")
6939           (match_operand:<VFMLA_W> 2 "register_operand")
6940           (match_operand:<VFMLA_W> 3 "register_operand")]
6941          VFMLA16_HIGH))]
6942   "TARGET_F16FML"
6943 {
6944   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6945   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6946
6947   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6948                                                                  operands[1],
6949                                                                  operands[2],
6950                                                                  operands[3],
6951                                                                  p1, p2));
6952   DONE;
6953 })
6954
6955 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6956   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6957         (fma:VDQSF
6958          (float_extend:VDQSF
6959           (vec_select:<VFMLA_SEL_W>
6960            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6961            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6962          (float_extend:VDQSF
6963           (vec_select:<VFMLA_SEL_W>
6964            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6965            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6966          (match_operand:VDQSF 1 "register_operand" "0")))]
6967   "TARGET_F16FML"
6968   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6969   [(set_attr "type" "neon_fp_mul_s")]
6970 )
6971
6972 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6973   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6974         (fma:VDQSF
6975          (float_extend:VDQSF
6976           (neg:<VFMLA_SEL_W>
6977            (vec_select:<VFMLA_SEL_W>
6978             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6979             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6980          (float_extend:VDQSF
6981           (vec_select:<VFMLA_SEL_W>
6982            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6983            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6984          (match_operand:VDQSF 1 "register_operand" "0")))]
6985   "TARGET_F16FML"
6986   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6987   [(set_attr "type" "neon_fp_mul_s")]
6988 )
6989
6990 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6991   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6992         (fma:VDQSF
6993          (float_extend:VDQSF
6994           (vec_select:<VFMLA_SEL_W>
6995            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6996            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6997          (float_extend:VDQSF
6998           (vec_select:<VFMLA_SEL_W>
6999            (match_operand:<VFMLA_W> 3 "register_operand" "w")
7000            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7001          (match_operand:VDQSF 1 "register_operand" "0")))]
7002   "TARGET_F16FML"
7003   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7004   [(set_attr "type" "neon_fp_mul_s")]
7005 )
7006
7007 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
7008   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7009         (fma:VDQSF
7010          (float_extend:VDQSF
7011           (neg:<VFMLA_SEL_W>
7012            (vec_select:<VFMLA_SEL_W>
7013             (match_operand:<VFMLA_W> 2 "register_operand" "w")
7014             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
7015          (float_extend:VDQSF
7016           (vec_select:<VFMLA_SEL_W>
7017            (match_operand:<VFMLA_W> 3 "register_operand" "w")
7018            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7019          (match_operand:VDQSF 1 "register_operand" "0")))]
7020   "TARGET_F16FML"
7021   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7022   [(set_attr "type" "neon_fp_mul_s")]
7023 )
7024
7025 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
7026   [(set (match_operand:V2SF 0 "register_operand")
7027         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7028                            (match_operand:V4HF 2 "register_operand")
7029                            (match_operand:V4HF 3 "register_operand")
7030                            (match_operand:SI 4 "aarch64_imm2")]
7031          VFMLA16_LOW))]
7032   "TARGET_F16FML"
7033 {
7034     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7035     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7036
7037     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
7038                                                             operands[1],
7039                                                             operands[2],
7040                                                             operands[3],
7041                                                             p1, lane));
7042     DONE;
7043 }
7044 )
7045
7046 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
7047   [(set (match_operand:V2SF 0 "register_operand")
7048         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7049                            (match_operand:V4HF 2 "register_operand")
7050                            (match_operand:V4HF 3 "register_operand")
7051                            (match_operand:SI 4 "aarch64_imm2")]
7052          VFMLA16_HIGH))]
7053   "TARGET_F16FML"
7054 {
7055     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7056     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7057
7058     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
7059                                                              operands[1],
7060                                                              operands[2],
7061                                                              operands[3],
7062                                                              p1, lane));
7063     DONE;
7064 })
7065
7066 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
7067   [(set (match_operand:V2SF 0 "register_operand" "=w")
7068         (fma:V2SF
7069          (float_extend:V2SF
7070            (vec_select:V2HF
7071             (match_operand:V4HF 2 "register_operand" "w")
7072             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7073          (float_extend:V2SF
7074            (vec_duplicate:V2HF
7075             (vec_select:HF
7076              (match_operand:V4HF 3 "register_operand" "x")
7077              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7078          (match_operand:V2SF 1 "register_operand" "0")))]
7079   "TARGET_F16FML"
7080   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7081   [(set_attr "type" "neon_fp_mul_s")]
7082 )
7083
7084 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
7085   [(set (match_operand:V2SF 0 "register_operand" "=w")
7086         (fma:V2SF
7087          (float_extend:V2SF
7088           (neg:V2HF
7089            (vec_select:V2HF
7090             (match_operand:V4HF 2 "register_operand" "w")
7091             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7092          (float_extend:V2SF
7093           (vec_duplicate:V2HF
7094            (vec_select:HF
7095             (match_operand:V4HF 3 "register_operand" "x")
7096             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7097          (match_operand:V2SF 1 "register_operand" "0")))]
7098   "TARGET_F16FML"
7099   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7100   [(set_attr "type" "neon_fp_mul_s")]
7101 )
7102
7103 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
7104   [(set (match_operand:V2SF 0 "register_operand" "=w")
7105         (fma:V2SF
7106          (float_extend:V2SF
7107            (vec_select:V2HF
7108             (match_operand:V4HF 2 "register_operand" "w")
7109             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7110          (float_extend:V2SF
7111            (vec_duplicate:V2HF
7112             (vec_select:HF
7113              (match_operand:V4HF 3 "register_operand" "x")
7114              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7115          (match_operand:V2SF 1 "register_operand" "0")))]
7116   "TARGET_F16FML"
7117   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7118   [(set_attr "type" "neon_fp_mul_s")]
7119 )
7120
7121 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
7122   [(set (match_operand:V2SF 0 "register_operand" "=w")
7123         (fma:V2SF
7124          (float_extend:V2SF
7125            (neg:V2HF
7126             (vec_select:V2HF
7127              (match_operand:V4HF 2 "register_operand" "w")
7128              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7129          (float_extend:V2SF
7130            (vec_duplicate:V2HF
7131             (vec_select:HF
7132              (match_operand:V4HF 3 "register_operand" "x")
7133              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7134          (match_operand:V2SF 1 "register_operand" "0")))]
7135   "TARGET_F16FML"
7136   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7137   [(set_attr "type" "neon_fp_mul_s")]
7138 )
7139
7140 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
7141   [(set (match_operand:V4SF 0 "register_operand")
7142         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7143                            (match_operand:V8HF 2 "register_operand")
7144                            (match_operand:V8HF 3 "register_operand")
7145                            (match_operand:SI 4 "aarch64_lane_imm3")]
7146          VFMLA16_LOW))]
7147   "TARGET_F16FML"
7148 {
7149     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7150     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7151
7152     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
7153                                                               operands[1],
7154                                                               operands[2],
7155                                                               operands[3],
7156                                                               p1, lane));
7157     DONE;
7158 })
7159
7160 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
7161   [(set (match_operand:V4SF 0 "register_operand")
7162         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7163                            (match_operand:V8HF 2 "register_operand")
7164                            (match_operand:V8HF 3 "register_operand")
7165                            (match_operand:SI 4 "aarch64_lane_imm3")]
7166          VFMLA16_HIGH))]
7167   "TARGET_F16FML"
7168 {
7169     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7170     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7171
7172     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
7173                                                                operands[1],
7174                                                                operands[2],
7175                                                                operands[3],
7176                                                                p1, lane));
7177     DONE;
7178 })
7179
7180 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
7181   [(set (match_operand:V4SF 0 "register_operand" "=w")
7182         (fma:V4SF
7183          (float_extend:V4SF
7184           (vec_select:V4HF
7185             (match_operand:V8HF 2 "register_operand" "w")
7186             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7187          (float_extend:V4SF
7188           (vec_duplicate:V4HF
7189            (vec_select:HF
7190             (match_operand:V8HF 3 "register_operand" "x")
7191             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7192          (match_operand:V4SF 1 "register_operand" "0")))]
7193   "TARGET_F16FML"
7194   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7195   [(set_attr "type" "neon_fp_mul_s")]
7196 )
7197
7198 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
7199   [(set (match_operand:V4SF 0 "register_operand" "=w")
7200         (fma:V4SF
7201           (float_extend:V4SF
7202            (neg:V4HF
7203             (vec_select:V4HF
7204              (match_operand:V8HF 2 "register_operand" "w")
7205              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7206          (float_extend:V4SF
7207           (vec_duplicate:V4HF
7208            (vec_select:HF
7209             (match_operand:V8HF 3 "register_operand" "x")
7210             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7211          (match_operand:V4SF 1 "register_operand" "0")))]
7212   "TARGET_F16FML"
7213   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7214   [(set_attr "type" "neon_fp_mul_s")]
7215 )
7216
7217 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
7218   [(set (match_operand:V4SF 0 "register_operand" "=w")
7219         (fma:V4SF
7220          (float_extend:V4SF
7221           (vec_select:V4HF
7222             (match_operand:V8HF 2 "register_operand" "w")
7223             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7224          (float_extend:V4SF
7225           (vec_duplicate:V4HF
7226            (vec_select:HF
7227             (match_operand:V8HF 3 "register_operand" "x")
7228             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7229          (match_operand:V4SF 1 "register_operand" "0")))]
7230   "TARGET_F16FML"
7231   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7232   [(set_attr "type" "neon_fp_mul_s")]
7233 )
7234
7235 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
7236   [(set (match_operand:V4SF 0 "register_operand" "=w")
7237         (fma:V4SF
7238          (float_extend:V4SF
7239           (neg:V4HF
7240            (vec_select:V4HF
7241             (match_operand:V8HF 2 "register_operand" "w")
7242             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7243          (float_extend:V4SF
7244           (vec_duplicate:V4HF
7245            (vec_select:HF
7246             (match_operand:V8HF 3 "register_operand" "x")
7247             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7248          (match_operand:V4SF 1 "register_operand" "0")))]
7249   "TARGET_F16FML"
7250   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7251   [(set_attr "type" "neon_fp_mul_s")]
7252 )
7253
7254 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
7255   [(set (match_operand:V2SF 0 "register_operand")
7256         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7257                       (match_operand:V4HF 2 "register_operand")
7258                       (match_operand:V8HF 3 "register_operand")
7259                       (match_operand:SI 4 "aarch64_lane_imm3")]
7260          VFMLA16_LOW))]
7261   "TARGET_F16FML"
7262 {
7263     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7264     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7265
7266     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
7267                                                              operands[1],
7268                                                              operands[2],
7269                                                              operands[3],
7270                                                              p1, lane));
7271     DONE;
7272
7273 })
7274
7275 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
7276   [(set (match_operand:V2SF 0 "register_operand")
7277         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7278                       (match_operand:V4HF 2 "register_operand")
7279                       (match_operand:V8HF 3 "register_operand")
7280                       (match_operand:SI 4 "aarch64_lane_imm3")]
7281          VFMLA16_HIGH))]
7282   "TARGET_F16FML"
7283 {
7284     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7285     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7286
7287     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
7288                                                               operands[1],
7289                                                               operands[2],
7290                                                               operands[3],
7291                                                               p1, lane));
7292     DONE;
7293
7294 })
7295
7296 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
7297   [(set (match_operand:V2SF 0 "register_operand" "=w")
7298         (fma:V2SF
7299          (float_extend:V2SF
7300            (vec_select:V2HF
7301             (match_operand:V4HF 2 "register_operand" "w")
7302             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7303          (float_extend:V2SF
7304           (vec_duplicate:V2HF
7305            (vec_select:HF
7306             (match_operand:V8HF 3 "register_operand" "x")
7307             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7308          (match_operand:V2SF 1 "register_operand" "0")))]
7309   "TARGET_F16FML"
7310   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7311   [(set_attr "type" "neon_fp_mul_s")]
7312 )
7313
7314 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
7315   [(set (match_operand:V2SF 0 "register_operand" "=w")
7316         (fma:V2SF
7317          (float_extend:V2SF
7318           (neg:V2HF
7319            (vec_select:V2HF
7320             (match_operand:V4HF 2 "register_operand" "w")
7321             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7322          (float_extend:V2SF
7323           (vec_duplicate:V2HF
7324            (vec_select:HF
7325             (match_operand:V8HF 3 "register_operand" "x")
7326             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7327          (match_operand:V2SF 1 "register_operand" "0")))]
7328   "TARGET_F16FML"
7329   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7330   [(set_attr "type" "neon_fp_mul_s")]
7331 )
7332
7333 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
7334   [(set (match_operand:V2SF 0 "register_operand" "=w")
7335         (fma:V2SF
7336          (float_extend:V2SF
7337            (vec_select:V2HF
7338             (match_operand:V4HF 2 "register_operand" "w")
7339             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7340          (float_extend:V2SF
7341           (vec_duplicate:V2HF
7342            (vec_select:HF
7343             (match_operand:V8HF 3 "register_operand" "x")
7344             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7345          (match_operand:V2SF 1 "register_operand" "0")))]
7346   "TARGET_F16FML"
7347   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7348   [(set_attr "type" "neon_fp_mul_s")]
7349 )
7350
7351 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
7352   [(set (match_operand:V2SF 0 "register_operand" "=w")
7353         (fma:V2SF
7354          (float_extend:V2SF
7355           (neg:V2HF
7356            (vec_select:V2HF
7357             (match_operand:V4HF 2 "register_operand" "w")
7358             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7359          (float_extend:V2SF
7360           (vec_duplicate:V2HF
7361            (vec_select:HF
7362             (match_operand:V8HF 3 "register_operand" "x")
7363             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7364          (match_operand:V2SF 1 "register_operand" "0")))]
7365   "TARGET_F16FML"
7366   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7367   [(set_attr "type" "neon_fp_mul_s")]
7368 )
7369
7370 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
7371   [(set (match_operand:V4SF 0 "register_operand")
7372         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7373                       (match_operand:V8HF 2 "register_operand")
7374                       (match_operand:V4HF 3 "register_operand")
7375                       (match_operand:SI 4 "aarch64_imm2")]
7376          VFMLA16_LOW))]
7377   "TARGET_F16FML"
7378 {
7379     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7380     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7381
7382     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
7383                                                              operands[1],
7384                                                              operands[2],
7385                                                              operands[3],
7386                                                              p1, lane));
7387     DONE;
7388 })
7389
7390 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
7391   [(set (match_operand:V4SF 0 "register_operand")
7392         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7393                       (match_operand:V8HF 2 "register_operand")
7394                       (match_operand:V4HF 3 "register_operand")
7395                       (match_operand:SI 4 "aarch64_imm2")]
7396          VFMLA16_HIGH))]
7397   "TARGET_F16FML"
7398 {
7399     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7400     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7401
7402     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7403                                                               operands[1],
7404                                                               operands[2],
7405                                                               operands[3],
7406                                                               p1, lane));
7407     DONE;
7408 })
7409
7410 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7411   [(set (match_operand:V4SF 0 "register_operand" "=w")
7412         (fma:V4SF
7413          (float_extend:V4SF
7414           (vec_select:V4HF
7415            (match_operand:V8HF 2 "register_operand" "w")
7416            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7417          (float_extend:V4SF
7418           (vec_duplicate:V4HF
7419            (vec_select:HF
7420             (match_operand:V4HF 3 "register_operand" "x")
7421             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7422          (match_operand:V4SF 1 "register_operand" "0")))]
7423   "TARGET_F16FML"
7424   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7425   [(set_attr "type" "neon_fp_mul_s")]
7426 )
7427
7428 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7429   [(set (match_operand:V4SF 0 "register_operand" "=w")
7430         (fma:V4SF
7431          (float_extend:V4SF
7432           (neg:V4HF
7433            (vec_select:V4HF
7434             (match_operand:V8HF 2 "register_operand" "w")
7435             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7436          (float_extend:V4SF
7437           (vec_duplicate:V4HF
7438            (vec_select:HF
7439             (match_operand:V4HF 3 "register_operand" "x")
7440             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7441          (match_operand:V4SF 1 "register_operand" "0")))]
7442   "TARGET_F16FML"
7443   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7444   [(set_attr "type" "neon_fp_mul_s")]
7445 )
7446
7447 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7448   [(set (match_operand:V4SF 0 "register_operand" "=w")
7449         (fma:V4SF
7450          (float_extend:V4SF
7451           (vec_select:V4HF
7452            (match_operand:V8HF 2 "register_operand" "w")
7453            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7454          (float_extend:V4SF
7455           (vec_duplicate:V4HF
7456            (vec_select:HF
7457             (match_operand:V4HF 3 "register_operand" "x")
7458             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7459          (match_operand:V4SF 1 "register_operand" "0")))]
7460   "TARGET_F16FML"
7461   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7462   [(set_attr "type" "neon_fp_mul_s")]
7463 )
7464
7465 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7466   [(set (match_operand:V4SF 0 "register_operand" "=w")
7467         (fma:V4SF
7468          (float_extend:V4SF
7469           (neg:V4HF
7470            (vec_select:V4HF
7471             (match_operand:V8HF 2 "register_operand" "w")
7472             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7473          (float_extend:V4SF
7474           (vec_duplicate:V4HF
7475            (vec_select:HF
7476             (match_operand:V4HF 3 "register_operand" "x")
7477             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7478          (match_operand:V4SF 1 "register_operand" "0")))]
7479   "TARGET_F16FML"
7480   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7481   [(set_attr "type" "neon_fp_mul_s")]
7482 )
7483
7484 ;; pmull
7485
7486 (define_insn "aarch64_crypto_pmulldi"
7487   [(set (match_operand:TI 0 "register_operand" "=w")
7488         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
7489                      (match_operand:DI 2 "register_operand" "w")]
7490                     UNSPEC_PMULL))]
7491  "TARGET_SIMD && TARGET_AES"
7492  "pmull\\t%0.1q, %1.1d, %2.1d"
7493   [(set_attr "type" "crypto_pmull")]
7494 )
7495
7496 (define_insn "aarch64_crypto_pmullv2di"
7497  [(set (match_operand:TI 0 "register_operand" "=w")
7498        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7499                    (match_operand:V2DI 2 "register_operand" "w")]
7500                   UNSPEC_PMULL2))]
7501   "TARGET_SIMD && TARGET_AES"
7502   "pmull2\\t%0.1q, %1.2d, %2.2d"
7503   [(set_attr "type" "crypto_pmull")]
7504 )
7505
7506 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7507 (define_insn "<optab><Vnarrowq><mode>2"
7508   [(set (match_operand:VQN 0 "register_operand" "=w")
7509         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7510   "TARGET_SIMD"
7511   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7512   [(set_attr "type" "neon_shift_imm_long")]
7513 )
7514
7515 (define_expand "aarch64_<su>xtl<mode>"
7516   [(set (match_operand:VQN 0 "register_operand" "=w")
7517         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7518   "TARGET_SIMD"
7519   ""
7520 )
7521
7522 (define_expand "aarch64_xtn<mode>"
7523   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7524         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7525   "TARGET_SIMD"
7526   ""
7527 )
7528
7529 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7530 (define_insn "trunc<mode><Vnarrowq>2"
7531   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7532         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7533   "TARGET_SIMD"
7534   "xtn\t%0.<Vntype>, %1.<Vtype>"
7535   [(set_attr "type" "neon_shift_imm_narrow_q")]
7536 )
7537
7538 (define_insn "aarch64_xtn2<mode>_le"
7539   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7540         (vec_concat:<VNARROWQ2>
7541           (match_operand:<VNARROWQ> 1 "register_operand" "0")
7542           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
7543   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
7544   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
7545   [(set_attr "type" "neon_shift_imm_narrow_q")]
7546 )
7547
7548 (define_insn "aarch64_xtn2<mode>_be"
7549   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7550         (vec_concat:<VNARROWQ2>
7551           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
7552           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7553   "TARGET_SIMD && BYTES_BIG_ENDIAN"
7554   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
7555   [(set_attr "type" "neon_shift_imm_narrow_q")]
7556 )
7557
7558 (define_expand "aarch64_xtn2<mode>"
7559   [(match_operand:<VNARROWQ2> 0 "register_operand")
7560    (match_operand:<VNARROWQ> 1 "register_operand")
7561    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
7562   "TARGET_SIMD"
7563   {
7564     if (BYTES_BIG_ENDIAN)
7565       emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
7566                                              operands[2]));
7567     else
7568       emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
7569                                              operands[2]));
7570     DONE;
7571   }
7572 )
7573
7574 (define_insn "aarch64_bfdot<mode>"
7575   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7576         (plus:VDQSF
7577           (unspec:VDQSF
7578            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7579             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7580             UNSPEC_BFDOT)
7581           (match_operand:VDQSF 1 "register_operand" "0")))]
7582   "TARGET_BF16_SIMD"
7583   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7584   [(set_attr "type" "neon_dot<q>")]
7585 )
7586
7587 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7588   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7589         (plus:VDQSF
7590           (unspec:VDQSF
7591            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7592             (match_operand:VBF 3 "register_operand" "w")
7593             (match_operand:SI 4 "const_int_operand" "n")]
7594             UNSPEC_BFDOT)
7595           (match_operand:VDQSF 1 "register_operand" "0")))]
7596   "TARGET_BF16_SIMD"
7597 {
7598   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7599   int lane = INTVAL (operands[4]);
7600   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7601   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7602 }
7603   [(set_attr "type" "neon_dot<VDQSF:q>")]
7604 )
7605
7606 ;; vget_low/high_bf16
7607 (define_expand "aarch64_vget_lo_halfv8bf"
7608   [(match_operand:V4BF 0 "register_operand")
7609    (match_operand:V8BF 1 "register_operand")]
7610   "TARGET_BF16_SIMD"
7611 {
7612   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
7613   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7614   DONE;
7615 })
7616
7617 (define_expand "aarch64_vget_hi_halfv8bf"
7618   [(match_operand:V4BF 0 "register_operand")
7619    (match_operand:V8BF 1 "register_operand")]
7620   "TARGET_BF16_SIMD"
7621 {
7622   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
7623   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7624   DONE;
7625 })
7626
7627 ;; bfmmla
7628 (define_insn "aarch64_bfmmlaqv4sf"
7629   [(set (match_operand:V4SF 0 "register_operand" "=w")
7630         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7631                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7632                                  (match_operand:V8BF 3 "register_operand" "w")]
7633                     UNSPEC_BFMMLA)))]
7634   "TARGET_BF16_SIMD"
7635   "bfmmla\\t%0.4s, %2.8h, %3.8h"
7636   [(set_attr "type" "neon_fp_mla_s_q")]
7637 )
7638
7639 ;; bfmlal<bt>
7640 (define_insn "aarch64_bfmlal<bt>v4sf"
7641   [(set (match_operand:V4SF 0 "register_operand" "=w")
7642         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7643                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7644                                   (match_operand:V8BF 3 "register_operand" "w")]
7645                      BF_MLA)))]
7646   "TARGET_BF16_SIMD"
7647   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7648   [(set_attr "type" "neon_fp_mla_s_q")]
7649 )
7650
7651 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7652   [(set (match_operand:V4SF 0 "register_operand" "=w")
7653         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7654                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7655                                   (match_operand:VBF 3 "register_operand" "w")
7656                                   (match_operand:SI 4 "const_int_operand" "n")]
7657                      BF_MLA)))]
7658   "TARGET_BF16_SIMD"
7659 {
7660   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7661   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7662 }
7663   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7664 )
7665
7666 ;; 8-bit integer matrix multiply-accumulate
7667 (define_insn "aarch64_simd_<sur>mmlav16qi"
7668   [(set (match_operand:V4SI 0 "register_operand" "=w")
7669         (plus:V4SI
7670          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7671                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7672          (match_operand:V4SI 1 "register_operand" "0")))]
7673   "TARGET_I8MM"
7674   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7675   [(set_attr "type" "neon_mla_s_q")]
7676 )
7677
7678 ;; bfcvtn
7679 (define_insn "aarch64_bfcvtn<q><mode>"
7680   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7681         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7682                             UNSPEC_BFCVTN))]
7683   "TARGET_BF16_SIMD"
7684   "bfcvtn\\t%0.4h, %1.4s"
7685   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7686 )
7687
7688 (define_insn "aarch64_bfcvtn2v8bf"
7689   [(set (match_operand:V8BF 0 "register_operand" "=w")
7690         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7691                       (match_operand:V4SF 2 "register_operand" "w")]
7692                       UNSPEC_BFCVTN2))]
7693   "TARGET_BF16_SIMD"
7694   "bfcvtn2\\t%0.8h, %2.4s"
7695   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7696 )
7697
7698 (define_insn "aarch64_bfcvtbf"
7699   [(set (match_operand:BF 0 "register_operand" "=w")
7700         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7701                     UNSPEC_BFCVT))]
7702   "TARGET_BF16_FP"
7703   "bfcvt\\t%h0, %s1"
7704   [(set_attr "type" "f_cvt")]
7705 )
7706
7707 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
7708 (define_insn "aarch64_vbfcvt<mode>"
7709   [(set (match_operand:V4SF 0 "register_operand" "=w")
7710         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
7711                       UNSPEC_BFCVTN))]
7712   "TARGET_BF16_SIMD"
7713   "shll\\t%0.4s, %1.4h, #16"
7714   [(set_attr "type" "neon_shift_imm_long")]
7715 )
7716
7717 (define_insn "aarch64_vbfcvt_highv8bf"
7718   [(set (match_operand:V4SF 0 "register_operand" "=w")
7719         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
7720                       UNSPEC_BFCVTN2))]
7721   "TARGET_BF16_SIMD"
7722   "shll2\\t%0.4s, %1.8h, #16"
7723   [(set_attr "type" "neon_shift_imm_long")]
7724 )
7725
7726 (define_insn "aarch64_bfcvtsf"
7727   [(set (match_operand:SF 0 "register_operand" "=w")
7728         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
7729                     UNSPEC_BFCVT))]
7730   "TARGET_BF16_FP"
7731   "shl\\t%d0, %d1, #16"
7732   [(set_attr "type" "neon_shift_imm")]
7733 )