1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23 (match_operand:VALL_F16MOV 1 "general_operand"))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
44 "TARGET_SIMD && !STRICT_ALIGNMENT"
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
105 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VDMOV 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
133 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQMOV 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %z1"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %z0"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %z1"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %z0"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQMOV 0 "register_operand" "")
238 (match_operand:VQMOV 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQMOV 0 "register_operand" "")
250 (match_operand:VQMOV 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQMOV 0)
262 (match_operand:VQMOV 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
286 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
292 (define_expand "aarch64_get_half<mode>"
293 [(set (match_operand:<VHALF> 0 "register_operand")
295 (match_operand:VQMOV 1 "register_operand")
296 (match_operand 2 "ascending_int_parallel")))]
300 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
301 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
303 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
304 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
309 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
310 [(set (match_dup 0) (match_dup 1))]
312 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
314 [(set_attr "type" "mov_reg,neon_to_gp<q>")
315 (set_attr "length" "4")]
318 (define_insn "aarch64_simd_mov_from_<mode>high"
319 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
321 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
322 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
327 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
328 (set_attr "length" "4")]
331 (define_insn "orn<mode>3"
332 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
333 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
334 (match_operand:VDQ_I 2 "register_operand" "w")))]
336 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
337 [(set_attr "type" "neon_logic<q>")]
340 (define_insn "bic<mode>3"
341 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
342 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
343 (match_operand:VDQ_I 2 "register_operand" "w")))]
345 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
346 [(set_attr "type" "neon_logic<q>")]
349 (define_insn "add<mode>3"
350 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
351 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
352 (match_operand:VDQ_I 2 "register_operand" "w")))]
354 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
355 [(set_attr "type" "neon_add<q>")]
358 (define_insn "sub<mode>3"
359 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
360 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
361 (match_operand:VDQ_I 2 "register_operand" "w")))]
363 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
364 [(set_attr "type" "neon_sub<q>")]
367 (define_insn "mul<mode>3"
368 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
369 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
370 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
372 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
373 [(set_attr "type" "neon_mul_<Vetype><q>")]
376 (define_insn "bswap<mode>2"
377 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
378 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
380 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
381 [(set_attr "type" "neon_rev<q>")]
384 (define_insn "aarch64_rbit<mode>"
385 [(set (match_operand:VB 0 "register_operand" "=w")
386 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
389 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
390 [(set_attr "type" "neon_rbit")]
393 (define_expand "ctz<mode>2"
394 [(set (match_operand:VS 0 "register_operand")
395 (ctz:VS (match_operand:VS 1 "register_operand")))]
398 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
399 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
401 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
402 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
407 (define_expand "xorsign<mode>3"
408 [(match_operand:VHSDF 0 "register_operand")
409 (match_operand:VHSDF 1 "register_operand")
410 (match_operand:VHSDF 2 "register_operand")]
414 machine_mode imode = <V_INT_EQUIV>mode;
415 rtx v_bitmask = gen_reg_rtx (imode);
416 rtx op1x = gen_reg_rtx (imode);
417 rtx op2x = gen_reg_rtx (imode);
419 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
420 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
422 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
424 emit_move_insn (v_bitmask,
425 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
426 HOST_WIDE_INT_M1U << bits));
428 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
429 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
430 emit_move_insn (operands[0],
431 lowpart_subreg (<MODE>mode, op1x, imode));
436 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
437 ;; fact that their usage need to guarantee that the source vectors are
438 ;; contiguous. It would be wrong to describe the operation without being able
439 ;; to describe the permute that is also required, but even if that is done
440 ;; the permute would have been created as a LOAD_LANES which means the values
441 ;; in the registers are in the wrong order.
442 (define_insn "aarch64_fcadd<rot><mode>"
443 [(set (match_operand:VHSDF 0 "register_operand" "=w")
444 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
445 (match_operand:VHSDF 2 "register_operand" "w")]
448 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
449 [(set_attr "type" "neon_fcadd")]
452 (define_expand "cadd<rot><mode>3"
453 [(set (match_operand:VHSDF 0 "register_operand")
454 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
455 (match_operand:VHSDF 2 "register_operand")]
457 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
460 (define_insn "aarch64_fcmla<rot><mode>"
461 [(set (match_operand:VHSDF 0 "register_operand" "=w")
462 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
463 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
464 (match_operand:VHSDF 3 "register_operand" "w")]
467 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
468 [(set_attr "type" "neon_fcmla")]
472 (define_insn "aarch64_fcmla_lane<rot><mode>"
473 [(set (match_operand:VHSDF 0 "register_operand" "=w")
474 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
475 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
476 (match_operand:VHSDF 3 "register_operand" "w")
477 (match_operand:SI 4 "const_int_operand" "n")]
481 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
482 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
484 [(set_attr "type" "neon_fcmla")]
487 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
488 [(set (match_operand:V4HF 0 "register_operand" "=w")
489 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
490 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
491 (match_operand:V8HF 3 "register_operand" "w")
492 (match_operand:SI 4 "const_int_operand" "n")]
496 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
497 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
499 [(set_attr "type" "neon_fcmla")]
502 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
503 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
504 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
505 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
506 (match_operand:<VHALF> 3 "register_operand" "w")
507 (match_operand:SI 4 "const_int_operand" "n")]
511 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
513 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
514 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
516 [(set_attr "type" "neon_fcmla")]
519 ;; The complex mla/mls operations always need to expand to two instructions.
520 ;; The first operation does half the computation and the second does the
521 ;; remainder. Because of this, expand early.
522 (define_expand "cml<fcmac1><conj_op><mode>4"
523 [(set (match_operand:VHSDF 0 "register_operand")
524 (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
525 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
526 (match_operand:VHSDF 3 "register_operand")]
528 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
530 rtx tmp = gen_reg_rtx (<MODE>mode);
531 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
532 operands[3], operands[2]));
533 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
534 operands[3], operands[2]));
538 ;; The complex mul operations always need to expand to two instructions.
539 ;; The first operation does half the computation and the second does the
540 ;; remainder. Because of this, expand early.
541 (define_expand "cmul<conj_op><mode>3"
542 [(set (match_operand:VHSDF 0 "register_operand")
543 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
544 (match_operand:VHSDF 2 "register_operand")]
546 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
548 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
549 rtx res1 = gen_reg_rtx (<MODE>mode);
550 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
551 operands[2], operands[1]));
552 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
553 operands[2], operands[1]));
557 ;; These instructions map to the __builtins for the Dot Product operations.
558 (define_insn "aarch64_<sur>dot<vsi2qi>"
559 [(set (match_operand:VS 0 "register_operand" "=w")
560 (plus:VS (match_operand:VS 1 "register_operand" "0")
561 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
562 (match_operand:<VSI2QI> 3 "register_operand" "w")]
565 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
566 [(set_attr "type" "neon_dot<q>")]
569 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
570 ;; (vector) Dot Product operation.
571 (define_insn "aarch64_usdot<vsi2qi>"
572 [(set (match_operand:VS 0 "register_operand" "=w")
574 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
575 (match_operand:<VSI2QI> 3 "register_operand" "w")]
577 (match_operand:VS 1 "register_operand" "0")))]
579 "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
580 [(set_attr "type" "neon_dot<q>")]
583 ;; These expands map to the Dot Product optab the vectorizer checks for.
584 ;; The auto-vectorizer expects a dot product builtin that also does an
585 ;; accumulation into the provided register.
586 ;; Given the following pattern
588 ;; for (i=0; i<len; i++) {
594 ;; This can be auto-vectorized to
595 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
597 ;; given enough iterations. However the vectorizer can keep unrolling the loop
598 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
599 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
602 ;; and so the vectorizer provides r, in which the result has to be accumulated.
603 (define_expand "<sur>dot_prod<vsi2qi>"
604 [(set (match_operand:VS 0 "register_operand")
605 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
606 (match_operand:<VSI2QI> 2 "register_operand")]
608 (match_operand:VS 3 "register_operand")))]
612 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
614 emit_insn (gen_rtx_SET (operands[0], operands[3]));
618 ;; These instructions map to the __builtins for the Dot Product
619 ;; indexed operations.
620 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
621 [(set (match_operand:VS 0 "register_operand" "=w")
622 (plus:VS (match_operand:VS 1 "register_operand" "0")
623 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
624 (match_operand:V8QI 3 "register_operand" "<h_con>")
625 (match_operand:SI 4 "immediate_operand" "i")]
629 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
630 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
632 [(set_attr "type" "neon_dot<q>")]
635 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
636 [(set (match_operand:VS 0 "register_operand" "=w")
637 (plus:VS (match_operand:VS 1 "register_operand" "0")
638 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
639 (match_operand:V16QI 3 "register_operand" "<h_con>")
640 (match_operand:SI 4 "immediate_operand" "i")]
644 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
645 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
647 [(set_attr "type" "neon_dot<q>")]
650 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
651 ;; (by element) Dot Product operations.
652 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
653 [(set (match_operand:VS 0 "register_operand" "=w")
655 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
656 (match_operand:VB 3 "register_operand" "w")
657 (match_operand:SI 4 "immediate_operand" "i")]
659 (match_operand:VS 1 "register_operand" "0")))]
662 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
663 int lane = INTVAL (operands[4]);
664 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
665 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
667 [(set_attr "type" "neon_dot<VS:q>")]
670 (define_expand "copysign<mode>3"
671 [(match_operand:VHSDF 0 "register_operand")
672 (match_operand:VHSDF 1 "register_operand")
673 (match_operand:VHSDF 2 "register_operand")]
674 "TARGET_FLOAT && TARGET_SIMD"
676 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
677 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
679 emit_move_insn (v_bitmask,
680 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
681 HOST_WIDE_INT_M1U << bits));
682 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
683 operands[2], operands[1]));
688 (define_insn "*aarch64_mul3_elt<mode>"
689 [(set (match_operand:VMUL 0 "register_operand" "=w")
693 (match_operand:VMUL 1 "register_operand" "<h_con>")
694 (parallel [(match_operand:SI 2 "immediate_operand")])))
695 (match_operand:VMUL 3 "register_operand" "w")))]
698 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
699 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
701 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
704 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
705 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
706 (mult:VMUL_CHANGE_NLANES
707 (vec_duplicate:VMUL_CHANGE_NLANES
709 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
710 (parallel [(match_operand:SI 2 "immediate_operand")])))
711 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
714 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
715 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
717 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
720 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
721 [(set (match_operand:VMUL 0 "register_operand" "=w")
724 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
725 (match_operand:VMUL 2 "register_operand" "w")))]
727 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
728 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
731 (define_insn "@aarch64_rsqrte<mode>"
732 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
733 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
736 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
737 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
739 (define_insn "@aarch64_rsqrts<mode>"
740 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
741 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
742 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
745 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
746 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
748 (define_expand "rsqrt<mode>2"
749 [(set (match_operand:VALLF 0 "register_operand")
750 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
754 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
758 (define_insn "*aarch64_mul3_elt_to_64v2df"
759 [(set (match_operand:DF 0 "register_operand" "=w")
762 (match_operand:V2DF 1 "register_operand" "w")
763 (parallel [(match_operand:SI 2 "immediate_operand")]))
764 (match_operand:DF 3 "register_operand" "w")))]
767 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
768 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
770 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
773 (define_insn "neg<mode>2"
774 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
775 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
777 "neg\t%0.<Vtype>, %1.<Vtype>"
778 [(set_attr "type" "neon_neg<q>")]
781 (define_insn "abs<mode>2"
782 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
783 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
785 "abs\t%0.<Vtype>, %1.<Vtype>"
786 [(set_attr "type" "neon_abs<q>")]
789 ;; The intrinsic version of integer ABS must not be allowed to
790 ;; combine with any operation with an integerated ABS step, such
792 (define_insn "aarch64_abs<mode>"
793 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
795 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
798 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
799 [(set_attr "type" "neon_abs<q>")]
802 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
803 ;; This isn't accurate as ABS treats always its input as a signed value.
804 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
805 ;; Whereas SABD would return 192 (-64 signed) on the above example.
806 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
807 (define_insn "aarch64_<su>abd<mode>"
808 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
811 (match_operand:VDQ_BHSI 1 "register_operand" "w")
812 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
817 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
818 [(set_attr "type" "neon_abd<q>")]
821 (define_insn "aarch64_<sur>abdl2<mode>_3"
822 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
823 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
824 (match_operand:VDQV_S 2 "register_operand" "w")]
827 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
828 [(set_attr "type" "neon_abd<q>")]
831 (define_insn "aarch64_<sur>abal<mode>_4"
832 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
833 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
834 (match_operand:VDQV_S 2 "register_operand" "w")
835 (match_operand:<VDBLW> 3 "register_operand" "0")]
838 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
839 [(set_attr "type" "neon_arith_acc<q>")]
842 (define_insn "aarch64_<sur>adalp<mode>"
843 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
844 (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w")
845 (match_operand:<VDBLW> 1 "register_operand" "0")]
848 "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
849 [(set_attr "type" "neon_reduc_add<q>")]
852 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
853 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
854 ;; reduction of the difference into a V4SI vector and accumulate that into
855 ;; operand 3 before copying that into the result operand 0.
856 ;; Perform that with a sequence of:
857 ;; UABDL2 tmp.8h, op1.16b, op2.16b
858 ;; UABAL tmp.8h, op1.16b, op2.16b
859 ;; UADALP op3.4s, tmp.8h
860 ;; MOV op0, op3 // should be eliminated in later passes.
862 ;; For TARGET_DOTPROD we do:
863 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
864 ;; UABD tmp2.16b, op1.16b, op2.16b
865 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
866 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
868 ;; The signed version just uses the signed variants of the above instructions
869 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
872 (define_expand "<sur>sadv16qi"
873 [(use (match_operand:V4SI 0 "register_operand"))
874 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
875 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
876 (use (match_operand:V4SI 3 "register_operand"))]
881 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
882 rtx abd = gen_reg_rtx (V16QImode);
883 emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
884 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
888 rtx reduc = gen_reg_rtx (V8HImode);
889 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
891 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
892 operands[2], reduc));
893 emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
894 emit_move_insn (operands[0], operands[3]);
899 (define_insn "aarch64_<su>aba<mode>"
900 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
901 (plus:VDQ_BHSI (minus:VDQ_BHSI
903 (match_operand:VDQ_BHSI 2 "register_operand" "w")
904 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
908 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
910 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
911 [(set_attr "type" "neon_arith_acc<q>")]
914 (define_insn "fabd<mode>3"
915 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
918 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
919 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
921 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
922 [(set_attr "type" "neon_fp_abd_<stype><q>")]
925 ;; For AND (vector, register) and BIC (vector, immediate)
926 (define_insn "and<mode>3"
927 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
928 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
929 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
932 switch (which_alternative)
935 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
937 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
943 [(set_attr "type" "neon_logic<q>")]
946 ;; For ORR (vector, register) and ORR (vector, immediate)
947 (define_insn "ior<mode>3"
948 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
949 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
950 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
953 switch (which_alternative)
956 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
958 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
964 [(set_attr "type" "neon_logic<q>")]
967 (define_insn "xor<mode>3"
968 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
969 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
970 (match_operand:VDQ_I 2 "register_operand" "w")))]
972 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
973 [(set_attr "type" "neon_logic<q>")]
976 (define_insn "one_cmpl<mode>2"
977 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
978 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
980 "not\t%0.<Vbtype>, %1.<Vbtype>"
981 [(set_attr "type" "neon_logic<q>")]
984 (define_insn "aarch64_simd_vec_set<mode>"
985 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
987 (vec_duplicate:VALL_F16
988 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
989 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
990 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
993 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
994 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
995 switch (which_alternative)
998 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1000 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1002 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1007 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1010 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1011 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1013 (vec_duplicate:VALL_F16
1015 (match_operand:VALL_F16 3 "register_operand" "w")
1017 [(match_operand:SI 4 "immediate_operand" "i")])))
1018 (match_operand:VALL_F16 1 "register_operand" "0")
1019 (match_operand:SI 2 "immediate_operand" "i")))]
1022 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1023 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1024 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1026 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1028 [(set_attr "type" "neon_ins<q>")]
1031 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1032 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1033 (vec_merge:VALL_F16_NO_V2Q
1034 (vec_duplicate:VALL_F16_NO_V2Q
1036 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1038 [(match_operand:SI 4 "immediate_operand" "i")])))
1039 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1040 (match_operand:SI 2 "immediate_operand" "i")))]
1043 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1044 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1045 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[4]));
1048 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1050 [(set_attr "type" "neon_ins<q>")]
1053 (define_expand "signbit<mode>2"
1054 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1055 (use (match_operand:VDQSF 1 "register_operand"))]
1058 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1059 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1061 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1063 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1068 (define_insn "aarch64_simd_lshr<mode>"
1069 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1070 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1071 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1073 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1074 [(set_attr "type" "neon_shift_imm<q>")]
1077 (define_insn "aarch64_simd_ashr<mode>"
1078 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1079 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1080 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1082 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1083 [(set_attr "type" "neon_shift_imm<q>")]
1086 (define_insn "*aarch64_simd_sra<mode>"
1087 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1090 (match_operand:VDQ_I 1 "register_operand" "w")
1091 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1092 (match_operand:VDQ_I 3 "register_operand" "0")))]
1094 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1095 [(set_attr "type" "neon_shift_acc<q>")]
1098 (define_insn "aarch64_simd_imm_shl<mode>"
1099 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1100 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1101 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1103 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1104 [(set_attr "type" "neon_shift_imm<q>")]
1107 (define_insn "aarch64_simd_reg_sshl<mode>"
1108 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1109 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1110 (match_operand:VDQ_I 2 "register_operand" "w")))]
1112 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1113 [(set_attr "type" "neon_shift_reg<q>")]
1116 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1117 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1118 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1119 (match_operand:VDQ_I 2 "register_operand" "w")]
1120 UNSPEC_ASHIFT_UNSIGNED))]
1122 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1123 [(set_attr "type" "neon_shift_reg<q>")]
1126 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1127 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1128 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1129 (match_operand:VDQ_I 2 "register_operand" "w")]
1130 UNSPEC_ASHIFT_SIGNED))]
1132 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1133 [(set_attr "type" "neon_shift_reg<q>")]
1136 (define_expand "ashl<mode>3"
1137 [(match_operand:VDQ_I 0 "register_operand")
1138 (match_operand:VDQ_I 1 "register_operand")
1139 (match_operand:SI 2 "general_operand")]
1142 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1145 if (CONST_INT_P (operands[2]))
1147 shift_amount = INTVAL (operands[2]);
1148 if (shift_amount >= 0 && shift_amount < bit_width)
1150 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1152 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1159 operands[2] = force_reg (SImode, operands[2]);
1161 rtx tmp = gen_reg_rtx (<MODE>mode);
1162 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1165 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1169 (define_expand "lshr<mode>3"
1170 [(match_operand:VDQ_I 0 "register_operand")
1171 (match_operand:VDQ_I 1 "register_operand")
1172 (match_operand:SI 2 "general_operand")]
1175 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1178 if (CONST_INT_P (operands[2]))
1180 shift_amount = INTVAL (operands[2]);
1181 if (shift_amount > 0 && shift_amount <= bit_width)
1183 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1185 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1192 operands[2] = force_reg (SImode, operands[2]);
1194 rtx tmp = gen_reg_rtx (SImode);
1195 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1196 emit_insn (gen_negsi2 (tmp, operands[2]));
1197 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1198 convert_to_mode (<VEL>mode, tmp, 0)));
1199 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1204 (define_expand "ashr<mode>3"
1205 [(match_operand:VDQ_I 0 "register_operand")
1206 (match_operand:VDQ_I 1 "register_operand")
1207 (match_operand:SI 2 "general_operand")]
1210 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1213 if (CONST_INT_P (operands[2]))
1215 shift_amount = INTVAL (operands[2]);
1216 if (shift_amount > 0 && shift_amount <= bit_width)
1218 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1220 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1227 operands[2] = force_reg (SImode, operands[2]);
1229 rtx tmp = gen_reg_rtx (SImode);
1230 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1231 emit_insn (gen_negsi2 (tmp, operands[2]));
1232 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1234 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1239 (define_expand "vashl<mode>3"
1240 [(match_operand:VDQ_I 0 "register_operand")
1241 (match_operand:VDQ_I 1 "register_operand")
1242 (match_operand:VDQ_I 2 "register_operand")]
1245 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1250 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1251 ;; Negating individual lanes most certainly offsets the
1252 ;; gain from vectorization.
1253 (define_expand "vashr<mode>3"
1254 [(match_operand:VDQ_BHSI 0 "register_operand")
1255 (match_operand:VDQ_BHSI 1 "register_operand")
1256 (match_operand:VDQ_BHSI 2 "register_operand")]
1259 rtx neg = gen_reg_rtx (<MODE>mode);
1260 emit (gen_neg<mode>2 (neg, operands[2]));
1261 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1267 (define_expand "aarch64_ashr_simddi"
1268 [(match_operand:DI 0 "register_operand")
1269 (match_operand:DI 1 "register_operand")
1270 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1273 /* An arithmetic shift right by 64 fills the result with copies of the sign
1274 bit, just like asr by 63 - however the standard pattern does not handle
1276 if (INTVAL (operands[2]) == 64)
1277 operands[2] = GEN_INT (63);
1278 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1283 (define_expand "vlshr<mode>3"
1284 [(match_operand:VDQ_BHSI 0 "register_operand")
1285 (match_operand:VDQ_BHSI 1 "register_operand")
1286 (match_operand:VDQ_BHSI 2 "register_operand")]
1289 rtx neg = gen_reg_rtx (<MODE>mode);
1290 emit (gen_neg<mode>2 (neg, operands[2]));
1291 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1296 (define_expand "aarch64_lshr_simddi"
1297 [(match_operand:DI 0 "register_operand")
1298 (match_operand:DI 1 "register_operand")
1299 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1302 if (INTVAL (operands[2]) == 64)
1303 emit_move_insn (operands[0], const0_rtx);
1305 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1310 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1311 (define_insn "vec_shr_<mode>"
1312 [(set (match_operand:VD 0 "register_operand" "=w")
1313 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1314 (match_operand:SI 2 "immediate_operand" "i")]
1318 if (BYTES_BIG_ENDIAN)
1319 return "shl %d0, %d1, %2";
1321 return "ushr %d0, %d1, %2";
1323 [(set_attr "type" "neon_shift_imm")]
1326 (define_expand "vec_set<mode>"
1327 [(match_operand:VALL_F16 0 "register_operand")
1328 (match_operand:<VEL> 1 "register_operand")
1329 (match_operand:SI 2 "immediate_operand")]
1332 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1333 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1334 GEN_INT (elem), operands[0]));
1340 (define_insn "aarch64_mla<mode>"
1341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342 (plus:VDQ_BHSI (mult:VDQ_BHSI
1343 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1345 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1347 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1348 [(set_attr "type" "neon_mla_<Vetype><q>")]
1351 (define_insn "*aarch64_mla_elt<mode>"
1352 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1355 (vec_duplicate:VDQHS
1357 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358 (parallel [(match_operand:SI 2 "immediate_operand")])))
1359 (match_operand:VDQHS 3 "register_operand" "w"))
1360 (match_operand:VDQHS 4 "register_operand" "0")))]
1363 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1364 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1366 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1369 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1370 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1373 (vec_duplicate:VDQHS
1375 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376 (parallel [(match_operand:SI 2 "immediate_operand")])))
1377 (match_operand:VDQHS 3 "register_operand" "w"))
1378 (match_operand:VDQHS 4 "register_operand" "0")))]
1381 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1382 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1384 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1387 (define_insn "*aarch64_mla_elt_merge<mode>"
1388 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1390 (mult:VDQHS (vec_duplicate:VDQHS
1391 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1392 (match_operand:VDQHS 2 "register_operand" "w"))
1393 (match_operand:VDQHS 3 "register_operand" "0")))]
1395 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1396 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1399 (define_insn "aarch64_mls<mode>"
1400 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1402 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1403 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1405 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1406 [(set_attr "type" "neon_mla_<Vetype><q>")]
1409 (define_insn "*aarch64_mls_elt<mode>"
1410 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1412 (match_operand:VDQHS 4 "register_operand" "0")
1414 (vec_duplicate:VDQHS
1416 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1417 (parallel [(match_operand:SI 2 "immediate_operand")])))
1418 (match_operand:VDQHS 3 "register_operand" "w"))))]
1421 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1422 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1424 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1427 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1428 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1430 (match_operand:VDQHS 4 "register_operand" "0")
1432 (vec_duplicate:VDQHS
1434 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1435 (parallel [(match_operand:SI 2 "immediate_operand")])))
1436 (match_operand:VDQHS 3 "register_operand" "w"))))]
1439 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1440 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1442 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1445 (define_insn "*aarch64_mls_elt_merge<mode>"
1446 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1448 (match_operand:VDQHS 1 "register_operand" "0")
1449 (mult:VDQHS (vec_duplicate:VDQHS
1450 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1451 (match_operand:VDQHS 3 "register_operand" "w"))))]
1453 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1454 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1457 ;; Max/Min operations.
1458 (define_insn "<su><maxmin><mode>3"
1459 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1460 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1461 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1463 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1464 [(set_attr "type" "neon_minmax<q>")]
1467 (define_expand "<su><maxmin>v2di3"
1468 [(set (match_operand:V2DI 0 "register_operand")
1469 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1470 (match_operand:V2DI 2 "register_operand")))]
1473 enum rtx_code cmp_operator;
1494 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1495 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1496 operands[2], cmp_fmt, operands[1], operands[2]));
1500 ;; Pairwise Integer Max/Min operations.
1501 (define_insn "aarch64_<maxmin_uns>p<mode>"
1502 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1503 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1504 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1507 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1508 [(set_attr "type" "neon_minmax<q>")]
1511 ;; Pairwise FP Max/Min operations.
1512 (define_insn "aarch64_<maxmin_uns>p<mode>"
1513 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1514 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1515 (match_operand:VHSDF 2 "register_operand" "w")]
1518 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1519 [(set_attr "type" "neon_minmax<q>")]
1522 ;; vec_concat gives a new vector with the low elements from operand 1, and
1523 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1524 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1525 ;; What that means, is that the RTL descriptions of the below patterns
1526 ;; need to change depending on endianness.
1528 ;; Move to the low architectural bits of the register.
1529 ;; On little-endian this is { operand, zeroes }
1530 ;; On big-endian this is { zeroes, operand }
1532 (define_insn "move_lo_quad_internal_<mode>"
1533 [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1534 (vec_concat:VQMOV_NO2E
1535 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1536 (vec_duplicate:<VHALF> (const_int 0))))]
1537 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1542 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1543 (set_attr "length" "4")
1544 (set_attr "arch" "simd,fp,simd")]
1547 (define_insn "move_lo_quad_internal_<mode>"
1548 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1550 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1552 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1557 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1558 (set_attr "length" "4")
1559 (set_attr "arch" "simd,fp,simd")]
1562 (define_insn "move_lo_quad_internal_be_<mode>"
1563 [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1564 (vec_concat:VQMOV_NO2E
1565 (vec_duplicate:<VHALF> (const_int 0))
1566 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1567 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1572 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1573 (set_attr "length" "4")
1574 (set_attr "arch" "simd,fp,simd")]
1577 (define_insn "move_lo_quad_internal_be_<mode>"
1578 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1581 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1582 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1587 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1588 (set_attr "length" "4")
1589 (set_attr "arch" "simd,fp,simd")]
1592 (define_expand "move_lo_quad_<mode>"
1593 [(match_operand:VQMOV 0 "register_operand")
1594 (match_operand:VQMOV 1 "register_operand")]
1597 if (BYTES_BIG_ENDIAN)
1598 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1600 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1605 ;; Move operand1 to the high architectural bits of the register, keeping
1606 ;; the low architectural bits of operand2.
1607 ;; For little-endian this is { operand2, operand1 }
1608 ;; For big-endian this is { operand1, operand2 }
1610 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1611 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1615 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1616 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1617 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1619 ins\\t%0.d[1], %1.d[0]
1621 [(set_attr "type" "neon_ins")]
1624 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1625 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1627 (match_operand:<VHALF> 1 "register_operand" "w,r")
1630 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1631 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1633 ins\\t%0.d[1], %1.d[0]
1635 [(set_attr "type" "neon_ins")]
1638 (define_expand "move_hi_quad_<mode>"
1639 [(match_operand:VQMOV 0 "register_operand")
1640 (match_operand:<VHALF> 1 "register_operand")]
1643 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1644 if (BYTES_BIG_ENDIAN)
1645 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1648 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1653 ;; Narrowing operations.
1656 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1657 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1658 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1660 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1661 [(set_attr "type" "neon_shift_imm_narrow_q")]
1664 (define_expand "vec_pack_trunc_<mode>"
1665 [(match_operand:<VNARROWD> 0 "register_operand")
1666 (match_operand:VDN 1 "register_operand")
1667 (match_operand:VDN 2 "register_operand")]
1670 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1671 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1672 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1674 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1675 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1676 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1682 (define_insn "vec_pack_trunc_<mode>"
1683 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1684 (vec_concat:<VNARROWQ2>
1685 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1686 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1689 if (BYTES_BIG_ENDIAN)
1690 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1692 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1694 [(set_attr "type" "multiple")
1695 (set_attr "length" "8")]
1698 ;; Widening operations.
1700 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1701 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1702 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1703 (match_operand:VQW 1 "register_operand" "w")
1704 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1707 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1708 [(set_attr "type" "neon_shift_imm_long")]
1711 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1712 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1713 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1714 (match_operand:VQW 1 "register_operand" "w")
1715 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1718 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1719 [(set_attr "type" "neon_shift_imm_long")]
1722 (define_expand "vec_unpack<su>_hi_<mode>"
1723 [(match_operand:<VWIDE> 0 "register_operand")
1724 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1727 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1728 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1734 (define_expand "vec_unpack<su>_lo_<mode>"
1735 [(match_operand:<VWIDE> 0 "register_operand")
1736 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1739 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1740 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1746 ;; Widening arithmetic.
1748 (define_insn "*aarch64_<su>mlal_lo<mode>"
1749 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1752 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1753 (match_operand:VQW 2 "register_operand" "w")
1754 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1755 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1756 (match_operand:VQW 4 "register_operand" "w")
1758 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1760 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1761 [(set_attr "type" "neon_mla_<Vetype>_long")]
1764 (define_insn "*aarch64_<su>mlal_hi<mode>"
1765 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1768 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769 (match_operand:VQW 2 "register_operand" "w")
1770 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1771 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772 (match_operand:VQW 4 "register_operand" "w")
1774 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1776 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1777 [(set_attr "type" "neon_mla_<Vetype>_long")]
1780 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1781 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1783 (match_operand:<VWIDE> 1 "register_operand" "0")
1785 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1786 (match_operand:VQW 2 "register_operand" "w")
1787 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1788 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1789 (match_operand:VQW 4 "register_operand" "w")
1792 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1793 [(set_attr "type" "neon_mla_<Vetype>_long")]
1796 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
1797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1799 (match_operand:<VWIDE> 1 "register_operand" "0")
1801 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802 (match_operand:VQW 2 "register_operand" "w")
1803 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1804 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1805 (match_operand:VQW 4 "register_operand" "w")
1808 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1809 [(set_attr "type" "neon_mla_<Vetype>_long")]
1812 (define_expand "aarch64_<su>mlsl_hi<mode>"
1813 [(match_operand:<VWIDE> 0 "register_operand")
1814 (match_operand:<VWIDE> 1 "register_operand")
1815 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
1816 (match_operand:VQW 3 "register_operand")]
1819 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1820 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
1821 operands[2], p, operands[3]));
1826 (define_insn "*aarch64_<su>mlal<mode>"
1827 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1831 (match_operand:VD_BHSI 1 "register_operand" "w"))
1833 (match_operand:VD_BHSI 2 "register_operand" "w")))
1834 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1836 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1837 [(set_attr "type" "neon_mla_<Vetype>_long")]
1840 (define_insn "aarch64_<su>mlsl<mode>"
1841 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1843 (match_operand:<VWIDE> 1 "register_operand" "0")
1846 (match_operand:VD_BHSI 2 "register_operand" "w"))
1848 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1850 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1851 [(set_attr "type" "neon_mla_<Vetype>_long")]
1854 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1855 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1856 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1857 (match_operand:VQW 1 "register_operand" "w")
1858 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1859 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1860 (match_operand:VQW 2 "register_operand" "w")
1863 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1864 [(set_attr "type" "neon_mul_<Vetype>_long")]
1867 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1868 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1869 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1870 (match_operand:VD_BHSI 1 "register_operand" "w"))
1872 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1874 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1875 [(set_attr "type" "neon_mul_<Vetype>_long")]
1878 (define_expand "vec_widen_<su>mult_lo_<mode>"
1879 [(match_operand:<VWIDE> 0 "register_operand")
1880 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1881 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1884 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1885 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1892 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1894 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1895 (match_operand:VQW 1 "register_operand" "w")
1896 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1897 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1898 (match_operand:VQW 2 "register_operand" "w")
1901 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1902 [(set_attr "type" "neon_mul_<Vetype>_long")]
1905 (define_expand "vec_widen_<su>mult_hi_<mode>"
1906 [(match_operand:<VWIDE> 0 "register_operand")
1907 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1908 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1911 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1912 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1920 ;; vmull_lane_s16 intrinsics
1921 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1925 (match_operand:<VCOND> 1 "register_operand" "w"))
1927 (vec_duplicate:<VCOND>
1929 (match_operand:VDQHS 2 "register_operand" "<vwx>")
1930 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1933 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1934 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1936 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1939 ;; vmlal_lane_s16 intrinsics
1940 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1945 (match_operand:<VCOND> 2 "register_operand" "w"))
1947 (vec_duplicate:<VCOND>
1949 (match_operand:VDQHS 3 "register_operand" "<vwx>")
1950 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1951 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1954 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1955 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1957 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1960 ;; FP vector operations.
1961 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1962 ;; double-precision (64-bit) floating-point data types and arithmetic as
1963 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1964 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1966 ;; Floating-point operations can raise an exception. Vectorizing such
1967 ;; operations are safe because of reasons explained below.
1969 ;; ARMv8 permits an extension to enable trapped floating-point
1970 ;; exception handling, however this is an optional feature. In the
1971 ;; event of a floating-point exception being raised by vectorised
1973 ;; 1. If trapped floating-point exceptions are available, then a trap
1974 ;; will be taken when any lane raises an enabled exception. A trap
1975 ;; handler may determine which lane raised the exception.
1976 ;; 2. Alternatively a sticky exception flag is set in the
1977 ;; floating-point status register (FPSR). Software may explicitly
1978 ;; test the exception flags, in which case the tests will either
1979 ;; prevent vectorisation, allowing precise identification of the
1980 ;; failing operation, or if tested outside of vectorisable regions
1981 ;; then the specific operation and lane are not of interest.
1983 ;; FP arithmetic operations.
1985 (define_insn "add<mode>3"
1986 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1988 (match_operand:VHSDF 2 "register_operand" "w")))]
1990 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1991 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1994 (define_insn "sub<mode>3"
1995 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1996 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1997 (match_operand:VHSDF 2 "register_operand" "w")))]
1999 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2000 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2003 (define_insn "mul<mode>3"
2004 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2005 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2006 (match_operand:VHSDF 2 "register_operand" "w")))]
2008 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2009 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2012 (define_expand "div<mode>3"
2013 [(set (match_operand:VHSDF 0 "register_operand")
2014 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2015 (match_operand:VHSDF 2 "register_operand")))]
2018 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2021 operands[1] = force_reg (<MODE>mode, operands[1]);
2024 (define_insn "*div<mode>3"
2025 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2027 (match_operand:VHSDF 2 "register_operand" "w")))]
2029 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2030 [(set_attr "type" "neon_fp_div_<stype><q>")]
2033 (define_insn "neg<mode>2"
2034 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2035 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2037 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2038 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2041 (define_insn "abs<mode>2"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2045 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2046 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2049 (define_insn "fma<mode>4"
2050 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2051 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2052 (match_operand:VHSDF 2 "register_operand" "w")
2053 (match_operand:VHSDF 3 "register_operand" "0")))]
2055 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2056 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2059 (define_insn "*aarch64_fma4_elt<mode>"
2060 [(set (match_operand:VDQF 0 "register_operand" "=w")
2064 (match_operand:VDQF 1 "register_operand" "<h_con>")
2065 (parallel [(match_operand:SI 2 "immediate_operand")])))
2066 (match_operand:VDQF 3 "register_operand" "w")
2067 (match_operand:VDQF 4 "register_operand" "0")))]
2070 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2071 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2073 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2076 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2077 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2079 (vec_duplicate:VDQSF
2081 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2082 (parallel [(match_operand:SI 2 "immediate_operand")])))
2083 (match_operand:VDQSF 3 "register_operand" "w")
2084 (match_operand:VDQSF 4 "register_operand" "0")))]
2087 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2088 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2090 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2093 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
2094 [(set (match_operand:VMUL 0 "register_operand" "=w")
2097 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2098 (match_operand:VMUL 2 "register_operand" "w")
2099 (match_operand:VMUL 3 "register_operand" "0")))]
2101 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2102 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2105 (define_insn "*aarch64_fma4_elt_to_64v2df"
2106 [(set (match_operand:DF 0 "register_operand" "=w")
2109 (match_operand:V2DF 1 "register_operand" "w")
2110 (parallel [(match_operand:SI 2 "immediate_operand")]))
2111 (match_operand:DF 3 "register_operand" "w")
2112 (match_operand:DF 4 "register_operand" "0")))]
2115 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2116 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2118 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2121 (define_insn "fnma<mode>4"
2122 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2124 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2125 (match_operand:VHSDF 2 "register_operand" "w")
2126 (match_operand:VHSDF 3 "register_operand" "0")))]
2128 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2129 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2132 (define_insn "*aarch64_fnma4_elt<mode>"
2133 [(set (match_operand:VDQF 0 "register_operand" "=w")
2136 (match_operand:VDQF 3 "register_operand" "w"))
2139 (match_operand:VDQF 1 "register_operand" "<h_con>")
2140 (parallel [(match_operand:SI 2 "immediate_operand")])))
2141 (match_operand:VDQF 4 "register_operand" "0")))]
2144 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2145 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2147 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2150 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2151 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2154 (match_operand:VDQSF 3 "register_operand" "w"))
2155 (vec_duplicate:VDQSF
2157 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2158 (parallel [(match_operand:SI 2 "immediate_operand")])))
2159 (match_operand:VDQSF 4 "register_operand" "0")))]
2162 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2163 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2165 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2168 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2169 [(set (match_operand:VMUL 0 "register_operand" "=w")
2172 (match_operand:VMUL 2 "register_operand" "w"))
2174 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2175 (match_operand:VMUL 3 "register_operand" "0")))]
2177 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2178 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2181 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2182 [(set (match_operand:DF 0 "register_operand" "=w")
2185 (match_operand:V2DF 1 "register_operand" "w")
2186 (parallel [(match_operand:SI 2 "immediate_operand")]))
2188 (match_operand:DF 3 "register_operand" "w"))
2189 (match_operand:DF 4 "register_operand" "0")))]
2192 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2193 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2195 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2198 ;; Vector versions of the floating-point frint patterns.
2199 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2200 (define_insn "<frint_pattern><mode>2"
2201 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2202 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2205 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2206 [(set_attr "type" "neon_fp_round_<stype><q>")]
2209 ;; Vector versions of the fcvt standard patterns.
2210 ;; Expands to lbtrunc, lround, lceil, lfloor
2211 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2212 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2213 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2214 [(match_operand:VHSDF 1 "register_operand" "w")]
2217 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2218 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2221 ;; HF Scalar variants of related SIMD instructions.
2222 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2223 [(set (match_operand:HI 0 "register_operand" "=w")
2224 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2226 "TARGET_SIMD_F16INST"
2227 "fcvt<frint_suffix><su>\t%h0, %h1"
2228 [(set_attr "type" "neon_fp_to_int_s")]
2231 (define_insn "<optab>_trunchfhi2"
2232 [(set (match_operand:HI 0 "register_operand" "=w")
2233 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2234 "TARGET_SIMD_F16INST"
2235 "fcvtz<su>\t%h0, %h1"
2236 [(set_attr "type" "neon_fp_to_int_s")]
2239 (define_insn "<optab>hihf2"
2240 [(set (match_operand:HF 0 "register_operand" "=w")
2241 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2242 "TARGET_SIMD_F16INST"
2243 "<su_optab>cvtf\t%h0, %h1"
2244 [(set_attr "type" "neon_int_to_fp_s")]
2247 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2248 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2249 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2251 (match_operand:VDQF 1 "register_operand" "w")
2252 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2255 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2256 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2258 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2260 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2261 output_asm_insn (buf, operands);
2264 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2267 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2268 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2269 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2270 [(match_operand:VHSDF 1 "register_operand")]
2275 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2276 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2277 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2278 [(match_operand:VHSDF 1 "register_operand")]
2283 (define_expand "ftrunc<VHSDF:mode>2"
2284 [(set (match_operand:VHSDF 0 "register_operand")
2285 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2290 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2291 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2293 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2295 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2296 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2299 ;; Conversions between vectors of floats and doubles.
2300 ;; Contains a mix of patterns to match standard pattern names
2301 ;; and those for intrinsics.
2303 ;; Float widening operations.
2305 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2307 (float_extend:<VWIDE> (vec_select:<VHALF>
2308 (match_operand:VQ_HSF 1 "register_operand" "w")
2309 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2312 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2313 [(set_attr "type" "neon_fp_cvt_widen_s")]
2316 ;; Convert between fixed-point and floating-point (vector modes)
2318 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2319 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2320 (unspec:<VHSDF:FCVT_TARGET>
2321 [(match_operand:VHSDF 1 "register_operand" "w")
2322 (match_operand:SI 2 "immediate_operand" "i")]
2325 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2326 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2329 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2330 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2331 (unspec:<VDQ_HSDI:FCVT_TARGET>
2332 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2333 (match_operand:SI 2 "immediate_operand" "i")]
2336 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2337 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2340 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2341 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2342 ;; the meaning of HI and LO changes depending on the target endianness.
2343 ;; While elsewhere we map the higher numbered elements of a vector to
2344 ;; the lower architectural lanes of the vector, for these patterns we want
2345 ;; to always treat "hi" as referring to the higher architectural lanes.
2346 ;; Consequently, while the patterns below look inconsistent with our
2347 ;; other big-endian patterns their behavior is as required.
2349 (define_expand "vec_unpacks_lo_<mode>"
2350 [(match_operand:<VWIDE> 0 "register_operand")
2351 (match_operand:VQ_HSF 1 "register_operand")]
2354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2355 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2361 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2363 (float_extend:<VWIDE> (vec_select:<VHALF>
2364 (match_operand:VQ_HSF 1 "register_operand" "w")
2365 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2368 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2369 [(set_attr "type" "neon_fp_cvt_widen_s")]
2372 (define_expand "vec_unpacks_hi_<mode>"
2373 [(match_operand:<VWIDE> 0 "register_operand")
2374 (match_operand:VQ_HSF 1 "register_operand")]
2377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2378 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2383 (define_insn "aarch64_float_extend_lo_<Vwide>"
2384 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2385 (float_extend:<VWIDE>
2386 (match_operand:VDF 1 "register_operand" "w")))]
2388 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2389 [(set_attr "type" "neon_fp_cvt_widen_s")]
2392 ;; Float narrowing operations.
2394 (define_insn "aarch64_float_truncate_lo_<mode>"
2395 [(set (match_operand:VDF 0 "register_operand" "=w")
2397 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2399 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2400 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2403 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2404 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2406 (match_operand:VDF 1 "register_operand" "0")
2408 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2409 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2410 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2411 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2414 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2415 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2418 (match_operand:<VWIDE> 2 "register_operand" "w"))
2419 (match_operand:VDF 1 "register_operand" "0")))]
2420 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2421 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2422 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2425 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2426 [(match_operand:<VDBL> 0 "register_operand")
2427 (match_operand:VDF 1 "register_operand")
2428 (match_operand:<VWIDE> 2 "register_operand")]
2431 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2432 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2433 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2434 emit_insn (gen (operands[0], operands[1], operands[2]));
2439 (define_expand "vec_pack_trunc_v2df"
2440 [(set (match_operand:V4SF 0 "register_operand")
2442 (float_truncate:V2SF
2443 (match_operand:V2DF 1 "register_operand"))
2444 (float_truncate:V2SF
2445 (match_operand:V2DF 2 "register_operand"))
2449 rtx tmp = gen_reg_rtx (V2SFmode);
2450 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2451 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2453 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2454 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2455 tmp, operands[hi]));
2460 (define_expand "vec_pack_trunc_df"
2461 [(set (match_operand:V2SF 0 "register_operand")
2464 (match_operand:DF 1 "register_operand"))
2466 (match_operand:DF 2 "register_operand"))
2470 rtx tmp = gen_reg_rtx (V2SFmode);
2471 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2472 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2474 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2475 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2476 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2482 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2484 ;; a = (b < c) ? b : c;
2485 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2486 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2489 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2490 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2491 ;; operand will be returned when both operands are zero (i.e. they may not
2492 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2493 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2496 (define_insn "<su><maxmin><mode>3"
2497 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2498 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2499 (match_operand:VHSDF 2 "register_operand" "w")))]
2501 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2502 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2505 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2506 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2507 ;; which implement the IEEE fmax ()/fmin () functions.
2508 (define_insn "<maxmin_uns><mode>3"
2509 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2510 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2511 (match_operand:VHSDF 2 "register_operand" "w")]
2514 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2515 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2518 ;; 'across lanes' add.
2520 (define_expand "reduc_plus_scal_<mode>"
2521 [(match_operand:<VEL> 0 "register_operand")
2522 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2526 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2527 rtx scratch = gen_reg_rtx (<MODE>mode);
2528 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2529 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2534 (define_insn "aarch64_faddp<mode>"
2535 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2536 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2537 (match_operand:VHSDF 2 "register_operand" "w")]
2540 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2541 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2544 (define_insn "aarch64_reduc_plus_internal<mode>"
2545 [(set (match_operand:VDQV 0 "register_operand" "=w")
2546 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2549 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2550 [(set_attr "type" "neon_reduc_add<q>")]
2553 ;; ADDV with result zero-extended to SI/DImode (for popcount).
2554 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2555 [(set (match_operand:GPI 0 "register_operand" "=w")
2557 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2560 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2561 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2564 (define_insn "aarch64_reduc_plus_internalv2si"
2565 [(set (match_operand:V2SI 0 "register_operand" "=w")
2566 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2569 "addp\\t%0.2s, %1.2s, %1.2s"
2570 [(set_attr "type" "neon_reduc_add")]
2573 (define_insn "reduc_plus_scal_<mode>"
2574 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2575 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2578 "faddp\\t%<Vetype>0, %1.<Vtype>"
2579 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2582 (define_expand "reduc_plus_scal_v4sf"
2583 [(set (match_operand:SF 0 "register_operand")
2584 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2588 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2589 rtx scratch = gen_reg_rtx (V4SFmode);
2590 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2591 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2592 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2596 (define_insn "clrsb<mode>2"
2597 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2598 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2600 "cls\\t%0.<Vtype>, %1.<Vtype>"
2601 [(set_attr "type" "neon_cls<q>")]
2604 (define_insn "clz<mode>2"
2605 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2606 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2608 "clz\\t%0.<Vtype>, %1.<Vtype>"
2609 [(set_attr "type" "neon_cls<q>")]
2612 (define_insn "popcount<mode>2"
2613 [(set (match_operand:VB 0 "register_operand" "=w")
2614 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2616 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2617 [(set_attr "type" "neon_cnt<q>")]
2620 ;; 'across lanes' max and min ops.
2622 ;; Template for outputting a scalar, so we can create __builtins which can be
2623 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2624 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2625 [(match_operand:<VEL> 0 "register_operand")
2626 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2630 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2631 rtx scratch = gen_reg_rtx (<MODE>mode);
2632 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2634 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2639 ;; Likewise for integer cases, signed and unsigned.
2640 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2641 [(match_operand:<VEL> 0 "register_operand")
2642 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2646 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2647 rtx scratch = gen_reg_rtx (<MODE>mode);
2648 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2650 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2655 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2656 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2657 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2660 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2661 [(set_attr "type" "neon_reduc_minmax<q>")]
2664 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2665 [(set (match_operand:V2SI 0 "register_operand" "=w")
2666 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2669 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2670 [(set_attr "type" "neon_reduc_minmax")]
2673 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2674 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2675 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2678 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2679 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2682 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2684 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2687 ;; Thus our BSL is of the form:
2688 ;; op0 = bsl (mask, op2, op3)
2689 ;; We can use any of:
2692 ;; bsl mask, op1, op2
2693 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2694 ;; bit op0, op2, mask
2695 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2696 ;; bif op0, op1, mask
2698 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2699 ;; Some forms of straight-line code may generate the equivalent form
2700 ;; in *aarch64_simd_bsl<mode>_alt.
2702 (define_insn "aarch64_simd_bsl<mode>_internal"
2703 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2707 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2708 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2709 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2710 (match_dup:<V_INT_EQUIV> 3)
2714 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2715 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2716 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2717 [(set_attr "type" "neon_bsl<q>")]
2720 ;; We need this form in addition to the above pattern to match the case
2721 ;; when combine tries merging three insns such that the second operand of
2722 ;; the outer XOR matches the second operand of the inner XOR rather than
2723 ;; the first. The two are equivalent but since recog doesn't try all
2724 ;; permutations of commutative operations, we have to have a separate pattern.
2726 (define_insn "*aarch64_simd_bsl<mode>_alt"
2727 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2731 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2732 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2733 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2734 (match_dup:<V_INT_EQUIV> 2)))]
2737 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2738 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2739 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2740 [(set_attr "type" "neon_bsl<q>")]
2743 ;; DImode is special, we want to avoid computing operations which are
2744 ;; more naturally computed in general purpose registers in the vector
2745 ;; registers. If we do that, we need to move all three operands from general
2746 ;; purpose registers to vector registers, then back again. However, we
2747 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2748 ;; optimizations based on the component operations of a BSL.
2750 ;; That means we need a splitter back to the individual operations, if they
2751 ;; would be better calculated on the integer side.
2753 (define_insn_and_split "aarch64_simd_bsldi_internal"
2754 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2758 (match_operand:DI 3 "register_operand" "w,0,w,r")
2759 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2760 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2765 bsl\\t%0.8b, %2.8b, %3.8b
2766 bit\\t%0.8b, %2.8b, %1.8b
2767 bif\\t%0.8b, %3.8b, %1.8b
2769 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2770 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2772 /* Split back to individual operations. If we're before reload, and
2773 able to create a temporary register, do so. If we're after reload,
2774 we've got an early-clobber destination register, so use that.
2775 Otherwise, we can't create pseudos and we can't yet guarantee that
2776 operands[0] is safe to write, so FAIL to split. */
2779 if (reload_completed)
2780 scratch = operands[0];
2781 else if (can_create_pseudo_p ())
2782 scratch = gen_reg_rtx (DImode);
2786 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2787 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2788 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2791 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2792 (set_attr "length" "4,4,4,12")]
2795 (define_insn_and_split "aarch64_simd_bsldi_alt"
2796 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2800 (match_operand:DI 3 "register_operand" "w,w,0,r")
2801 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2802 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2807 bsl\\t%0.8b, %3.8b, %2.8b
2808 bit\\t%0.8b, %3.8b, %1.8b
2809 bif\\t%0.8b, %2.8b, %1.8b
2811 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2812 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2814 /* Split back to individual operations. If we're before reload, and
2815 able to create a temporary register, do so. If we're after reload,
2816 we've got an early-clobber destination register, so use that.
2817 Otherwise, we can't create pseudos and we can't yet guarantee that
2818 operands[0] is safe to write, so FAIL to split. */
2821 if (reload_completed)
2822 scratch = operands[0];
2823 else if (can_create_pseudo_p ())
2824 scratch = gen_reg_rtx (DImode);
2828 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2829 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2830 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2833 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2834 (set_attr "length" "4,4,4,12")]
2837 (define_expand "aarch64_simd_bsl<mode>"
2838 [(match_operand:VALLDIF 0 "register_operand")
2839 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2840 (match_operand:VALLDIF 2 "register_operand")
2841 (match_operand:VALLDIF 3 "register_operand")]
2844 /* We can't alias operands together if they have different modes. */
2845 rtx tmp = operands[0];
2846 if (FLOAT_MODE_P (<MODE>mode))
2848 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2849 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2850 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2852 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2853 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2857 if (tmp != operands[0])
2858 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2863 (define_expand "vcond_mask_<mode><v_int_equiv>"
2864 [(match_operand:VALLDI 0 "register_operand")
2865 (match_operand:VALLDI 1 "nonmemory_operand")
2866 (match_operand:VALLDI 2 "nonmemory_operand")
2867 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2870 /* If we have (a = (P) ? -1 : 0);
2871 Then we can simply move the generated mask (result must be int). */
2872 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2873 && operands[2] == CONST0_RTX (<MODE>mode))
2874 emit_move_insn (operands[0], operands[3]);
2875 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2876 else if (operands[1] == CONST0_RTX (<MODE>mode)
2877 && operands[2] == CONSTM1_RTX (<MODE>mode))
2878 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2881 if (!REG_P (operands[1]))
2882 operands[1] = force_reg (<MODE>mode, operands[1]);
2883 if (!REG_P (operands[2]))
2884 operands[2] = force_reg (<MODE>mode, operands[2]);
2885 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2886 operands[1], operands[2]));
2892 ;; Patterns comparing two vectors to produce a mask.
2894 (define_expand "vec_cmp<mode><mode>"
2895 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2896 (match_operator 1 "comparison_operator"
2897 [(match_operand:VSDQ_I_DI 2 "register_operand")
2898 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2901 rtx mask = operands[0];
2902 enum rtx_code code = GET_CODE (operands[1]);
2912 if (operands[3] == CONST0_RTX (<MODE>mode))
2917 if (!REG_P (operands[3]))
2918 operands[3] = force_reg (<MODE>mode, operands[3]);
2926 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2930 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2934 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2938 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2942 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2946 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2950 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2954 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2958 /* Handle NE as !EQ. */
2959 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2960 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2964 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2974 (define_expand "vec_cmp<mode><v_int_equiv>"
2975 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2976 (match_operator 1 "comparison_operator"
2977 [(match_operand:VDQF 2 "register_operand")
2978 (match_operand:VDQF 3 "nonmemory_operand")]))]
2981 int use_zero_form = 0;
2982 enum rtx_code code = GET_CODE (operands[1]);
2983 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2985 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2994 if (operands[3] == CONST0_RTX (<MODE>mode))
3001 if (!REG_P (operands[3]))
3002 operands[3] = force_reg (<MODE>mode, operands[3]);
3012 comparison = gen_aarch64_cmlt<mode>;
3017 std::swap (operands[2], operands[3]);
3021 comparison = gen_aarch64_cmgt<mode>;
3026 comparison = gen_aarch64_cmle<mode>;
3031 std::swap (operands[2], operands[3]);
3035 comparison = gen_aarch64_cmge<mode>;
3039 comparison = gen_aarch64_cmeq<mode>;
3057 /* All of the above must not raise any FP exceptions. Thus we first
3058 check each operand for NaNs and force any elements containing NaN to
3059 zero before using them in the compare.
3060 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3061 (cm<cc> (isnan (a) ? 0.0 : a,
3062 isnan (b) ? 0.0 : b))
3063 We use the following transformations for doing the comparisions:
3067 a UNLT b -> b GT a. */
3069 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3070 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3071 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3072 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3073 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3074 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3075 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3076 lowpart_subreg (<V_INT_EQUIV>mode,
3079 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3080 lowpart_subreg (<V_INT_EQUIV>mode,
3083 gcc_assert (comparison != NULL);
3084 emit_insn (comparison (operands[0],
3085 lowpart_subreg (<MODE>mode,
3086 tmp0, <V_INT_EQUIV>mode),
3087 lowpart_subreg (<MODE>mode,
3088 tmp1, <V_INT_EQUIV>mode)));
3089 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3099 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
3100 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
3106 a NE b -> ~(a EQ b) */
3107 gcc_assert (comparison != NULL);
3108 emit_insn (comparison (operands[0], operands[2], operands[3]));
3110 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3114 /* LTGT is not guranteed to not generate a FP exception. So let's
3115 go the faster way : ((a > b) || (b > a)). */
3116 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3117 operands[2], operands[3]));
3118 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3119 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3125 /* cmeq (a, a) & cmeq (b, b). */
3126 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3127 operands[2], operands[2]));
3128 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3129 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3131 if (code == UNORDERED)
3132 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3133 else if (code == UNEQ)
3135 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3136 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3147 (define_expand "vec_cmpu<mode><mode>"
3148 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3149 (match_operator 1 "comparison_operator"
3150 [(match_operand:VSDQ_I_DI 2 "register_operand")
3151 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3154 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3155 operands[2], operands[3]));
3159 (define_expand "vcond<mode><mode>"
3160 [(set (match_operand:VALLDI 0 "register_operand")
3161 (if_then_else:VALLDI
3162 (match_operator 3 "comparison_operator"
3163 [(match_operand:VALLDI 4 "register_operand")
3164 (match_operand:VALLDI 5 "nonmemory_operand")])
3165 (match_operand:VALLDI 1 "nonmemory_operand")
3166 (match_operand:VALLDI 2 "nonmemory_operand")))]
3169 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3170 enum rtx_code code = GET_CODE (operands[3]);
3172 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3173 it as well as switch operands 1/2 in order to avoid the additional
3177 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3178 operands[4], operands[5]);
3179 std::swap (operands[1], operands[2]);
3181 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3182 operands[4], operands[5]));
3183 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3184 operands[2], mask));
3189 (define_expand "vcond<v_cmp_mixed><mode>"
3190 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3191 (if_then_else:<V_cmp_mixed>
3192 (match_operator 3 "comparison_operator"
3193 [(match_operand:VDQF_COND 4 "register_operand")
3194 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3195 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3196 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3199 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3200 enum rtx_code code = GET_CODE (operands[3]);
3202 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3203 it as well as switch operands 1/2 in order to avoid the additional
3207 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3208 operands[4], operands[5]);
3209 std::swap (operands[1], operands[2]);
3211 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3212 operands[4], operands[5]));
3213 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3214 operands[0], operands[1],
3215 operands[2], mask));
3220 (define_expand "vcondu<mode><mode>"
3221 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3222 (if_then_else:VSDQ_I_DI
3223 (match_operator 3 "comparison_operator"
3224 [(match_operand:VSDQ_I_DI 4 "register_operand")
3225 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3226 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3227 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3230 rtx mask = gen_reg_rtx (<MODE>mode);
3231 enum rtx_code code = GET_CODE (operands[3]);
3233 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3234 it as well as switch operands 1/2 in order to avoid the additional
3238 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3239 operands[4], operands[5]);
3240 std::swap (operands[1], operands[2]);
3242 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3243 operands[4], operands[5]));
3244 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3245 operands[2], mask));
3249 (define_expand "vcondu<mode><v_cmp_mixed>"
3250 [(set (match_operand:VDQF 0 "register_operand")
3252 (match_operator 3 "comparison_operator"
3253 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3254 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3255 (match_operand:VDQF 1 "nonmemory_operand")
3256 (match_operand:VDQF 2 "nonmemory_operand")))]
3259 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3260 enum rtx_code code = GET_CODE (operands[3]);
3262 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3263 it as well as switch operands 1/2 in order to avoid the additional
3267 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3268 operands[4], operands[5]);
3269 std::swap (operands[1], operands[2]);
3271 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3273 operands[4], operands[5]));
3274 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3275 operands[2], mask));
3279 ;; Patterns for AArch64 SIMD Intrinsics.
3281 ;; Lane extraction with sign extension to general purpose register.
3282 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3283 [(set (match_operand:GPI 0 "register_operand" "=r")
3285 (vec_select:<VDQQH:VEL>
3286 (match_operand:VDQQH 1 "register_operand" "w")
3287 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3290 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3291 INTVAL (operands[2]));
3292 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3294 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3297 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3298 [(set (match_operand:GPI 0 "register_operand" "=r")
3300 (vec_select:<VDQQH:VEL>
3301 (match_operand:VDQQH 1 "register_operand" "w")
3302 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3305 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3306 INTVAL (operands[2]));
3307 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3309 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3312 ;; Lane extraction of a value, neither sign nor zero extension
3313 ;; is guaranteed so upper bits should be considered undefined.
3314 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3315 ;; Extracting lane zero is split into a simple move when it is between SIMD
3316 ;; registers or a store.
3317 (define_insn_and_split "aarch64_get_lane<mode>"
3318 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3320 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3321 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3324 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3325 switch (which_alternative)
3328 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3330 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3332 return "st1\\t{%1.<Vetype>}[%2], %0";
3337 "&& reload_completed
3338 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
3339 [(set (match_dup 0) (match_dup 1))]
3341 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
3343 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3346 (define_insn "load_pair_lanes<mode>"
3347 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3349 (match_operand:VDC 1 "memory_operand" "Utq")
3350 (match_operand:VDC 2 "memory_operand" "m")))]
3351 "TARGET_SIMD && !STRICT_ALIGNMENT
3352 && rtx_equal_p (XEXP (operands[2], 0),
3353 plus_constant (Pmode,
3354 XEXP (operands[1], 0),
3355 GET_MODE_SIZE (<MODE>mode)))"
3357 [(set_attr "type" "neon_load1_1reg_q")]
3360 (define_insn "store_pair_lanes<mode>"
3361 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3363 (match_operand:VDC 1 "register_operand" "w, r")
3364 (match_operand:VDC 2 "register_operand" "w, r")))]
3368 stp\\t%x1, %x2, %y0"
3369 [(set_attr "type" "neon_stp, store_16")]
3372 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3375 (define_insn "@aarch64_combinez<mode>"
3376 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3378 (match_operand:VDC 1 "general_operand" "w,?r,m")
3379 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3380 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3385 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3386 (set_attr "arch" "simd,fp,simd")]
3389 (define_insn "@aarch64_combinez_be<mode>"
3390 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3392 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3393 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3394 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3399 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3400 (set_attr "arch" "simd,fp,simd")]
3403 (define_expand "aarch64_combine<mode>"
3404 [(match_operand:<VDBL> 0 "register_operand")
3405 (match_operand:VDC 1 "register_operand")
3406 (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
3409 if (operands[2] == CONST0_RTX (<MODE>mode))
3411 if (BYTES_BIG_ENDIAN)
3412 emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
3415 emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
3419 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3424 (define_expand "@aarch64_simd_combine<mode>"
3425 [(match_operand:<VDBL> 0 "register_operand")
3426 (match_operand:VDC 1 "register_operand")
3427 (match_operand:VDC 2 "register_operand")]
3430 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3431 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3434 [(set_attr "type" "multiple")]
3437 ;; <su><addsub>l<q>.
3439 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3441 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3442 (match_operand:VQW 1 "register_operand" "w")
3443 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3445 (match_operand:VQW 2 "register_operand" "w")
3448 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3449 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3452 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3453 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3454 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3455 (match_operand:VQW 1 "register_operand" "w")
3456 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3457 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3458 (match_operand:VQW 2 "register_operand" "w")
3461 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3462 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3465 (define_expand "vec_widen_<su>addl_lo_<mode>"
3466 [(match_operand:<VWIDE> 0 "register_operand")
3467 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3468 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3471 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3472 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
3477 (define_expand "vec_widen_<su>addl_hi_<mode>"
3478 [(match_operand:<VWIDE> 0 "register_operand")
3479 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3480 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3483 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3484 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
3489 (define_expand "vec_widen_<su>subl_lo_<mode>"
3490 [(match_operand:<VWIDE> 0 "register_operand")
3491 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3492 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3495 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3496 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
3501 (define_expand "vec_widen_<su>subl_hi_<mode>"
3502 [(match_operand:<VWIDE> 0 "register_operand")
3503 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3504 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
3513 (define_expand "aarch64_saddl2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand")
3515 (match_operand:VQW 1 "register_operand")
3516 (match_operand:VQW 2 "register_operand")]
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3525 (define_expand "aarch64_uaddl2<mode>"
3526 [(match_operand:<VWIDE> 0 "register_operand")
3527 (match_operand:VQW 1 "register_operand")
3528 (match_operand:VQW 2 "register_operand")]
3531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3532 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3537 (define_expand "aarch64_ssubl2<mode>"
3538 [(match_operand:<VWIDE> 0 "register_operand")
3539 (match_operand:VQW 1 "register_operand")
3540 (match_operand:VQW 2 "register_operand")]
3543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3544 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3549 (define_expand "aarch64_usubl2<mode>"
3550 [(match_operand:<VWIDE> 0 "register_operand")
3551 (match_operand:VQW 1 "register_operand")
3552 (match_operand:VQW 2 "register_operand")]
3555 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3556 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3561 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3562 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3563 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3564 (match_operand:VD_BHSI 1 "register_operand" "w"))
3566 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3568 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3569 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3572 ;; <su><addsub>w<q>.
3574 (define_expand "widen_ssum<mode>3"
3575 [(set (match_operand:<VDBLW> 0 "register_operand")
3576 (plus:<VDBLW> (sign_extend:<VDBLW>
3577 (match_operand:VQW 1 "register_operand"))
3578 (match_operand:<VDBLW> 2 "register_operand")))]
3581 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3582 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3584 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3586 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3591 (define_expand "widen_ssum<mode>3"
3592 [(set (match_operand:<VWIDE> 0 "register_operand")
3593 (plus:<VWIDE> (sign_extend:<VWIDE>
3594 (match_operand:VD_BHSI 1 "register_operand"))
3595 (match_operand:<VWIDE> 2 "register_operand")))]
3598 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3602 (define_expand "widen_usum<mode>3"
3603 [(set (match_operand:<VDBLW> 0 "register_operand")
3604 (plus:<VDBLW> (zero_extend:<VDBLW>
3605 (match_operand:VQW 1 "register_operand"))
3606 (match_operand:<VDBLW> 2 "register_operand")))]
3609 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3610 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3612 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3614 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3619 (define_expand "widen_usum<mode>3"
3620 [(set (match_operand:<VWIDE> 0 "register_operand")
3621 (plus:<VWIDE> (zero_extend:<VWIDE>
3622 (match_operand:VD_BHSI 1 "register_operand"))
3623 (match_operand:<VWIDE> 2 "register_operand")))]
3626 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3630 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3631 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3632 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3634 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3636 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3637 [(set_attr "type" "neon_sub_widen")]
3640 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3642 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3645 (match_operand:VQW 2 "register_operand" "w")
3646 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3648 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3649 [(set_attr "type" "neon_sub_widen")]
3652 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3654 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3657 (match_operand:VQW 2 "register_operand" "w")
3658 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3660 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3661 [(set_attr "type" "neon_sub_widen")]
3664 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3665 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3667 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3668 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3670 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3671 [(set_attr "type" "neon_add_widen")]
3674 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3675 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3679 (match_operand:VQW 2 "register_operand" "w")
3680 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3681 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3683 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3684 [(set_attr "type" "neon_add_widen")]
3687 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3688 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3692 (match_operand:VQW 2 "register_operand" "w")
3693 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3694 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3696 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3697 [(set_attr "type" "neon_add_widen")]
3700 (define_expand "aarch64_saddw2<mode>"
3701 [(match_operand:<VWIDE> 0 "register_operand")
3702 (match_operand:<VWIDE> 1 "register_operand")
3703 (match_operand:VQW 2 "register_operand")]
3706 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3707 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3712 (define_expand "aarch64_uaddw2<mode>"
3713 [(match_operand:<VWIDE> 0 "register_operand")
3714 (match_operand:<VWIDE> 1 "register_operand")
3715 (match_operand:VQW 2 "register_operand")]
3718 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3719 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3725 (define_expand "aarch64_ssubw2<mode>"
3726 [(match_operand:<VWIDE> 0 "register_operand")
3727 (match_operand:<VWIDE> 1 "register_operand")
3728 (match_operand:VQW 2 "register_operand")]
3731 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3732 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3737 (define_expand "aarch64_usubw2<mode>"
3738 [(match_operand:<VWIDE> 0 "register_operand")
3739 (match_operand:<VWIDE> 1 "register_operand")
3740 (match_operand:VQW 2 "register_operand")]
3743 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3744 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3749 ;; <su><r>h<addsub>.
3751 (define_expand "<u>avg<mode>3_floor"
3752 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3753 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3754 (match_operand:VDQ_BHSI 2 "register_operand")]
3759 (define_expand "<u>avg<mode>3_ceil"
3760 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3761 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3762 (match_operand:VDQ_BHSI 2 "register_operand")]
3767 (define_insn "aarch64_<sur>h<addsub><mode>"
3768 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3769 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3770 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3773 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3774 [(set_attr "type" "neon_<addsub>_halve<q>")]
3777 ;; <r><addsub>hn<q>.
3779 (define_insn "aarch64_<sur><addsub>hn<mode>"
3780 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3781 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3782 (match_operand:VQN 2 "register_operand" "w")]
3785 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3786 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3789 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3790 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3791 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3792 (match_operand:VQN 2 "register_operand" "w")
3793 (match_operand:VQN 3 "register_operand" "w")]
3796 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3797 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3802 (define_insn "aarch64_pmul<mode>"
3803 [(set (match_operand:VB 0 "register_operand" "=w")
3804 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3805 (match_operand:VB 2 "register_operand" "w")]
3808 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3809 [(set_attr "type" "neon_mul_<Vetype><q>")]
3814 (define_insn "aarch64_fmulx<mode>"
3815 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3817 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3818 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3821 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3822 [(set_attr "type" "neon_fp_mul_<stype>")]
3825 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3827 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3828 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3830 [(match_operand:VDQSF 1 "register_operand" "w")
3831 (vec_duplicate:VDQSF
3833 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3834 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3838 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3839 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3841 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3844 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3846 (define_insn "*aarch64_mulx_elt<mode>"
3847 [(set (match_operand:VDQF 0 "register_operand" "=w")
3849 [(match_operand:VDQF 1 "register_operand" "w")
3852 (match_operand:VDQF 2 "register_operand" "w")
3853 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3857 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3858 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3860 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3865 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3866 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3868 [(match_operand:VHSDF 1 "register_operand" "w")
3869 (vec_duplicate:VHSDF
3870 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3873 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3874 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3877 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3878 ;; vmulxd_lane_f64 == vmulx_lane_f64
3879 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3881 (define_insn "*aarch64_vgetfmulx<mode>"
3882 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3884 [(match_operand:<VEL> 1 "register_operand" "w")
3886 (match_operand:VDQF 2 "register_operand" "w")
3887 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3891 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3892 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3894 [(set_attr "type" "fmul<Vetype>")]
3898 (define_insn "aarch64_<su_optab>q<addsub><mode>"
3899 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3900 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3901 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3903 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3904 [(set_attr "type" "neon_q<addsub><q>")]
3907 ;; suqadd and usqadd
3909 (define_insn "aarch64_<sur>qadd<mode>"
3910 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3911 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3912 (match_operand:VSDQ_I 2 "register_operand" "w")]
3915 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3916 [(set_attr "type" "neon_qadd<q>")]
3921 (define_insn "aarch64_sqmovun<mode>"
3922 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3923 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3926 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3927 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3930 ;; sqmovn and uqmovn
3932 (define_insn "aarch64_<sur>qmovn<mode>"
3933 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3934 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3937 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3938 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3941 (define_insn "aarch64_<su>qxtn2<mode>_le"
3942 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3943 (vec_concat:<VNARROWQ2>
3944 (match_operand:<VNARROWQ> 1 "register_operand" "0")
3945 (SAT_TRUNC:<VNARROWQ>
3946 (match_operand:VQN 2 "register_operand" "w"))))]
3947 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3948 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
3949 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3952 (define_insn "aarch64_<su>qxtn2<mode>_be"
3953 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3954 (vec_concat:<VNARROWQ2>
3955 (SAT_TRUNC:<VNARROWQ>
3956 (match_operand:VQN 2 "register_operand" "w"))
3957 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
3958 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3959 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
3960 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3963 (define_expand "aarch64_<su>qxtn2<mode>"
3964 [(match_operand:<VNARROWQ2> 0 "register_operand")
3965 (match_operand:<VNARROWQ> 1 "register_operand")
3966 (SAT_TRUNC:<VNARROWQ>
3967 (match_operand:VQN 2 "register_operand"))]
3970 if (BYTES_BIG_ENDIAN)
3971 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
3974 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
3982 (define_insn "aarch64_s<optab><mode>"
3983 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3985 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3987 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3988 [(set_attr "type" "neon_<optab><q>")]
3993 (define_insn "aarch64_sq<r>dmulh<mode>"
3994 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3996 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3997 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
4000 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4001 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
4006 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
4007 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4009 [(match_operand:VDQHS 1 "register_operand" "w")
4011 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4012 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4016 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4017 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4018 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4021 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4022 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4024 [(match_operand:VDQHS 1 "register_operand" "w")
4026 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4027 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4031 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4032 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4033 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4036 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
4037 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4039 [(match_operand:SD_HSI 1 "register_operand" "w")
4041 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4042 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4046 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4047 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4048 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4051 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4052 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4054 [(match_operand:SD_HSI 1 "register_operand" "w")
4056 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4057 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4061 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4062 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4063 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4068 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
4069 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
4071 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
4072 (match_operand:VSDQ_HSI 2 "register_operand" "w")
4073 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
4076 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4077 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4080 ;; sqrdml[as]h_lane.
4082 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4083 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4085 [(match_operand:VDQHS 1 "register_operand" "0")
4086 (match_operand:VDQHS 2 "register_operand" "w")
4088 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4089 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4093 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4095 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4097 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4100 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4101 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4103 [(match_operand:SD_HSI 1 "register_operand" "0")
4104 (match_operand:SD_HSI 2 "register_operand" "w")
4106 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4107 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4111 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4113 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
4115 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4118 ;; sqrdml[as]h_laneq.
4120 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4121 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4123 [(match_operand:VDQHS 1 "register_operand" "0")
4124 (match_operand:VDQHS 2 "register_operand" "w")
4126 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4131 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4133 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4135 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4138 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4139 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4141 [(match_operand:SD_HSI 1 "register_operand" "0")
4142 (match_operand:SD_HSI 2 "register_operand" "w")
4144 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4145 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4149 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4151 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4153 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4158 (define_insn "aarch64_sqdmlal<mode>"
4159 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4163 (sign_extend:<VWIDE>
4164 (match_operand:VSD_HSI 2 "register_operand" "w"))
4165 (sign_extend:<VWIDE>
4166 (match_operand:VSD_HSI 3 "register_operand" "w")))
4168 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4170 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4171 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4174 (define_insn "aarch64_sqdmlsl<mode>"
4175 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4177 (match_operand:<VWIDE> 1 "register_operand" "0")
4180 (sign_extend:<VWIDE>
4181 (match_operand:VSD_HSI 2 "register_operand" "w"))
4182 (sign_extend:<VWIDE>
4183 (match_operand:VSD_HSI 3 "register_operand" "w")))
4186 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4187 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4192 (define_insn "aarch64_sqdmlal_lane<mode>"
4193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4197 (sign_extend:<VWIDE>
4198 (match_operand:VD_HSI 2 "register_operand" "w"))
4199 (sign_extend:<VWIDE>
4200 (vec_duplicate:VD_HSI
4202 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4203 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4206 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4209 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4211 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4213 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4216 (define_insn "aarch64_sqdmlsl_lane<mode>"
4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4219 (match_operand:<VWIDE> 1 "register_operand" "0")
4222 (sign_extend:<VWIDE>
4223 (match_operand:VD_HSI 2 "register_operand" "w"))
4224 (sign_extend:<VWIDE>
4225 (vec_duplicate:VD_HSI
4227 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4228 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4233 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4235 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4237 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4241 (define_insn "aarch64_sqdmlsl_laneq<mode>"
4242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4244 (match_operand:<VWIDE> 1 "register_operand" "0")
4247 (sign_extend:<VWIDE>
4248 (match_operand:VD_HSI 2 "register_operand" "w"))
4249 (sign_extend:<VWIDE>
4250 (vec_duplicate:VD_HSI
4252 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4253 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4258 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4260 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4262 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4265 (define_insn "aarch64_sqdmlal_laneq<mode>"
4266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4270 (sign_extend:<VWIDE>
4271 (match_operand:VD_HSI 2 "register_operand" "w"))
4272 (sign_extend:<VWIDE>
4273 (vec_duplicate:VD_HSI
4275 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4276 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4279 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4282 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4284 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4286 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4290 (define_insn "aarch64_sqdmlal_lane<mode>"
4291 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4295 (sign_extend:<VWIDE>
4296 (match_operand:SD_HSI 2 "register_operand" "w"))
4297 (sign_extend:<VWIDE>
4299 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4300 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4303 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4306 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4308 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4310 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4313 (define_insn "aarch64_sqdmlsl_lane<mode>"
4314 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4316 (match_operand:<VWIDE> 1 "register_operand" "0")
4319 (sign_extend:<VWIDE>
4320 (match_operand:SD_HSI 2 "register_operand" "w"))
4321 (sign_extend:<VWIDE>
4323 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4324 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4329 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4331 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4333 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4337 (define_insn "aarch64_sqdmlal_laneq<mode>"
4338 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4342 (sign_extend:<VWIDE>
4343 (match_operand:SD_HSI 2 "register_operand" "w"))
4344 (sign_extend:<VWIDE>
4346 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4347 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4350 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4353 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4355 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4357 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4360 (define_insn "aarch64_sqdmlsl_laneq<mode>"
4361 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4363 (match_operand:<VWIDE> 1 "register_operand" "0")
4366 (sign_extend:<VWIDE>
4367 (match_operand:SD_HSI 2 "register_operand" "w"))
4368 (sign_extend:<VWIDE>
4370 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4371 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4376 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4378 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4380 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4385 (define_insn "aarch64_sqdmlsl_n<mode>"
4386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4388 (match_operand:<VWIDE> 1 "register_operand" "0")
4391 (sign_extend:<VWIDE>
4392 (match_operand:VD_HSI 2 "register_operand" "w"))
4393 (sign_extend:<VWIDE>
4394 (vec_duplicate:VD_HSI
4395 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4398 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4399 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4402 (define_insn "aarch64_sqdmlal_n<mode>"
4403 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4407 (sign_extend:<VWIDE>
4408 (match_operand:VD_HSI 2 "register_operand" "w"))
4409 (sign_extend:<VWIDE>
4410 (vec_duplicate:VD_HSI
4411 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4413 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4415 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4416 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4422 (define_insn "aarch64_sqdmlal2<mode>_internal"
4423 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4427 (sign_extend:<VWIDE>
4429 (match_operand:VQ_HSI 2 "register_operand" "w")
4430 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4431 (sign_extend:<VWIDE>
4433 (match_operand:VQ_HSI 3 "register_operand" "w")
4436 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4438 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4439 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4442 (define_insn "aarch64_sqdmlsl2<mode>_internal"
4443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4445 (match_operand:<VWIDE> 1 "register_operand" "0")
4448 (sign_extend:<VWIDE>
4450 (match_operand:VQ_HSI 2 "register_operand" "w")
4451 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4452 (sign_extend:<VWIDE>
4454 (match_operand:VQ_HSI 3 "register_operand" "w")
4458 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4459 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4462 (define_expand "aarch64_sqdmlal2<mode>"
4463 [(match_operand:<VWIDE> 0 "register_operand")
4464 (match_operand:<VWIDE> 1 "register_operand")
4465 (match_operand:VQ_HSI 2 "register_operand")
4466 (match_operand:VQ_HSI 3 "register_operand")]
4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4471 operands[2], operands[3], p));
4475 (define_expand "aarch64_sqdmlsl2<mode>"
4476 [(match_operand:<VWIDE> 0 "register_operand")
4477 (match_operand:<VWIDE> 1 "register_operand")
4478 (match_operand:VQ_HSI 2 "register_operand")
4479 (match_operand:VQ_HSI 3 "register_operand")]
4482 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4484 operands[2], operands[3], p));
4490 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4491 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4493 (match_operand:<VWIDE> 1 "register_operand" "0")
4496 (sign_extend:<VWIDE>
4498 (match_operand:VQ_HSI 2 "register_operand" "w")
4499 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4500 (sign_extend:<VWIDE>
4501 (vec_duplicate:<VHALF>
4503 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4504 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4509 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4511 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4513 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4516 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4519 (match_operand:<VWIDE> 1 "register_operand" "0")
4522 (sign_extend:<VWIDE>
4524 (match_operand:VQ_HSI 2 "register_operand" "w")
4525 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4526 (sign_extend:<VWIDE>
4527 (vec_duplicate:<VHALF>
4529 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4530 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4535 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4537 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4539 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4542 (define_expand "aarch64_sqdmlal2_lane<mode>"
4543 [(match_operand:<VWIDE> 0 "register_operand")
4544 (match_operand:<VWIDE> 1 "register_operand")
4545 (match_operand:VQ_HSI 2 "register_operand")
4546 (match_operand:<VCOND> 3 "register_operand")
4547 (match_operand:SI 4 "immediate_operand")]
4550 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4551 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4552 operands[2], operands[3],
4557 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4558 [(match_operand:<VWIDE> 0 "register_operand")
4559 (match_operand:<VWIDE> 1 "register_operand")
4560 (match_operand:VQ_HSI 2 "register_operand")
4561 (match_operand:<VCONQ> 3 "register_operand")
4562 (match_operand:SI 4 "immediate_operand")]
4565 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4566 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4567 operands[2], operands[3],
4572 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4573 [(match_operand:<VWIDE> 0 "register_operand")
4574 (match_operand:<VWIDE> 1 "register_operand")
4575 (match_operand:VQ_HSI 2 "register_operand")
4576 (match_operand:<VCOND> 3 "register_operand")
4577 (match_operand:SI 4 "immediate_operand")]
4580 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4581 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4582 operands[2], operands[3],
4587 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4588 [(match_operand:<VWIDE> 0 "register_operand")
4589 (match_operand:<VWIDE> 1 "register_operand")
4590 (match_operand:VQ_HSI 2 "register_operand")
4591 (match_operand:<VCONQ> 3 "register_operand")
4592 (match_operand:SI 4 "immediate_operand")]
4595 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4596 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4597 operands[2], operands[3],
4602 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4603 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4605 (match_operand:<VWIDE> 1 "register_operand" "0")
4608 (sign_extend:<VWIDE>
4610 (match_operand:VQ_HSI 2 "register_operand" "w")
4611 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4612 (sign_extend:<VWIDE>
4613 (vec_duplicate:<VHALF>
4614 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4617 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4618 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4621 (define_expand "aarch64_sqdmlal2_n<mode>"
4622 [(match_operand:<VWIDE> 0 "register_operand")
4623 (match_operand:<VWIDE> 1 "register_operand")
4624 (match_operand:VQ_HSI 2 "register_operand")
4625 (match_operand:<VEL> 3 "register_operand")]
4628 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4629 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4630 operands[2], operands[3],
4635 (define_expand "aarch64_sqdmlsl2_n<mode>"
4636 [(match_operand:<VWIDE> 0 "register_operand")
4637 (match_operand:<VWIDE> 1 "register_operand")
4638 (match_operand:VQ_HSI 2 "register_operand")
4639 (match_operand:<VEL> 3 "register_operand")]
4642 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4643 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4644 operands[2], operands[3],
4651 (define_insn "aarch64_sqdmull<mode>"
4652 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4655 (sign_extend:<VWIDE>
4656 (match_operand:VSD_HSI 1 "register_operand" "w"))
4657 (sign_extend:<VWIDE>
4658 (match_operand:VSD_HSI 2 "register_operand" "w")))
4661 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4662 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4667 (define_insn "aarch64_sqdmull_lane<mode>"
4668 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4671 (sign_extend:<VWIDE>
4672 (match_operand:VD_HSI 1 "register_operand" "w"))
4673 (sign_extend:<VWIDE>
4674 (vec_duplicate:VD_HSI
4676 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4677 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4682 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4683 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4685 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4688 (define_insn "aarch64_sqdmull_laneq<mode>"
4689 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4692 (sign_extend:<VWIDE>
4693 (match_operand:VD_HSI 1 "register_operand" "w"))
4694 (sign_extend:<VWIDE>
4695 (vec_duplicate:VD_HSI
4697 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4698 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4703 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4704 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4706 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4709 (define_insn "aarch64_sqdmull_lane<mode>"
4710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4713 (sign_extend:<VWIDE>
4714 (match_operand:SD_HSI 1 "register_operand" "w"))
4715 (sign_extend:<VWIDE>
4717 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4718 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4723 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4724 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4726 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4729 (define_insn "aarch64_sqdmull_laneq<mode>"
4730 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4733 (sign_extend:<VWIDE>
4734 (match_operand:SD_HSI 1 "register_operand" "w"))
4735 (sign_extend:<VWIDE>
4737 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4738 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4743 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4744 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4746 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4751 (define_insn "aarch64_sqdmull_n<mode>"
4752 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4755 (sign_extend:<VWIDE>
4756 (match_operand:VD_HSI 1 "register_operand" "w"))
4757 (sign_extend:<VWIDE>
4758 (vec_duplicate:VD_HSI
4759 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4763 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4764 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4771 (define_insn "aarch64_sqdmull2<mode>_internal"
4772 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4775 (sign_extend:<VWIDE>
4777 (match_operand:VQ_HSI 1 "register_operand" "w")
4778 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4779 (sign_extend:<VWIDE>
4781 (match_operand:VQ_HSI 2 "register_operand" "w")
4786 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4787 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4790 (define_expand "aarch64_sqdmull2<mode>"
4791 [(match_operand:<VWIDE> 0 "register_operand")
4792 (match_operand:VQ_HSI 1 "register_operand")
4793 (match_operand:VQ_HSI 2 "register_operand")]
4796 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4797 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4804 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4805 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4808 (sign_extend:<VWIDE>
4810 (match_operand:VQ_HSI 1 "register_operand" "w")
4811 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4812 (sign_extend:<VWIDE>
4813 (vec_duplicate:<VHALF>
4815 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4816 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4821 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4822 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4824 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4827 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4828 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4831 (sign_extend:<VWIDE>
4833 (match_operand:VQ_HSI 1 "register_operand" "w")
4834 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4835 (sign_extend:<VWIDE>
4836 (vec_duplicate:<VHALF>
4838 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4839 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4844 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4845 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4847 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4850 (define_expand "aarch64_sqdmull2_lane<mode>"
4851 [(match_operand:<VWIDE> 0 "register_operand")
4852 (match_operand:VQ_HSI 1 "register_operand")
4853 (match_operand:<VCOND> 2 "register_operand")
4854 (match_operand:SI 3 "immediate_operand")]
4857 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4858 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4859 operands[2], operands[3],
4864 (define_expand "aarch64_sqdmull2_laneq<mode>"
4865 [(match_operand:<VWIDE> 0 "register_operand")
4866 (match_operand:VQ_HSI 1 "register_operand")
4867 (match_operand:<VCONQ> 2 "register_operand")
4868 (match_operand:SI 3 "immediate_operand")]
4871 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4872 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4873 operands[2], operands[3],
4880 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4881 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4884 (sign_extend:<VWIDE>
4886 (match_operand:VQ_HSI 1 "register_operand" "w")
4887 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4888 (sign_extend:<VWIDE>
4889 (vec_duplicate:<VHALF>
4890 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4894 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4895 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4898 (define_expand "aarch64_sqdmull2_n<mode>"
4899 [(match_operand:<VWIDE> 0 "register_operand")
4900 (match_operand:VQ_HSI 1 "register_operand")
4901 (match_operand:<VEL> 2 "register_operand")]
4904 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4905 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4912 (define_insn "aarch64_<sur>shl<mode>"
4913 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4915 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4916 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4919 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4920 [(set_attr "type" "neon_shift_reg<q>")]
4926 (define_insn "aarch64_<sur>q<r>shl<mode>"
4927 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4929 [(match_operand:VSDQ_I 1 "register_operand" "w")
4930 (match_operand:VSDQ_I 2 "register_operand" "w")]
4933 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4934 [(set_attr "type" "neon_sat_shift_reg<q>")]
4937 (define_expand "vec_widen_<sur>shiftl_lo_<mode>"
4938 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4939 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4941 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4945 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4946 emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
4952 (define_expand "vec_widen_<sur>shiftl_hi_<mode>"
4953 [(set (match_operand:<VWIDE> 0 "register_operand")
4954 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4956 "immediate_operand" "i")]
4960 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4961 emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
4969 (define_insn "aarch64_<sur>shll<mode>_internal"
4970 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4971 (unspec:<VWIDE> [(vec_select:<VHALF>
4972 (match_operand:VQW 1 "register_operand" "w")
4973 (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
4975 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4979 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4980 return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
4982 return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
4984 [(set_attr "type" "neon_shift_imm_long")]
4987 (define_insn "aarch64_<sur>shll2<mode>_internal"
4988 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4989 (unspec:<VWIDE> [(vec_select:<VHALF>
4990 (match_operand:VQW 1 "register_operand" "w")
4991 (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
4993 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4997 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4998 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
5000 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
5002 [(set_attr "type" "neon_shift_imm_long")]
5005 (define_insn "aarch64_<sur>shll_n<mode>"
5006 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5007 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
5009 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5013 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5014 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5016 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5018 [(set_attr "type" "neon_shift_imm_long")]
5023 (define_insn "aarch64_<sur>shll2_n<mode>"
5024 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5025 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
5026 (match_operand:SI 2 "immediate_operand" "i")]
5030 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5031 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5033 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5035 [(set_attr "type" "neon_shift_imm_long")]
5040 (define_insn "aarch64_<sur>shr_n<mode>"
5041 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5042 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
5044 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5047 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5048 [(set_attr "type" "neon_sat_shift_imm<q>")]
5053 (define_insn "aarch64_<sur>sra_n<mode>"
5054 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5055 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5056 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5058 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5061 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5062 [(set_attr "type" "neon_shift_acc<q>")]
5067 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
5068 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5069 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5070 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5072 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
5075 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5076 [(set_attr "type" "neon_shift_imm<q>")]
5081 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
5082 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5083 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
5085 "aarch64_simd_shift_imm_<ve_mode>" "i")]
5088 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5089 [(set_attr "type" "neon_sat_shift_imm<q>")]
5095 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
5096 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5097 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
5099 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5102 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
5103 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5106 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
5107 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5108 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
5109 (match_operand:VQN 2 "register_operand" "w")
5110 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5113 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
5114 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5118 ;; cm(eq|ge|gt|lt|le)
5119 ;; Note, we have constraints for Dz and Z as different expanders
5120 ;; have different ideas of what should be passed to this pattern.
5122 (define_insn "aarch64_cm<optab><mode>"
5123 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5125 (COMPARISONS:<V_INT_EQUIV>
5126 (match_operand:VDQ_I 1 "register_operand" "w,w")
5127 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5131 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5132 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
5133 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
5136 (define_insn_and_split "aarch64_cm<optab>di"
5137 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
5140 (match_operand:DI 1 "register_operand" "w,w,r")
5141 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
5143 (clobber (reg:CC CC_REGNUM))]
5146 "&& reload_completed"
5147 [(set (match_operand:DI 0 "register_operand")
5150 (match_operand:DI 1 "register_operand")
5151 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5154 /* If we are in the general purpose register file,
5155 we split to a sequence of comparison and store. */
5156 if (GP_REGNUM_P (REGNO (operands[0]))
5157 && GP_REGNUM_P (REGNO (operands[1])))
5159 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
5160 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5161 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5162 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5165 /* Otherwise, we expand to a similar pattern which does not
5166 clobber CC_REGNUM. */
5168 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
5171 (define_insn "*aarch64_cm<optab>di"
5172 [(set (match_operand:DI 0 "register_operand" "=w,w")
5175 (match_operand:DI 1 "register_operand" "w,w")
5176 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5178 "TARGET_SIMD && reload_completed"
5180 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
5181 cm<optab>\t%d0, %d1, #0"
5182 [(set_attr "type" "neon_compare, neon_compare_zero")]
5187 (define_insn "aarch64_cm<optab><mode>"
5188 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5190 (UCOMPARISONS:<V_INT_EQUIV>
5191 (match_operand:VDQ_I 1 "register_operand" "w")
5192 (match_operand:VDQ_I 2 "register_operand" "w")
5195 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5196 [(set_attr "type" "neon_compare<q>")]
5199 (define_insn_and_split "aarch64_cm<optab>di"
5200 [(set (match_operand:DI 0 "register_operand" "=w,r")
5203 (match_operand:DI 1 "register_operand" "w,r")
5204 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
5206 (clobber (reg:CC CC_REGNUM))]
5209 "&& reload_completed"
5210 [(set (match_operand:DI 0 "register_operand")
5213 (match_operand:DI 1 "register_operand")
5214 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5217 /* If we are in the general purpose register file,
5218 we split to a sequence of comparison and store. */
5219 if (GP_REGNUM_P (REGNO (operands[0]))
5220 && GP_REGNUM_P (REGNO (operands[1])))
5222 machine_mode mode = CCmode;
5223 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5224 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5225 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5228 /* Otherwise, we expand to a similar pattern which does not
5229 clobber CC_REGNUM. */
5231 [(set_attr "type" "neon_compare,multiple")]
5234 (define_insn "*aarch64_cm<optab>di"
5235 [(set (match_operand:DI 0 "register_operand" "=w")
5238 (match_operand:DI 1 "register_operand" "w")
5239 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
5241 "TARGET_SIMD && reload_completed"
5242 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
5243 [(set_attr "type" "neon_compare")]
5248 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
5249 ;; we don't have any insns using ne, and aarch64_vcond outputs
5250 ;; not (neg (eq (and x y) 0))
5251 ;; which is rewritten by simplify_rtx as
5252 ;; plus (eq (and x y) 0) -1.
5254 (define_insn "aarch64_cmtst<mode>"
5255 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5259 (match_operand:VDQ_I 1 "register_operand" "w")
5260 (match_operand:VDQ_I 2 "register_operand" "w"))
5261 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
5262 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
5265 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5266 [(set_attr "type" "neon_tst<q>")]
5269 (define_insn_and_split "aarch64_cmtstdi"
5270 [(set (match_operand:DI 0 "register_operand" "=w,r")
5274 (match_operand:DI 1 "register_operand" "w,r")
5275 (match_operand:DI 2 "register_operand" "w,r"))
5277 (clobber (reg:CC CC_REGNUM))]
5280 "&& reload_completed"
5281 [(set (match_operand:DI 0 "register_operand")
5285 (match_operand:DI 1 "register_operand")
5286 (match_operand:DI 2 "register_operand"))
5289 /* If we are in the general purpose register file,
5290 we split to a sequence of comparison and store. */
5291 if (GP_REGNUM_P (REGNO (operands[0]))
5292 && GP_REGNUM_P (REGNO (operands[1])))
5294 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
5295 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
5296 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
5297 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
5298 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5301 /* Otherwise, we expand to a similar pattern which does not
5302 clobber CC_REGNUM. */
5304 [(set_attr "type" "neon_tst,multiple")]
5307 (define_insn "*aarch64_cmtstdi"
5308 [(set (match_operand:DI 0 "register_operand" "=w")
5312 (match_operand:DI 1 "register_operand" "w")
5313 (match_operand:DI 2 "register_operand" "w"))
5316 "cmtst\t%d0, %d1, %d2"
5317 [(set_attr "type" "neon_tst")]
5320 ;; fcm(eq|ge|gt|le|lt)
5322 (define_insn "aarch64_cm<optab><mode>"
5323 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5325 (COMPARISONS:<V_INT_EQUIV>
5326 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
5327 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
5331 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5332 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
5333 [(set_attr "type" "neon_fp_compare_<stype><q>")]
5337 ;; Note we can also handle what would be fac(le|lt) by
5338 ;; generating fac(ge|gt).
5340 (define_insn "aarch64_fac<optab><mode>"
5341 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5343 (FAC_COMPARISONS:<V_INT_EQUIV>
5345 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
5347 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
5350 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5351 [(set_attr "type" "neon_fp_compare_<stype><q>")]
5356 (define_insn "aarch64_addp<mode>"
5357 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5359 [(match_operand:VD_BHSI 1 "register_operand" "w")
5360 (match_operand:VD_BHSI 2 "register_operand" "w")]
5363 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5364 [(set_attr "type" "neon_reduc_add<q>")]
5367 (define_insn "aarch64_addpdi"
5368 [(set (match_operand:DI 0 "register_operand" "=w")
5370 [(match_operand:V2DI 1 "register_operand" "w")]
5374 [(set_attr "type" "neon_reduc_add")]
5379 (define_expand "sqrt<mode>2"
5380 [(set (match_operand:VHSDF 0 "register_operand")
5381 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
5384 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
5388 (define_insn "*sqrt<mode>2"
5389 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5390 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
5392 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
5393 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
5396 ;; Patterns for vector struct loads and stores.
5398 (define_insn "aarch64_simd_ld2<mode>"
5399 [(set (match_operand:OI 0 "register_operand" "=w")
5400 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5401 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5404 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5405 [(set_attr "type" "neon_load2_2reg<q>")]
5408 (define_insn "aarch64_simd_ld2r<mode>"
5409 [(set (match_operand:OI 0 "register_operand" "=w")
5410 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5411 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5414 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5415 [(set_attr "type" "neon_load2_all_lanes<q>")]
5418 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5419 [(set (match_operand:OI 0 "register_operand" "=w")
5420 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5421 (match_operand:OI 2 "register_operand" "0")
5422 (match_operand:SI 3 "immediate_operand" "i")
5423 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5427 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5428 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5430 [(set_attr "type" "neon_load2_one_lane")]
5433 (define_expand "vec_load_lanesoi<mode>"
5434 [(set (match_operand:OI 0 "register_operand")
5435 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5436 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5440 if (BYTES_BIG_ENDIAN)
5442 rtx tmp = gen_reg_rtx (OImode);
5443 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5444 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5445 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5448 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5452 (define_insn "aarch64_simd_st2<mode>"
5453 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5454 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5455 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5458 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5459 [(set_attr "type" "neon_store2_2reg<q>")]
5462 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5463 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5464 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5465 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5466 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5467 (match_operand:SI 2 "immediate_operand" "i")]
5471 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5472 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5474 [(set_attr "type" "neon_store2_one_lane<q>")]
5477 (define_expand "vec_store_lanesoi<mode>"
5478 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5479 (unspec:OI [(match_operand:OI 1 "register_operand")
5480 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5484 if (BYTES_BIG_ENDIAN)
5486 rtx tmp = gen_reg_rtx (OImode);
5487 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5488 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5489 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5492 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5496 (define_insn "aarch64_simd_ld3<mode>"
5497 [(set (match_operand:CI 0 "register_operand" "=w")
5498 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5499 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5502 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5503 [(set_attr "type" "neon_load3_3reg<q>")]
5506 (define_insn "aarch64_simd_ld3r<mode>"
5507 [(set (match_operand:CI 0 "register_operand" "=w")
5508 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5509 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5512 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5513 [(set_attr "type" "neon_load3_all_lanes<q>")]
5516 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5517 [(set (match_operand:CI 0 "register_operand" "=w")
5518 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5519 (match_operand:CI 2 "register_operand" "0")
5520 (match_operand:SI 3 "immediate_operand" "i")
5521 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5525 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5526 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5528 [(set_attr "type" "neon_load3_one_lane")]
5531 (define_expand "vec_load_lanesci<mode>"
5532 [(set (match_operand:CI 0 "register_operand")
5533 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5534 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5538 if (BYTES_BIG_ENDIAN)
5540 rtx tmp = gen_reg_rtx (CImode);
5541 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5542 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5543 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5546 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5550 (define_insn "aarch64_simd_st3<mode>"
5551 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5552 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5553 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5556 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5557 [(set_attr "type" "neon_store3_3reg<q>")]
5560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5561 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5562 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5563 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5564 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5565 (match_operand:SI 2 "immediate_operand" "i")]
5569 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5570 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5572 [(set_attr "type" "neon_store3_one_lane<q>")]
5575 (define_expand "vec_store_lanesci<mode>"
5576 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5577 (unspec:CI [(match_operand:CI 1 "register_operand")
5578 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5582 if (BYTES_BIG_ENDIAN)
5584 rtx tmp = gen_reg_rtx (CImode);
5585 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5586 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5587 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5590 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5594 (define_insn "aarch64_simd_ld4<mode>"
5595 [(set (match_operand:XI 0 "register_operand" "=w")
5596 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5597 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5600 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5601 [(set_attr "type" "neon_load4_4reg<q>")]
5604 (define_insn "aarch64_simd_ld4r<mode>"
5605 [(set (match_operand:XI 0 "register_operand" "=w")
5606 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5607 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5610 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5611 [(set_attr "type" "neon_load4_all_lanes<q>")]
5614 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5615 [(set (match_operand:XI 0 "register_operand" "=w")
5616 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5617 (match_operand:XI 2 "register_operand" "0")
5618 (match_operand:SI 3 "immediate_operand" "i")
5619 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5623 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5624 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5626 [(set_attr "type" "neon_load4_one_lane")]
5629 (define_expand "vec_load_lanesxi<mode>"
5630 [(set (match_operand:XI 0 "register_operand")
5631 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5632 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5636 if (BYTES_BIG_ENDIAN)
5638 rtx tmp = gen_reg_rtx (XImode);
5639 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5640 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5641 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5644 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5648 (define_insn "aarch64_simd_st4<mode>"
5649 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5650 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5651 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5654 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5655 [(set_attr "type" "neon_store4_4reg<q>")]
5658 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5659 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5660 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5661 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5662 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5663 (match_operand:SI 2 "immediate_operand" "i")]
5667 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5668 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5670 [(set_attr "type" "neon_store4_one_lane<q>")]
5673 (define_expand "vec_store_lanesxi<mode>"
5674 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5675 (unspec:XI [(match_operand:XI 1 "register_operand")
5676 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5680 if (BYTES_BIG_ENDIAN)
5682 rtx tmp = gen_reg_rtx (XImode);
5683 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5684 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5685 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5688 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5692 (define_insn_and_split "aarch64_rev_reglist<mode>"
5693 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5695 [(match_operand:VSTRUCT 1 "register_operand" "w")
5696 (match_operand:V16QI 2 "register_operand" "w")]
5697 UNSPEC_REV_REGLIST))]
5700 "&& reload_completed"
5704 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5705 for (i = 0; i < nregs; i++)
5707 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5708 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5709 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5713 [(set_attr "type" "neon_tbl1_q")
5714 (set_attr "length" "<insn_count>")]
5717 ;; Reload patterns for AdvSIMD register list operands.
5719 (define_expand "mov<mode>"
5720 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5721 (match_operand:VSTRUCT 1 "general_operand"))]
5724 if (can_create_pseudo_p ())
5726 if (GET_CODE (operands[0]) != REG)
5727 operands[1] = force_reg (<MODE>mode, operands[1]);
5732 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5733 [(match_operand:CI 0 "register_operand")
5734 (match_operand:DI 1 "register_operand")
5735 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5738 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5739 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5743 (define_insn "aarch64_ld1_x3_<mode>"
5744 [(set (match_operand:CI 0 "register_operand" "=w")
5746 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5747 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5749 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5750 [(set_attr "type" "neon_load1_3reg<q>")]
5753 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5754 [(match_operand:XI 0 "register_operand" "=w")
5755 (match_operand:DI 1 "register_operand" "r")
5756 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 rtx mem = gen_rtx_MEM (XImode, operands[1]);
5760 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5764 (define_insn "aarch64_ld1_x4_<mode>"
5765 [(set (match_operand:XI 0 "register_operand" "=w")
5767 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5768 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5771 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5772 [(set_attr "type" "neon_load1_4reg<q>")]
5775 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5776 [(match_operand:DI 0 "register_operand")
5777 (match_operand:OI 1 "register_operand")
5778 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5781 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5782 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5786 (define_insn "aarch64_st1_x2_<mode>"
5787 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5789 [(match_operand:OI 1 "register_operand" "w")
5790 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5792 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5793 [(set_attr "type" "neon_store1_2reg<q>")]
5796 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5797 [(match_operand:DI 0 "register_operand")
5798 (match_operand:CI 1 "register_operand")
5799 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5802 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5803 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5807 (define_insn "aarch64_st1_x3_<mode>"
5808 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5810 [(match_operand:CI 1 "register_operand" "w")
5811 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5813 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5814 [(set_attr "type" "neon_store1_3reg<q>")]
5817 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5818 [(match_operand:DI 0 "register_operand" "")
5819 (match_operand:XI 1 "register_operand" "")
5820 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5823 rtx mem = gen_rtx_MEM (XImode, operands[0]);
5824 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5828 (define_insn "aarch64_st1_x4_<mode>"
5829 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5831 [(match_operand:XI 1 "register_operand" "w")
5832 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5835 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5836 [(set_attr "type" "neon_store1_4reg<q>")]
5839 (define_insn "*aarch64_mov<mode>"
5840 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5841 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5842 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5843 && (register_operand (operands[0], <MODE>mode)
5844 || register_operand (operands[1], <MODE>mode))"
5847 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5848 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5849 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5850 neon_load<nregs>_<nregs>reg_q")
5851 (set_attr "length" "<insn_count>,4,4")]
5854 (define_insn "aarch64_be_ld1<mode>"
5855 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5856 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5857 "aarch64_simd_struct_operand" "Utv")]
5860 "ld1\\t{%0<Vmtype>}, %1"
5861 [(set_attr "type" "neon_load1_1reg<q>")]
5864 (define_insn "aarch64_be_st1<mode>"
5865 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5866 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5869 "st1\\t{%1<Vmtype>}, %0"
5870 [(set_attr "type" "neon_store1_1reg<q>")]
5873 (define_insn "*aarch64_be_movoi"
5874 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5875 (match_operand:OI 1 "general_operand" " w,w,m"))]
5876 "TARGET_SIMD && BYTES_BIG_ENDIAN
5877 && (register_operand (operands[0], OImode)
5878 || register_operand (operands[1], OImode))"
5883 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5884 (set_attr "length" "8,4,4")]
5887 (define_insn "*aarch64_be_movci"
5888 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5889 (match_operand:CI 1 "general_operand" " w,w,o"))]
5890 "TARGET_SIMD && BYTES_BIG_ENDIAN
5891 && (register_operand (operands[0], CImode)
5892 || register_operand (operands[1], CImode))"
5894 [(set_attr "type" "multiple")
5895 (set_attr "length" "12,4,4")]
5898 (define_insn "*aarch64_be_movxi"
5899 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5900 (match_operand:XI 1 "general_operand" " w,w,o"))]
5901 "TARGET_SIMD && BYTES_BIG_ENDIAN
5902 && (register_operand (operands[0], XImode)
5903 || register_operand (operands[1], XImode))"
5905 [(set_attr "type" "multiple")
5906 (set_attr "length" "16,4,4")]
5910 [(set (match_operand:OI 0 "register_operand")
5911 (match_operand:OI 1 "register_operand"))]
5912 "TARGET_SIMD && reload_completed"
5915 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5920 [(set (match_operand:CI 0 "nonimmediate_operand")
5921 (match_operand:CI 1 "general_operand"))]
5922 "TARGET_SIMD && reload_completed"
5925 if (register_operand (operands[0], CImode)
5926 && register_operand (operands[1], CImode))
5928 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5931 else if (BYTES_BIG_ENDIAN)
5933 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5934 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5935 emit_move_insn (gen_lowpart (V16QImode,
5936 simplify_gen_subreg (TImode, operands[0],
5938 gen_lowpart (V16QImode,
5939 simplify_gen_subreg (TImode, operands[1],
5948 [(set (match_operand:XI 0 "nonimmediate_operand")
5949 (match_operand:XI 1 "general_operand"))]
5950 "TARGET_SIMD && reload_completed"
5953 if (register_operand (operands[0], XImode)
5954 && register_operand (operands[1], XImode))
5956 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5959 else if (BYTES_BIG_ENDIAN)
5961 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5962 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5963 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5964 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5971 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5972 [(match_operand:VSTRUCT 0 "register_operand")
5973 (match_operand:DI 1 "register_operand")
5974 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5977 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5978 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5981 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5986 (define_insn "aarch64_ld2<mode>_dreg"
5987 [(set (match_operand:OI 0 "register_operand" "=w")
5988 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5989 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5992 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5993 [(set_attr "type" "neon_load2_2reg<q>")]
5996 (define_insn "aarch64_ld2<mode>_dreg"
5997 [(set (match_operand:OI 0 "register_operand" "=w")
5998 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5999 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6002 "ld1\\t{%S0.1d - %T0.1d}, %1"
6003 [(set_attr "type" "neon_load1_2reg<q>")]
6006 (define_insn "aarch64_ld3<mode>_dreg"
6007 [(set (match_operand:CI 0 "register_operand" "=w")
6008 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6009 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6012 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6013 [(set_attr "type" "neon_load3_3reg<q>")]
6016 (define_insn "aarch64_ld3<mode>_dreg"
6017 [(set (match_operand:CI 0 "register_operand" "=w")
6018 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6019 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6022 "ld1\\t{%S0.1d - %U0.1d}, %1"
6023 [(set_attr "type" "neon_load1_3reg<q>")]
6026 (define_insn "aarch64_ld4<mode>_dreg"
6027 [(set (match_operand:XI 0 "register_operand" "=w")
6028 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6029 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6032 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6033 [(set_attr "type" "neon_load4_4reg<q>")]
6036 (define_insn "aarch64_ld4<mode>_dreg"
6037 [(set (match_operand:XI 0 "register_operand" "=w")
6038 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6039 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6042 "ld1\\t{%S0.1d - %V0.1d}, %1"
6043 [(set_attr "type" "neon_load1_4reg<q>")]
6046 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
6047 [(match_operand:VSTRUCT 0 "register_operand")
6048 (match_operand:DI 1 "register_operand")
6049 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6052 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6053 set_mem_size (mem, <VSTRUCT:nregs> * 8);
6055 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
6059 (define_expand "aarch64_ld1<VALL_F16:mode>"
6060 [(match_operand:VALL_F16 0 "register_operand")
6061 (match_operand:DI 1 "register_operand")]
6064 machine_mode mode = <VALL_F16:MODE>mode;
6065 rtx mem = gen_rtx_MEM (mode, operands[1]);
6067 if (BYTES_BIG_ENDIAN)
6068 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
6070 emit_move_insn (operands[0], mem);
6074 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
6075 [(match_operand:VSTRUCT 0 "register_operand")
6076 (match_operand:DI 1 "register_operand")
6077 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6080 machine_mode mode = <VSTRUCT:MODE>mode;
6081 rtx mem = gen_rtx_MEM (mode, operands[1]);
6083 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
6087 (define_expand "aarch64_ld1x2<VQ:mode>"
6088 [(match_operand:OI 0 "register_operand")
6089 (match_operand:DI 1 "register_operand")
6090 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6093 machine_mode mode = OImode;
6094 rtx mem = gen_rtx_MEM (mode, operands[1]);
6096 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
6100 (define_expand "aarch64_ld1x2<VDC:mode>"
6101 [(match_operand:OI 0 "register_operand")
6102 (match_operand:DI 1 "register_operand")
6103 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6106 machine_mode mode = OImode;
6107 rtx mem = gen_rtx_MEM (mode, operands[1]);
6109 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
6114 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6115 [(match_operand:VSTRUCT 0 "register_operand")
6116 (match_operand:DI 1 "register_operand")
6117 (match_operand:VSTRUCT 2 "register_operand")
6118 (match_operand:SI 3 "immediate_operand")
6119 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6122 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6123 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6126 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
6127 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6128 operands[0], mem, operands[2], operands[3]));
6132 ;; Expanders for builtins to extract vector registers from large
6133 ;; opaque integer modes.
6137 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
6138 [(match_operand:VDC 0 "register_operand")
6139 (match_operand:VSTRUCT 1 "register_operand")
6140 (match_operand:SI 2 "immediate_operand")]
6143 int part = INTVAL (operands[2]);
6144 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
6145 int offset = part * 16;
6147 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
6148 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
6154 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
6155 [(match_operand:VQ 0 "register_operand")
6156 (match_operand:VSTRUCT 1 "register_operand")
6157 (match_operand:SI 2 "immediate_operand")]
6160 int part = INTVAL (operands[2]);
6161 int offset = part * 16;
6163 emit_move_insn (operands[0],
6164 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
6168 ;; Permuted-store expanders for neon intrinsics.
6170 ;; Permute instructions
6174 (define_expand "vec_perm<mode>"
6175 [(match_operand:VB 0 "register_operand")
6176 (match_operand:VB 1 "register_operand")
6177 (match_operand:VB 2 "register_operand")
6178 (match_operand:VB 3 "register_operand")]
6181 aarch64_expand_vec_perm (operands[0], operands[1],
6182 operands[2], operands[3], <nunits>);
6186 (define_insn "aarch64_tbl1<mode>"
6187 [(set (match_operand:VB 0 "register_operand" "=w")
6188 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
6189 (match_operand:VB 2 "register_operand" "w")]
6192 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
6193 [(set_attr "type" "neon_tbl1<q>")]
6196 ;; Two source registers.
6198 (define_insn "aarch64_tbl2v16qi"
6199 [(set (match_operand:V16QI 0 "register_operand" "=w")
6200 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
6201 (match_operand:V16QI 2 "register_operand" "w")]
6204 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
6205 [(set_attr "type" "neon_tbl2_q")]
6208 (define_insn "aarch64_tbl3<mode>"
6209 [(set (match_operand:VB 0 "register_operand" "=w")
6210 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
6211 (match_operand:VB 2 "register_operand" "w")]
6214 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
6215 [(set_attr "type" "neon_tbl3")]
6218 (define_insn "aarch64_tbx4<mode>"
6219 [(set (match_operand:VB 0 "register_operand" "=w")
6220 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6221 (match_operand:OI 2 "register_operand" "w")
6222 (match_operand:VB 3 "register_operand" "w")]
6225 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
6226 [(set_attr "type" "neon_tbl4")]
6229 ;; Three source registers.
6231 (define_insn "aarch64_qtbl3<mode>"
6232 [(set (match_operand:VB 0 "register_operand" "=w")
6233 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
6234 (match_operand:VB 2 "register_operand" "w")]
6237 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
6238 [(set_attr "type" "neon_tbl3")]
6241 (define_insn "aarch64_qtbx3<mode>"
6242 [(set (match_operand:VB 0 "register_operand" "=w")
6243 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6244 (match_operand:CI 2 "register_operand" "w")
6245 (match_operand:VB 3 "register_operand" "w")]
6248 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
6249 [(set_attr "type" "neon_tbl3")]
6252 ;; Four source registers.
6254 (define_insn "aarch64_qtbl4<mode>"
6255 [(set (match_operand:VB 0 "register_operand" "=w")
6256 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
6257 (match_operand:VB 2 "register_operand" "w")]
6260 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
6261 [(set_attr "type" "neon_tbl4")]
6264 (define_insn "aarch64_qtbx4<mode>"
6265 [(set (match_operand:VB 0 "register_operand" "=w")
6266 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6267 (match_operand:XI 2 "register_operand" "w")
6268 (match_operand:VB 3 "register_operand" "w")]
6271 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
6272 [(set_attr "type" "neon_tbl4")]
6275 (define_insn_and_split "aarch64_combinev16qi"
6276 [(set (match_operand:OI 0 "register_operand" "=w")
6277 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
6278 (match_operand:V16QI 2 "register_operand" "w")]
6282 "&& reload_completed"
6285 aarch64_split_combinev16qi (operands);
6288 [(set_attr "type" "multiple")]
6291 ;; This instruction's pattern is generated directly by
6292 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6293 ;; need corresponding changes there.
6294 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
6295 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6296 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6297 (match_operand:VALL_F16 2 "register_operand" "w")]
6300 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
6301 [(set_attr "type" "neon_permute<q>")]
6304 ;; This instruction's pattern is generated directly by
6305 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6306 ;; need corresponding changes there. Note that the immediate (third)
6307 ;; operand is a lane index not a byte index.
6308 (define_insn "aarch64_ext<mode>"
6309 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6310 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6311 (match_operand:VALL_F16 2 "register_operand" "w")
6312 (match_operand:SI 3 "immediate_operand" "i")]
6316 operands[3] = GEN_INT (INTVAL (operands[3])
6317 * GET_MODE_UNIT_SIZE (<MODE>mode));
6318 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
6320 [(set_attr "type" "neon_ext<q>")]
6323 ;; This instruction's pattern is generated directly by
6324 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6325 ;; need corresponding changes there.
6326 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
6327 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6328 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
6331 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
6332 [(set_attr "type" "neon_rev<q>")]
6335 (define_insn "aarch64_st2<mode>_dreg"
6336 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6337 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6338 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6341 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6342 [(set_attr "type" "neon_store2_2reg")]
6345 (define_insn "aarch64_st2<mode>_dreg"
6346 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6347 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6348 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6351 "st1\\t{%S1.1d - %T1.1d}, %0"
6352 [(set_attr "type" "neon_store1_2reg")]
6355 (define_insn "aarch64_st3<mode>_dreg"
6356 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6357 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6358 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6361 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6362 [(set_attr "type" "neon_store3_3reg")]
6365 (define_insn "aarch64_st3<mode>_dreg"
6366 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6367 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6368 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6371 "st1\\t{%S1.1d - %U1.1d}, %0"
6372 [(set_attr "type" "neon_store1_3reg")]
6375 (define_insn "aarch64_st4<mode>_dreg"
6376 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6377 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6378 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6381 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6382 [(set_attr "type" "neon_store4_4reg")]
6385 (define_insn "aarch64_st4<mode>_dreg"
6386 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6387 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6388 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6391 "st1\\t{%S1.1d - %V1.1d}, %0"
6392 [(set_attr "type" "neon_store1_4reg")]
6395 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
6396 [(match_operand:DI 0 "register_operand")
6397 (match_operand:VSTRUCT 1 "register_operand")
6398 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6401 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6402 set_mem_size (mem, <VSTRUCT:nregs> * 8);
6404 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6408 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6409 [(match_operand:DI 0 "register_operand")
6410 (match_operand:VSTRUCT 1 "register_operand")
6411 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6414 machine_mode mode = <VSTRUCT:MODE>mode;
6415 rtx mem = gen_rtx_MEM (mode, operands[0]);
6417 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6421 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6422 [(match_operand:DI 0 "register_operand")
6423 (match_operand:VSTRUCT 1 "register_operand")
6424 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6425 (match_operand:SI 2 "immediate_operand")]
6428 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6429 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6432 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6433 mem, operands[1], operands[2]));
6437 (define_expand "aarch64_st1<VALL_F16:mode>"
6438 [(match_operand:DI 0 "register_operand")
6439 (match_operand:VALL_F16 1 "register_operand")]
6442 machine_mode mode = <VALL_F16:MODE>mode;
6443 rtx mem = gen_rtx_MEM (mode, operands[0]);
6445 if (BYTES_BIG_ENDIAN)
6446 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6448 emit_move_insn (mem, operands[1]);
6452 ;; Expander for builtins to insert vector registers into large
6453 ;; opaque integer modes.
6455 ;; Q-register list. We don't need a D-reg inserter as we zero
6456 ;; extend them in arm_neon.h and insert the resulting Q-regs.
6458 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6459 [(match_operand:VSTRUCT 0 "register_operand")
6460 (match_operand:VSTRUCT 1 "register_operand")
6461 (match_operand:VQ 2 "register_operand")
6462 (match_operand:SI 3 "immediate_operand")]
6465 int part = INTVAL (operands[3]);
6466 int offset = part * 16;
6468 emit_move_insn (operands[0], operands[1]);
6469 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6474 ;; Standard pattern name vec_init<mode><Vel>.
6476 (define_expand "vec_init<mode><Vel>"
6477 [(match_operand:VALL_F16 0 "register_operand")
6478 (match_operand 1 "" "")]
6481 aarch64_expand_vector_init (operands[0], operands[1]);
6485 (define_expand "vec_init<mode><Vhalf>"
6486 [(match_operand:VQ_NO2E 0 "register_operand")
6487 (match_operand 1 "" "")]
6490 aarch64_expand_vector_init (operands[0], operands[1]);
6494 (define_insn "*aarch64_simd_ld1r<mode>"
6495 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6496 (vec_duplicate:VALL_F16
6497 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6499 "ld1r\\t{%0.<Vtype>}, %1"
6500 [(set_attr "type" "neon_load1_all_lanes")]
6503 (define_insn "aarch64_simd_ld1<mode>_x2"
6504 [(set (match_operand:OI 0 "register_operand" "=w")
6505 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6506 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6509 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6510 [(set_attr "type" "neon_load1_2reg<q>")]
6513 (define_insn "aarch64_simd_ld1<mode>_x2"
6514 [(set (match_operand:OI 0 "register_operand" "=w")
6515 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6516 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6519 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6520 [(set_attr "type" "neon_load1_2reg<q>")]
6524 (define_insn "@aarch64_frecpe<mode>"
6525 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6527 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6530 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6531 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6534 (define_insn "aarch64_frecpx<mode>"
6535 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6536 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6539 "frecpx\t%<s>0, %<s>1"
6540 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6543 (define_insn "@aarch64_frecps<mode>"
6544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6546 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6547 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6550 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6551 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6554 (define_insn "aarch64_urecpe<mode>"
6555 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6556 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6559 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6560 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6562 ;; Standard pattern name vec_extract<mode><Vel>.
6564 (define_expand "vec_extract<mode><Vel>"
6565 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6566 (match_operand:VALL_F16 1 "register_operand")
6567 (match_operand:SI 2 "immediate_operand")]
6571 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6575 ;; Extract a 64-bit vector from one half of a 128-bit vector.
6576 (define_expand "vec_extract<mode><Vhalf>"
6577 [(match_operand:<VHALF> 0 "register_operand")
6578 (match_operand:VQMOV_NO2E 1 "register_operand")
6579 (match_operand 2 "immediate_operand")]
6582 int start = INTVAL (operands[2]);
6583 if (start != 0 && start != <nunits> / 2)
6585 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6586 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6590 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6591 (define_expand "vec_extractv2dfv1df"
6592 [(match_operand:V1DF 0 "register_operand")
6593 (match_operand:V2DF 1 "register_operand")
6594 (match_operand 2 "immediate_operand")]
6597 /* V1DF is rarely used by other patterns, so it should be better to hide
6598 it in a subreg destination of a normal DF op. */
6599 rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6600 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6606 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6607 [(set (match_operand:V16QI 0 "register_operand" "=w")
6610 (match_operand:V16QI 1 "register_operand" "%0")
6611 (match_operand:V16QI 2 "register_operand" "w"))]
6613 "TARGET_SIMD && TARGET_AES"
6614 "aes<aes_op>\\t%0.16b, %2.16b"
6615 [(set_attr "type" "crypto_aese")]
6618 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6619 [(set (match_operand:V16QI 0 "register_operand" "=w")
6620 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6622 "TARGET_SIMD && TARGET_AES"
6623 "aes<aesmc_op>\\t%0.16b, %1.16b"
6624 [(set_attr "type" "crypto_aesmc")]
6627 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6628 ;; and enforce the register dependency without scheduling or register
6629 ;; allocation messing up the order or introducing moves inbetween.
6630 ;; Mash the two together during combine.
6632 (define_insn "*aarch64_crypto_aese_fused"
6633 [(set (match_operand:V16QI 0 "register_operand" "=w")
6637 (match_operand:V16QI 1 "register_operand" "%0")
6638 (match_operand:V16QI 2 "register_operand" "w"))]
6641 "TARGET_SIMD && TARGET_AES
6642 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6643 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6644 [(set_attr "type" "crypto_aese")
6645 (set_attr "length" "8")]
6648 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6649 ;; and enforce the register dependency without scheduling or register
6650 ;; allocation messing up the order or introducing moves inbetween.
6651 ;; Mash the two together during combine.
6653 (define_insn "*aarch64_crypto_aesd_fused"
6654 [(set (match_operand:V16QI 0 "register_operand" "=w")
6658 (match_operand:V16QI 1 "register_operand" "%0")
6659 (match_operand:V16QI 2 "register_operand" "w"))]
6662 "TARGET_SIMD && TARGET_AES
6663 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6664 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6665 [(set_attr "type" "crypto_aese")
6666 (set_attr "length" "8")]
6671 (define_insn "aarch64_crypto_sha1hsi"
6672 [(set (match_operand:SI 0 "register_operand" "=w")
6673 (unspec:SI [(match_operand:SI 1
6674 "register_operand" "w")]
6676 "TARGET_SIMD && TARGET_SHA2"
6678 [(set_attr "type" "crypto_sha1_fast")]
6681 (define_insn "aarch64_crypto_sha1hv4si"
6682 [(set (match_operand:SI 0 "register_operand" "=w")
6683 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6684 (parallel [(const_int 0)]))]
6686 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6688 [(set_attr "type" "crypto_sha1_fast")]
6691 (define_insn "aarch64_be_crypto_sha1hv4si"
6692 [(set (match_operand:SI 0 "register_operand" "=w")
6693 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6694 (parallel [(const_int 3)]))]
6696 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6698 [(set_attr "type" "crypto_sha1_fast")]
6701 (define_insn "aarch64_crypto_sha1su1v4si"
6702 [(set (match_operand:V4SI 0 "register_operand" "=w")
6703 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6704 (match_operand:V4SI 2 "register_operand" "w")]
6706 "TARGET_SIMD && TARGET_SHA2"
6707 "sha1su1\\t%0.4s, %2.4s"
6708 [(set_attr "type" "crypto_sha1_fast")]
6711 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6712 [(set (match_operand:V4SI 0 "register_operand" "=w")
6713 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6714 (match_operand:SI 2 "register_operand" "w")
6715 (match_operand:V4SI 3 "register_operand" "w")]
6717 "TARGET_SIMD && TARGET_SHA2"
6718 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6719 [(set_attr "type" "crypto_sha1_slow")]
6722 (define_insn "aarch64_crypto_sha1su0v4si"
6723 [(set (match_operand:V4SI 0 "register_operand" "=w")
6724 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6725 (match_operand:V4SI 2 "register_operand" "w")
6726 (match_operand:V4SI 3 "register_operand" "w")]
6728 "TARGET_SIMD && TARGET_SHA2"
6729 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6730 [(set_attr "type" "crypto_sha1_xor")]
6735 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6736 [(set (match_operand:V4SI 0 "register_operand" "=w")
6737 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6738 (match_operand:V4SI 2 "register_operand" "w")
6739 (match_operand:V4SI 3 "register_operand" "w")]
6741 "TARGET_SIMD && TARGET_SHA2"
6742 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6743 [(set_attr "type" "crypto_sha256_slow")]
6746 (define_insn "aarch64_crypto_sha256su0v4si"
6747 [(set (match_operand:V4SI 0 "register_operand" "=w")
6748 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6749 (match_operand:V4SI 2 "register_operand" "w")]
6751 "TARGET_SIMD && TARGET_SHA2"
6752 "sha256su0\\t%0.4s, %2.4s"
6753 [(set_attr "type" "crypto_sha256_fast")]
6756 (define_insn "aarch64_crypto_sha256su1v4si"
6757 [(set (match_operand:V4SI 0 "register_operand" "=w")
6758 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6759 (match_operand:V4SI 2 "register_operand" "w")
6760 (match_operand:V4SI 3 "register_operand" "w")]
6762 "TARGET_SIMD && TARGET_SHA2"
6763 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6764 [(set_attr "type" "crypto_sha256_slow")]
6769 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6770 [(set (match_operand:V2DI 0 "register_operand" "=w")
6771 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6772 (match_operand:V2DI 2 "register_operand" "w")
6773 (match_operand:V2DI 3 "register_operand" "w")]
6775 "TARGET_SIMD && TARGET_SHA3"
6776 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6777 [(set_attr "type" "crypto_sha512")]
6780 (define_insn "aarch64_crypto_sha512su0qv2di"
6781 [(set (match_operand:V2DI 0 "register_operand" "=w")
6782 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6783 (match_operand:V2DI 2 "register_operand" "w")]
6785 "TARGET_SIMD && TARGET_SHA3"
6786 "sha512su0\\t%0.2d, %2.2d"
6787 [(set_attr "type" "crypto_sha512")]
6790 (define_insn "aarch64_crypto_sha512su1qv2di"
6791 [(set (match_operand:V2DI 0 "register_operand" "=w")
6792 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6793 (match_operand:V2DI 2 "register_operand" "w")
6794 (match_operand:V2DI 3 "register_operand" "w")]
6796 "TARGET_SIMD && TARGET_SHA3"
6797 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6798 [(set_attr "type" "crypto_sha512")]
6803 (define_insn "eor3q<mode>4"
6804 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6807 (match_operand:VQ_I 2 "register_operand" "w")
6808 (match_operand:VQ_I 3 "register_operand" "w"))
6809 (match_operand:VQ_I 1 "register_operand" "w")))]
6810 "TARGET_SIMD && TARGET_SHA3"
6811 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6812 [(set_attr "type" "crypto_sha3")]
6815 (define_insn "aarch64_rax1qv2di"
6816 [(set (match_operand:V2DI 0 "register_operand" "=w")
6819 (match_operand:V2DI 2 "register_operand" "w")
6821 (match_operand:V2DI 1 "register_operand" "w")))]
6822 "TARGET_SIMD && TARGET_SHA3"
6823 "rax1\\t%0.2d, %1.2d, %2.2d"
6824 [(set_attr "type" "crypto_sha3")]
6827 (define_insn "aarch64_xarqv2di"
6828 [(set (match_operand:V2DI 0 "register_operand" "=w")
6831 (match_operand:V2DI 1 "register_operand" "%w")
6832 (match_operand:V2DI 2 "register_operand" "w"))
6833 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6834 "TARGET_SIMD && TARGET_SHA3"
6835 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6836 [(set_attr "type" "crypto_sha3")]
6839 (define_insn "bcaxq<mode>4"
6840 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6843 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6844 (match_operand:VQ_I 2 "register_operand" "w"))
6845 (match_operand:VQ_I 1 "register_operand" "w")))]
6846 "TARGET_SIMD && TARGET_SHA3"
6847 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6848 [(set_attr "type" "crypto_sha3")]
6853 (define_insn "aarch64_sm3ss1qv4si"
6854 [(set (match_operand:V4SI 0 "register_operand" "=w")
6855 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6856 (match_operand:V4SI 2 "register_operand" "w")
6857 (match_operand:V4SI 3 "register_operand" "w")]
6859 "TARGET_SIMD && TARGET_SM4"
6860 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6861 [(set_attr "type" "crypto_sm3")]
6865 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6866 [(set (match_operand:V4SI 0 "register_operand" "=w")
6867 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6868 (match_operand:V4SI 2 "register_operand" "w")
6869 (match_operand:V4SI 3 "register_operand" "w")
6870 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6872 "TARGET_SIMD && TARGET_SM4"
6873 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6874 [(set_attr "type" "crypto_sm3")]
6877 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6878 [(set (match_operand:V4SI 0 "register_operand" "=w")
6879 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6880 (match_operand:V4SI 2 "register_operand" "w")
6881 (match_operand:V4SI 3 "register_operand" "w")]
6883 "TARGET_SIMD && TARGET_SM4"
6884 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6885 [(set_attr "type" "crypto_sm3")]
6890 (define_insn "aarch64_sm4eqv4si"
6891 [(set (match_operand:V4SI 0 "register_operand" "=w")
6892 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6893 (match_operand:V4SI 2 "register_operand" "w")]
6895 "TARGET_SIMD && TARGET_SM4"
6896 "sm4e\\t%0.4s, %2.4s"
6897 [(set_attr "type" "crypto_sm4")]
6900 (define_insn "aarch64_sm4ekeyqv4si"
6901 [(set (match_operand:V4SI 0 "register_operand" "=w")
6902 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6903 (match_operand:V4SI 2 "register_operand" "w")]
6905 "TARGET_SIMD && TARGET_SM4"
6906 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6907 [(set_attr "type" "crypto_sm4")]
6912 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6913 [(set (match_operand:VDQSF 0 "register_operand")
6915 [(match_operand:VDQSF 1 "register_operand")
6916 (match_operand:<VFMLA_W> 2 "register_operand")
6917 (match_operand:<VFMLA_W> 3 "register_operand")]
6921 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6922 <nunits> * 2, false);
6923 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6924 <nunits> * 2, false);
6926 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6935 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6936 [(set (match_operand:VDQSF 0 "register_operand")
6938 [(match_operand:VDQSF 1 "register_operand")
6939 (match_operand:<VFMLA_W> 2 "register_operand")
6940 (match_operand:<VFMLA_W> 3 "register_operand")]
6944 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6945 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6947 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6955 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6956 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6959 (vec_select:<VFMLA_SEL_W>
6960 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6961 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6963 (vec_select:<VFMLA_SEL_W>
6964 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6965 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6966 (match_operand:VDQSF 1 "register_operand" "0")))]
6968 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6969 [(set_attr "type" "neon_fp_mul_s")]
6972 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6973 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6977 (vec_select:<VFMLA_SEL_W>
6978 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6979 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6981 (vec_select:<VFMLA_SEL_W>
6982 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6983 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6984 (match_operand:VDQSF 1 "register_operand" "0")))]
6986 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6987 [(set_attr "type" "neon_fp_mul_s")]
6990 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6991 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6994 (vec_select:<VFMLA_SEL_W>
6995 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6996 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6998 (vec_select:<VFMLA_SEL_W>
6999 (match_operand:<VFMLA_W> 3 "register_operand" "w")
7000 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7001 (match_operand:VDQSF 1 "register_operand" "0")))]
7003 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7004 [(set_attr "type" "neon_fp_mul_s")]
7007 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
7008 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7012 (vec_select:<VFMLA_SEL_W>
7013 (match_operand:<VFMLA_W> 2 "register_operand" "w")
7014 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
7016 (vec_select:<VFMLA_SEL_W>
7017 (match_operand:<VFMLA_W> 3 "register_operand" "w")
7018 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7019 (match_operand:VDQSF 1 "register_operand" "0")))]
7021 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7022 [(set_attr "type" "neon_fp_mul_s")]
7025 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
7026 [(set (match_operand:V2SF 0 "register_operand")
7027 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7028 (match_operand:V4HF 2 "register_operand")
7029 (match_operand:V4HF 3 "register_operand")
7030 (match_operand:SI 4 "aarch64_imm2")]
7034 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7035 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7037 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
7046 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
7047 [(set (match_operand:V2SF 0 "register_operand")
7048 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7049 (match_operand:V4HF 2 "register_operand")
7050 (match_operand:V4HF 3 "register_operand")
7051 (match_operand:SI 4 "aarch64_imm2")]
7055 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7056 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7058 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
7066 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
7067 [(set (match_operand:V2SF 0 "register_operand" "=w")
7071 (match_operand:V4HF 2 "register_operand" "w")
7072 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7076 (match_operand:V4HF 3 "register_operand" "x")
7077 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7078 (match_operand:V2SF 1 "register_operand" "0")))]
7080 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7081 [(set_attr "type" "neon_fp_mul_s")]
7084 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
7085 [(set (match_operand:V2SF 0 "register_operand" "=w")
7090 (match_operand:V4HF 2 "register_operand" "w")
7091 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7095 (match_operand:V4HF 3 "register_operand" "x")
7096 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7097 (match_operand:V2SF 1 "register_operand" "0")))]
7099 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7100 [(set_attr "type" "neon_fp_mul_s")]
7103 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
7104 [(set (match_operand:V2SF 0 "register_operand" "=w")
7108 (match_operand:V4HF 2 "register_operand" "w")
7109 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7113 (match_operand:V4HF 3 "register_operand" "x")
7114 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7115 (match_operand:V2SF 1 "register_operand" "0")))]
7117 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7118 [(set_attr "type" "neon_fp_mul_s")]
7121 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
7122 [(set (match_operand:V2SF 0 "register_operand" "=w")
7127 (match_operand:V4HF 2 "register_operand" "w")
7128 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7132 (match_operand:V4HF 3 "register_operand" "x")
7133 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7134 (match_operand:V2SF 1 "register_operand" "0")))]
7136 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7137 [(set_attr "type" "neon_fp_mul_s")]
7140 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
7141 [(set (match_operand:V4SF 0 "register_operand")
7142 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7143 (match_operand:V8HF 2 "register_operand")
7144 (match_operand:V8HF 3 "register_operand")
7145 (match_operand:SI 4 "aarch64_lane_imm3")]
7149 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7150 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7152 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
7160 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
7161 [(set (match_operand:V4SF 0 "register_operand")
7162 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7163 (match_operand:V8HF 2 "register_operand")
7164 (match_operand:V8HF 3 "register_operand")
7165 (match_operand:SI 4 "aarch64_lane_imm3")]
7169 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7170 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7172 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
7180 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
7181 [(set (match_operand:V4SF 0 "register_operand" "=w")
7185 (match_operand:V8HF 2 "register_operand" "w")
7186 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7190 (match_operand:V8HF 3 "register_operand" "x")
7191 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7192 (match_operand:V4SF 1 "register_operand" "0")))]
7194 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7195 [(set_attr "type" "neon_fp_mul_s")]
7198 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
7199 [(set (match_operand:V4SF 0 "register_operand" "=w")
7204 (match_operand:V8HF 2 "register_operand" "w")
7205 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7209 (match_operand:V8HF 3 "register_operand" "x")
7210 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7211 (match_operand:V4SF 1 "register_operand" "0")))]
7213 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7214 [(set_attr "type" "neon_fp_mul_s")]
7217 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
7218 [(set (match_operand:V4SF 0 "register_operand" "=w")
7222 (match_operand:V8HF 2 "register_operand" "w")
7223 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7227 (match_operand:V8HF 3 "register_operand" "x")
7228 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7229 (match_operand:V4SF 1 "register_operand" "0")))]
7231 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7232 [(set_attr "type" "neon_fp_mul_s")]
7235 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
7236 [(set (match_operand:V4SF 0 "register_operand" "=w")
7241 (match_operand:V8HF 2 "register_operand" "w")
7242 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7246 (match_operand:V8HF 3 "register_operand" "x")
7247 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7248 (match_operand:V4SF 1 "register_operand" "0")))]
7250 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7251 [(set_attr "type" "neon_fp_mul_s")]
7254 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
7255 [(set (match_operand:V2SF 0 "register_operand")
7256 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7257 (match_operand:V4HF 2 "register_operand")
7258 (match_operand:V8HF 3 "register_operand")
7259 (match_operand:SI 4 "aarch64_lane_imm3")]
7263 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7264 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7266 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
7275 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
7276 [(set (match_operand:V2SF 0 "register_operand")
7277 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7278 (match_operand:V4HF 2 "register_operand")
7279 (match_operand:V8HF 3 "register_operand")
7280 (match_operand:SI 4 "aarch64_lane_imm3")]
7284 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7285 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7287 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
7296 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
7297 [(set (match_operand:V2SF 0 "register_operand" "=w")
7301 (match_operand:V4HF 2 "register_operand" "w")
7302 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7306 (match_operand:V8HF 3 "register_operand" "x")
7307 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7308 (match_operand:V2SF 1 "register_operand" "0")))]
7310 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7311 [(set_attr "type" "neon_fp_mul_s")]
7314 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
7315 [(set (match_operand:V2SF 0 "register_operand" "=w")
7320 (match_operand:V4HF 2 "register_operand" "w")
7321 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7325 (match_operand:V8HF 3 "register_operand" "x")
7326 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7327 (match_operand:V2SF 1 "register_operand" "0")))]
7329 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7330 [(set_attr "type" "neon_fp_mul_s")]
7333 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
7334 [(set (match_operand:V2SF 0 "register_operand" "=w")
7338 (match_operand:V4HF 2 "register_operand" "w")
7339 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7343 (match_operand:V8HF 3 "register_operand" "x")
7344 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7345 (match_operand:V2SF 1 "register_operand" "0")))]
7347 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7348 [(set_attr "type" "neon_fp_mul_s")]
7351 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
7352 [(set (match_operand:V2SF 0 "register_operand" "=w")
7357 (match_operand:V4HF 2 "register_operand" "w")
7358 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7362 (match_operand:V8HF 3 "register_operand" "x")
7363 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7364 (match_operand:V2SF 1 "register_operand" "0")))]
7366 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7367 [(set_attr "type" "neon_fp_mul_s")]
7370 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
7371 [(set (match_operand:V4SF 0 "register_operand")
7372 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7373 (match_operand:V8HF 2 "register_operand")
7374 (match_operand:V4HF 3 "register_operand")
7375 (match_operand:SI 4 "aarch64_imm2")]
7379 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7380 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7382 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
7390 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
7391 [(set (match_operand:V4SF 0 "register_operand")
7392 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7393 (match_operand:V8HF 2 "register_operand")
7394 (match_operand:V4HF 3 "register_operand")
7395 (match_operand:SI 4 "aarch64_imm2")]
7399 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7400 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7402 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7410 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7411 [(set (match_operand:V4SF 0 "register_operand" "=w")
7415 (match_operand:V8HF 2 "register_operand" "w")
7416 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7420 (match_operand:V4HF 3 "register_operand" "x")
7421 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7422 (match_operand:V4SF 1 "register_operand" "0")))]
7424 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7425 [(set_attr "type" "neon_fp_mul_s")]
7428 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7429 [(set (match_operand:V4SF 0 "register_operand" "=w")
7434 (match_operand:V8HF 2 "register_operand" "w")
7435 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7439 (match_operand:V4HF 3 "register_operand" "x")
7440 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7441 (match_operand:V4SF 1 "register_operand" "0")))]
7443 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7444 [(set_attr "type" "neon_fp_mul_s")]
7447 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7448 [(set (match_operand:V4SF 0 "register_operand" "=w")
7452 (match_operand:V8HF 2 "register_operand" "w")
7453 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7457 (match_operand:V4HF 3 "register_operand" "x")
7458 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7459 (match_operand:V4SF 1 "register_operand" "0")))]
7461 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7462 [(set_attr "type" "neon_fp_mul_s")]
7465 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7466 [(set (match_operand:V4SF 0 "register_operand" "=w")
7471 (match_operand:V8HF 2 "register_operand" "w")
7472 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7476 (match_operand:V4HF 3 "register_operand" "x")
7477 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7478 (match_operand:V4SF 1 "register_operand" "0")))]
7480 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7481 [(set_attr "type" "neon_fp_mul_s")]
7486 (define_insn "aarch64_crypto_pmulldi"
7487 [(set (match_operand:TI 0 "register_operand" "=w")
7488 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
7489 (match_operand:DI 2 "register_operand" "w")]
7491 "TARGET_SIMD && TARGET_AES"
7492 "pmull\\t%0.1q, %1.1d, %2.1d"
7493 [(set_attr "type" "crypto_pmull")]
7496 (define_insn "aarch64_crypto_pmullv2di"
7497 [(set (match_operand:TI 0 "register_operand" "=w")
7498 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7499 (match_operand:V2DI 2 "register_operand" "w")]
7501 "TARGET_SIMD && TARGET_AES"
7502 "pmull2\\t%0.1q, %1.2d, %2.2d"
7503 [(set_attr "type" "crypto_pmull")]
7506 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7507 (define_insn "<optab><Vnarrowq><mode>2"
7508 [(set (match_operand:VQN 0 "register_operand" "=w")
7509 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7511 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7512 [(set_attr "type" "neon_shift_imm_long")]
7515 (define_expand "aarch64_<su>xtl<mode>"
7516 [(set (match_operand:VQN 0 "register_operand" "=w")
7517 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7522 (define_expand "aarch64_xtn<mode>"
7523 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7524 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7529 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7530 (define_insn "trunc<mode><Vnarrowq>2"
7531 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7532 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7534 "xtn\t%0.<Vntype>, %1.<Vtype>"
7535 [(set_attr "type" "neon_shift_imm_narrow_q")]
7538 (define_insn "aarch64_xtn2<mode>_le"
7539 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7540 (vec_concat:<VNARROWQ2>
7541 (match_operand:<VNARROWQ> 1 "register_operand" "0")
7542 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
7543 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
7544 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
7545 [(set_attr "type" "neon_shift_imm_narrow_q")]
7548 (define_insn "aarch64_xtn2<mode>_be"
7549 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7550 (vec_concat:<VNARROWQ2>
7551 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
7552 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7553 "TARGET_SIMD && BYTES_BIG_ENDIAN"
7554 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
7555 [(set_attr "type" "neon_shift_imm_narrow_q")]
7558 (define_expand "aarch64_xtn2<mode>"
7559 [(match_operand:<VNARROWQ2> 0 "register_operand")
7560 (match_operand:<VNARROWQ> 1 "register_operand")
7561 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
7564 if (BYTES_BIG_ENDIAN)
7565 emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
7568 emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
7574 (define_insn "aarch64_bfdot<mode>"
7575 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7578 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7579 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7581 (match_operand:VDQSF 1 "register_operand" "0")))]
7583 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7584 [(set_attr "type" "neon_dot<q>")]
7587 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7588 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7591 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7592 (match_operand:VBF 3 "register_operand" "w")
7593 (match_operand:SI 4 "const_int_operand" "n")]
7595 (match_operand:VDQSF 1 "register_operand" "0")))]
7598 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7599 int lane = INTVAL (operands[4]);
7600 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7601 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7603 [(set_attr "type" "neon_dot<VDQSF:q>")]
7606 ;; vget_low/high_bf16
7607 (define_expand "aarch64_vget_lo_halfv8bf"
7608 [(match_operand:V4BF 0 "register_operand")
7609 (match_operand:V8BF 1 "register_operand")]
7612 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
7613 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7617 (define_expand "aarch64_vget_hi_halfv8bf"
7618 [(match_operand:V4BF 0 "register_operand")
7619 (match_operand:V8BF 1 "register_operand")]
7622 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
7623 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7628 (define_insn "aarch64_bfmmlaqv4sf"
7629 [(set (match_operand:V4SF 0 "register_operand" "=w")
7630 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7631 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7632 (match_operand:V8BF 3 "register_operand" "w")]
7635 "bfmmla\\t%0.4s, %2.8h, %3.8h"
7636 [(set_attr "type" "neon_fp_mla_s_q")]
7640 (define_insn "aarch64_bfmlal<bt>v4sf"
7641 [(set (match_operand:V4SF 0 "register_operand" "=w")
7642 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7643 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7644 (match_operand:V8BF 3 "register_operand" "w")]
7647 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7648 [(set_attr "type" "neon_fp_mla_s_q")]
7651 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7652 [(set (match_operand:V4SF 0 "register_operand" "=w")
7653 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7654 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7655 (match_operand:VBF 3 "register_operand" "w")
7656 (match_operand:SI 4 "const_int_operand" "n")]
7660 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7661 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7663 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7666 ;; 8-bit integer matrix multiply-accumulate
7667 (define_insn "aarch64_simd_<sur>mmlav16qi"
7668 [(set (match_operand:V4SI 0 "register_operand" "=w")
7670 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7671 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7672 (match_operand:V4SI 1 "register_operand" "0")))]
7674 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7675 [(set_attr "type" "neon_mla_s_q")]
7679 (define_insn "aarch64_bfcvtn<q><mode>"
7680 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7681 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7684 "bfcvtn\\t%0.4h, %1.4s"
7685 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7688 (define_insn "aarch64_bfcvtn2v8bf"
7689 [(set (match_operand:V8BF 0 "register_operand" "=w")
7690 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7691 (match_operand:V4SF 2 "register_operand" "w")]
7694 "bfcvtn2\\t%0.8h, %2.4s"
7695 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7698 (define_insn "aarch64_bfcvtbf"
7699 [(set (match_operand:BF 0 "register_operand" "=w")
7700 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7704 [(set_attr "type" "f_cvt")]
7707 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
7708 (define_insn "aarch64_vbfcvt<mode>"
7709 [(set (match_operand:V4SF 0 "register_operand" "=w")
7710 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
7713 "shll\\t%0.4s, %1.4h, #16"
7714 [(set_attr "type" "neon_shift_imm_long")]
7717 (define_insn "aarch64_vbfcvt_highv8bf"
7718 [(set (match_operand:V4SF 0 "register_operand" "=w")
7719 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
7722 "shll2\\t%0.4s, %1.8h, #16"
7723 [(set_attr "type" "neon_shift_imm_long")]
7726 (define_insn "aarch64_bfcvtsf"
7727 [(set (match_operand:SF 0 "register_operand" "=w")
7728 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
7731 "shl\\t%d0, %d1, #16"
7732 [(set_attr "type" "neon_shift_imm")]