aarch64: Use canonical RTL for sqdmlal patterns
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23 (match_operand:VALL_F16MOV 1 "general_operand"))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
44 "TARGET_SIMD && !STRICT_ALIGNMENT"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
105 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VDMOV 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
133 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQMOV 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %z1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %z0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %z1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %z0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQMOV 0 "register_operand" "")
238 (match_operand:VQMOV 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQMOV 0 "register_operand" "")
250 (match_operand:VQMOV 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQMOV 0)
262 (match_operand:VQMOV 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
286 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
287 }
288 DONE;
289 }
290 )
291
292 (define_expand "aarch64_get_half<mode>"
293 [(set (match_operand:<VHALF> 0 "register_operand")
294 (vec_select:<VHALF>
295 (match_operand:VQMOV 1 "register_operand")
296 (match_operand 2 "ascending_int_parallel")))]
297 "TARGET_SIMD"
298 )
299
300 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
301 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
302 (vec_select:<VHALF>
303 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
304 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
305 "TARGET_SIMD"
306 "@
307 #
308 umov\t%0, %1.d[0]"
309 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
310 [(set (match_dup 0) (match_dup 1))]
311 {
312 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
313 }
314 [(set_attr "type" "mov_reg,neon_to_gp<q>")
315 (set_attr "length" "4")]
316 )
317
318 (define_insn "aarch64_simd_mov_from_<mode>high"
319 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
320 (vec_select:<VHALF>
321 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
322 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
323 "TARGET_SIMD"
324 "@
325 dup\\t%d0, %1.d[1]
326 umov\t%0, %1.d[1]"
327 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
328 (set_attr "length" "4")]
329 )
330
331 (define_insn "orn<mode>3"
332 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
333 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
334 (match_operand:VDQ_I 2 "register_operand" "w")))]
335 "TARGET_SIMD"
336 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
337 [(set_attr "type" "neon_logic<q>")]
338 )
339
340 (define_insn "bic<mode>3"
341 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
342 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
343 (match_operand:VDQ_I 2 "register_operand" "w")))]
344 "TARGET_SIMD"
345 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
346 [(set_attr "type" "neon_logic<q>")]
347 )
348
349 (define_insn "add<mode>3"
350 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
351 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
352 (match_operand:VDQ_I 2 "register_operand" "w")))]
353 "TARGET_SIMD"
354 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
355 [(set_attr "type" "neon_add<q>")]
356 )
357
358 (define_insn "sub<mode>3"
359 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
360 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
361 (match_operand:VDQ_I 2 "register_operand" "w")))]
362 "TARGET_SIMD"
363 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
364 [(set_attr "type" "neon_sub<q>")]
365 )
366
367 (define_insn "mul<mode>3"
368 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
369 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
370 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
371 "TARGET_SIMD"
372 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
373 [(set_attr "type" "neon_mul_<Vetype><q>")]
374 )
375
376 (define_insn "bswap<mode>2"
377 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
378 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
379 "TARGET_SIMD"
380 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
381 [(set_attr "type" "neon_rev<q>")]
382 )
383
384 (define_insn "aarch64_rbit<mode>"
385 [(set (match_operand:VB 0 "register_operand" "=w")
386 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
387 UNSPEC_RBIT))]
388 "TARGET_SIMD"
389 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
390 [(set_attr "type" "neon_rbit")]
391 )
392
393 (define_expand "ctz<mode>2"
394 [(set (match_operand:VS 0 "register_operand")
395 (ctz:VS (match_operand:VS 1 "register_operand")))]
396 "TARGET_SIMD"
397 {
398 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
399 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
400 <MODE>mode, 0);
401 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
402 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
403 DONE;
404 }
405 )
406
407 (define_expand "xorsign<mode>3"
408 [(match_operand:VHSDF 0 "register_operand")
409 (match_operand:VHSDF 1 "register_operand")
410 (match_operand:VHSDF 2 "register_operand")]
411 "TARGET_SIMD"
412 {
413
414 machine_mode imode = <V_INT_EQUIV>mode;
415 rtx v_bitmask = gen_reg_rtx (imode);
416 rtx op1x = gen_reg_rtx (imode);
417 rtx op2x = gen_reg_rtx (imode);
418
419 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
420 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
421
422 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
423
424 emit_move_insn (v_bitmask,
425 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
426 HOST_WIDE_INT_M1U << bits));
427
428 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
429 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
430 emit_move_insn (operands[0],
431 lowpart_subreg (<MODE>mode, op1x, imode));
432 DONE;
433 }
434 )
435
436 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
437 ;; fact that their usage need to guarantee that the source vectors are
438 ;; contiguous. It would be wrong to describe the operation without being able
439 ;; to describe the permute that is also required, but even if that is done
440 ;; the permute would have been created as a LOAD_LANES which means the values
441 ;; in the registers are in the wrong order.
442 (define_insn "aarch64_fcadd<rot><mode>"
443 [(set (match_operand:VHSDF 0 "register_operand" "=w")
444 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
445 (match_operand:VHSDF 2 "register_operand" "w")]
446 FCADD))]
447 "TARGET_COMPLEX"
448 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
449 [(set_attr "type" "neon_fcadd")]
450 )
451
452 (define_expand "cadd<rot><mode>3"
453 [(set (match_operand:VHSDF 0 "register_operand")
454 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
455 (match_operand:VHSDF 2 "register_operand")]
456 FCADD))]
457 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
458 )
459
460 (define_insn "aarch64_fcmla<rot><mode>"
461 [(set (match_operand:VHSDF 0 "register_operand" "=w")
462 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
463 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
464 (match_operand:VHSDF 3 "register_operand" "w")]
465 FCMLA)))]
466 "TARGET_COMPLEX"
467 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
468 [(set_attr "type" "neon_fcmla")]
469 )
470
471
472 (define_insn "aarch64_fcmla_lane<rot><mode>"
473 [(set (match_operand:VHSDF 0 "register_operand" "=w")
474 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
475 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
476 (match_operand:VHSDF 3 "register_operand" "w")
477 (match_operand:SI 4 "const_int_operand" "n")]
478 FCMLA)))]
479 "TARGET_COMPLEX"
480 {
481 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
482 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
483 }
484 [(set_attr "type" "neon_fcmla")]
485 )
486
487 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
488 [(set (match_operand:V4HF 0 "register_operand" "=w")
489 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
490 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
491 (match_operand:V8HF 3 "register_operand" "w")
492 (match_operand:SI 4 "const_int_operand" "n")]
493 FCMLA)))]
494 "TARGET_COMPLEX"
495 {
496 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
497 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
498 }
499 [(set_attr "type" "neon_fcmla")]
500 )
501
502 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
503 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
504 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
505 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
506 (match_operand:<VHALF> 3 "register_operand" "w")
507 (match_operand:SI 4 "const_int_operand" "n")]
508 FCMLA)))]
509 "TARGET_COMPLEX"
510 {
511 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
512 operands[4]
513 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
514 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
515 }
516 [(set_attr "type" "neon_fcmla")]
517 )
518
519 ;; The complex mla/mls operations always need to expand to two instructions.
520 ;; The first operation does half the computation and the second does the
521 ;; remainder. Because of this, expand early.
522 (define_expand "cml<fcmac1><conj_op><mode>4"
523 [(set (match_operand:VHSDF 0 "register_operand")
524 (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
525 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
526 (match_operand:VHSDF 3 "register_operand")]
527 FCMLA_OP)))]
528 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
529 {
530 rtx tmp = gen_reg_rtx (<MODE>mode);
531 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
532 operands[3], operands[2]));
533 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
534 operands[3], operands[2]));
535 DONE;
536 })
537
538 ;; The complex mul operations always need to expand to two instructions.
539 ;; The first operation does half the computation and the second does the
540 ;; remainder. Because of this, expand early.
541 (define_expand "cmul<conj_op><mode>3"
542 [(set (match_operand:VHSDF 0 "register_operand")
543 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
544 (match_operand:VHSDF 2 "register_operand")]
545 FCMUL_OP))]
546 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
547 {
548 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
549 rtx res1 = gen_reg_rtx (<MODE>mode);
550 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
551 operands[2], operands[1]));
552 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
553 operands[2], operands[1]));
554 DONE;
555 })
556
557 ;; These instructions map to the __builtins for the Dot Product operations.
558 (define_insn "aarch64_<sur>dot<vsi2qi>"
559 [(set (match_operand:VS 0 "register_operand" "=w")
560 (plus:VS (match_operand:VS 1 "register_operand" "0")
561 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
562 (match_operand:<VSI2QI> 3 "register_operand" "w")]
563 DOTPROD)))]
564 "TARGET_DOTPROD"
565 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
566 [(set_attr "type" "neon_dot<q>")]
567 )
568
569 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
570 ;; (vector) Dot Product operation.
571 (define_insn "aarch64_usdot<vsi2qi>"
572 [(set (match_operand:VS 0 "register_operand" "=w")
573 (plus:VS
574 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
575 (match_operand:<VSI2QI> 3 "register_operand" "w")]
576 UNSPEC_USDOT)
577 (match_operand:VS 1 "register_operand" "0")))]
578 "TARGET_I8MM"
579 "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
580 [(set_attr "type" "neon_dot<q>")]
581 )
582
583 ;; These expands map to the Dot Product optab the vectorizer checks for.
584 ;; The auto-vectorizer expects a dot product builtin that also does an
585 ;; accumulation into the provided register.
586 ;; Given the following pattern
587 ;;
588 ;; for (i=0; i<len; i++) {
589 ;; c = a[i] * b[i];
590 ;; r += c;
591 ;; }
592 ;; return result;
593 ;;
594 ;; This can be auto-vectorized to
595 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
596 ;;
597 ;; given enough iterations. However the vectorizer can keep unrolling the loop
598 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
599 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
600 ;; ...
601 ;;
602 ;; and so the vectorizer provides r, in which the result has to be accumulated.
603 (define_expand "<sur>dot_prod<vsi2qi>"
604 [(set (match_operand:VS 0 "register_operand")
605 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
606 (match_operand:<VSI2QI> 2 "register_operand")]
607 DOTPROD)
608 (match_operand:VS 3 "register_operand")))]
609 "TARGET_DOTPROD"
610 {
611 emit_insn (
612 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
613 operands[2]));
614 emit_insn (gen_rtx_SET (operands[0], operands[3]));
615 DONE;
616 })
617
618 ;; These instructions map to the __builtins for the Dot Product
619 ;; indexed operations.
620 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
621 [(set (match_operand:VS 0 "register_operand" "=w")
622 (plus:VS (match_operand:VS 1 "register_operand" "0")
623 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
624 (match_operand:V8QI 3 "register_operand" "<h_con>")
625 (match_operand:SI 4 "immediate_operand" "i")]
626 DOTPROD)))]
627 "TARGET_DOTPROD"
628 {
629 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
630 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
631 }
632 [(set_attr "type" "neon_dot<q>")]
633 )
634
635 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
636 [(set (match_operand:VS 0 "register_operand" "=w")
637 (plus:VS (match_operand:VS 1 "register_operand" "0")
638 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
639 (match_operand:V16QI 3 "register_operand" "<h_con>")
640 (match_operand:SI 4 "immediate_operand" "i")]
641 DOTPROD)))]
642 "TARGET_DOTPROD"
643 {
644 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
645 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
646 }
647 [(set_attr "type" "neon_dot<q>")]
648 )
649
650 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
651 ;; (by element) Dot Product operations.
652 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
653 [(set (match_operand:VS 0 "register_operand" "=w")
654 (plus:VS
655 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
656 (match_operand:VB 3 "register_operand" "w")
657 (match_operand:SI 4 "immediate_operand" "i")]
658 DOTPROD_I8MM)
659 (match_operand:VS 1 "register_operand" "0")))]
660 "TARGET_I8MM"
661 {
662 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
663 int lane = INTVAL (operands[4]);
664 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
665 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
666 }
667 [(set_attr "type" "neon_dot<VS:q>")]
668 )
669
670 (define_expand "copysign<mode>3"
671 [(match_operand:VHSDF 0 "register_operand")
672 (match_operand:VHSDF 1 "register_operand")
673 (match_operand:VHSDF 2 "register_operand")]
674 "TARGET_FLOAT && TARGET_SIMD"
675 {
676 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
677 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
678
679 emit_move_insn (v_bitmask,
680 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
681 HOST_WIDE_INT_M1U << bits));
682 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
683 operands[2], operands[1]));
684 DONE;
685 }
686 )
687
688 (define_insn "*aarch64_mul3_elt<mode>"
689 [(set (match_operand:VMUL 0 "register_operand" "=w")
690 (mult:VMUL
691 (vec_duplicate:VMUL
692 (vec_select:<VEL>
693 (match_operand:VMUL 1 "register_operand" "<h_con>")
694 (parallel [(match_operand:SI 2 "immediate_operand")])))
695 (match_operand:VMUL 3 "register_operand" "w")))]
696 "TARGET_SIMD"
697 {
698 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
699 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
700 }
701 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
702 )
703
704 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
705 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
706 (mult:VMUL_CHANGE_NLANES
707 (vec_duplicate:VMUL_CHANGE_NLANES
708 (vec_select:<VEL>
709 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
710 (parallel [(match_operand:SI 2 "immediate_operand")])))
711 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
712 "TARGET_SIMD"
713 {
714 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
715 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
716 }
717 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
718 )
719
720 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
721 [(set (match_operand:VMUL 0 "register_operand" "=w")
722 (mult:VMUL
723 (vec_duplicate:VMUL
724 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
725 (match_operand:VMUL 2 "register_operand" "w")))]
726 "TARGET_SIMD"
727 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
728 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
729 )
730
731 (define_insn "@aarch64_rsqrte<mode>"
732 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
733 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
734 UNSPEC_RSQRTE))]
735 "TARGET_SIMD"
736 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
737 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
738
739 (define_insn "@aarch64_rsqrts<mode>"
740 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
741 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
742 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
743 UNSPEC_RSQRTS))]
744 "TARGET_SIMD"
745 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
746 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
747
748 (define_expand "rsqrt<mode>2"
749 [(set (match_operand:VALLF 0 "register_operand")
750 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
751 UNSPEC_RSQRT))]
752 "TARGET_SIMD"
753 {
754 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
755 DONE;
756 })
757
758 (define_insn "*aarch64_mul3_elt_to_64v2df"
759 [(set (match_operand:DF 0 "register_operand" "=w")
760 (mult:DF
761 (vec_select:DF
762 (match_operand:V2DF 1 "register_operand" "w")
763 (parallel [(match_operand:SI 2 "immediate_operand")]))
764 (match_operand:DF 3 "register_operand" "w")))]
765 "TARGET_SIMD"
766 {
767 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
768 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
769 }
770 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
771 )
772
773 (define_insn "neg<mode>2"
774 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
775 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
776 "TARGET_SIMD"
777 "neg\t%0.<Vtype>, %1.<Vtype>"
778 [(set_attr "type" "neon_neg<q>")]
779 )
780
781 (define_insn "abs<mode>2"
782 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
783 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
784 "TARGET_SIMD"
785 "abs\t%0.<Vtype>, %1.<Vtype>"
786 [(set_attr "type" "neon_abs<q>")]
787 )
788
789 ;; The intrinsic version of integer ABS must not be allowed to
790 ;; combine with any operation with an integerated ABS step, such
791 ;; as SABD.
792 (define_insn "aarch64_abs<mode>"
793 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
794 (unspec:VSDQ_I_DI
795 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
796 UNSPEC_ABS))]
797 "TARGET_SIMD"
798 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
799 [(set_attr "type" "neon_abs<q>")]
800 )
801
802 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
803 ;; This isn't accurate as ABS treats always its input as a signed value.
804 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
805 ;; Whereas SABD would return 192 (-64 signed) on the above example.
806 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
807 (define_insn "aarch64_<su>abd<mode>"
808 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
809 (minus:VDQ_BHSI
810 (USMAX:VDQ_BHSI
811 (match_operand:VDQ_BHSI 1 "register_operand" "w")
812 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
813 (<max_opp>:VDQ_BHSI
814 (match_dup 1)
815 (match_dup 2))))]
816 "TARGET_SIMD"
817 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
818 [(set_attr "type" "neon_abd<q>")]
819 )
820
821 (define_insn "aarch64_<sur>abdl2<mode>_3"
822 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
823 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
824 (match_operand:VDQV_S 2 "register_operand" "w")]
825 ABDL2))]
826 "TARGET_SIMD"
827 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
828 [(set_attr "type" "neon_abd<q>")]
829 )
830
831 (define_insn "aarch64_<sur>abal<mode>_4"
832 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
833 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
834 (match_operand:VDQV_S 2 "register_operand" "w")
835 (match_operand:<VDBLW> 3 "register_operand" "0")]
836 ABAL))]
837 "TARGET_SIMD"
838 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
839 [(set_attr "type" "neon_arith_acc<q>")]
840 )
841
842 (define_insn "aarch64_<sur>adalp<mode>"
843 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
844 (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w")
845 (match_operand:<VDBLW> 1 "register_operand" "0")]
846 ADALP))]
847 "TARGET_SIMD"
848 "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
849 [(set_attr "type" "neon_reduc_add<q>")]
850 )
851
852 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
853 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
854 ;; reduction of the difference into a V4SI vector and accumulate that into
855 ;; operand 3 before copying that into the result operand 0.
856 ;; Perform that with a sequence of:
857 ;; UABDL2 tmp.8h, op1.16b, op2.16b
858 ;; UABAL tmp.8h, op1.16b, op2.16b
859 ;; UADALP op3.4s, tmp.8h
860 ;; MOV op0, op3 // should be eliminated in later passes.
861 ;;
862 ;; For TARGET_DOTPROD we do:
863 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
864 ;; UABD tmp2.16b, op1.16b, op2.16b
865 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
866 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
867 ;;
868 ;; The signed version just uses the signed variants of the above instructions
869 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
870 ;; unsigned.
871
872 (define_expand "<sur>sadv16qi"
873 [(use (match_operand:V4SI 0 "register_operand"))
874 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
875 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
876 (use (match_operand:V4SI 3 "register_operand"))]
877 "TARGET_SIMD"
878 {
879 if (TARGET_DOTPROD)
880 {
881 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
882 rtx abd = gen_reg_rtx (V16QImode);
883 emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
884 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
885 abd, ones));
886 DONE;
887 }
888 rtx reduc = gen_reg_rtx (V8HImode);
889 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
890 operands[2]));
891 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
892 operands[2], reduc));
893 emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
894 emit_move_insn (operands[0], operands[3]);
895 DONE;
896 }
897 )
898
899 (define_insn "aarch64_<su>aba<mode>"
900 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
901 (plus:VDQ_BHSI (minus:VDQ_BHSI
902 (USMAX:VDQ_BHSI
903 (match_operand:VDQ_BHSI 2 "register_operand" "w")
904 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
905 (<max_opp>:VDQ_BHSI
906 (match_dup 2)
907 (match_dup 3)))
908 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
909 "TARGET_SIMD"
910 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
911 [(set_attr "type" "neon_arith_acc<q>")]
912 )
913
914 (define_insn "fabd<mode>3"
915 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
916 (abs:VHSDF_HSDF
917 (minus:VHSDF_HSDF
918 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
919 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
920 "TARGET_SIMD"
921 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
922 [(set_attr "type" "neon_fp_abd_<stype><q>")]
923 )
924
925 ;; For AND (vector, register) and BIC (vector, immediate)
926 (define_insn "and<mode>3"
927 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
928 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
929 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
930 "TARGET_SIMD"
931 {
932 switch (which_alternative)
933 {
934 case 0:
935 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
936 case 1:
937 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
938 AARCH64_CHECK_BIC);
939 default:
940 gcc_unreachable ();
941 }
942 }
943 [(set_attr "type" "neon_logic<q>")]
944 )
945
946 ;; For ORR (vector, register) and ORR (vector, immediate)
947 (define_insn "ior<mode>3"
948 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
949 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
950 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
951 "TARGET_SIMD"
952 {
953 switch (which_alternative)
954 {
955 case 0:
956 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
957 case 1:
958 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
959 AARCH64_CHECK_ORR);
960 default:
961 gcc_unreachable ();
962 }
963 }
964 [(set_attr "type" "neon_logic<q>")]
965 )
966
967 (define_insn "xor<mode>3"
968 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
969 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
970 (match_operand:VDQ_I 2 "register_operand" "w")))]
971 "TARGET_SIMD"
972 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
973 [(set_attr "type" "neon_logic<q>")]
974 )
975
976 (define_insn "one_cmpl<mode>2"
977 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
978 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
979 "TARGET_SIMD"
980 "not\t%0.<Vbtype>, %1.<Vbtype>"
981 [(set_attr "type" "neon_logic<q>")]
982 )
983
984 (define_insn "aarch64_simd_vec_set<mode>"
985 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
986 (vec_merge:VALL_F16
987 (vec_duplicate:VALL_F16
988 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
989 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
990 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
991 "TARGET_SIMD"
992 {
993 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
994 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
995 switch (which_alternative)
996 {
997 case 0:
998 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
999 case 1:
1000 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1001 case 2:
1002 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1003 default:
1004 gcc_unreachable ();
1005 }
1006 }
1007 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1008 )
1009
1010 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1011 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1012 (vec_merge:VALL_F16
1013 (vec_duplicate:VALL_F16
1014 (vec_select:<VEL>
1015 (match_operand:VALL_F16 3 "register_operand" "w")
1016 (parallel
1017 [(match_operand:SI 4 "immediate_operand" "i")])))
1018 (match_operand:VALL_F16 1 "register_operand" "0")
1019 (match_operand:SI 2 "immediate_operand" "i")))]
1020 "TARGET_SIMD"
1021 {
1022 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1023 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1024 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1025
1026 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1027 }
1028 [(set_attr "type" "neon_ins<q>")]
1029 )
1030
1031 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1032 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1033 (vec_merge:VALL_F16_NO_V2Q
1034 (vec_duplicate:VALL_F16_NO_V2Q
1035 (vec_select:<VEL>
1036 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1037 (parallel
1038 [(match_operand:SI 4 "immediate_operand" "i")])))
1039 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1040 (match_operand:SI 2 "immediate_operand" "i")))]
1041 "TARGET_SIMD"
1042 {
1043 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1044 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1045 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[4]));
1047
1048 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1049 }
1050 [(set_attr "type" "neon_ins<q>")]
1051 )
1052
1053 (define_expand "signbit<mode>2"
1054 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1055 (use (match_operand:VDQSF 1 "register_operand"))]
1056 "TARGET_SIMD"
1057 {
1058 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1059 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1060 shift_amount);
1061 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1062
1063 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1064 shift_vector));
1065 DONE;
1066 })
1067
1068 (define_insn "aarch64_simd_lshr<mode>"
1069 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1070 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1071 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1072 "TARGET_SIMD"
1073 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1074 [(set_attr "type" "neon_shift_imm<q>")]
1075 )
1076
1077 (define_insn "aarch64_simd_ashr<mode>"
1078 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1079 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1080 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1081 "TARGET_SIMD"
1082 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1083 [(set_attr "type" "neon_shift_imm<q>")]
1084 )
1085
1086 (define_insn "*aarch64_simd_sra<mode>"
1087 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1088 (plus:VDQ_I
1089 (SHIFTRT:VDQ_I
1090 (match_operand:VDQ_I 1 "register_operand" "w")
1091 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1092 (match_operand:VDQ_I 3 "register_operand" "0")))]
1093 "TARGET_SIMD"
1094 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1095 [(set_attr "type" "neon_shift_acc<q>")]
1096 )
1097
1098 (define_insn "aarch64_simd_imm_shl<mode>"
1099 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1100 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1101 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1102 "TARGET_SIMD"
1103 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1104 [(set_attr "type" "neon_shift_imm<q>")]
1105 )
1106
1107 (define_insn "aarch64_simd_reg_sshl<mode>"
1108 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1109 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1110 (match_operand:VDQ_I 2 "register_operand" "w")))]
1111 "TARGET_SIMD"
1112 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1113 [(set_attr "type" "neon_shift_reg<q>")]
1114 )
1115
1116 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1117 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1118 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1119 (match_operand:VDQ_I 2 "register_operand" "w")]
1120 UNSPEC_ASHIFT_UNSIGNED))]
1121 "TARGET_SIMD"
1122 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1123 [(set_attr "type" "neon_shift_reg<q>")]
1124 )
1125
1126 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1127 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1128 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1129 (match_operand:VDQ_I 2 "register_operand" "w")]
1130 UNSPEC_ASHIFT_SIGNED))]
1131 "TARGET_SIMD"
1132 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1133 [(set_attr "type" "neon_shift_reg<q>")]
1134 )
1135
1136 (define_expand "ashl<mode>3"
1137 [(match_operand:VDQ_I 0 "register_operand")
1138 (match_operand:VDQ_I 1 "register_operand")
1139 (match_operand:SI 2 "general_operand")]
1140 "TARGET_SIMD"
1141 {
1142 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1143 int shift_amount;
1144
1145 if (CONST_INT_P (operands[2]))
1146 {
1147 shift_amount = INTVAL (operands[2]);
1148 if (shift_amount >= 0 && shift_amount < bit_width)
1149 {
1150 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1151 shift_amount);
1152 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1153 operands[1],
1154 tmp));
1155 DONE;
1156 }
1157 }
1158
1159 operands[2] = force_reg (SImode, operands[2]);
1160
1161 rtx tmp = gen_reg_rtx (<MODE>mode);
1162 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1163 operands[2],
1164 0)));
1165 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1166 DONE;
1167 })
1168
1169 (define_expand "lshr<mode>3"
1170 [(match_operand:VDQ_I 0 "register_operand")
1171 (match_operand:VDQ_I 1 "register_operand")
1172 (match_operand:SI 2 "general_operand")]
1173 "TARGET_SIMD"
1174 {
1175 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1176 int shift_amount;
1177
1178 if (CONST_INT_P (operands[2]))
1179 {
1180 shift_amount = INTVAL (operands[2]);
1181 if (shift_amount > 0 && shift_amount <= bit_width)
1182 {
1183 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1184 shift_amount);
1185 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1186 operands[1],
1187 tmp));
1188 DONE;
1189 }
1190 }
1191
1192 operands[2] = force_reg (SImode, operands[2]);
1193
1194 rtx tmp = gen_reg_rtx (SImode);
1195 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1196 emit_insn (gen_negsi2 (tmp, operands[2]));
1197 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1198 convert_to_mode (<VEL>mode, tmp, 0)));
1199 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1200 tmp1));
1201 DONE;
1202 })
1203
1204 (define_expand "ashr<mode>3"
1205 [(match_operand:VDQ_I 0 "register_operand")
1206 (match_operand:VDQ_I 1 "register_operand")
1207 (match_operand:SI 2 "general_operand")]
1208 "TARGET_SIMD"
1209 {
1210 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1211 int shift_amount;
1212
1213 if (CONST_INT_P (operands[2]))
1214 {
1215 shift_amount = INTVAL (operands[2]);
1216 if (shift_amount > 0 && shift_amount <= bit_width)
1217 {
1218 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1219 shift_amount);
1220 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1221 operands[1],
1222 tmp));
1223 DONE;
1224 }
1225 }
1226
1227 operands[2] = force_reg (SImode, operands[2]);
1228
1229 rtx tmp = gen_reg_rtx (SImode);
1230 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1231 emit_insn (gen_negsi2 (tmp, operands[2]));
1232 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1233 tmp, 0)));
1234 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1235 tmp1));
1236 DONE;
1237 })
1238
1239 (define_expand "vashl<mode>3"
1240 [(match_operand:VDQ_I 0 "register_operand")
1241 (match_operand:VDQ_I 1 "register_operand")
1242 (match_operand:VDQ_I 2 "register_operand")]
1243 "TARGET_SIMD"
1244 {
1245 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1246 operands[2]));
1247 DONE;
1248 })
1249
1250 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1251 ;; Negating individual lanes most certainly offsets the
1252 ;; gain from vectorization.
1253 (define_expand "vashr<mode>3"
1254 [(match_operand:VDQ_BHSI 0 "register_operand")
1255 (match_operand:VDQ_BHSI 1 "register_operand")
1256 (match_operand:VDQ_BHSI 2 "register_operand")]
1257 "TARGET_SIMD"
1258 {
1259 rtx neg = gen_reg_rtx (<MODE>mode);
1260 emit (gen_neg<mode>2 (neg, operands[2]));
1261 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1262 neg));
1263 DONE;
1264 })
1265
1266 ;; DI vector shift
1267 (define_expand "aarch64_ashr_simddi"
1268 [(match_operand:DI 0 "register_operand")
1269 (match_operand:DI 1 "register_operand")
1270 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1271 "TARGET_SIMD"
1272 {
1273 /* An arithmetic shift right by 64 fills the result with copies of the sign
1274 bit, just like asr by 63 - however the standard pattern does not handle
1275 a shift by 64. */
1276 if (INTVAL (operands[2]) == 64)
1277 operands[2] = GEN_INT (63);
1278 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1279 DONE;
1280 }
1281 )
1282
1283 (define_expand "vlshr<mode>3"
1284 [(match_operand:VDQ_BHSI 0 "register_operand")
1285 (match_operand:VDQ_BHSI 1 "register_operand")
1286 (match_operand:VDQ_BHSI 2 "register_operand")]
1287 "TARGET_SIMD"
1288 {
1289 rtx neg = gen_reg_rtx (<MODE>mode);
1290 emit (gen_neg<mode>2 (neg, operands[2]));
1291 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1292 neg));
1293 DONE;
1294 })
1295
1296 (define_expand "aarch64_lshr_simddi"
1297 [(match_operand:DI 0 "register_operand")
1298 (match_operand:DI 1 "register_operand")
1299 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1300 "TARGET_SIMD"
1301 {
1302 if (INTVAL (operands[2]) == 64)
1303 emit_move_insn (operands[0], const0_rtx);
1304 else
1305 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1306 DONE;
1307 }
1308 )
1309
1310 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1311 (define_insn "vec_shr_<mode>"
1312 [(set (match_operand:VD 0 "register_operand" "=w")
1313 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1314 (match_operand:SI 2 "immediate_operand" "i")]
1315 UNSPEC_VEC_SHR))]
1316 "TARGET_SIMD"
1317 {
1318 if (BYTES_BIG_ENDIAN)
1319 return "shl %d0, %d1, %2";
1320 else
1321 return "ushr %d0, %d1, %2";
1322 }
1323 [(set_attr "type" "neon_shift_imm")]
1324 )
1325
1326 (define_expand "vec_set<mode>"
1327 [(match_operand:VALL_F16 0 "register_operand")
1328 (match_operand:<VEL> 1 "register_operand")
1329 (match_operand:SI 2 "immediate_operand")]
1330 "TARGET_SIMD"
1331 {
1332 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1333 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1334 GEN_INT (elem), operands[0]));
1335 DONE;
1336 }
1337 )
1338
1339
1340 (define_insn "aarch64_mla<mode>"
1341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342 (plus:VDQ_BHSI (mult:VDQ_BHSI
1343 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1345 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1346 "TARGET_SIMD"
1347 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1348 [(set_attr "type" "neon_mla_<Vetype><q>")]
1349 )
1350
1351 (define_insn "*aarch64_mla_elt<mode>"
1352 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1353 (plus:VDQHS
1354 (mult:VDQHS
1355 (vec_duplicate:VDQHS
1356 (vec_select:<VEL>
1357 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358 (parallel [(match_operand:SI 2 "immediate_operand")])))
1359 (match_operand:VDQHS 3 "register_operand" "w"))
1360 (match_operand:VDQHS 4 "register_operand" "0")))]
1361 "TARGET_SIMD"
1362 {
1363 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1364 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1365 }
1366 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1367 )
1368
1369 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1370 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1371 (plus:VDQHS
1372 (mult:VDQHS
1373 (vec_duplicate:VDQHS
1374 (vec_select:<VEL>
1375 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376 (parallel [(match_operand:SI 2 "immediate_operand")])))
1377 (match_operand:VDQHS 3 "register_operand" "w"))
1378 (match_operand:VDQHS 4 "register_operand" "0")))]
1379 "TARGET_SIMD"
1380 {
1381 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1382 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1383 }
1384 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1385 )
1386
1387 (define_insn "*aarch64_mla_elt_merge<mode>"
1388 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1389 (plus:VDQHS
1390 (mult:VDQHS (vec_duplicate:VDQHS
1391 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1392 (match_operand:VDQHS 2 "register_operand" "w"))
1393 (match_operand:VDQHS 3 "register_operand" "0")))]
1394 "TARGET_SIMD"
1395 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1396 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1397 )
1398
1399 (define_insn "aarch64_mls<mode>"
1400 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1402 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1403 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1404 "TARGET_SIMD"
1405 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1406 [(set_attr "type" "neon_mla_<Vetype><q>")]
1407 )
1408
1409 (define_insn "*aarch64_mls_elt<mode>"
1410 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1411 (minus:VDQHS
1412 (match_operand:VDQHS 4 "register_operand" "0")
1413 (mult:VDQHS
1414 (vec_duplicate:VDQHS
1415 (vec_select:<VEL>
1416 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1417 (parallel [(match_operand:SI 2 "immediate_operand")])))
1418 (match_operand:VDQHS 3 "register_operand" "w"))))]
1419 "TARGET_SIMD"
1420 {
1421 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1422 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1423 }
1424 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1425 )
1426
1427 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1428 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1429 (minus:VDQHS
1430 (match_operand:VDQHS 4 "register_operand" "0")
1431 (mult:VDQHS
1432 (vec_duplicate:VDQHS
1433 (vec_select:<VEL>
1434 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1435 (parallel [(match_operand:SI 2 "immediate_operand")])))
1436 (match_operand:VDQHS 3 "register_operand" "w"))))]
1437 "TARGET_SIMD"
1438 {
1439 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1440 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1441 }
1442 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1443 )
1444
1445 (define_insn "*aarch64_mls_elt_merge<mode>"
1446 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1447 (minus:VDQHS
1448 (match_operand:VDQHS 1 "register_operand" "0")
1449 (mult:VDQHS (vec_duplicate:VDQHS
1450 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1451 (match_operand:VDQHS 3 "register_operand" "w"))))]
1452 "TARGET_SIMD"
1453 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1454 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1455 )
1456
1457 ;; Max/Min operations.
1458 (define_insn "<su><maxmin><mode>3"
1459 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1460 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1461 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1462 "TARGET_SIMD"
1463 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1464 [(set_attr "type" "neon_minmax<q>")]
1465 )
1466
1467 (define_expand "<su><maxmin>v2di3"
1468 [(set (match_operand:V2DI 0 "register_operand")
1469 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1470 (match_operand:V2DI 2 "register_operand")))]
1471 "TARGET_SIMD"
1472 {
1473 enum rtx_code cmp_operator;
1474 rtx cmp_fmt;
1475
1476 switch (<CODE>)
1477 {
1478 case UMIN:
1479 cmp_operator = LTU;
1480 break;
1481 case SMIN:
1482 cmp_operator = LT;
1483 break;
1484 case UMAX:
1485 cmp_operator = GTU;
1486 break;
1487 case SMAX:
1488 cmp_operator = GT;
1489 break;
1490 default:
1491 gcc_unreachable ();
1492 }
1493
1494 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1495 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1496 operands[2], cmp_fmt, operands[1], operands[2]));
1497 DONE;
1498 })
1499
1500 ;; Pairwise Integer Max/Min operations.
1501 (define_insn "aarch64_<maxmin_uns>p<mode>"
1502 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1503 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1504 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1505 MAXMINV))]
1506 "TARGET_SIMD"
1507 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1508 [(set_attr "type" "neon_minmax<q>")]
1509 )
1510
1511 ;; Pairwise FP Max/Min operations.
1512 (define_insn "aarch64_<maxmin_uns>p<mode>"
1513 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1514 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1515 (match_operand:VHSDF 2 "register_operand" "w")]
1516 FMAXMINV))]
1517 "TARGET_SIMD"
1518 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1519 [(set_attr "type" "neon_minmax<q>")]
1520 )
1521
1522 ;; vec_concat gives a new vector with the low elements from operand 1, and
1523 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1524 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1525 ;; What that means, is that the RTL descriptions of the below patterns
1526 ;; need to change depending on endianness.
1527
1528 ;; Move to the low architectural bits of the register.
1529 ;; On little-endian this is { operand, zeroes }
1530 ;; On big-endian this is { zeroes, operand }
1531
1532 (define_insn "move_lo_quad_internal_<mode>"
1533 [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1534 (vec_concat:VQMOV_NO2E
1535 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1536 (vec_duplicate:<VHALF> (const_int 0))))]
1537 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1538 "@
1539 dup\\t%d0, %1.d[0]
1540 fmov\\t%d0, %1
1541 dup\\t%d0, %1"
1542 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1543 (set_attr "length" "4")
1544 (set_attr "arch" "simd,fp,simd")]
1545 )
1546
1547 (define_insn "move_lo_quad_internal_<mode>"
1548 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1549 (vec_concat:VQ_2E
1550 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1551 (const_int 0)))]
1552 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1553 "@
1554 dup\\t%d0, %1.d[0]
1555 fmov\\t%d0, %1
1556 dup\\t%d0, %1"
1557 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1558 (set_attr "length" "4")
1559 (set_attr "arch" "simd,fp,simd")]
1560 )
1561
1562 (define_insn "move_lo_quad_internal_be_<mode>"
1563 [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1564 (vec_concat:VQMOV_NO2E
1565 (vec_duplicate:<VHALF> (const_int 0))
1566 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1567 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1568 "@
1569 dup\\t%d0, %1.d[0]
1570 fmov\\t%d0, %1
1571 dup\\t%d0, %1"
1572 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1573 (set_attr "length" "4")
1574 (set_attr "arch" "simd,fp,simd")]
1575 )
1576
1577 (define_insn "move_lo_quad_internal_be_<mode>"
1578 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1579 (vec_concat:VQ_2E
1580 (const_int 0)
1581 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1582 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1583 "@
1584 dup\\t%d0, %1.d[0]
1585 fmov\\t%d0, %1
1586 dup\\t%d0, %1"
1587 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1588 (set_attr "length" "4")
1589 (set_attr "arch" "simd,fp,simd")]
1590 )
1591
1592 (define_expand "move_lo_quad_<mode>"
1593 [(match_operand:VQMOV 0 "register_operand")
1594 (match_operand:VQMOV 1 "register_operand")]
1595 "TARGET_SIMD"
1596 {
1597 if (BYTES_BIG_ENDIAN)
1598 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1599 else
1600 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1601 DONE;
1602 }
1603 )
1604
1605 ;; Move operand1 to the high architectural bits of the register, keeping
1606 ;; the low architectural bits of operand2.
1607 ;; For little-endian this is { operand2, operand1 }
1608 ;; For big-endian this is { operand1, operand2 }
1609
1610 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1611 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1612 (vec_concat:VQMOV
1613 (vec_select:<VHALF>
1614 (match_dup 0)
1615 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1616 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1617 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1618 "@
1619 ins\\t%0.d[1], %1.d[0]
1620 ins\\t%0.d[1], %1"
1621 [(set_attr "type" "neon_ins")]
1622 )
1623
1624 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1625 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1626 (vec_concat:VQMOV
1627 (match_operand:<VHALF> 1 "register_operand" "w,r")
1628 (vec_select:<VHALF>
1629 (match_dup 0)
1630 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1631 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1632 "@
1633 ins\\t%0.d[1], %1.d[0]
1634 ins\\t%0.d[1], %1"
1635 [(set_attr "type" "neon_ins")]
1636 )
1637
1638 (define_expand "move_hi_quad_<mode>"
1639 [(match_operand:VQMOV 0 "register_operand")
1640 (match_operand:<VHALF> 1 "register_operand")]
1641 "TARGET_SIMD"
1642 {
1643 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1644 if (BYTES_BIG_ENDIAN)
1645 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1646 operands[1], p));
1647 else
1648 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1649 operands[1], p));
1650 DONE;
1651 })
1652
1653 ;; Narrowing operations.
1654
1655 ;; For doubles.
1656 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1657 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1658 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1659 "TARGET_SIMD"
1660 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1661 [(set_attr "type" "neon_shift_imm_narrow_q")]
1662 )
1663
1664 (define_expand "vec_pack_trunc_<mode>"
1665 [(match_operand:<VNARROWD> 0 "register_operand")
1666 (match_operand:VDN 1 "register_operand")
1667 (match_operand:VDN 2 "register_operand")]
1668 "TARGET_SIMD"
1669 {
1670 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1671 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1672 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1673
1674 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1675 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1676 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1677 DONE;
1678 })
1679
1680 ;; For quads.
1681
1682 (define_insn "vec_pack_trunc_<mode>"
1683 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1684 (vec_concat:<VNARROWQ2>
1685 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1686 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1687 "TARGET_SIMD"
1688 {
1689 if (BYTES_BIG_ENDIAN)
1690 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1691 else
1692 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1693 }
1694 [(set_attr "type" "multiple")
1695 (set_attr "length" "8")]
1696 )
1697
1698 ;; Widening operations.
1699
1700 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1701 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1702 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1703 (match_operand:VQW 1 "register_operand" "w")
1704 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1705 )))]
1706 "TARGET_SIMD"
1707 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1708 [(set_attr "type" "neon_shift_imm_long")]
1709 )
1710
1711 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1712 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1713 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1714 (match_operand:VQW 1 "register_operand" "w")
1715 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1716 )))]
1717 "TARGET_SIMD"
1718 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1719 [(set_attr "type" "neon_shift_imm_long")]
1720 )
1721
1722 (define_expand "vec_unpack<su>_hi_<mode>"
1723 [(match_operand:<VWIDE> 0 "register_operand")
1724 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1725 "TARGET_SIMD"
1726 {
1727 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1728 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1729 operands[1], p));
1730 DONE;
1731 }
1732 )
1733
1734 (define_expand "vec_unpack<su>_lo_<mode>"
1735 [(match_operand:<VWIDE> 0 "register_operand")
1736 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1737 "TARGET_SIMD"
1738 {
1739 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1740 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1741 operands[1], p));
1742 DONE;
1743 }
1744 )
1745
1746 ;; Widening arithmetic.
1747
1748 (define_insn "*aarch64_<su>mlal_lo<mode>"
1749 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1750 (plus:<VWIDE>
1751 (mult:<VWIDE>
1752 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1753 (match_operand:VQW 2 "register_operand" "w")
1754 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1755 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1756 (match_operand:VQW 4 "register_operand" "w")
1757 (match_dup 3))))
1758 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1759 "TARGET_SIMD"
1760 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1761 [(set_attr "type" "neon_mla_<Vetype>_long")]
1762 )
1763
1764 (define_insn "*aarch64_<su>mlal_hi<mode>"
1765 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1766 (plus:<VWIDE>
1767 (mult:<VWIDE>
1768 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769 (match_operand:VQW 2 "register_operand" "w")
1770 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1771 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772 (match_operand:VQW 4 "register_operand" "w")
1773 (match_dup 3))))
1774 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1775 "TARGET_SIMD"
1776 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1777 [(set_attr "type" "neon_mla_<Vetype>_long")]
1778 )
1779
1780 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1781 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1782 (minus:<VWIDE>
1783 (match_operand:<VWIDE> 1 "register_operand" "0")
1784 (mult:<VWIDE>
1785 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1786 (match_operand:VQW 2 "register_operand" "w")
1787 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1788 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1789 (match_operand:VQW 4 "register_operand" "w")
1790 (match_dup 3))))))]
1791 "TARGET_SIMD"
1792 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1793 [(set_attr "type" "neon_mla_<Vetype>_long")]
1794 )
1795
1796 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
1797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798 (minus:<VWIDE>
1799 (match_operand:<VWIDE> 1 "register_operand" "0")
1800 (mult:<VWIDE>
1801 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802 (match_operand:VQW 2 "register_operand" "w")
1803 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1804 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1805 (match_operand:VQW 4 "register_operand" "w")
1806 (match_dup 3))))))]
1807 "TARGET_SIMD"
1808 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1809 [(set_attr "type" "neon_mla_<Vetype>_long")]
1810 )
1811
1812 (define_expand "aarch64_<su>mlsl_hi<mode>"
1813 [(match_operand:<VWIDE> 0 "register_operand")
1814 (match_operand:<VWIDE> 1 "register_operand")
1815 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
1816 (match_operand:VQW 3 "register_operand")]
1817 "TARGET_SIMD"
1818 {
1819 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1820 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
1821 operands[2], p, operands[3]));
1822 DONE;
1823 }
1824 )
1825
1826 (define_insn "*aarch64_<su>mlal<mode>"
1827 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1828 (plus:<VWIDE>
1829 (mult:<VWIDE>
1830 (ANY_EXTEND:<VWIDE>
1831 (match_operand:VD_BHSI 1 "register_operand" "w"))
1832 (ANY_EXTEND:<VWIDE>
1833 (match_operand:VD_BHSI 2 "register_operand" "w")))
1834 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1835 "TARGET_SIMD"
1836 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1837 [(set_attr "type" "neon_mla_<Vetype>_long")]
1838 )
1839
1840 (define_insn "aarch64_<su>mlsl<mode>"
1841 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1842 (minus:<VWIDE>
1843 (match_operand:<VWIDE> 1 "register_operand" "0")
1844 (mult:<VWIDE>
1845 (ANY_EXTEND:<VWIDE>
1846 (match_operand:VD_BHSI 2 "register_operand" "w"))
1847 (ANY_EXTEND:<VWIDE>
1848 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1849 "TARGET_SIMD"
1850 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1851 [(set_attr "type" "neon_mla_<Vetype>_long")]
1852 )
1853
1854 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1855 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1856 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1857 (match_operand:VQW 1 "register_operand" "w")
1858 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1859 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1860 (match_operand:VQW 2 "register_operand" "w")
1861 (match_dup 3)))))]
1862 "TARGET_SIMD"
1863 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1864 [(set_attr "type" "neon_mul_<Vetype>_long")]
1865 )
1866
1867 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1868 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1869 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1870 (match_operand:VD_BHSI 1 "register_operand" "w"))
1871 (ANY_EXTEND:<VWIDE>
1872 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1873 "TARGET_SIMD"
1874 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1875 [(set_attr "type" "neon_mul_<Vetype>_long")]
1876 )
1877
1878 (define_expand "vec_widen_<su>mult_lo_<mode>"
1879 [(match_operand:<VWIDE> 0 "register_operand")
1880 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1881 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1882 "TARGET_SIMD"
1883 {
1884 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1885 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1886 operands[1],
1887 operands[2], p));
1888 DONE;
1889 }
1890 )
1891
1892 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1894 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1895 (match_operand:VQW 1 "register_operand" "w")
1896 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1897 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1898 (match_operand:VQW 2 "register_operand" "w")
1899 (match_dup 3)))))]
1900 "TARGET_SIMD"
1901 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1902 [(set_attr "type" "neon_mul_<Vetype>_long")]
1903 )
1904
1905 (define_expand "vec_widen_<su>mult_hi_<mode>"
1906 [(match_operand:<VWIDE> 0 "register_operand")
1907 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1908 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1909 "TARGET_SIMD"
1910 {
1911 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1912 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1913 operands[1],
1914 operands[2], p));
1915 DONE;
1916
1917 }
1918 )
1919
1920 ;; vmull_lane_s16 intrinsics
1921 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1923 (mult:<VWIDE>
1924 (ANY_EXTEND:<VWIDE>
1925 (match_operand:<VCOND> 1 "register_operand" "w"))
1926 (ANY_EXTEND:<VWIDE>
1927 (vec_duplicate:<VCOND>
1928 (vec_select:<VEL>
1929 (match_operand:VDQHS 2 "register_operand" "<vwx>")
1930 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1931 "TARGET_SIMD"
1932 {
1933 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1934 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1935 }
1936 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1937 )
1938
1939 ;; vmlal_lane_s16 intrinsics
1940 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1942 (plus:<VWIDE>
1943 (mult:<VWIDE>
1944 (ANY_EXTEND:<VWIDE>
1945 (match_operand:<VCOND> 2 "register_operand" "w"))
1946 (ANY_EXTEND:<VWIDE>
1947 (vec_duplicate:<VCOND>
1948 (vec_select:<VEL>
1949 (match_operand:VDQHS 3 "register_operand" "<vwx>")
1950 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1951 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1952 "TARGET_SIMD"
1953 {
1954 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1955 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1956 }
1957 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1958 )
1959
1960 ;; FP vector operations.
1961 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1962 ;; double-precision (64-bit) floating-point data types and arithmetic as
1963 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1964 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1965 ;;
1966 ;; Floating-point operations can raise an exception. Vectorizing such
1967 ;; operations are safe because of reasons explained below.
1968 ;;
1969 ;; ARMv8 permits an extension to enable trapped floating-point
1970 ;; exception handling, however this is an optional feature. In the
1971 ;; event of a floating-point exception being raised by vectorised
1972 ;; code then:
1973 ;; 1. If trapped floating-point exceptions are available, then a trap
1974 ;; will be taken when any lane raises an enabled exception. A trap
1975 ;; handler may determine which lane raised the exception.
1976 ;; 2. Alternatively a sticky exception flag is set in the
1977 ;; floating-point status register (FPSR). Software may explicitly
1978 ;; test the exception flags, in which case the tests will either
1979 ;; prevent vectorisation, allowing precise identification of the
1980 ;; failing operation, or if tested outside of vectorisable regions
1981 ;; then the specific operation and lane are not of interest.
1982
1983 ;; FP arithmetic operations.
1984
1985 (define_insn "add<mode>3"
1986 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1988 (match_operand:VHSDF 2 "register_operand" "w")))]
1989 "TARGET_SIMD"
1990 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1991 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1992 )
1993
1994 (define_insn "sub<mode>3"
1995 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1996 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1997 (match_operand:VHSDF 2 "register_operand" "w")))]
1998 "TARGET_SIMD"
1999 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2000 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2001 )
2002
2003 (define_insn "mul<mode>3"
2004 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2005 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2006 (match_operand:VHSDF 2 "register_operand" "w")))]
2007 "TARGET_SIMD"
2008 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2009 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2010 )
2011
2012 (define_expand "div<mode>3"
2013 [(set (match_operand:VHSDF 0 "register_operand")
2014 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2015 (match_operand:VHSDF 2 "register_operand")))]
2016 "TARGET_SIMD"
2017 {
2018 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2019 DONE;
2020
2021 operands[1] = force_reg (<MODE>mode, operands[1]);
2022 })
2023
2024 (define_insn "*div<mode>3"
2025 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2027 (match_operand:VHSDF 2 "register_operand" "w")))]
2028 "TARGET_SIMD"
2029 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2030 [(set_attr "type" "neon_fp_div_<stype><q>")]
2031 )
2032
2033 (define_insn "neg<mode>2"
2034 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2035 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2036 "TARGET_SIMD"
2037 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2038 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2039 )
2040
2041 (define_insn "abs<mode>2"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2044 "TARGET_SIMD"
2045 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2046 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2047 )
2048
2049 (define_insn "fma<mode>4"
2050 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2051 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2052 (match_operand:VHSDF 2 "register_operand" "w")
2053 (match_operand:VHSDF 3 "register_operand" "0")))]
2054 "TARGET_SIMD"
2055 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2056 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2057 )
2058
2059 (define_insn "*aarch64_fma4_elt<mode>"
2060 [(set (match_operand:VDQF 0 "register_operand" "=w")
2061 (fma:VDQF
2062 (vec_duplicate:VDQF
2063 (vec_select:<VEL>
2064 (match_operand:VDQF 1 "register_operand" "<h_con>")
2065 (parallel [(match_operand:SI 2 "immediate_operand")])))
2066 (match_operand:VDQF 3 "register_operand" "w")
2067 (match_operand:VDQF 4 "register_operand" "0")))]
2068 "TARGET_SIMD"
2069 {
2070 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2071 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2072 }
2073 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2074 )
2075
2076 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2077 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2078 (fma:VDQSF
2079 (vec_duplicate:VDQSF
2080 (vec_select:<VEL>
2081 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2082 (parallel [(match_operand:SI 2 "immediate_operand")])))
2083 (match_operand:VDQSF 3 "register_operand" "w")
2084 (match_operand:VDQSF 4 "register_operand" "0")))]
2085 "TARGET_SIMD"
2086 {
2087 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2088 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2089 }
2090 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2091 )
2092
2093 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
2094 [(set (match_operand:VMUL 0 "register_operand" "=w")
2095 (fma:VMUL
2096 (vec_duplicate:VMUL
2097 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2098 (match_operand:VMUL 2 "register_operand" "w")
2099 (match_operand:VMUL 3 "register_operand" "0")))]
2100 "TARGET_SIMD"
2101 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2102 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2103 )
2104
2105 (define_insn "*aarch64_fma4_elt_to_64v2df"
2106 [(set (match_operand:DF 0 "register_operand" "=w")
2107 (fma:DF
2108 (vec_select:DF
2109 (match_operand:V2DF 1 "register_operand" "w")
2110 (parallel [(match_operand:SI 2 "immediate_operand")]))
2111 (match_operand:DF 3 "register_operand" "w")
2112 (match_operand:DF 4 "register_operand" "0")))]
2113 "TARGET_SIMD"
2114 {
2115 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2116 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2117 }
2118 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2119 )
2120
2121 (define_insn "fnma<mode>4"
2122 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2123 (fma:VHSDF
2124 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2125 (match_operand:VHSDF 2 "register_operand" "w")
2126 (match_operand:VHSDF 3 "register_operand" "0")))]
2127 "TARGET_SIMD"
2128 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2129 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2130 )
2131
2132 (define_insn "*aarch64_fnma4_elt<mode>"
2133 [(set (match_operand:VDQF 0 "register_operand" "=w")
2134 (fma:VDQF
2135 (neg:VDQF
2136 (match_operand:VDQF 3 "register_operand" "w"))
2137 (vec_duplicate:VDQF
2138 (vec_select:<VEL>
2139 (match_operand:VDQF 1 "register_operand" "<h_con>")
2140 (parallel [(match_operand:SI 2 "immediate_operand")])))
2141 (match_operand:VDQF 4 "register_operand" "0")))]
2142 "TARGET_SIMD"
2143 {
2144 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2145 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2146 }
2147 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2148 )
2149
2150 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2151 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2152 (fma:VDQSF
2153 (neg:VDQSF
2154 (match_operand:VDQSF 3 "register_operand" "w"))
2155 (vec_duplicate:VDQSF
2156 (vec_select:<VEL>
2157 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2158 (parallel [(match_operand:SI 2 "immediate_operand")])))
2159 (match_operand:VDQSF 4 "register_operand" "0")))]
2160 "TARGET_SIMD"
2161 {
2162 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2163 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2164 }
2165 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2166 )
2167
2168 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2169 [(set (match_operand:VMUL 0 "register_operand" "=w")
2170 (fma:VMUL
2171 (neg:VMUL
2172 (match_operand:VMUL 2 "register_operand" "w"))
2173 (vec_duplicate:VMUL
2174 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2175 (match_operand:VMUL 3 "register_operand" "0")))]
2176 "TARGET_SIMD"
2177 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2178 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2179 )
2180
2181 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2182 [(set (match_operand:DF 0 "register_operand" "=w")
2183 (fma:DF
2184 (vec_select:DF
2185 (match_operand:V2DF 1 "register_operand" "w")
2186 (parallel [(match_operand:SI 2 "immediate_operand")]))
2187 (neg:DF
2188 (match_operand:DF 3 "register_operand" "w"))
2189 (match_operand:DF 4 "register_operand" "0")))]
2190 "TARGET_SIMD"
2191 {
2192 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2193 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2194 }
2195 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2196 )
2197
2198 ;; Vector versions of the floating-point frint patterns.
2199 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2200 (define_insn "<frint_pattern><mode>2"
2201 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2202 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2203 FRINT))]
2204 "TARGET_SIMD"
2205 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2206 [(set_attr "type" "neon_fp_round_<stype><q>")]
2207 )
2208
2209 ;; Vector versions of the fcvt standard patterns.
2210 ;; Expands to lbtrunc, lround, lceil, lfloor
2211 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2212 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2213 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2214 [(match_operand:VHSDF 1 "register_operand" "w")]
2215 FCVT)))]
2216 "TARGET_SIMD"
2217 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2218 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2219 )
2220
2221 ;; HF Scalar variants of related SIMD instructions.
2222 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2223 [(set (match_operand:HI 0 "register_operand" "=w")
2224 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2225 FCVT)))]
2226 "TARGET_SIMD_F16INST"
2227 "fcvt<frint_suffix><su>\t%h0, %h1"
2228 [(set_attr "type" "neon_fp_to_int_s")]
2229 )
2230
2231 (define_insn "<optab>_trunchfhi2"
2232 [(set (match_operand:HI 0 "register_operand" "=w")
2233 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2234 "TARGET_SIMD_F16INST"
2235 "fcvtz<su>\t%h0, %h1"
2236 [(set_attr "type" "neon_fp_to_int_s")]
2237 )
2238
2239 (define_insn "<optab>hihf2"
2240 [(set (match_operand:HF 0 "register_operand" "=w")
2241 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2242 "TARGET_SIMD_F16INST"
2243 "<su_optab>cvtf\t%h0, %h1"
2244 [(set_attr "type" "neon_int_to_fp_s")]
2245 )
2246
2247 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2248 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2249 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2250 [(mult:VDQF
2251 (match_operand:VDQF 1 "register_operand" "w")
2252 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2253 UNSPEC_FRINTZ)))]
2254 "TARGET_SIMD
2255 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2256 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2257 {
2258 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2259 char buf[64];
2260 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2261 output_asm_insn (buf, operands);
2262 return "";
2263 }
2264 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2265 )
2266
2267 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2268 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2269 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2270 [(match_operand:VHSDF 1 "register_operand")]
2271 UNSPEC_FRINTZ)))]
2272 "TARGET_SIMD"
2273 {})
2274
2275 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2276 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2277 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2278 [(match_operand:VHSDF 1 "register_operand")]
2279 UNSPEC_FRINTZ)))]
2280 "TARGET_SIMD"
2281 {})
2282
2283 (define_expand "ftrunc<VHSDF:mode>2"
2284 [(set (match_operand:VHSDF 0 "register_operand")
2285 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2286 UNSPEC_FRINTZ))]
2287 "TARGET_SIMD"
2288 {})
2289
2290 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2291 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2292 (FLOATUORS:VHSDF
2293 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2294 "TARGET_SIMD"
2295 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2296 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2297 )
2298
2299 ;; Conversions between vectors of floats and doubles.
2300 ;; Contains a mix of patterns to match standard pattern names
2301 ;; and those for intrinsics.
2302
2303 ;; Float widening operations.
2304
2305 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2307 (float_extend:<VWIDE> (vec_select:<VHALF>
2308 (match_operand:VQ_HSF 1 "register_operand" "w")
2309 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2310 )))]
2311 "TARGET_SIMD"
2312 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2313 [(set_attr "type" "neon_fp_cvt_widen_s")]
2314 )
2315
2316 ;; Convert between fixed-point and floating-point (vector modes)
2317
2318 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2319 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2320 (unspec:<VHSDF:FCVT_TARGET>
2321 [(match_operand:VHSDF 1 "register_operand" "w")
2322 (match_operand:SI 2 "immediate_operand" "i")]
2323 FCVT_F2FIXED))]
2324 "TARGET_SIMD"
2325 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2326 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2327 )
2328
2329 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2330 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2331 (unspec:<VDQ_HSDI:FCVT_TARGET>
2332 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2333 (match_operand:SI 2 "immediate_operand" "i")]
2334 FCVT_FIXED2F))]
2335 "TARGET_SIMD"
2336 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2337 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2338 )
2339
2340 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2341 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2342 ;; the meaning of HI and LO changes depending on the target endianness.
2343 ;; While elsewhere we map the higher numbered elements of a vector to
2344 ;; the lower architectural lanes of the vector, for these patterns we want
2345 ;; to always treat "hi" as referring to the higher architectural lanes.
2346 ;; Consequently, while the patterns below look inconsistent with our
2347 ;; other big-endian patterns their behavior is as required.
2348
2349 (define_expand "vec_unpacks_lo_<mode>"
2350 [(match_operand:<VWIDE> 0 "register_operand")
2351 (match_operand:VQ_HSF 1 "register_operand")]
2352 "TARGET_SIMD"
2353 {
2354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2355 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2356 operands[1], p));
2357 DONE;
2358 }
2359 )
2360
2361 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2363 (float_extend:<VWIDE> (vec_select:<VHALF>
2364 (match_operand:VQ_HSF 1 "register_operand" "w")
2365 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2366 )))]
2367 "TARGET_SIMD"
2368 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2369 [(set_attr "type" "neon_fp_cvt_widen_s")]
2370 )
2371
2372 (define_expand "vec_unpacks_hi_<mode>"
2373 [(match_operand:<VWIDE> 0 "register_operand")
2374 (match_operand:VQ_HSF 1 "register_operand")]
2375 "TARGET_SIMD"
2376 {
2377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2378 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2379 operands[1], p));
2380 DONE;
2381 }
2382 )
2383 (define_insn "aarch64_float_extend_lo_<Vwide>"
2384 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2385 (float_extend:<VWIDE>
2386 (match_operand:VDF 1 "register_operand" "w")))]
2387 "TARGET_SIMD"
2388 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2389 [(set_attr "type" "neon_fp_cvt_widen_s")]
2390 )
2391
2392 ;; Float narrowing operations.
2393
2394 (define_insn "aarch64_float_truncate_lo_<mode>"
2395 [(set (match_operand:VDF 0 "register_operand" "=w")
2396 (float_truncate:VDF
2397 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2398 "TARGET_SIMD"
2399 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2400 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2401 )
2402
2403 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2404 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2405 (vec_concat:<VDBL>
2406 (match_operand:VDF 1 "register_operand" "0")
2407 (float_truncate:VDF
2408 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2409 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2410 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2411 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2412 )
2413
2414 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2415 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2416 (vec_concat:<VDBL>
2417 (float_truncate:VDF
2418 (match_operand:<VWIDE> 2 "register_operand" "w"))
2419 (match_operand:VDF 1 "register_operand" "0")))]
2420 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2421 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2422 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2423 )
2424
2425 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2426 [(match_operand:<VDBL> 0 "register_operand")
2427 (match_operand:VDF 1 "register_operand")
2428 (match_operand:<VWIDE> 2 "register_operand")]
2429 "TARGET_SIMD"
2430 {
2431 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2432 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2433 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2434 emit_insn (gen (operands[0], operands[1], operands[2]));
2435 DONE;
2436 }
2437 )
2438
2439 (define_expand "vec_pack_trunc_v2df"
2440 [(set (match_operand:V4SF 0 "register_operand")
2441 (vec_concat:V4SF
2442 (float_truncate:V2SF
2443 (match_operand:V2DF 1 "register_operand"))
2444 (float_truncate:V2SF
2445 (match_operand:V2DF 2 "register_operand"))
2446 ))]
2447 "TARGET_SIMD"
2448 {
2449 rtx tmp = gen_reg_rtx (V2SFmode);
2450 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2451 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2452
2453 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2454 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2455 tmp, operands[hi]));
2456 DONE;
2457 }
2458 )
2459
2460 (define_expand "vec_pack_trunc_df"
2461 [(set (match_operand:V2SF 0 "register_operand")
2462 (vec_concat:V2SF
2463 (float_truncate:SF
2464 (match_operand:DF 1 "register_operand"))
2465 (float_truncate:SF
2466 (match_operand:DF 2 "register_operand"))
2467 ))]
2468 "TARGET_SIMD"
2469 {
2470 rtx tmp = gen_reg_rtx (V2SFmode);
2471 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2472 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2473
2474 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2475 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2476 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2477 DONE;
2478 }
2479 )
2480
2481 ;; FP Max/Min
2482 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2483 ;; expression like:
2484 ;; a = (b < c) ? b : c;
2485 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2486 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2487 ;; -ffast-math.
2488 ;;
2489 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2490 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2491 ;; operand will be returned when both operands are zero (i.e. they may not
2492 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2493 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2494 ;; NaNs.
2495
2496 (define_insn "<su><maxmin><mode>3"
2497 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2498 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2499 (match_operand:VHSDF 2 "register_operand" "w")))]
2500 "TARGET_SIMD"
2501 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2502 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2503 )
2504
2505 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2506 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2507 ;; which implement the IEEE fmax ()/fmin () functions.
2508 (define_insn "<maxmin_uns><mode>3"
2509 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2510 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2511 (match_operand:VHSDF 2 "register_operand" "w")]
2512 FMAXMIN_UNS))]
2513 "TARGET_SIMD"
2514 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2515 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2516 )
2517
2518 ;; 'across lanes' add.
2519
2520 (define_expand "reduc_plus_scal_<mode>"
2521 [(match_operand:<VEL> 0 "register_operand")
2522 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2523 UNSPEC_ADDV)]
2524 "TARGET_SIMD"
2525 {
2526 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2527 rtx scratch = gen_reg_rtx (<MODE>mode);
2528 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2529 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2530 DONE;
2531 }
2532 )
2533
2534 (define_insn "aarch64_faddp<mode>"
2535 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2536 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2537 (match_operand:VHSDF 2 "register_operand" "w")]
2538 UNSPEC_FADDV))]
2539 "TARGET_SIMD"
2540 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2541 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2542 )
2543
2544 (define_insn "aarch64_reduc_plus_internal<mode>"
2545 [(set (match_operand:VDQV 0 "register_operand" "=w")
2546 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2547 UNSPEC_ADDV))]
2548 "TARGET_SIMD"
2549 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2550 [(set_attr "type" "neon_reduc_add<q>")]
2551 )
2552
2553 ;; ADDV with result zero-extended to SI/DImode (for popcount).
2554 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2555 [(set (match_operand:GPI 0 "register_operand" "=w")
2556 (zero_extend:GPI
2557 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2558 UNSPEC_ADDV)))]
2559 "TARGET_SIMD"
2560 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2561 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2562 )
2563
2564 (define_insn "aarch64_reduc_plus_internalv2si"
2565 [(set (match_operand:V2SI 0 "register_operand" "=w")
2566 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2567 UNSPEC_ADDV))]
2568 "TARGET_SIMD"
2569 "addp\\t%0.2s, %1.2s, %1.2s"
2570 [(set_attr "type" "neon_reduc_add")]
2571 )
2572
2573 (define_insn "reduc_plus_scal_<mode>"
2574 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2575 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2576 UNSPEC_FADDV))]
2577 "TARGET_SIMD"
2578 "faddp\\t%<Vetype>0, %1.<Vtype>"
2579 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2580 )
2581
2582 (define_expand "reduc_plus_scal_v4sf"
2583 [(set (match_operand:SF 0 "register_operand")
2584 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2585 UNSPEC_FADDV))]
2586 "TARGET_SIMD"
2587 {
2588 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2589 rtx scratch = gen_reg_rtx (V4SFmode);
2590 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2591 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2592 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2593 DONE;
2594 })
2595
2596 (define_insn "clrsb<mode>2"
2597 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2598 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2599 "TARGET_SIMD"
2600 "cls\\t%0.<Vtype>, %1.<Vtype>"
2601 [(set_attr "type" "neon_cls<q>")]
2602 )
2603
2604 (define_insn "clz<mode>2"
2605 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2606 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2607 "TARGET_SIMD"
2608 "clz\\t%0.<Vtype>, %1.<Vtype>"
2609 [(set_attr "type" "neon_cls<q>")]
2610 )
2611
2612 (define_insn "popcount<mode>2"
2613 [(set (match_operand:VB 0 "register_operand" "=w")
2614 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2615 "TARGET_SIMD"
2616 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2617 [(set_attr "type" "neon_cnt<q>")]
2618 )
2619
2620 ;; 'across lanes' max and min ops.
2621
2622 ;; Template for outputting a scalar, so we can create __builtins which can be
2623 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2624 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2625 [(match_operand:<VEL> 0 "register_operand")
2626 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2627 FMAXMINV)]
2628 "TARGET_SIMD"
2629 {
2630 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2631 rtx scratch = gen_reg_rtx (<MODE>mode);
2632 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2633 operands[1]));
2634 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2635 DONE;
2636 }
2637 )
2638
2639 ;; Likewise for integer cases, signed and unsigned.
2640 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2641 [(match_operand:<VEL> 0 "register_operand")
2642 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2643 MAXMINV)]
2644 "TARGET_SIMD"
2645 {
2646 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2647 rtx scratch = gen_reg_rtx (<MODE>mode);
2648 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2649 operands[1]));
2650 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2651 DONE;
2652 }
2653 )
2654
2655 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2656 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2657 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2658 MAXMINV))]
2659 "TARGET_SIMD"
2660 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2661 [(set_attr "type" "neon_reduc_minmax<q>")]
2662 )
2663
2664 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2665 [(set (match_operand:V2SI 0 "register_operand" "=w")
2666 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2667 MAXMINV))]
2668 "TARGET_SIMD"
2669 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2670 [(set_attr "type" "neon_reduc_minmax")]
2671 )
2672
2673 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2674 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2675 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2676 FMAXMINV))]
2677 "TARGET_SIMD"
2678 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2679 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2680 )
2681
2682 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2683 ;; allocation.
2684 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2685 ;; to select.
2686 ;;
2687 ;; Thus our BSL is of the form:
2688 ;; op0 = bsl (mask, op2, op3)
2689 ;; We can use any of:
2690 ;;
2691 ;; if (op0 = mask)
2692 ;; bsl mask, op1, op2
2693 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2694 ;; bit op0, op2, mask
2695 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2696 ;; bif op0, op1, mask
2697 ;;
2698 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2699 ;; Some forms of straight-line code may generate the equivalent form
2700 ;; in *aarch64_simd_bsl<mode>_alt.
2701
2702 (define_insn "aarch64_simd_bsl<mode>_internal"
2703 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2704 (xor:VDQ_I
2705 (and:VDQ_I
2706 (xor:VDQ_I
2707 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2708 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2709 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2710 (match_dup:<V_INT_EQUIV> 3)
2711 ))]
2712 "TARGET_SIMD"
2713 "@
2714 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2715 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2716 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2717 [(set_attr "type" "neon_bsl<q>")]
2718 )
2719
2720 ;; We need this form in addition to the above pattern to match the case
2721 ;; when combine tries merging three insns such that the second operand of
2722 ;; the outer XOR matches the second operand of the inner XOR rather than
2723 ;; the first. The two are equivalent but since recog doesn't try all
2724 ;; permutations of commutative operations, we have to have a separate pattern.
2725
2726 (define_insn "*aarch64_simd_bsl<mode>_alt"
2727 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2728 (xor:VDQ_I
2729 (and:VDQ_I
2730 (xor:VDQ_I
2731 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2732 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2733 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2734 (match_dup:<V_INT_EQUIV> 2)))]
2735 "TARGET_SIMD"
2736 "@
2737 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2738 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2739 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2740 [(set_attr "type" "neon_bsl<q>")]
2741 )
2742
2743 ;; DImode is special, we want to avoid computing operations which are
2744 ;; more naturally computed in general purpose registers in the vector
2745 ;; registers. If we do that, we need to move all three operands from general
2746 ;; purpose registers to vector registers, then back again. However, we
2747 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2748 ;; optimizations based on the component operations of a BSL.
2749 ;;
2750 ;; That means we need a splitter back to the individual operations, if they
2751 ;; would be better calculated on the integer side.
2752
2753 (define_insn_and_split "aarch64_simd_bsldi_internal"
2754 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2755 (xor:DI
2756 (and:DI
2757 (xor:DI
2758 (match_operand:DI 3 "register_operand" "w,0,w,r")
2759 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2760 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2761 (match_dup:DI 3)
2762 ))]
2763 "TARGET_SIMD"
2764 "@
2765 bsl\\t%0.8b, %2.8b, %3.8b
2766 bit\\t%0.8b, %2.8b, %1.8b
2767 bif\\t%0.8b, %3.8b, %1.8b
2768 #"
2769 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2770 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2771 {
2772 /* Split back to individual operations. If we're before reload, and
2773 able to create a temporary register, do so. If we're after reload,
2774 we've got an early-clobber destination register, so use that.
2775 Otherwise, we can't create pseudos and we can't yet guarantee that
2776 operands[0] is safe to write, so FAIL to split. */
2777
2778 rtx scratch;
2779 if (reload_completed)
2780 scratch = operands[0];
2781 else if (can_create_pseudo_p ())
2782 scratch = gen_reg_rtx (DImode);
2783 else
2784 FAIL;
2785
2786 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2787 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2788 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2789 DONE;
2790 }
2791 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2792 (set_attr "length" "4,4,4,12")]
2793 )
2794
2795 (define_insn_and_split "aarch64_simd_bsldi_alt"
2796 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2797 (xor:DI
2798 (and:DI
2799 (xor:DI
2800 (match_operand:DI 3 "register_operand" "w,w,0,r")
2801 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2802 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2803 (match_dup:DI 2)
2804 ))]
2805 "TARGET_SIMD"
2806 "@
2807 bsl\\t%0.8b, %3.8b, %2.8b
2808 bit\\t%0.8b, %3.8b, %1.8b
2809 bif\\t%0.8b, %2.8b, %1.8b
2810 #"
2811 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2812 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2813 {
2814 /* Split back to individual operations. If we're before reload, and
2815 able to create a temporary register, do so. If we're after reload,
2816 we've got an early-clobber destination register, so use that.
2817 Otherwise, we can't create pseudos and we can't yet guarantee that
2818 operands[0] is safe to write, so FAIL to split. */
2819
2820 rtx scratch;
2821 if (reload_completed)
2822 scratch = operands[0];
2823 else if (can_create_pseudo_p ())
2824 scratch = gen_reg_rtx (DImode);
2825 else
2826 FAIL;
2827
2828 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2829 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2830 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2831 DONE;
2832 }
2833 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2834 (set_attr "length" "4,4,4,12")]
2835 )
2836
2837 (define_expand "aarch64_simd_bsl<mode>"
2838 [(match_operand:VALLDIF 0 "register_operand")
2839 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2840 (match_operand:VALLDIF 2 "register_operand")
2841 (match_operand:VALLDIF 3 "register_operand")]
2842 "TARGET_SIMD"
2843 {
2844 /* We can't alias operands together if they have different modes. */
2845 rtx tmp = operands[0];
2846 if (FLOAT_MODE_P (<MODE>mode))
2847 {
2848 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2849 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2850 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2851 }
2852 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2853 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2854 operands[1],
2855 operands[2],
2856 operands[3]));
2857 if (tmp != operands[0])
2858 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2859
2860 DONE;
2861 })
2862
2863 (define_expand "vcond_mask_<mode><v_int_equiv>"
2864 [(match_operand:VALLDI 0 "register_operand")
2865 (match_operand:VALLDI 1 "nonmemory_operand")
2866 (match_operand:VALLDI 2 "nonmemory_operand")
2867 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2868 "TARGET_SIMD"
2869 {
2870 /* If we have (a = (P) ? -1 : 0);
2871 Then we can simply move the generated mask (result must be int). */
2872 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2873 && operands[2] == CONST0_RTX (<MODE>mode))
2874 emit_move_insn (operands[0], operands[3]);
2875 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2876 else if (operands[1] == CONST0_RTX (<MODE>mode)
2877 && operands[2] == CONSTM1_RTX (<MODE>mode))
2878 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2879 else
2880 {
2881 if (!REG_P (operands[1]))
2882 operands[1] = force_reg (<MODE>mode, operands[1]);
2883 if (!REG_P (operands[2]))
2884 operands[2] = force_reg (<MODE>mode, operands[2]);
2885 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2886 operands[1], operands[2]));
2887 }
2888
2889 DONE;
2890 })
2891
2892 ;; Patterns comparing two vectors to produce a mask.
2893
2894 (define_expand "vec_cmp<mode><mode>"
2895 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2896 (match_operator 1 "comparison_operator"
2897 [(match_operand:VSDQ_I_DI 2 "register_operand")
2898 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2899 "TARGET_SIMD"
2900 {
2901 rtx mask = operands[0];
2902 enum rtx_code code = GET_CODE (operands[1]);
2903
2904 switch (code)
2905 {
2906 case NE:
2907 case LE:
2908 case LT:
2909 case GE:
2910 case GT:
2911 case EQ:
2912 if (operands[3] == CONST0_RTX (<MODE>mode))
2913 break;
2914
2915 /* Fall through. */
2916 default:
2917 if (!REG_P (operands[3]))
2918 operands[3] = force_reg (<MODE>mode, operands[3]);
2919
2920 break;
2921 }
2922
2923 switch (code)
2924 {
2925 case LT:
2926 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2927 break;
2928
2929 case GE:
2930 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2931 break;
2932
2933 case LE:
2934 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2935 break;
2936
2937 case GT:
2938 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2939 break;
2940
2941 case LTU:
2942 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2943 break;
2944
2945 case GEU:
2946 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2947 break;
2948
2949 case LEU:
2950 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2951 break;
2952
2953 case GTU:
2954 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2955 break;
2956
2957 case NE:
2958 /* Handle NE as !EQ. */
2959 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2960 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2961 break;
2962
2963 case EQ:
2964 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2965 break;
2966
2967 default:
2968 gcc_unreachable ();
2969 }
2970
2971 DONE;
2972 })
2973
2974 (define_expand "vec_cmp<mode><v_int_equiv>"
2975 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2976 (match_operator 1 "comparison_operator"
2977 [(match_operand:VDQF 2 "register_operand")
2978 (match_operand:VDQF 3 "nonmemory_operand")]))]
2979 "TARGET_SIMD"
2980 {
2981 int use_zero_form = 0;
2982 enum rtx_code code = GET_CODE (operands[1]);
2983 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2984
2985 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2986
2987 switch (code)
2988 {
2989 case LE:
2990 case LT:
2991 case GE:
2992 case GT:
2993 case EQ:
2994 if (operands[3] == CONST0_RTX (<MODE>mode))
2995 {
2996 use_zero_form = 1;
2997 break;
2998 }
2999 /* Fall through. */
3000 default:
3001 if (!REG_P (operands[3]))
3002 operands[3] = force_reg (<MODE>mode, operands[3]);
3003
3004 break;
3005 }
3006
3007 switch (code)
3008 {
3009 case LT:
3010 if (use_zero_form)
3011 {
3012 comparison = gen_aarch64_cmlt<mode>;
3013 break;
3014 }
3015 /* Fall through. */
3016 case UNLT:
3017 std::swap (operands[2], operands[3]);
3018 /* Fall through. */
3019 case UNGT:
3020 case GT:
3021 comparison = gen_aarch64_cmgt<mode>;
3022 break;
3023 case LE:
3024 if (use_zero_form)
3025 {
3026 comparison = gen_aarch64_cmle<mode>;
3027 break;
3028 }
3029 /* Fall through. */
3030 case UNLE:
3031 std::swap (operands[2], operands[3]);
3032 /* Fall through. */
3033 case UNGE:
3034 case GE:
3035 comparison = gen_aarch64_cmge<mode>;
3036 break;
3037 case NE:
3038 case EQ:
3039 comparison = gen_aarch64_cmeq<mode>;
3040 break;
3041 case UNEQ:
3042 case ORDERED:
3043 case UNORDERED:
3044 case LTGT:
3045 break;
3046 default:
3047 gcc_unreachable ();
3048 }
3049
3050 switch (code)
3051 {
3052 case UNGE:
3053 case UNGT:
3054 case UNLE:
3055 case UNLT:
3056 {
3057 /* All of the above must not raise any FP exceptions. Thus we first
3058 check each operand for NaNs and force any elements containing NaN to
3059 zero before using them in the compare.
3060 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3061 (cm<cc> (isnan (a) ? 0.0 : a,
3062 isnan (b) ? 0.0 : b))
3063 We use the following transformations for doing the comparisions:
3064 a UNGE b -> a GE b
3065 a UNGT b -> a GT b
3066 a UNLE b -> b GE a
3067 a UNLT b -> b GT a. */
3068
3069 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3070 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3071 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3072 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3073 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3074 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3075 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3076 lowpart_subreg (<V_INT_EQUIV>mode,
3077 operands[2],
3078 <MODE>mode)));
3079 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3080 lowpart_subreg (<V_INT_EQUIV>mode,
3081 operands[3],
3082 <MODE>mode)));
3083 gcc_assert (comparison != NULL);
3084 emit_insn (comparison (operands[0],
3085 lowpart_subreg (<MODE>mode,
3086 tmp0, <V_INT_EQUIV>mode),
3087 lowpart_subreg (<MODE>mode,
3088 tmp1, <V_INT_EQUIV>mode)));
3089 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3090 }
3091 break;
3092
3093 case LT:
3094 case LE:
3095 case GT:
3096 case GE:
3097 case EQ:
3098 case NE:
3099 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
3100 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
3101 a GE b -> a GE b
3102 a GT b -> a GT b
3103 a LE b -> b GE a
3104 a LT b -> b GT a
3105 a EQ b -> a EQ b
3106 a NE b -> ~(a EQ b) */
3107 gcc_assert (comparison != NULL);
3108 emit_insn (comparison (operands[0], operands[2], operands[3]));
3109 if (code == NE)
3110 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3111 break;
3112
3113 case LTGT:
3114 /* LTGT is not guranteed to not generate a FP exception. So let's
3115 go the faster way : ((a > b) || (b > a)). */
3116 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3117 operands[2], operands[3]));
3118 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3119 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3120 break;
3121
3122 case ORDERED:
3123 case UNORDERED:
3124 case UNEQ:
3125 /* cmeq (a, a) & cmeq (b, b). */
3126 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3127 operands[2], operands[2]));
3128 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3129 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3130
3131 if (code == UNORDERED)
3132 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3133 else if (code == UNEQ)
3134 {
3135 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3136 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3137 }
3138 break;
3139
3140 default:
3141 gcc_unreachable ();
3142 }
3143
3144 DONE;
3145 })
3146
3147 (define_expand "vec_cmpu<mode><mode>"
3148 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3149 (match_operator 1 "comparison_operator"
3150 [(match_operand:VSDQ_I_DI 2 "register_operand")
3151 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3152 "TARGET_SIMD"
3153 {
3154 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3155 operands[2], operands[3]));
3156 DONE;
3157 })
3158
3159 (define_expand "vcond<mode><mode>"
3160 [(set (match_operand:VALLDI 0 "register_operand")
3161 (if_then_else:VALLDI
3162 (match_operator 3 "comparison_operator"
3163 [(match_operand:VALLDI 4 "register_operand")
3164 (match_operand:VALLDI 5 "nonmemory_operand")])
3165 (match_operand:VALLDI 1 "nonmemory_operand")
3166 (match_operand:VALLDI 2 "nonmemory_operand")))]
3167 "TARGET_SIMD"
3168 {
3169 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3170 enum rtx_code code = GET_CODE (operands[3]);
3171
3172 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3173 it as well as switch operands 1/2 in order to avoid the additional
3174 NOT instruction. */
3175 if (code == NE)
3176 {
3177 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3178 operands[4], operands[5]);
3179 std::swap (operands[1], operands[2]);
3180 }
3181 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3182 operands[4], operands[5]));
3183 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3184 operands[2], mask));
3185
3186 DONE;
3187 })
3188
3189 (define_expand "vcond<v_cmp_mixed><mode>"
3190 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3191 (if_then_else:<V_cmp_mixed>
3192 (match_operator 3 "comparison_operator"
3193 [(match_operand:VDQF_COND 4 "register_operand")
3194 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3195 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3196 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3197 "TARGET_SIMD"
3198 {
3199 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3200 enum rtx_code code = GET_CODE (operands[3]);
3201
3202 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3203 it as well as switch operands 1/2 in order to avoid the additional
3204 NOT instruction. */
3205 if (code == NE)
3206 {
3207 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3208 operands[4], operands[5]);
3209 std::swap (operands[1], operands[2]);
3210 }
3211 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3212 operands[4], operands[5]));
3213 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3214 operands[0], operands[1],
3215 operands[2], mask));
3216
3217 DONE;
3218 })
3219
3220 (define_expand "vcondu<mode><mode>"
3221 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3222 (if_then_else:VSDQ_I_DI
3223 (match_operator 3 "comparison_operator"
3224 [(match_operand:VSDQ_I_DI 4 "register_operand")
3225 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3226 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3227 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3228 "TARGET_SIMD"
3229 {
3230 rtx mask = gen_reg_rtx (<MODE>mode);
3231 enum rtx_code code = GET_CODE (operands[3]);
3232
3233 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3234 it as well as switch operands 1/2 in order to avoid the additional
3235 NOT instruction. */
3236 if (code == NE)
3237 {
3238 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3239 operands[4], operands[5]);
3240 std::swap (operands[1], operands[2]);
3241 }
3242 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3243 operands[4], operands[5]));
3244 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3245 operands[2], mask));
3246 DONE;
3247 })
3248
3249 (define_expand "vcondu<mode><v_cmp_mixed>"
3250 [(set (match_operand:VDQF 0 "register_operand")
3251 (if_then_else:VDQF
3252 (match_operator 3 "comparison_operator"
3253 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3254 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3255 (match_operand:VDQF 1 "nonmemory_operand")
3256 (match_operand:VDQF 2 "nonmemory_operand")))]
3257 "TARGET_SIMD"
3258 {
3259 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3260 enum rtx_code code = GET_CODE (operands[3]);
3261
3262 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3263 it as well as switch operands 1/2 in order to avoid the additional
3264 NOT instruction. */
3265 if (code == NE)
3266 {
3267 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3268 operands[4], operands[5]);
3269 std::swap (operands[1], operands[2]);
3270 }
3271 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3272 mask, operands[3],
3273 operands[4], operands[5]));
3274 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3275 operands[2], mask));
3276 DONE;
3277 })
3278
3279 ;; Patterns for AArch64 SIMD Intrinsics.
3280
3281 ;; Lane extraction with sign extension to general purpose register.
3282 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3283 [(set (match_operand:GPI 0 "register_operand" "=r")
3284 (sign_extend:GPI
3285 (vec_select:<VDQQH:VEL>
3286 (match_operand:VDQQH 1 "register_operand" "w")
3287 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3288 "TARGET_SIMD"
3289 {
3290 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3291 INTVAL (operands[2]));
3292 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3293 }
3294 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3295 )
3296
3297 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3298 [(set (match_operand:GPI 0 "register_operand" "=r")
3299 (zero_extend:GPI
3300 (vec_select:<VDQQH:VEL>
3301 (match_operand:VDQQH 1 "register_operand" "w")
3302 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3303 "TARGET_SIMD"
3304 {
3305 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3306 INTVAL (operands[2]));
3307 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3308 }
3309 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3310 )
3311
3312 ;; Lane extraction of a value, neither sign nor zero extension
3313 ;; is guaranteed so upper bits should be considered undefined.
3314 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3315 ;; Extracting lane zero is split into a simple move when it is between SIMD
3316 ;; registers or a store.
3317 (define_insn_and_split "aarch64_get_lane<mode>"
3318 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3319 (vec_select:<VEL>
3320 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3321 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3322 "TARGET_SIMD"
3323 {
3324 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3325 switch (which_alternative)
3326 {
3327 case 0:
3328 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3329 case 1:
3330 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3331 case 2:
3332 return "st1\\t{%1.<Vetype>}[%2], %0";
3333 default:
3334 gcc_unreachable ();
3335 }
3336 }
3337 "&& reload_completed
3338 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
3339 [(set (match_dup 0) (match_dup 1))]
3340 {
3341 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
3342 }
3343 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3344 )
3345
3346 (define_insn "load_pair_lanes<mode>"
3347 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3348 (vec_concat:<VDBL>
3349 (match_operand:VDC 1 "memory_operand" "Utq")
3350 (match_operand:VDC 2 "memory_operand" "m")))]
3351 "TARGET_SIMD && !STRICT_ALIGNMENT
3352 && rtx_equal_p (XEXP (operands[2], 0),
3353 plus_constant (Pmode,
3354 XEXP (operands[1], 0),
3355 GET_MODE_SIZE (<MODE>mode)))"
3356 "ldr\\t%q0, %1"
3357 [(set_attr "type" "neon_load1_1reg_q")]
3358 )
3359
3360 (define_insn "store_pair_lanes<mode>"
3361 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3362 (vec_concat:<VDBL>
3363 (match_operand:VDC 1 "register_operand" "w, r")
3364 (match_operand:VDC 2 "register_operand" "w, r")))]
3365 "TARGET_SIMD"
3366 "@
3367 stp\\t%d1, %d2, %y0
3368 stp\\t%x1, %x2, %y0"
3369 [(set_attr "type" "neon_stp, store_16")]
3370 )
3371
3372 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3373 ;; dest vector.
3374
3375 (define_insn "@aarch64_combinez<mode>"
3376 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3377 (vec_concat:<VDBL>
3378 (match_operand:VDC 1 "general_operand" "w,?r,m")
3379 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3380 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3381 "@
3382 mov\\t%0.8b, %1.8b
3383 fmov\t%d0, %1
3384 ldr\\t%d0, %1"
3385 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3386 (set_attr "arch" "simd,fp,simd")]
3387 )
3388
3389 (define_insn "@aarch64_combinez_be<mode>"
3390 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3391 (vec_concat:<VDBL>
3392 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3393 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3394 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3395 "@
3396 mov\\t%0.8b, %1.8b
3397 fmov\t%d0, %1
3398 ldr\\t%d0, %1"
3399 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3400 (set_attr "arch" "simd,fp,simd")]
3401 )
3402
3403 (define_expand "aarch64_combine<mode>"
3404 [(match_operand:<VDBL> 0 "register_operand")
3405 (match_operand:VDC 1 "register_operand")
3406 (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
3407 "TARGET_SIMD"
3408 {
3409 if (operands[2] == CONST0_RTX (<MODE>mode))
3410 {
3411 if (BYTES_BIG_ENDIAN)
3412 emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
3413 operands[2]));
3414 else
3415 emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
3416 operands[2]));
3417 }
3418 else
3419 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3420 DONE;
3421 }
3422 )
3423
3424 (define_expand "@aarch64_simd_combine<mode>"
3425 [(match_operand:<VDBL> 0 "register_operand")
3426 (match_operand:VDC 1 "register_operand")
3427 (match_operand:VDC 2 "register_operand")]
3428 "TARGET_SIMD"
3429 {
3430 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3431 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3432 DONE;
3433 }
3434 [(set_attr "type" "multiple")]
3435 )
3436
3437 ;; <su><addsub>l<q>.
3438
3439 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3441 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3442 (match_operand:VQW 1 "register_operand" "w")
3443 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3445 (match_operand:VQW 2 "register_operand" "w")
3446 (match_dup 3)))))]
3447 "TARGET_SIMD"
3448 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3449 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3450 )
3451
3452 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3453 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3454 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3455 (match_operand:VQW 1 "register_operand" "w")
3456 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3457 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3458 (match_operand:VQW 2 "register_operand" "w")
3459 (match_dup 3)))))]
3460 "TARGET_SIMD"
3461 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3462 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3463 )
3464
3465 (define_expand "vec_widen_<su>addl_lo_<mode>"
3466 [(match_operand:<VWIDE> 0 "register_operand")
3467 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3468 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3469 "TARGET_SIMD"
3470 {
3471 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3472 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
3473 operands[2], p));
3474 DONE;
3475 })
3476
3477 (define_expand "vec_widen_<su>addl_hi_<mode>"
3478 [(match_operand:<VWIDE> 0 "register_operand")
3479 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3480 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3481 "TARGET_SIMD"
3482 {
3483 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3484 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
3485 operands[2], p));
3486 DONE;
3487 })
3488
3489 (define_expand "vec_widen_<su>subl_lo_<mode>"
3490 [(match_operand:<VWIDE> 0 "register_operand")
3491 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3492 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3493 "TARGET_SIMD"
3494 {
3495 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3496 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
3497 operands[2], p));
3498 DONE;
3499 })
3500
3501 (define_expand "vec_widen_<su>subl_hi_<mode>"
3502 [(match_operand:<VWIDE> 0 "register_operand")
3503 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
3504 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
3505 "TARGET_SIMD"
3506 {
3507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
3509 operands[2], p));
3510 DONE;
3511 })
3512
3513 (define_expand "aarch64_saddl2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand")
3515 (match_operand:VQW 1 "register_operand")
3516 (match_operand:VQW 2 "register_operand")]
3517 "TARGET_SIMD"
3518 {
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3521 operands[2], p));
3522 DONE;
3523 })
3524
3525 (define_expand "aarch64_uaddl2<mode>"
3526 [(match_operand:<VWIDE> 0 "register_operand")
3527 (match_operand:VQW 1 "register_operand")
3528 (match_operand:VQW 2 "register_operand")]
3529 "TARGET_SIMD"
3530 {
3531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3532 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3533 operands[2], p));
3534 DONE;
3535 })
3536
3537 (define_expand "aarch64_ssubl2<mode>"
3538 [(match_operand:<VWIDE> 0 "register_operand")
3539 (match_operand:VQW 1 "register_operand")
3540 (match_operand:VQW 2 "register_operand")]
3541 "TARGET_SIMD"
3542 {
3543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3544 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3545 operands[2], p));
3546 DONE;
3547 })
3548
3549 (define_expand "aarch64_usubl2<mode>"
3550 [(match_operand:<VWIDE> 0 "register_operand")
3551 (match_operand:VQW 1 "register_operand")
3552 (match_operand:VQW 2 "register_operand")]
3553 "TARGET_SIMD"
3554 {
3555 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3556 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3557 operands[2], p));
3558 DONE;
3559 })
3560
3561 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3562 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3563 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3564 (match_operand:VD_BHSI 1 "register_operand" "w"))
3565 (ANY_EXTEND:<VWIDE>
3566 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3567 "TARGET_SIMD"
3568 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3569 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3570 )
3571
3572 ;; <su><addsub>w<q>.
3573
3574 (define_expand "widen_ssum<mode>3"
3575 [(set (match_operand:<VDBLW> 0 "register_operand")
3576 (plus:<VDBLW> (sign_extend:<VDBLW>
3577 (match_operand:VQW 1 "register_operand"))
3578 (match_operand:<VDBLW> 2 "register_operand")))]
3579 "TARGET_SIMD"
3580 {
3581 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3582 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3583
3584 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3585 operands[1], p));
3586 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3587 DONE;
3588 }
3589 )
3590
3591 (define_expand "widen_ssum<mode>3"
3592 [(set (match_operand:<VWIDE> 0 "register_operand")
3593 (plus:<VWIDE> (sign_extend:<VWIDE>
3594 (match_operand:VD_BHSI 1 "register_operand"))
3595 (match_operand:<VWIDE> 2 "register_operand")))]
3596 "TARGET_SIMD"
3597 {
3598 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3599 DONE;
3600 })
3601
3602 (define_expand "widen_usum<mode>3"
3603 [(set (match_operand:<VDBLW> 0 "register_operand")
3604 (plus:<VDBLW> (zero_extend:<VDBLW>
3605 (match_operand:VQW 1 "register_operand"))
3606 (match_operand:<VDBLW> 2 "register_operand")))]
3607 "TARGET_SIMD"
3608 {
3609 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3610 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3611
3612 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3613 operands[1], p));
3614 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3615 DONE;
3616 }
3617 )
3618
3619 (define_expand "widen_usum<mode>3"
3620 [(set (match_operand:<VWIDE> 0 "register_operand")
3621 (plus:<VWIDE> (zero_extend:<VWIDE>
3622 (match_operand:VD_BHSI 1 "register_operand"))
3623 (match_operand:<VWIDE> 2 "register_operand")))]
3624 "TARGET_SIMD"
3625 {
3626 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3627 DONE;
3628 })
3629
3630 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3631 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3632 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3633 (ANY_EXTEND:<VWIDE>
3634 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3635 "TARGET_SIMD"
3636 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3637 [(set_attr "type" "neon_sub_widen")]
3638 )
3639
3640 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3642 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3643 (ANY_EXTEND:<VWIDE>
3644 (vec_select:<VHALF>
3645 (match_operand:VQW 2 "register_operand" "w")
3646 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3647 "TARGET_SIMD"
3648 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3649 [(set_attr "type" "neon_sub_widen")]
3650 )
3651
3652 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3654 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3655 (ANY_EXTEND:<VWIDE>
3656 (vec_select:<VHALF>
3657 (match_operand:VQW 2 "register_operand" "w")
3658 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3659 "TARGET_SIMD"
3660 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3661 [(set_attr "type" "neon_sub_widen")]
3662 )
3663
3664 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3665 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3666 (plus:<VWIDE>
3667 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3668 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3669 "TARGET_SIMD"
3670 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3671 [(set_attr "type" "neon_add_widen")]
3672 )
3673
3674 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3675 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3676 (plus:<VWIDE>
3677 (ANY_EXTEND:<VWIDE>
3678 (vec_select:<VHALF>
3679 (match_operand:VQW 2 "register_operand" "w")
3680 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3681 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3682 "TARGET_SIMD"
3683 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3684 [(set_attr "type" "neon_add_widen")]
3685 )
3686
3687 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3688 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3689 (plus:<VWIDE>
3690 (ANY_EXTEND:<VWIDE>
3691 (vec_select:<VHALF>
3692 (match_operand:VQW 2 "register_operand" "w")
3693 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3694 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3695 "TARGET_SIMD"
3696 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3697 [(set_attr "type" "neon_add_widen")]
3698 )
3699
3700 (define_expand "aarch64_saddw2<mode>"
3701 [(match_operand:<VWIDE> 0 "register_operand")
3702 (match_operand:<VWIDE> 1 "register_operand")
3703 (match_operand:VQW 2 "register_operand")]
3704 "TARGET_SIMD"
3705 {
3706 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3707 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3708 operands[2], p));
3709 DONE;
3710 })
3711
3712 (define_expand "aarch64_uaddw2<mode>"
3713 [(match_operand:<VWIDE> 0 "register_operand")
3714 (match_operand:<VWIDE> 1 "register_operand")
3715 (match_operand:VQW 2 "register_operand")]
3716 "TARGET_SIMD"
3717 {
3718 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3719 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3720 operands[2], p));
3721 DONE;
3722 })
3723
3724
3725 (define_expand "aarch64_ssubw2<mode>"
3726 [(match_operand:<VWIDE> 0 "register_operand")
3727 (match_operand:<VWIDE> 1 "register_operand")
3728 (match_operand:VQW 2 "register_operand")]
3729 "TARGET_SIMD"
3730 {
3731 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3732 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3733 operands[2], p));
3734 DONE;
3735 })
3736
3737 (define_expand "aarch64_usubw2<mode>"
3738 [(match_operand:<VWIDE> 0 "register_operand")
3739 (match_operand:<VWIDE> 1 "register_operand")
3740 (match_operand:VQW 2 "register_operand")]
3741 "TARGET_SIMD"
3742 {
3743 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3744 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3745 operands[2], p));
3746 DONE;
3747 })
3748
3749 ;; <su><r>h<addsub>.
3750
3751 (define_expand "<u>avg<mode>3_floor"
3752 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3753 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3754 (match_operand:VDQ_BHSI 2 "register_operand")]
3755 HADD))]
3756 "TARGET_SIMD"
3757 )
3758
3759 (define_expand "<u>avg<mode>3_ceil"
3760 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3761 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3762 (match_operand:VDQ_BHSI 2 "register_operand")]
3763 RHADD))]
3764 "TARGET_SIMD"
3765 )
3766
3767 (define_insn "aarch64_<sur>h<addsub><mode>"
3768 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3769 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3770 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3771 HADDSUB))]
3772 "TARGET_SIMD"
3773 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3774 [(set_attr "type" "neon_<addsub>_halve<q>")]
3775 )
3776
3777 ;; <r><addsub>hn<q>.
3778
3779 (define_insn "aarch64_<sur><addsub>hn<mode>"
3780 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3781 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3782 (match_operand:VQN 2 "register_operand" "w")]
3783 ADDSUBHN))]
3784 "TARGET_SIMD"
3785 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3786 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3787 )
3788
3789 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3790 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3791 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3792 (match_operand:VQN 2 "register_operand" "w")
3793 (match_operand:VQN 3 "register_operand" "w")]
3794 ADDSUBHN2))]
3795 "TARGET_SIMD"
3796 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3797 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3798 )
3799
3800 ;; pmul.
3801
3802 (define_insn "aarch64_pmul<mode>"
3803 [(set (match_operand:VB 0 "register_operand" "=w")
3804 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3805 (match_operand:VB 2 "register_operand" "w")]
3806 UNSPEC_PMUL))]
3807 "TARGET_SIMD"
3808 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3809 [(set_attr "type" "neon_mul_<Vetype><q>")]
3810 )
3811
3812 ;; fmulx.
3813
3814 (define_insn "aarch64_fmulx<mode>"
3815 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3816 (unspec:VHSDF_HSDF
3817 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3818 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3819 UNSPEC_FMULX))]
3820 "TARGET_SIMD"
3821 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3822 [(set_attr "type" "neon_fp_mul_<stype>")]
3823 )
3824
3825 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3826
3827 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3828 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3829 (unspec:VDQSF
3830 [(match_operand:VDQSF 1 "register_operand" "w")
3831 (vec_duplicate:VDQSF
3832 (vec_select:<VEL>
3833 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3834 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3835 UNSPEC_FMULX))]
3836 "TARGET_SIMD"
3837 {
3838 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3839 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3840 }
3841 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3842 )
3843
3844 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3845
3846 (define_insn "*aarch64_mulx_elt<mode>"
3847 [(set (match_operand:VDQF 0 "register_operand" "=w")
3848 (unspec:VDQF
3849 [(match_operand:VDQF 1 "register_operand" "w")
3850 (vec_duplicate:VDQF
3851 (vec_select:<VEL>
3852 (match_operand:VDQF 2 "register_operand" "w")
3853 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3854 UNSPEC_FMULX))]
3855 "TARGET_SIMD"
3856 {
3857 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3858 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3859 }
3860 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3861 )
3862
3863 ;; vmulxq_lane
3864
3865 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3866 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3867 (unspec:VHSDF
3868 [(match_operand:VHSDF 1 "register_operand" "w")
3869 (vec_duplicate:VHSDF
3870 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3871 UNSPEC_FMULX))]
3872 "TARGET_SIMD"
3873 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3874 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3875 )
3876
3877 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3878 ;; vmulxd_lane_f64 == vmulx_lane_f64
3879 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3880
3881 (define_insn "*aarch64_vgetfmulx<mode>"
3882 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3883 (unspec:<VEL>
3884 [(match_operand:<VEL> 1 "register_operand" "w")
3885 (vec_select:<VEL>
3886 (match_operand:VDQF 2 "register_operand" "w")
3887 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3888 UNSPEC_FMULX))]
3889 "TARGET_SIMD"
3890 {
3891 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3892 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3893 }
3894 [(set_attr "type" "fmul<Vetype>")]
3895 )
3896 ;; <su>q<addsub>
3897
3898 (define_insn "aarch64_<su_optab>q<addsub><mode>"
3899 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3900 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3901 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3902 "TARGET_SIMD"
3903 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3904 [(set_attr "type" "neon_q<addsub><q>")]
3905 )
3906
3907 ;; suqadd and usqadd
3908
3909 (define_insn "aarch64_<sur>qadd<mode>"
3910 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3911 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3912 (match_operand:VSDQ_I 2 "register_operand" "w")]
3913 USSUQADD))]
3914 "TARGET_SIMD"
3915 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3916 [(set_attr "type" "neon_qadd<q>")]
3917 )
3918
3919 ;; sqmovun
3920
3921 (define_insn "aarch64_sqmovun<mode>"
3922 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3923 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3924 UNSPEC_SQXTUN))]
3925 "TARGET_SIMD"
3926 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3927 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3928 )
3929
3930 ;; sqmovn and uqmovn
3931
3932 (define_insn "aarch64_<sur>qmovn<mode>"
3933 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3934 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3935 SUQMOVN))]
3936 "TARGET_SIMD"
3937 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3938 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3939 )
3940
3941 (define_insn "aarch64_<su>qxtn2<mode>_le"
3942 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3943 (vec_concat:<VNARROWQ2>
3944 (match_operand:<VNARROWQ> 1 "register_operand" "0")
3945 (SAT_TRUNC:<VNARROWQ>
3946 (match_operand:VQN 2 "register_operand" "w"))))]
3947 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3948 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
3949 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3950 )
3951
3952 (define_insn "aarch64_<su>qxtn2<mode>_be"
3953 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3954 (vec_concat:<VNARROWQ2>
3955 (SAT_TRUNC:<VNARROWQ>
3956 (match_operand:VQN 2 "register_operand" "w"))
3957 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
3958 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3959 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
3960 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3961 )
3962
3963 (define_expand "aarch64_<su>qxtn2<mode>"
3964 [(match_operand:<VNARROWQ2> 0 "register_operand")
3965 (match_operand:<VNARROWQ> 1 "register_operand")
3966 (SAT_TRUNC:<VNARROWQ>
3967 (match_operand:VQN 2 "register_operand"))]
3968 "TARGET_SIMD"
3969 {
3970 if (BYTES_BIG_ENDIAN)
3971 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
3972 operands[2]));
3973 else
3974 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
3975 operands[2]));
3976 DONE;
3977 }
3978 )
3979
3980 ;; <su>q<absneg>
3981
3982 (define_insn "aarch64_s<optab><mode>"
3983 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3984 (UNQOPS:VSDQ_I
3985 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3986 "TARGET_SIMD"
3987 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3988 [(set_attr "type" "neon_<optab><q>")]
3989 )
3990
3991 ;; sq<r>dmulh.
3992
3993 (define_insn "aarch64_sq<r>dmulh<mode>"
3994 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3995 (unspec:VSDQ_HSI
3996 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3997 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3998 VQDMULH))]
3999 "TARGET_SIMD"
4000 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4001 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
4002 )
4003
4004 ;; sq<r>dmulh_lane
4005
4006 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
4007 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4008 (unspec:VDQHS
4009 [(match_operand:VDQHS 1 "register_operand" "w")
4010 (vec_select:<VEL>
4011 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4012 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4013 VQDMULH))]
4014 "TARGET_SIMD"
4015 "*
4016 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4017 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4018 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4019 )
4020
4021 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4022 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4023 (unspec:VDQHS
4024 [(match_operand:VDQHS 1 "register_operand" "w")
4025 (vec_select:<VEL>
4026 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4027 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4028 VQDMULH))]
4029 "TARGET_SIMD"
4030 "*
4031 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4032 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4033 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4034 )
4035
4036 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
4037 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4038 (unspec:SD_HSI
4039 [(match_operand:SD_HSI 1 "register_operand" "w")
4040 (vec_select:<VEL>
4041 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4042 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4043 VQDMULH))]
4044 "TARGET_SIMD"
4045 "*
4046 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4047 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4048 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4049 )
4050
4051 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4052 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4053 (unspec:SD_HSI
4054 [(match_operand:SD_HSI 1 "register_operand" "w")
4055 (vec_select:<VEL>
4056 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4057 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4058 VQDMULH))]
4059 "TARGET_SIMD"
4060 "*
4061 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4062 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4063 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4064 )
4065
4066 ;; sqrdml[as]h.
4067
4068 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
4069 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
4070 (unspec:VSDQ_HSI
4071 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
4072 (match_operand:VSDQ_HSI 2 "register_operand" "w")
4073 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
4074 SQRDMLH_AS))]
4075 "TARGET_SIMD_RDMA"
4076 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4077 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4078 )
4079
4080 ;; sqrdml[as]h_lane.
4081
4082 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4083 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4084 (unspec:VDQHS
4085 [(match_operand:VDQHS 1 "register_operand" "0")
4086 (match_operand:VDQHS 2 "register_operand" "w")
4087 (vec_select:<VEL>
4088 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4089 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4090 SQRDMLH_AS))]
4091 "TARGET_SIMD_RDMA"
4092 {
4093 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4094 return
4095 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4096 }
4097 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4098 )
4099
4100 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4101 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4102 (unspec:SD_HSI
4103 [(match_operand:SD_HSI 1 "register_operand" "0")
4104 (match_operand:SD_HSI 2 "register_operand" "w")
4105 (vec_select:<VEL>
4106 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4107 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4108 SQRDMLH_AS))]
4109 "TARGET_SIMD_RDMA"
4110 {
4111 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4112 return
4113 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
4114 }
4115 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4116 )
4117
4118 ;; sqrdml[as]h_laneq.
4119
4120 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4121 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4122 (unspec:VDQHS
4123 [(match_operand:VDQHS 1 "register_operand" "0")
4124 (match_operand:VDQHS 2 "register_operand" "w")
4125 (vec_select:<VEL>
4126 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4128 SQRDMLH_AS))]
4129 "TARGET_SIMD_RDMA"
4130 {
4131 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4132 return
4133 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4134 }
4135 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4136 )
4137
4138 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4139 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4140 (unspec:SD_HSI
4141 [(match_operand:SD_HSI 1 "register_operand" "0")
4142 (match_operand:SD_HSI 2 "register_operand" "w")
4143 (vec_select:<VEL>
4144 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4145 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4146 SQRDMLH_AS))]
4147 "TARGET_SIMD_RDMA"
4148 {
4149 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4150 return
4151 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4152 }
4153 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4154 )
4155
4156 ;; vqdml[sa]l
4157
4158 (define_insn "aarch64_sqdmlal<mode>"
4159 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4160 (ss_plus:<VWIDE>
4161 (ss_ashift:<VWIDE>
4162 (mult:<VWIDE>
4163 (sign_extend:<VWIDE>
4164 (match_operand:VSD_HSI 2 "register_operand" "w"))
4165 (sign_extend:<VWIDE>
4166 (match_operand:VSD_HSI 3 "register_operand" "w")))
4167 (const_int 1))
4168 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4169 "TARGET_SIMD"
4170 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4171 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4172 )
4173
4174 (define_insn "aarch64_sqdmlsl<mode>"
4175 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4176 (ss_minus:<VWIDE>
4177 (match_operand:<VWIDE> 1 "register_operand" "0")
4178 (ss_ashift:<VWIDE>
4179 (mult:<VWIDE>
4180 (sign_extend:<VWIDE>
4181 (match_operand:VSD_HSI 2 "register_operand" "w"))
4182 (sign_extend:<VWIDE>
4183 (match_operand:VSD_HSI 3 "register_operand" "w")))
4184 (const_int 1))))]
4185 "TARGET_SIMD"
4186 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4187 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4188 )
4189
4190 ;; vqdml[sa]l_lane
4191
4192 (define_insn "aarch64_sqdmlal_lane<mode>"
4193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4194 (ss_plus:<VWIDE>
4195 (ss_ashift:<VWIDE>
4196 (mult:<VWIDE>
4197 (sign_extend:<VWIDE>
4198 (match_operand:VD_HSI 2 "register_operand" "w"))
4199 (sign_extend:<VWIDE>
4200 (vec_duplicate:VD_HSI
4201 (vec_select:<VEL>
4202 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4203 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4204 ))
4205 (const_int 1))
4206 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4207 "TARGET_SIMD"
4208 {
4209 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4210 return
4211 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4212 }
4213 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4214 )
4215
4216 (define_insn "aarch64_sqdmlsl_lane<mode>"
4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218 (ss_minus:<VWIDE>
4219 (match_operand:<VWIDE> 1 "register_operand" "0")
4220 (ss_ashift:<VWIDE>
4221 (mult:<VWIDE>
4222 (sign_extend:<VWIDE>
4223 (match_operand:VD_HSI 2 "register_operand" "w"))
4224 (sign_extend:<VWIDE>
4225 (vec_duplicate:VD_HSI
4226 (vec_select:<VEL>
4227 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4228 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4229 ))
4230 (const_int 1))))]
4231 "TARGET_SIMD"
4232 {
4233 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4234 return
4235 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4236 }
4237 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4238 )
4239
4240
4241 (define_insn "aarch64_sqdmlsl_laneq<mode>"
4242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4243 (ss_minus:<VWIDE>
4244 (match_operand:<VWIDE> 1 "register_operand" "0")
4245 (ss_ashift:<VWIDE>
4246 (mult:<VWIDE>
4247 (sign_extend:<VWIDE>
4248 (match_operand:VD_HSI 2 "register_operand" "w"))
4249 (sign_extend:<VWIDE>
4250 (vec_duplicate:VD_HSI
4251 (vec_select:<VEL>
4252 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4253 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4254 ))
4255 (const_int 1))))]
4256 "TARGET_SIMD"
4257 {
4258 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4259 return
4260 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4261 }
4262 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4263 )
4264
4265 (define_insn "aarch64_sqdmlal_laneq<mode>"
4266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4267 (ss_plus:<VWIDE>
4268 (ss_ashift:<VWIDE>
4269 (mult:<VWIDE>
4270 (sign_extend:<VWIDE>
4271 (match_operand:VD_HSI 2 "register_operand" "w"))
4272 (sign_extend:<VWIDE>
4273 (vec_duplicate:VD_HSI
4274 (vec_select:<VEL>
4275 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4276 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4277 ))
4278 (const_int 1))
4279 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4280 "TARGET_SIMD"
4281 {
4282 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4283 return
4284 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4285 }
4286 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4287 )
4288
4289
4290 (define_insn "aarch64_sqdmlal_lane<mode>"
4291 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4292 (ss_plus:<VWIDE>
4293 (ss_ashift:<VWIDE>
4294 (mult:<VWIDE>
4295 (sign_extend:<VWIDE>
4296 (match_operand:SD_HSI 2 "register_operand" "w"))
4297 (sign_extend:<VWIDE>
4298 (vec_select:<VEL>
4299 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4300 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4301 )
4302 (const_int 1))
4303 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4304 "TARGET_SIMD"
4305 {
4306 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4307 return
4308 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4309 }
4310 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4311 )
4312
4313 (define_insn "aarch64_sqdmlsl_lane<mode>"
4314 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4315 (ss_minus:<VWIDE>
4316 (match_operand:<VWIDE> 1 "register_operand" "0")
4317 (ss_ashift:<VWIDE>
4318 (mult:<VWIDE>
4319 (sign_extend:<VWIDE>
4320 (match_operand:SD_HSI 2 "register_operand" "w"))
4321 (sign_extend:<VWIDE>
4322 (vec_select:<VEL>
4323 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4324 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4325 )
4326 (const_int 1))))]
4327 "TARGET_SIMD"
4328 {
4329 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4330 return
4331 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4332 }
4333 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4334 )
4335
4336
4337 (define_insn "aarch64_sqdmlal_laneq<mode>"
4338 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4339 (ss_plus:<VWIDE>
4340 (ss_ashift:<VWIDE>
4341 (mult:<VWIDE>
4342 (sign_extend:<VWIDE>
4343 (match_operand:SD_HSI 2 "register_operand" "w"))
4344 (sign_extend:<VWIDE>
4345 (vec_select:<VEL>
4346 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4347 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4348 )
4349 (const_int 1))
4350 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4351 "TARGET_SIMD"
4352 {
4353 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4354 return
4355 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4356 }
4357 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4358 )
4359
4360 (define_insn "aarch64_sqdmlsl_laneq<mode>"
4361 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4362 (ss_minus:<VWIDE>
4363 (match_operand:<VWIDE> 1 "register_operand" "0")
4364 (ss_ashift:<VWIDE>
4365 (mult:<VWIDE>
4366 (sign_extend:<VWIDE>
4367 (match_operand:SD_HSI 2 "register_operand" "w"))
4368 (sign_extend:<VWIDE>
4369 (vec_select:<VEL>
4370 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4371 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4372 )
4373 (const_int 1))))]
4374 "TARGET_SIMD"
4375 {
4376 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4377 return
4378 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4379 }
4380 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4381 )
4382
4383 ;; vqdml[sa]l_n
4384
4385 (define_insn "aarch64_sqdmlsl_n<mode>"
4386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4387 (ss_minus:<VWIDE>
4388 (match_operand:<VWIDE> 1 "register_operand" "0")
4389 (ss_ashift:<VWIDE>
4390 (mult:<VWIDE>
4391 (sign_extend:<VWIDE>
4392 (match_operand:VD_HSI 2 "register_operand" "w"))
4393 (sign_extend:<VWIDE>
4394 (vec_duplicate:VD_HSI
4395 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4396 (const_int 1))))]
4397 "TARGET_SIMD"
4398 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4399 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4400 )
4401
4402 (define_insn "aarch64_sqdmlal_n<mode>"
4403 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4404 (ss_plus:<VWIDE>
4405 (ss_ashift:<VWIDE>
4406 (mult:<VWIDE>
4407 (sign_extend:<VWIDE>
4408 (match_operand:VD_HSI 2 "register_operand" "w"))
4409 (sign_extend:<VWIDE>
4410 (vec_duplicate:VD_HSI
4411 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4412 (const_int 1))
4413 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4414 "TARGET_SIMD"
4415 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4416 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4417 )
4418
4419
4420 ;; sqdml[as]l2
4421
4422 (define_insn "aarch64_sqdmlal2<mode>_internal"
4423 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4424 (ss_plus:<VWIDE>
4425 (ss_ashift:<VWIDE>
4426 (mult:<VWIDE>
4427 (sign_extend:<VWIDE>
4428 (vec_select:<VHALF>
4429 (match_operand:VQ_HSI 2 "register_operand" "w")
4430 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4431 (sign_extend:<VWIDE>
4432 (vec_select:<VHALF>
4433 (match_operand:VQ_HSI 3 "register_operand" "w")
4434 (match_dup 4))))
4435 (const_int 1))
4436 (match_operand:<VWIDE> 1 "register_operand" "0")))]
4437 "TARGET_SIMD"
4438 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4439 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4440 )
4441
4442 (define_insn "aarch64_sqdmlsl2<mode>_internal"
4443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4444 (ss_minus:<VWIDE>
4445 (match_operand:<VWIDE> 1 "register_operand" "0")
4446 (ss_ashift:<VWIDE>
4447 (mult:<VWIDE>
4448 (sign_extend:<VWIDE>
4449 (vec_select:<VHALF>
4450 (match_operand:VQ_HSI 2 "register_operand" "w")
4451 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4452 (sign_extend:<VWIDE>
4453 (vec_select:<VHALF>
4454 (match_operand:VQ_HSI 3 "register_operand" "w")
4455 (match_dup 4))))
4456 (const_int 1))))]
4457 "TARGET_SIMD"
4458 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4459 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4460 )
4461
4462 (define_expand "aarch64_sqdmlal2<mode>"
4463 [(match_operand:<VWIDE> 0 "register_operand")
4464 (match_operand:<VWIDE> 1 "register_operand")
4465 (match_operand:VQ_HSI 2 "register_operand")
4466 (match_operand:VQ_HSI 3 "register_operand")]
4467 "TARGET_SIMD"
4468 {
4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4471 operands[2], operands[3], p));
4472 DONE;
4473 })
4474
4475 (define_expand "aarch64_sqdmlsl2<mode>"
4476 [(match_operand:<VWIDE> 0 "register_operand")
4477 (match_operand:<VWIDE> 1 "register_operand")
4478 (match_operand:VQ_HSI 2 "register_operand")
4479 (match_operand:VQ_HSI 3 "register_operand")]
4480 "TARGET_SIMD"
4481 {
4482 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4484 operands[2], operands[3], p));
4485 DONE;
4486 })
4487
4488 ;; vqdml[sa]l2_lane
4489
4490 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4491 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4492 (SBINQOPS:<VWIDE>
4493 (match_operand:<VWIDE> 1 "register_operand" "0")
4494 (ss_ashift:<VWIDE>
4495 (mult:<VWIDE>
4496 (sign_extend:<VWIDE>
4497 (vec_select:<VHALF>
4498 (match_operand:VQ_HSI 2 "register_operand" "w")
4499 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4500 (sign_extend:<VWIDE>
4501 (vec_duplicate:<VHALF>
4502 (vec_select:<VEL>
4503 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4504 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4505 ))))
4506 (const_int 1))))]
4507 "TARGET_SIMD"
4508 {
4509 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4510 return
4511 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4512 }
4513 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4514 )
4515
4516 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4518 (SBINQOPS:<VWIDE>
4519 (match_operand:<VWIDE> 1 "register_operand" "0")
4520 (ss_ashift:<VWIDE>
4521 (mult:<VWIDE>
4522 (sign_extend:<VWIDE>
4523 (vec_select:<VHALF>
4524 (match_operand:VQ_HSI 2 "register_operand" "w")
4525 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4526 (sign_extend:<VWIDE>
4527 (vec_duplicate:<VHALF>
4528 (vec_select:<VEL>
4529 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4530 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4531 ))))
4532 (const_int 1))))]
4533 "TARGET_SIMD"
4534 {
4535 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4536 return
4537 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4538 }
4539 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4540 )
4541
4542 (define_expand "aarch64_sqdmlal2_lane<mode>"
4543 [(match_operand:<VWIDE> 0 "register_operand")
4544 (match_operand:<VWIDE> 1 "register_operand")
4545 (match_operand:VQ_HSI 2 "register_operand")
4546 (match_operand:<VCOND> 3 "register_operand")
4547 (match_operand:SI 4 "immediate_operand")]
4548 "TARGET_SIMD"
4549 {
4550 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4551 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4552 operands[2], operands[3],
4553 operands[4], p));
4554 DONE;
4555 })
4556
4557 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4558 [(match_operand:<VWIDE> 0 "register_operand")
4559 (match_operand:<VWIDE> 1 "register_operand")
4560 (match_operand:VQ_HSI 2 "register_operand")
4561 (match_operand:<VCONQ> 3 "register_operand")
4562 (match_operand:SI 4 "immediate_operand")]
4563 "TARGET_SIMD"
4564 {
4565 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4566 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4567 operands[2], operands[3],
4568 operands[4], p));
4569 DONE;
4570 })
4571
4572 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4573 [(match_operand:<VWIDE> 0 "register_operand")
4574 (match_operand:<VWIDE> 1 "register_operand")
4575 (match_operand:VQ_HSI 2 "register_operand")
4576 (match_operand:<VCOND> 3 "register_operand")
4577 (match_operand:SI 4 "immediate_operand")]
4578 "TARGET_SIMD"
4579 {
4580 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4581 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4582 operands[2], operands[3],
4583 operands[4], p));
4584 DONE;
4585 })
4586
4587 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4588 [(match_operand:<VWIDE> 0 "register_operand")
4589 (match_operand:<VWIDE> 1 "register_operand")
4590 (match_operand:VQ_HSI 2 "register_operand")
4591 (match_operand:<VCONQ> 3 "register_operand")
4592 (match_operand:SI 4 "immediate_operand")]
4593 "TARGET_SIMD"
4594 {
4595 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4596 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4597 operands[2], operands[3],
4598 operands[4], p));
4599 DONE;
4600 })
4601
4602 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4603 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4604 (SBINQOPS:<VWIDE>
4605 (match_operand:<VWIDE> 1 "register_operand" "0")
4606 (ss_ashift:<VWIDE>
4607 (mult:<VWIDE>
4608 (sign_extend:<VWIDE>
4609 (vec_select:<VHALF>
4610 (match_operand:VQ_HSI 2 "register_operand" "w")
4611 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4612 (sign_extend:<VWIDE>
4613 (vec_duplicate:<VHALF>
4614 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4615 (const_int 1))))]
4616 "TARGET_SIMD"
4617 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4618 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4619 )
4620
4621 (define_expand "aarch64_sqdmlal2_n<mode>"
4622 [(match_operand:<VWIDE> 0 "register_operand")
4623 (match_operand:<VWIDE> 1 "register_operand")
4624 (match_operand:VQ_HSI 2 "register_operand")
4625 (match_operand:<VEL> 3 "register_operand")]
4626 "TARGET_SIMD"
4627 {
4628 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4629 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4630 operands[2], operands[3],
4631 p));
4632 DONE;
4633 })
4634
4635 (define_expand "aarch64_sqdmlsl2_n<mode>"
4636 [(match_operand:<VWIDE> 0 "register_operand")
4637 (match_operand:<VWIDE> 1 "register_operand")
4638 (match_operand:VQ_HSI 2 "register_operand")
4639 (match_operand:<VEL> 3 "register_operand")]
4640 "TARGET_SIMD"
4641 {
4642 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4643 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4644 operands[2], operands[3],
4645 p));
4646 DONE;
4647 })
4648
4649 ;; vqdmull
4650
4651 (define_insn "aarch64_sqdmull<mode>"
4652 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4653 (ss_ashift:<VWIDE>
4654 (mult:<VWIDE>
4655 (sign_extend:<VWIDE>
4656 (match_operand:VSD_HSI 1 "register_operand" "w"))
4657 (sign_extend:<VWIDE>
4658 (match_operand:VSD_HSI 2 "register_operand" "w")))
4659 (const_int 1)))]
4660 "TARGET_SIMD"
4661 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4662 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4663 )
4664
4665 ;; vqdmull_lane
4666
4667 (define_insn "aarch64_sqdmull_lane<mode>"
4668 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4669 (ss_ashift:<VWIDE>
4670 (mult:<VWIDE>
4671 (sign_extend:<VWIDE>
4672 (match_operand:VD_HSI 1 "register_operand" "w"))
4673 (sign_extend:<VWIDE>
4674 (vec_duplicate:VD_HSI
4675 (vec_select:<VEL>
4676 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4677 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4678 ))
4679 (const_int 1)))]
4680 "TARGET_SIMD"
4681 {
4682 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4683 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4684 }
4685 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4686 )
4687
4688 (define_insn "aarch64_sqdmull_laneq<mode>"
4689 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4690 (ss_ashift:<VWIDE>
4691 (mult:<VWIDE>
4692 (sign_extend:<VWIDE>
4693 (match_operand:VD_HSI 1 "register_operand" "w"))
4694 (sign_extend:<VWIDE>
4695 (vec_duplicate:VD_HSI
4696 (vec_select:<VEL>
4697 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4698 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4699 ))
4700 (const_int 1)))]
4701 "TARGET_SIMD"
4702 {
4703 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4704 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4705 }
4706 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4707 )
4708
4709 (define_insn "aarch64_sqdmull_lane<mode>"
4710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4711 (ss_ashift:<VWIDE>
4712 (mult:<VWIDE>
4713 (sign_extend:<VWIDE>
4714 (match_operand:SD_HSI 1 "register_operand" "w"))
4715 (sign_extend:<VWIDE>
4716 (vec_select:<VEL>
4717 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4718 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4719 ))
4720 (const_int 1)))]
4721 "TARGET_SIMD"
4722 {
4723 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4724 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4725 }
4726 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4727 )
4728
4729 (define_insn "aarch64_sqdmull_laneq<mode>"
4730 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4731 (ss_ashift:<VWIDE>
4732 (mult:<VWIDE>
4733 (sign_extend:<VWIDE>
4734 (match_operand:SD_HSI 1 "register_operand" "w"))
4735 (sign_extend:<VWIDE>
4736 (vec_select:<VEL>
4737 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4738 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4739 ))
4740 (const_int 1)))]
4741 "TARGET_SIMD"
4742 {
4743 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4744 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4745 }
4746 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4747 )
4748
4749 ;; vqdmull_n
4750
4751 (define_insn "aarch64_sqdmull_n<mode>"
4752 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4753 (ss_ashift:<VWIDE>
4754 (mult:<VWIDE>
4755 (sign_extend:<VWIDE>
4756 (match_operand:VD_HSI 1 "register_operand" "w"))
4757 (sign_extend:<VWIDE>
4758 (vec_duplicate:VD_HSI
4759 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4760 )
4761 (const_int 1)))]
4762 "TARGET_SIMD"
4763 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4764 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4765 )
4766
4767 ;; vqdmull2
4768
4769
4770
4771 (define_insn "aarch64_sqdmull2<mode>_internal"
4772 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4773 (ss_ashift:<VWIDE>
4774 (mult:<VWIDE>
4775 (sign_extend:<VWIDE>
4776 (vec_select:<VHALF>
4777 (match_operand:VQ_HSI 1 "register_operand" "w")
4778 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4779 (sign_extend:<VWIDE>
4780 (vec_select:<VHALF>
4781 (match_operand:VQ_HSI 2 "register_operand" "w")
4782 (match_dup 3)))
4783 )
4784 (const_int 1)))]
4785 "TARGET_SIMD"
4786 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4787 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4788 )
4789
4790 (define_expand "aarch64_sqdmull2<mode>"
4791 [(match_operand:<VWIDE> 0 "register_operand")
4792 (match_operand:VQ_HSI 1 "register_operand")
4793 (match_operand:VQ_HSI 2 "register_operand")]
4794 "TARGET_SIMD"
4795 {
4796 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4797 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4798 operands[2], p));
4799 DONE;
4800 })
4801
4802 ;; vqdmull2_lane
4803
4804 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4805 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4806 (ss_ashift:<VWIDE>
4807 (mult:<VWIDE>
4808 (sign_extend:<VWIDE>
4809 (vec_select:<VHALF>
4810 (match_operand:VQ_HSI 1 "register_operand" "w")
4811 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4812 (sign_extend:<VWIDE>
4813 (vec_duplicate:<VHALF>
4814 (vec_select:<VEL>
4815 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4816 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4817 ))
4818 (const_int 1)))]
4819 "TARGET_SIMD"
4820 {
4821 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4822 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4823 }
4824 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4825 )
4826
4827 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4828 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4829 (ss_ashift:<VWIDE>
4830 (mult:<VWIDE>
4831 (sign_extend:<VWIDE>
4832 (vec_select:<VHALF>
4833 (match_operand:VQ_HSI 1 "register_operand" "w")
4834 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4835 (sign_extend:<VWIDE>
4836 (vec_duplicate:<VHALF>
4837 (vec_select:<VEL>
4838 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4839 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4840 ))
4841 (const_int 1)))]
4842 "TARGET_SIMD"
4843 {
4844 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4845 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4846 }
4847 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4848 )
4849
4850 (define_expand "aarch64_sqdmull2_lane<mode>"
4851 [(match_operand:<VWIDE> 0 "register_operand")
4852 (match_operand:VQ_HSI 1 "register_operand")
4853 (match_operand:<VCOND> 2 "register_operand")
4854 (match_operand:SI 3 "immediate_operand")]
4855 "TARGET_SIMD"
4856 {
4857 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4858 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4859 operands[2], operands[3],
4860 p));
4861 DONE;
4862 })
4863
4864 (define_expand "aarch64_sqdmull2_laneq<mode>"
4865 [(match_operand:<VWIDE> 0 "register_operand")
4866 (match_operand:VQ_HSI 1 "register_operand")
4867 (match_operand:<VCONQ> 2 "register_operand")
4868 (match_operand:SI 3 "immediate_operand")]
4869 "TARGET_SIMD"
4870 {
4871 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4872 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4873 operands[2], operands[3],
4874 p));
4875 DONE;
4876 })
4877
4878 ;; vqdmull2_n
4879
4880 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4881 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4882 (ss_ashift:<VWIDE>
4883 (mult:<VWIDE>
4884 (sign_extend:<VWIDE>
4885 (vec_select:<VHALF>
4886 (match_operand:VQ_HSI 1 "register_operand" "w")
4887 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4888 (sign_extend:<VWIDE>
4889 (vec_duplicate:<VHALF>
4890 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4891 )
4892 (const_int 1)))]
4893 "TARGET_SIMD"
4894 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4895 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4896 )
4897
4898 (define_expand "aarch64_sqdmull2_n<mode>"
4899 [(match_operand:<VWIDE> 0 "register_operand")
4900 (match_operand:VQ_HSI 1 "register_operand")
4901 (match_operand:<VEL> 2 "register_operand")]
4902 "TARGET_SIMD"
4903 {
4904 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4905 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4906 operands[2], p));
4907 DONE;
4908 })
4909
4910 ;; vshl
4911
4912 (define_insn "aarch64_<sur>shl<mode>"
4913 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4914 (unspec:VSDQ_I_DI
4915 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4916 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4917 VSHL))]
4918 "TARGET_SIMD"
4919 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4920 [(set_attr "type" "neon_shift_reg<q>")]
4921 )
4922
4923
4924 ;; vqshl
4925
4926 (define_insn "aarch64_<sur>q<r>shl<mode>"
4927 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4928 (unspec:VSDQ_I
4929 [(match_operand:VSDQ_I 1 "register_operand" "w")
4930 (match_operand:VSDQ_I 2 "register_operand" "w")]
4931 VQSHL))]
4932 "TARGET_SIMD"
4933 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4934 [(set_attr "type" "neon_sat_shift_reg<q>")]
4935 )
4936
4937 (define_expand "vec_widen_<sur>shiftl_lo_<mode>"
4938 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4939 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4940 (match_operand:SI 2
4941 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4942 VSHLL))]
4943 "TARGET_SIMD"
4944 {
4945 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4946 emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
4947 p, operands[2]));
4948 DONE;
4949 }
4950 )
4951
4952 (define_expand "vec_widen_<sur>shiftl_hi_<mode>"
4953 [(set (match_operand:<VWIDE> 0 "register_operand")
4954 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4955 (match_operand:SI 2
4956 "immediate_operand" "i")]
4957 VSHLL))]
4958 "TARGET_SIMD"
4959 {
4960 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4961 emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
4962 p, operands[2]));
4963 DONE;
4964 }
4965 )
4966
4967 ;; vshll_n
4968
4969 (define_insn "aarch64_<sur>shll<mode>_internal"
4970 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4971 (unspec:<VWIDE> [(vec_select:<VHALF>
4972 (match_operand:VQW 1 "register_operand" "w")
4973 (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
4974 (match_operand:SI 3
4975 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4976 VSHLL))]
4977 "TARGET_SIMD"
4978 {
4979 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4980 return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
4981 else
4982 return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
4983 }
4984 [(set_attr "type" "neon_shift_imm_long")]
4985 )
4986
4987 (define_insn "aarch64_<sur>shll2<mode>_internal"
4988 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4989 (unspec:<VWIDE> [(vec_select:<VHALF>
4990 (match_operand:VQW 1 "register_operand" "w")
4991 (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
4992 (match_operand:SI 3
4993 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4994 VSHLL))]
4995 "TARGET_SIMD"
4996 {
4997 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4998 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
4999 else
5000 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
5001 }
5002 [(set_attr "type" "neon_shift_imm_long")]
5003 )
5004
5005 (define_insn "aarch64_<sur>shll_n<mode>"
5006 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5007 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
5008 (match_operand:SI 2
5009 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5010 VSHLL))]
5011 "TARGET_SIMD"
5012 {
5013 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5014 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5015 else
5016 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5017 }
5018 [(set_attr "type" "neon_shift_imm_long")]
5019 )
5020
5021 ;; vshll_high_n
5022
5023 (define_insn "aarch64_<sur>shll2_n<mode>"
5024 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5025 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
5026 (match_operand:SI 2 "immediate_operand" "i")]
5027 VSHLL))]
5028 "TARGET_SIMD"
5029 {
5030 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5031 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5032 else
5033 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5034 }
5035 [(set_attr "type" "neon_shift_imm_long")]
5036 )
5037
5038 ;; vrshr_n
5039
5040 (define_insn "aarch64_<sur>shr_n<mode>"
5041 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5042 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
5043 (match_operand:SI 2
5044 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5045 VRSHR_N))]
5046 "TARGET_SIMD"
5047 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5048 [(set_attr "type" "neon_sat_shift_imm<q>")]
5049 )
5050
5051 ;; v(r)sra_n
5052
5053 (define_insn "aarch64_<sur>sra_n<mode>"
5054 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5055 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5056 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5057 (match_operand:SI 3
5058 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5059 VSRA))]
5060 "TARGET_SIMD"
5061 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5062 [(set_attr "type" "neon_shift_acc<q>")]
5063 )
5064
5065 ;; vs<lr>i_n
5066
5067 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
5068 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5069 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5070 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5071 (match_operand:SI 3
5072 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
5073 VSLRI))]
5074 "TARGET_SIMD"
5075 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5076 [(set_attr "type" "neon_shift_imm<q>")]
5077 )
5078
5079 ;; vqshl(u)
5080
5081 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
5082 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5083 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
5084 (match_operand:SI 2
5085 "aarch64_simd_shift_imm_<ve_mode>" "i")]
5086 VQSHL_N))]
5087 "TARGET_SIMD"
5088 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5089 [(set_attr "type" "neon_sat_shift_imm<q>")]
5090 )
5091
5092
5093 ;; vq(r)shr(u)n_n
5094
5095 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
5096 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5097 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
5098 (match_operand:SI 2
5099 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5100 VQSHRN_N))]
5101 "TARGET_SIMD"
5102 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
5103 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5104 )
5105
5106 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
5107 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5108 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
5109 (match_operand:VQN 2 "register_operand" "w")
5110 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5111 VQSHRN_N))]
5112 "TARGET_SIMD"
5113 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
5114 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5115 )
5116
5117
5118 ;; cm(eq|ge|gt|lt|le)
5119 ;; Note, we have constraints for Dz and Z as different expanders
5120 ;; have different ideas of what should be passed to this pattern.
5121
5122 (define_insn "aarch64_cm<optab><mode>"
5123 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5124 (neg:<V_INT_EQUIV>
5125 (COMPARISONS:<V_INT_EQUIV>
5126 (match_operand:VDQ_I 1 "register_operand" "w,w")
5127 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5128 )))]
5129 "TARGET_SIMD"
5130 "@
5131 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5132 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
5133 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
5134 )
5135
5136 (define_insn_and_split "aarch64_cm<optab>di"
5137 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
5138 (neg:DI
5139 (COMPARISONS:DI
5140 (match_operand:DI 1 "register_operand" "w,w,r")
5141 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
5142 )))
5143 (clobber (reg:CC CC_REGNUM))]
5144 "TARGET_SIMD"
5145 "#"
5146 "&& reload_completed"
5147 [(set (match_operand:DI 0 "register_operand")
5148 (neg:DI
5149 (COMPARISONS:DI
5150 (match_operand:DI 1 "register_operand")
5151 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5152 )))]
5153 {
5154 /* If we are in the general purpose register file,
5155 we split to a sequence of comparison and store. */
5156 if (GP_REGNUM_P (REGNO (operands[0]))
5157 && GP_REGNUM_P (REGNO (operands[1])))
5158 {
5159 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
5160 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5161 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5162 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5163 DONE;
5164 }
5165 /* Otherwise, we expand to a similar pattern which does not
5166 clobber CC_REGNUM. */
5167 }
5168 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
5169 )
5170
5171 (define_insn "*aarch64_cm<optab>di"
5172 [(set (match_operand:DI 0 "register_operand" "=w,w")
5173 (neg:DI
5174 (COMPARISONS:DI
5175 (match_operand:DI 1 "register_operand" "w,w")
5176 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5177 )))]
5178 "TARGET_SIMD && reload_completed"
5179 "@
5180 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
5181 cm<optab>\t%d0, %d1, #0"
5182 [(set_attr "type" "neon_compare, neon_compare_zero")]
5183 )
5184
5185 ;; cm(hs|hi)
5186
5187 (define_insn "aarch64_cm<optab><mode>"
5188 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5189 (neg:<V_INT_EQUIV>
5190 (UCOMPARISONS:<V_INT_EQUIV>
5191 (match_operand:VDQ_I 1 "register_operand" "w")
5192 (match_operand:VDQ_I 2 "register_operand" "w")
5193 )))]
5194 "TARGET_SIMD"
5195 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5196 [(set_attr "type" "neon_compare<q>")]
5197 )
5198
5199 (define_insn_and_split "aarch64_cm<optab>di"
5200 [(set (match_operand:DI 0 "register_operand" "=w,r")
5201 (neg:DI
5202 (UCOMPARISONS:DI
5203 (match_operand:DI 1 "register_operand" "w,r")
5204 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
5205 )))
5206 (clobber (reg:CC CC_REGNUM))]
5207 "TARGET_SIMD"
5208 "#"
5209 "&& reload_completed"
5210 [(set (match_operand:DI 0 "register_operand")
5211 (neg:DI
5212 (UCOMPARISONS:DI
5213 (match_operand:DI 1 "register_operand")
5214 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5215 )))]
5216 {
5217 /* If we are in the general purpose register file,
5218 we split to a sequence of comparison and store. */
5219 if (GP_REGNUM_P (REGNO (operands[0]))
5220 && GP_REGNUM_P (REGNO (operands[1])))
5221 {
5222 machine_mode mode = CCmode;
5223 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5224 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5225 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5226 DONE;
5227 }
5228 /* Otherwise, we expand to a similar pattern which does not
5229 clobber CC_REGNUM. */
5230 }
5231 [(set_attr "type" "neon_compare,multiple")]
5232 )
5233
5234 (define_insn "*aarch64_cm<optab>di"
5235 [(set (match_operand:DI 0 "register_operand" "=w")
5236 (neg:DI
5237 (UCOMPARISONS:DI
5238 (match_operand:DI 1 "register_operand" "w")
5239 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
5240 )))]
5241 "TARGET_SIMD && reload_completed"
5242 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
5243 [(set_attr "type" "neon_compare")]
5244 )
5245
5246 ;; cmtst
5247
5248 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
5249 ;; we don't have any insns using ne, and aarch64_vcond outputs
5250 ;; not (neg (eq (and x y) 0))
5251 ;; which is rewritten by simplify_rtx as
5252 ;; plus (eq (and x y) 0) -1.
5253
5254 (define_insn "aarch64_cmtst<mode>"
5255 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5256 (plus:<V_INT_EQUIV>
5257 (eq:<V_INT_EQUIV>
5258 (and:VDQ_I
5259 (match_operand:VDQ_I 1 "register_operand" "w")
5260 (match_operand:VDQ_I 2 "register_operand" "w"))
5261 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
5262 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
5263 ]
5264 "TARGET_SIMD"
5265 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5266 [(set_attr "type" "neon_tst<q>")]
5267 )
5268
5269 (define_insn_and_split "aarch64_cmtstdi"
5270 [(set (match_operand:DI 0 "register_operand" "=w,r")
5271 (neg:DI
5272 (ne:DI
5273 (and:DI
5274 (match_operand:DI 1 "register_operand" "w,r")
5275 (match_operand:DI 2 "register_operand" "w,r"))
5276 (const_int 0))))
5277 (clobber (reg:CC CC_REGNUM))]
5278 "TARGET_SIMD"
5279 "#"
5280 "&& reload_completed"
5281 [(set (match_operand:DI 0 "register_operand")
5282 (neg:DI
5283 (ne:DI
5284 (and:DI
5285 (match_operand:DI 1 "register_operand")
5286 (match_operand:DI 2 "register_operand"))
5287 (const_int 0))))]
5288 {
5289 /* If we are in the general purpose register file,
5290 we split to a sequence of comparison and store. */
5291 if (GP_REGNUM_P (REGNO (operands[0]))
5292 && GP_REGNUM_P (REGNO (operands[1])))
5293 {
5294 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
5295 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
5296 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
5297 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
5298 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5299 DONE;
5300 }
5301 /* Otherwise, we expand to a similar pattern which does not
5302 clobber CC_REGNUM. */
5303 }
5304 [(set_attr "type" "neon_tst,multiple")]
5305 )
5306
5307 (define_insn "*aarch64_cmtstdi"
5308 [(set (match_operand:DI 0 "register_operand" "=w")
5309 (neg:DI
5310 (ne:DI
5311 (and:DI
5312 (match_operand:DI 1 "register_operand" "w")
5313 (match_operand:DI 2 "register_operand" "w"))
5314 (const_int 0))))]
5315 "TARGET_SIMD"
5316 "cmtst\t%d0, %d1, %d2"
5317 [(set_attr "type" "neon_tst")]
5318 )
5319
5320 ;; fcm(eq|ge|gt|le|lt)
5321
5322 (define_insn "aarch64_cm<optab><mode>"
5323 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5324 (neg:<V_INT_EQUIV>
5325 (COMPARISONS:<V_INT_EQUIV>
5326 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
5327 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
5328 )))]
5329 "TARGET_SIMD"
5330 "@
5331 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5332 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
5333 [(set_attr "type" "neon_fp_compare_<stype><q>")]
5334 )
5335
5336 ;; fac(ge|gt)
5337 ;; Note we can also handle what would be fac(le|lt) by
5338 ;; generating fac(ge|gt).
5339
5340 (define_insn "aarch64_fac<optab><mode>"
5341 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5342 (neg:<V_INT_EQUIV>
5343 (FAC_COMPARISONS:<V_INT_EQUIV>
5344 (abs:VHSDF_HSDF
5345 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
5346 (abs:VHSDF_HSDF
5347 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
5348 )))]
5349 "TARGET_SIMD"
5350 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5351 [(set_attr "type" "neon_fp_compare_<stype><q>")]
5352 )
5353
5354 ;; addp
5355
5356 (define_insn "aarch64_addp<mode>"
5357 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5358 (unspec:VD_BHSI
5359 [(match_operand:VD_BHSI 1 "register_operand" "w")
5360 (match_operand:VD_BHSI 2 "register_operand" "w")]
5361 UNSPEC_ADDP))]
5362 "TARGET_SIMD"
5363 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5364 [(set_attr "type" "neon_reduc_add<q>")]
5365 )
5366
5367 (define_insn "aarch64_addpdi"
5368 [(set (match_operand:DI 0 "register_operand" "=w")
5369 (unspec:DI
5370 [(match_operand:V2DI 1 "register_operand" "w")]
5371 UNSPEC_ADDP))]
5372 "TARGET_SIMD"
5373 "addp\t%d0, %1.2d"
5374 [(set_attr "type" "neon_reduc_add")]
5375 )
5376
5377 ;; sqrt
5378
5379 (define_expand "sqrt<mode>2"
5380 [(set (match_operand:VHSDF 0 "register_operand")
5381 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
5382 "TARGET_SIMD"
5383 {
5384 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
5385 DONE;
5386 })
5387
5388 (define_insn "*sqrt<mode>2"
5389 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5390 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
5391 "TARGET_SIMD"
5392 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
5393 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
5394 )
5395
5396 ;; Patterns for vector struct loads and stores.
5397
5398 (define_insn "aarch64_simd_ld2<mode>"
5399 [(set (match_operand:OI 0 "register_operand" "=w")
5400 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5401 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5402 UNSPEC_LD2))]
5403 "TARGET_SIMD"
5404 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5405 [(set_attr "type" "neon_load2_2reg<q>")]
5406 )
5407
5408 (define_insn "aarch64_simd_ld2r<mode>"
5409 [(set (match_operand:OI 0 "register_operand" "=w")
5410 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5411 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5412 UNSPEC_LD2_DUP))]
5413 "TARGET_SIMD"
5414 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5415 [(set_attr "type" "neon_load2_all_lanes<q>")]
5416 )
5417
5418 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5419 [(set (match_operand:OI 0 "register_operand" "=w")
5420 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5421 (match_operand:OI 2 "register_operand" "0")
5422 (match_operand:SI 3 "immediate_operand" "i")
5423 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5424 UNSPEC_LD2_LANE))]
5425 "TARGET_SIMD"
5426 {
5427 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5428 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5429 }
5430 [(set_attr "type" "neon_load2_one_lane")]
5431 )
5432
5433 (define_expand "vec_load_lanesoi<mode>"
5434 [(set (match_operand:OI 0 "register_operand")
5435 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5436 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5437 UNSPEC_LD2))]
5438 "TARGET_SIMD"
5439 {
5440 if (BYTES_BIG_ENDIAN)
5441 {
5442 rtx tmp = gen_reg_rtx (OImode);
5443 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5444 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5445 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5446 }
5447 else
5448 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5449 DONE;
5450 })
5451
5452 (define_insn "aarch64_simd_st2<mode>"
5453 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5454 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5455 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5456 UNSPEC_ST2))]
5457 "TARGET_SIMD"
5458 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5459 [(set_attr "type" "neon_store2_2reg<q>")]
5460 )
5461
5462 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5463 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5464 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5465 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5466 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5467 (match_operand:SI 2 "immediate_operand" "i")]
5468 UNSPEC_ST2_LANE))]
5469 "TARGET_SIMD"
5470 {
5471 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5472 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5473 }
5474 [(set_attr "type" "neon_store2_one_lane<q>")]
5475 )
5476
5477 (define_expand "vec_store_lanesoi<mode>"
5478 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5479 (unspec:OI [(match_operand:OI 1 "register_operand")
5480 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5481 UNSPEC_ST2))]
5482 "TARGET_SIMD"
5483 {
5484 if (BYTES_BIG_ENDIAN)
5485 {
5486 rtx tmp = gen_reg_rtx (OImode);
5487 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5488 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5489 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5490 }
5491 else
5492 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5493 DONE;
5494 })
5495
5496 (define_insn "aarch64_simd_ld3<mode>"
5497 [(set (match_operand:CI 0 "register_operand" "=w")
5498 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5499 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5500 UNSPEC_LD3))]
5501 "TARGET_SIMD"
5502 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5503 [(set_attr "type" "neon_load3_3reg<q>")]
5504 )
5505
5506 (define_insn "aarch64_simd_ld3r<mode>"
5507 [(set (match_operand:CI 0 "register_operand" "=w")
5508 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5509 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5510 UNSPEC_LD3_DUP))]
5511 "TARGET_SIMD"
5512 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5513 [(set_attr "type" "neon_load3_all_lanes<q>")]
5514 )
5515
5516 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5517 [(set (match_operand:CI 0 "register_operand" "=w")
5518 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5519 (match_operand:CI 2 "register_operand" "0")
5520 (match_operand:SI 3 "immediate_operand" "i")
5521 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5522 UNSPEC_LD3_LANE))]
5523 "TARGET_SIMD"
5524 {
5525 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5526 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5527 }
5528 [(set_attr "type" "neon_load3_one_lane")]
5529 )
5530
5531 (define_expand "vec_load_lanesci<mode>"
5532 [(set (match_operand:CI 0 "register_operand")
5533 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5534 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5535 UNSPEC_LD3))]
5536 "TARGET_SIMD"
5537 {
5538 if (BYTES_BIG_ENDIAN)
5539 {
5540 rtx tmp = gen_reg_rtx (CImode);
5541 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5542 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5543 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5544 }
5545 else
5546 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5547 DONE;
5548 })
5549
5550 (define_insn "aarch64_simd_st3<mode>"
5551 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5552 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5553 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5554 UNSPEC_ST3))]
5555 "TARGET_SIMD"
5556 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5557 [(set_attr "type" "neon_store3_3reg<q>")]
5558 )
5559
5560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5561 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5562 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5563 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5564 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5565 (match_operand:SI 2 "immediate_operand" "i")]
5566 UNSPEC_ST3_LANE))]
5567 "TARGET_SIMD"
5568 {
5569 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5570 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5571 }
5572 [(set_attr "type" "neon_store3_one_lane<q>")]
5573 )
5574
5575 (define_expand "vec_store_lanesci<mode>"
5576 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5577 (unspec:CI [(match_operand:CI 1 "register_operand")
5578 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579 UNSPEC_ST3))]
5580 "TARGET_SIMD"
5581 {
5582 if (BYTES_BIG_ENDIAN)
5583 {
5584 rtx tmp = gen_reg_rtx (CImode);
5585 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5586 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5587 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5588 }
5589 else
5590 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5591 DONE;
5592 })
5593
5594 (define_insn "aarch64_simd_ld4<mode>"
5595 [(set (match_operand:XI 0 "register_operand" "=w")
5596 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5597 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5598 UNSPEC_LD4))]
5599 "TARGET_SIMD"
5600 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5601 [(set_attr "type" "neon_load4_4reg<q>")]
5602 )
5603
5604 (define_insn "aarch64_simd_ld4r<mode>"
5605 [(set (match_operand:XI 0 "register_operand" "=w")
5606 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5607 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5608 UNSPEC_LD4_DUP))]
5609 "TARGET_SIMD"
5610 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5611 [(set_attr "type" "neon_load4_all_lanes<q>")]
5612 )
5613
5614 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5615 [(set (match_operand:XI 0 "register_operand" "=w")
5616 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5617 (match_operand:XI 2 "register_operand" "0")
5618 (match_operand:SI 3 "immediate_operand" "i")
5619 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5620 UNSPEC_LD4_LANE))]
5621 "TARGET_SIMD"
5622 {
5623 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5624 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5625 }
5626 [(set_attr "type" "neon_load4_one_lane")]
5627 )
5628
5629 (define_expand "vec_load_lanesxi<mode>"
5630 [(set (match_operand:XI 0 "register_operand")
5631 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5632 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5633 UNSPEC_LD4))]
5634 "TARGET_SIMD"
5635 {
5636 if (BYTES_BIG_ENDIAN)
5637 {
5638 rtx tmp = gen_reg_rtx (XImode);
5639 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5640 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5641 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5642 }
5643 else
5644 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5645 DONE;
5646 })
5647
5648 (define_insn "aarch64_simd_st4<mode>"
5649 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5650 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5651 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5652 UNSPEC_ST4))]
5653 "TARGET_SIMD"
5654 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5655 [(set_attr "type" "neon_store4_4reg<q>")]
5656 )
5657
5658 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5659 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5660 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5661 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5662 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5663 (match_operand:SI 2 "immediate_operand" "i")]
5664 UNSPEC_ST4_LANE))]
5665 "TARGET_SIMD"
5666 {
5667 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5668 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5669 }
5670 [(set_attr "type" "neon_store4_one_lane<q>")]
5671 )
5672
5673 (define_expand "vec_store_lanesxi<mode>"
5674 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5675 (unspec:XI [(match_operand:XI 1 "register_operand")
5676 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5677 UNSPEC_ST4))]
5678 "TARGET_SIMD"
5679 {
5680 if (BYTES_BIG_ENDIAN)
5681 {
5682 rtx tmp = gen_reg_rtx (XImode);
5683 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5684 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5685 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5686 }
5687 else
5688 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5689 DONE;
5690 })
5691
5692 (define_insn_and_split "aarch64_rev_reglist<mode>"
5693 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5694 (unspec:VSTRUCT
5695 [(match_operand:VSTRUCT 1 "register_operand" "w")
5696 (match_operand:V16QI 2 "register_operand" "w")]
5697 UNSPEC_REV_REGLIST))]
5698 "TARGET_SIMD"
5699 "#"
5700 "&& reload_completed"
5701 [(const_int 0)]
5702 {
5703 int i;
5704 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5705 for (i = 0; i < nregs; i++)
5706 {
5707 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5708 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5709 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5710 }
5711 DONE;
5712 }
5713 [(set_attr "type" "neon_tbl1_q")
5714 (set_attr "length" "<insn_count>")]
5715 )
5716
5717 ;; Reload patterns for AdvSIMD register list operands.
5718
5719 (define_expand "mov<mode>"
5720 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5721 (match_operand:VSTRUCT 1 "general_operand"))]
5722 "TARGET_SIMD"
5723 {
5724 if (can_create_pseudo_p ())
5725 {
5726 if (GET_CODE (operands[0]) != REG)
5727 operands[1] = force_reg (<MODE>mode, operands[1]);
5728 }
5729 })
5730
5731
5732 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5733 [(match_operand:CI 0 "register_operand")
5734 (match_operand:DI 1 "register_operand")
5735 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5736 "TARGET_SIMD"
5737 {
5738 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5739 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5740 DONE;
5741 })
5742
5743 (define_insn "aarch64_ld1_x3_<mode>"
5744 [(set (match_operand:CI 0 "register_operand" "=w")
5745 (unspec:CI
5746 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5747 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5748 "TARGET_SIMD"
5749 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5750 [(set_attr "type" "neon_load1_3reg<q>")]
5751 )
5752
5753 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5754 [(match_operand:XI 0 "register_operand" "=w")
5755 (match_operand:DI 1 "register_operand" "r")
5756 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5757 "TARGET_SIMD"
5758 {
5759 rtx mem = gen_rtx_MEM (XImode, operands[1]);
5760 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5761 DONE;
5762 })
5763
5764 (define_insn "aarch64_ld1_x4_<mode>"
5765 [(set (match_operand:XI 0 "register_operand" "=w")
5766 (unspec:XI
5767 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5768 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5769 UNSPEC_LD1))]
5770 "TARGET_SIMD"
5771 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5772 [(set_attr "type" "neon_load1_4reg<q>")]
5773 )
5774
5775 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5776 [(match_operand:DI 0 "register_operand")
5777 (match_operand:OI 1 "register_operand")
5778 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5779 "TARGET_SIMD"
5780 {
5781 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5782 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5783 DONE;
5784 })
5785
5786 (define_insn "aarch64_st1_x2_<mode>"
5787 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5788 (unspec:OI
5789 [(match_operand:OI 1 "register_operand" "w")
5790 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5791 "TARGET_SIMD"
5792 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5793 [(set_attr "type" "neon_store1_2reg<q>")]
5794 )
5795
5796 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5797 [(match_operand:DI 0 "register_operand")
5798 (match_operand:CI 1 "register_operand")
5799 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5800 "TARGET_SIMD"
5801 {
5802 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5803 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5804 DONE;
5805 })
5806
5807 (define_insn "aarch64_st1_x3_<mode>"
5808 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5809 (unspec:CI
5810 [(match_operand:CI 1 "register_operand" "w")
5811 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5812 "TARGET_SIMD"
5813 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5814 [(set_attr "type" "neon_store1_3reg<q>")]
5815 )
5816
5817 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5818 [(match_operand:DI 0 "register_operand" "")
5819 (match_operand:XI 1 "register_operand" "")
5820 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5821 "TARGET_SIMD"
5822 {
5823 rtx mem = gen_rtx_MEM (XImode, operands[0]);
5824 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5825 DONE;
5826 })
5827
5828 (define_insn "aarch64_st1_x4_<mode>"
5829 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5830 (unspec:XI
5831 [(match_operand:XI 1 "register_operand" "w")
5832 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5833 UNSPEC_ST1))]
5834 "TARGET_SIMD"
5835 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5836 [(set_attr "type" "neon_store1_4reg<q>")]
5837 )
5838
5839 (define_insn "*aarch64_mov<mode>"
5840 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5841 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5842 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5843 && (register_operand (operands[0], <MODE>mode)
5844 || register_operand (operands[1], <MODE>mode))"
5845 "@
5846 #
5847 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5848 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5849 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5850 neon_load<nregs>_<nregs>reg_q")
5851 (set_attr "length" "<insn_count>,4,4")]
5852 )
5853
5854 (define_insn "aarch64_be_ld1<mode>"
5855 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5856 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5857 "aarch64_simd_struct_operand" "Utv")]
5858 UNSPEC_LD1))]
5859 "TARGET_SIMD"
5860 "ld1\\t{%0<Vmtype>}, %1"
5861 [(set_attr "type" "neon_load1_1reg<q>")]
5862 )
5863
5864 (define_insn "aarch64_be_st1<mode>"
5865 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5866 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5867 UNSPEC_ST1))]
5868 "TARGET_SIMD"
5869 "st1\\t{%1<Vmtype>}, %0"
5870 [(set_attr "type" "neon_store1_1reg<q>")]
5871 )
5872
5873 (define_insn "*aarch64_be_movoi"
5874 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5875 (match_operand:OI 1 "general_operand" " w,w,m"))]
5876 "TARGET_SIMD && BYTES_BIG_ENDIAN
5877 && (register_operand (operands[0], OImode)
5878 || register_operand (operands[1], OImode))"
5879 "@
5880 #
5881 stp\\t%q1, %R1, %0
5882 ldp\\t%q0, %R0, %1"
5883 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5884 (set_attr "length" "8,4,4")]
5885 )
5886
5887 (define_insn "*aarch64_be_movci"
5888 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5889 (match_operand:CI 1 "general_operand" " w,w,o"))]
5890 "TARGET_SIMD && BYTES_BIG_ENDIAN
5891 && (register_operand (operands[0], CImode)
5892 || register_operand (operands[1], CImode))"
5893 "#"
5894 [(set_attr "type" "multiple")
5895 (set_attr "length" "12,4,4")]
5896 )
5897
5898 (define_insn "*aarch64_be_movxi"
5899 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5900 (match_operand:XI 1 "general_operand" " w,w,o"))]
5901 "TARGET_SIMD && BYTES_BIG_ENDIAN
5902 && (register_operand (operands[0], XImode)
5903 || register_operand (operands[1], XImode))"
5904 "#"
5905 [(set_attr "type" "multiple")
5906 (set_attr "length" "16,4,4")]
5907 )
5908
5909 (define_split
5910 [(set (match_operand:OI 0 "register_operand")
5911 (match_operand:OI 1 "register_operand"))]
5912 "TARGET_SIMD && reload_completed"
5913 [(const_int 0)]
5914 {
5915 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5916 DONE;
5917 })
5918
5919 (define_split
5920 [(set (match_operand:CI 0 "nonimmediate_operand")
5921 (match_operand:CI 1 "general_operand"))]
5922 "TARGET_SIMD && reload_completed"
5923 [(const_int 0)]
5924 {
5925 if (register_operand (operands[0], CImode)
5926 && register_operand (operands[1], CImode))
5927 {
5928 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5929 DONE;
5930 }
5931 else if (BYTES_BIG_ENDIAN)
5932 {
5933 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5934 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5935 emit_move_insn (gen_lowpart (V16QImode,
5936 simplify_gen_subreg (TImode, operands[0],
5937 CImode, 32)),
5938 gen_lowpart (V16QImode,
5939 simplify_gen_subreg (TImode, operands[1],
5940 CImode, 32)));
5941 DONE;
5942 }
5943 else
5944 FAIL;
5945 })
5946
5947 (define_split
5948 [(set (match_operand:XI 0 "nonimmediate_operand")
5949 (match_operand:XI 1 "general_operand"))]
5950 "TARGET_SIMD && reload_completed"
5951 [(const_int 0)]
5952 {
5953 if (register_operand (operands[0], XImode)
5954 && register_operand (operands[1], XImode))
5955 {
5956 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5957 DONE;
5958 }
5959 else if (BYTES_BIG_ENDIAN)
5960 {
5961 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5962 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5963 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5964 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5965 DONE;
5966 }
5967 else
5968 FAIL;
5969 })
5970
5971 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5972 [(match_operand:VSTRUCT 0 "register_operand")
5973 (match_operand:DI 1 "register_operand")
5974 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5975 "TARGET_SIMD"
5976 {
5977 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5978 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5979 * <VSTRUCT:nregs>);
5980
5981 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5982 mem));
5983 DONE;
5984 })
5985
5986 (define_insn "aarch64_ld2<mode>_dreg"
5987 [(set (match_operand:OI 0 "register_operand" "=w")
5988 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5989 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5990 UNSPEC_LD2_DREG))]
5991 "TARGET_SIMD"
5992 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5993 [(set_attr "type" "neon_load2_2reg<q>")]
5994 )
5995
5996 (define_insn "aarch64_ld2<mode>_dreg"
5997 [(set (match_operand:OI 0 "register_operand" "=w")
5998 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5999 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6000 UNSPEC_LD2_DREG))]
6001 "TARGET_SIMD"
6002 "ld1\\t{%S0.1d - %T0.1d}, %1"
6003 [(set_attr "type" "neon_load1_2reg<q>")]
6004 )
6005
6006 (define_insn "aarch64_ld3<mode>_dreg"
6007 [(set (match_operand:CI 0 "register_operand" "=w")
6008 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6009 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6010 UNSPEC_LD3_DREG))]
6011 "TARGET_SIMD"
6012 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6013 [(set_attr "type" "neon_load3_3reg<q>")]
6014 )
6015
6016 (define_insn "aarch64_ld3<mode>_dreg"
6017 [(set (match_operand:CI 0 "register_operand" "=w")
6018 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6019 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6020 UNSPEC_LD3_DREG))]
6021 "TARGET_SIMD"
6022 "ld1\\t{%S0.1d - %U0.1d}, %1"
6023 [(set_attr "type" "neon_load1_3reg<q>")]
6024 )
6025
6026 (define_insn "aarch64_ld4<mode>_dreg"
6027 [(set (match_operand:XI 0 "register_operand" "=w")
6028 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6029 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6030 UNSPEC_LD4_DREG))]
6031 "TARGET_SIMD"
6032 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6033 [(set_attr "type" "neon_load4_4reg<q>")]
6034 )
6035
6036 (define_insn "aarch64_ld4<mode>_dreg"
6037 [(set (match_operand:XI 0 "register_operand" "=w")
6038 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6039 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6040 UNSPEC_LD4_DREG))]
6041 "TARGET_SIMD"
6042 "ld1\\t{%S0.1d - %V0.1d}, %1"
6043 [(set_attr "type" "neon_load1_4reg<q>")]
6044 )
6045
6046 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
6047 [(match_operand:VSTRUCT 0 "register_operand")
6048 (match_operand:DI 1 "register_operand")
6049 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6050 "TARGET_SIMD"
6051 {
6052 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6053 set_mem_size (mem, <VSTRUCT:nregs> * 8);
6054
6055 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
6056 DONE;
6057 })
6058
6059 (define_expand "aarch64_ld1<VALL_F16:mode>"
6060 [(match_operand:VALL_F16 0 "register_operand")
6061 (match_operand:DI 1 "register_operand")]
6062 "TARGET_SIMD"
6063 {
6064 machine_mode mode = <VALL_F16:MODE>mode;
6065 rtx mem = gen_rtx_MEM (mode, operands[1]);
6066
6067 if (BYTES_BIG_ENDIAN)
6068 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
6069 else
6070 emit_move_insn (operands[0], mem);
6071 DONE;
6072 })
6073
6074 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
6075 [(match_operand:VSTRUCT 0 "register_operand")
6076 (match_operand:DI 1 "register_operand")
6077 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6078 "TARGET_SIMD"
6079 {
6080 machine_mode mode = <VSTRUCT:MODE>mode;
6081 rtx mem = gen_rtx_MEM (mode, operands[1]);
6082
6083 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
6084 DONE;
6085 })
6086
6087 (define_expand "aarch64_ld1x2<VQ:mode>"
6088 [(match_operand:OI 0 "register_operand")
6089 (match_operand:DI 1 "register_operand")
6090 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6091 "TARGET_SIMD"
6092 {
6093 machine_mode mode = OImode;
6094 rtx mem = gen_rtx_MEM (mode, operands[1]);
6095
6096 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
6097 DONE;
6098 })
6099
6100 (define_expand "aarch64_ld1x2<VDC:mode>"
6101 [(match_operand:OI 0 "register_operand")
6102 (match_operand:DI 1 "register_operand")
6103 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6104 "TARGET_SIMD"
6105 {
6106 machine_mode mode = OImode;
6107 rtx mem = gen_rtx_MEM (mode, operands[1]);
6108
6109 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
6110 DONE;
6111 })
6112
6113
6114 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6115 [(match_operand:VSTRUCT 0 "register_operand")
6116 (match_operand:DI 1 "register_operand")
6117 (match_operand:VSTRUCT 2 "register_operand")
6118 (match_operand:SI 3 "immediate_operand")
6119 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6120 "TARGET_SIMD"
6121 {
6122 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6123 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6124 * <VSTRUCT:nregs>);
6125
6126 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
6127 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6128 operands[0], mem, operands[2], operands[3]));
6129 DONE;
6130 })
6131
6132 ;; Expanders for builtins to extract vector registers from large
6133 ;; opaque integer modes.
6134
6135 ;; D-register list.
6136
6137 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
6138 [(match_operand:VDC 0 "register_operand")
6139 (match_operand:VSTRUCT 1 "register_operand")
6140 (match_operand:SI 2 "immediate_operand")]
6141 "TARGET_SIMD"
6142 {
6143 int part = INTVAL (operands[2]);
6144 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
6145 int offset = part * 16;
6146
6147 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
6148 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
6149 DONE;
6150 })
6151
6152 ;; Q-register list.
6153
6154 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
6155 [(match_operand:VQ 0 "register_operand")
6156 (match_operand:VSTRUCT 1 "register_operand")
6157 (match_operand:SI 2 "immediate_operand")]
6158 "TARGET_SIMD"
6159 {
6160 int part = INTVAL (operands[2]);
6161 int offset = part * 16;
6162
6163 emit_move_insn (operands[0],
6164 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
6165 DONE;
6166 })
6167
6168 ;; Permuted-store expanders for neon intrinsics.
6169
6170 ;; Permute instructions
6171
6172 ;; vec_perm support
6173
6174 (define_expand "vec_perm<mode>"
6175 [(match_operand:VB 0 "register_operand")
6176 (match_operand:VB 1 "register_operand")
6177 (match_operand:VB 2 "register_operand")
6178 (match_operand:VB 3 "register_operand")]
6179 "TARGET_SIMD"
6180 {
6181 aarch64_expand_vec_perm (operands[0], operands[1],
6182 operands[2], operands[3], <nunits>);
6183 DONE;
6184 })
6185
6186 (define_insn "aarch64_tbl1<mode>"
6187 [(set (match_operand:VB 0 "register_operand" "=w")
6188 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
6189 (match_operand:VB 2 "register_operand" "w")]
6190 UNSPEC_TBL))]
6191 "TARGET_SIMD"
6192 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
6193 [(set_attr "type" "neon_tbl1<q>")]
6194 )
6195
6196 ;; Two source registers.
6197
6198 (define_insn "aarch64_tbl2v16qi"
6199 [(set (match_operand:V16QI 0 "register_operand" "=w")
6200 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
6201 (match_operand:V16QI 2 "register_operand" "w")]
6202 UNSPEC_TBL))]
6203 "TARGET_SIMD"
6204 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
6205 [(set_attr "type" "neon_tbl2_q")]
6206 )
6207
6208 (define_insn "aarch64_tbl3<mode>"
6209 [(set (match_operand:VB 0 "register_operand" "=w")
6210 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
6211 (match_operand:VB 2 "register_operand" "w")]
6212 UNSPEC_TBL))]
6213 "TARGET_SIMD"
6214 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
6215 [(set_attr "type" "neon_tbl3")]
6216 )
6217
6218 (define_insn "aarch64_tbx4<mode>"
6219 [(set (match_operand:VB 0 "register_operand" "=w")
6220 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6221 (match_operand:OI 2 "register_operand" "w")
6222 (match_operand:VB 3 "register_operand" "w")]
6223 UNSPEC_TBX))]
6224 "TARGET_SIMD"
6225 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
6226 [(set_attr "type" "neon_tbl4")]
6227 )
6228
6229 ;; Three source registers.
6230
6231 (define_insn "aarch64_qtbl3<mode>"
6232 [(set (match_operand:VB 0 "register_operand" "=w")
6233 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
6234 (match_operand:VB 2 "register_operand" "w")]
6235 UNSPEC_TBL))]
6236 "TARGET_SIMD"
6237 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
6238 [(set_attr "type" "neon_tbl3")]
6239 )
6240
6241 (define_insn "aarch64_qtbx3<mode>"
6242 [(set (match_operand:VB 0 "register_operand" "=w")
6243 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6244 (match_operand:CI 2 "register_operand" "w")
6245 (match_operand:VB 3 "register_operand" "w")]
6246 UNSPEC_TBX))]
6247 "TARGET_SIMD"
6248 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
6249 [(set_attr "type" "neon_tbl3")]
6250 )
6251
6252 ;; Four source registers.
6253
6254 (define_insn "aarch64_qtbl4<mode>"
6255 [(set (match_operand:VB 0 "register_operand" "=w")
6256 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
6257 (match_operand:VB 2 "register_operand" "w")]
6258 UNSPEC_TBL))]
6259 "TARGET_SIMD"
6260 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
6261 [(set_attr "type" "neon_tbl4")]
6262 )
6263
6264 (define_insn "aarch64_qtbx4<mode>"
6265 [(set (match_operand:VB 0 "register_operand" "=w")
6266 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
6267 (match_operand:XI 2 "register_operand" "w")
6268 (match_operand:VB 3 "register_operand" "w")]
6269 UNSPEC_TBX))]
6270 "TARGET_SIMD"
6271 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
6272 [(set_attr "type" "neon_tbl4")]
6273 )
6274
6275 (define_insn_and_split "aarch64_combinev16qi"
6276 [(set (match_operand:OI 0 "register_operand" "=w")
6277 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
6278 (match_operand:V16QI 2 "register_operand" "w")]
6279 UNSPEC_CONCAT))]
6280 "TARGET_SIMD"
6281 "#"
6282 "&& reload_completed"
6283 [(const_int 0)]
6284 {
6285 aarch64_split_combinev16qi (operands);
6286 DONE;
6287 }
6288 [(set_attr "type" "multiple")]
6289 )
6290
6291 ;; This instruction's pattern is generated directly by
6292 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6293 ;; need corresponding changes there.
6294 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
6295 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6296 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6297 (match_operand:VALL_F16 2 "register_operand" "w")]
6298 PERMUTE))]
6299 "TARGET_SIMD"
6300 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
6301 [(set_attr "type" "neon_permute<q>")]
6302 )
6303
6304 ;; This instruction's pattern is generated directly by
6305 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6306 ;; need corresponding changes there. Note that the immediate (third)
6307 ;; operand is a lane index not a byte index.
6308 (define_insn "aarch64_ext<mode>"
6309 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6310 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6311 (match_operand:VALL_F16 2 "register_operand" "w")
6312 (match_operand:SI 3 "immediate_operand" "i")]
6313 UNSPEC_EXT))]
6314 "TARGET_SIMD"
6315 {
6316 operands[3] = GEN_INT (INTVAL (operands[3])
6317 * GET_MODE_UNIT_SIZE (<MODE>mode));
6318 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
6319 }
6320 [(set_attr "type" "neon_ext<q>")]
6321 )
6322
6323 ;; This instruction's pattern is generated directly by
6324 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6325 ;; need corresponding changes there.
6326 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
6327 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6328 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
6329 REVERSE))]
6330 "TARGET_SIMD"
6331 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
6332 [(set_attr "type" "neon_rev<q>")]
6333 )
6334
6335 (define_insn "aarch64_st2<mode>_dreg"
6336 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6337 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6338 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6339 UNSPEC_ST2))]
6340 "TARGET_SIMD"
6341 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6342 [(set_attr "type" "neon_store2_2reg")]
6343 )
6344
6345 (define_insn "aarch64_st2<mode>_dreg"
6346 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6347 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6348 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6349 UNSPEC_ST2))]
6350 "TARGET_SIMD"
6351 "st1\\t{%S1.1d - %T1.1d}, %0"
6352 [(set_attr "type" "neon_store1_2reg")]
6353 )
6354
6355 (define_insn "aarch64_st3<mode>_dreg"
6356 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6357 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6358 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6359 UNSPEC_ST3))]
6360 "TARGET_SIMD"
6361 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6362 [(set_attr "type" "neon_store3_3reg")]
6363 )
6364
6365 (define_insn "aarch64_st3<mode>_dreg"
6366 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6367 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6368 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6369 UNSPEC_ST3))]
6370 "TARGET_SIMD"
6371 "st1\\t{%S1.1d - %U1.1d}, %0"
6372 [(set_attr "type" "neon_store1_3reg")]
6373 )
6374
6375 (define_insn "aarch64_st4<mode>_dreg"
6376 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6377 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6378 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6379 UNSPEC_ST4))]
6380 "TARGET_SIMD"
6381 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6382 [(set_attr "type" "neon_store4_4reg")]
6383 )
6384
6385 (define_insn "aarch64_st4<mode>_dreg"
6386 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6387 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6388 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6389 UNSPEC_ST4))]
6390 "TARGET_SIMD"
6391 "st1\\t{%S1.1d - %V1.1d}, %0"
6392 [(set_attr "type" "neon_store1_4reg")]
6393 )
6394
6395 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
6396 [(match_operand:DI 0 "register_operand")
6397 (match_operand:VSTRUCT 1 "register_operand")
6398 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6399 "TARGET_SIMD"
6400 {
6401 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6402 set_mem_size (mem, <VSTRUCT:nregs> * 8);
6403
6404 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6405 DONE;
6406 })
6407
6408 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6409 [(match_operand:DI 0 "register_operand")
6410 (match_operand:VSTRUCT 1 "register_operand")
6411 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6412 "TARGET_SIMD"
6413 {
6414 machine_mode mode = <VSTRUCT:MODE>mode;
6415 rtx mem = gen_rtx_MEM (mode, operands[0]);
6416
6417 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6418 DONE;
6419 })
6420
6421 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6422 [(match_operand:DI 0 "register_operand")
6423 (match_operand:VSTRUCT 1 "register_operand")
6424 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6425 (match_operand:SI 2 "immediate_operand")]
6426 "TARGET_SIMD"
6427 {
6428 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6429 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6430 * <VSTRUCT:nregs>);
6431
6432 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6433 mem, operands[1], operands[2]));
6434 DONE;
6435 })
6436
6437 (define_expand "aarch64_st1<VALL_F16:mode>"
6438 [(match_operand:DI 0 "register_operand")
6439 (match_operand:VALL_F16 1 "register_operand")]
6440 "TARGET_SIMD"
6441 {
6442 machine_mode mode = <VALL_F16:MODE>mode;
6443 rtx mem = gen_rtx_MEM (mode, operands[0]);
6444
6445 if (BYTES_BIG_ENDIAN)
6446 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6447 else
6448 emit_move_insn (mem, operands[1]);
6449 DONE;
6450 })
6451
6452 ;; Expander for builtins to insert vector registers into large
6453 ;; opaque integer modes.
6454
6455 ;; Q-register list. We don't need a D-reg inserter as we zero
6456 ;; extend them in arm_neon.h and insert the resulting Q-regs.
6457
6458 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6459 [(match_operand:VSTRUCT 0 "register_operand")
6460 (match_operand:VSTRUCT 1 "register_operand")
6461 (match_operand:VQ 2 "register_operand")
6462 (match_operand:SI 3 "immediate_operand")]
6463 "TARGET_SIMD"
6464 {
6465 int part = INTVAL (operands[3]);
6466 int offset = part * 16;
6467
6468 emit_move_insn (operands[0], operands[1]);
6469 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6470 operands[2]);
6471 DONE;
6472 })
6473
6474 ;; Standard pattern name vec_init<mode><Vel>.
6475
6476 (define_expand "vec_init<mode><Vel>"
6477 [(match_operand:VALL_F16 0 "register_operand")
6478 (match_operand 1 "" "")]
6479 "TARGET_SIMD"
6480 {
6481 aarch64_expand_vector_init (operands[0], operands[1]);
6482 DONE;
6483 })
6484
6485 (define_expand "vec_init<mode><Vhalf>"
6486 [(match_operand:VQ_NO2E 0 "register_operand")
6487 (match_operand 1 "" "")]
6488 "TARGET_SIMD"
6489 {
6490 aarch64_expand_vector_init (operands[0], operands[1]);
6491 DONE;
6492 })
6493
6494 (define_insn "*aarch64_simd_ld1r<mode>"
6495 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6496 (vec_duplicate:VALL_F16
6497 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6498 "TARGET_SIMD"
6499 "ld1r\\t{%0.<Vtype>}, %1"
6500 [(set_attr "type" "neon_load1_all_lanes")]
6501 )
6502
6503 (define_insn "aarch64_simd_ld1<mode>_x2"
6504 [(set (match_operand:OI 0 "register_operand" "=w")
6505 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6506 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6507 UNSPEC_LD1))]
6508 "TARGET_SIMD"
6509 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6510 [(set_attr "type" "neon_load1_2reg<q>")]
6511 )
6512
6513 (define_insn "aarch64_simd_ld1<mode>_x2"
6514 [(set (match_operand:OI 0 "register_operand" "=w")
6515 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6516 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6517 UNSPEC_LD1))]
6518 "TARGET_SIMD"
6519 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6520 [(set_attr "type" "neon_load1_2reg<q>")]
6521 )
6522
6523
6524 (define_insn "@aarch64_frecpe<mode>"
6525 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6526 (unspec:VHSDF_HSDF
6527 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6528 UNSPEC_FRECPE))]
6529 "TARGET_SIMD"
6530 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6531 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6532 )
6533
6534 (define_insn "aarch64_frecpx<mode>"
6535 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6536 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6537 UNSPEC_FRECPX))]
6538 "TARGET_SIMD"
6539 "frecpx\t%<s>0, %<s>1"
6540 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6541 )
6542
6543 (define_insn "@aarch64_frecps<mode>"
6544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6545 (unspec:VHSDF_HSDF
6546 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6547 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6548 UNSPEC_FRECPS))]
6549 "TARGET_SIMD"
6550 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6551 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6552 )
6553
6554 (define_insn "aarch64_urecpe<mode>"
6555 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6556 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6557 UNSPEC_URECPE))]
6558 "TARGET_SIMD"
6559 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6560 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6561
6562 ;; Standard pattern name vec_extract<mode><Vel>.
6563
6564 (define_expand "vec_extract<mode><Vel>"
6565 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6566 (match_operand:VALL_F16 1 "register_operand")
6567 (match_operand:SI 2 "immediate_operand")]
6568 "TARGET_SIMD"
6569 {
6570 emit_insn
6571 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6572 DONE;
6573 })
6574
6575 ;; Extract a 64-bit vector from one half of a 128-bit vector.
6576 (define_expand "vec_extract<mode><Vhalf>"
6577 [(match_operand:<VHALF> 0 "register_operand")
6578 (match_operand:VQMOV_NO2E 1 "register_operand")
6579 (match_operand 2 "immediate_operand")]
6580 "TARGET_SIMD"
6581 {
6582 int start = INTVAL (operands[2]);
6583 if (start != 0 && start != <nunits> / 2)
6584 FAIL;
6585 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6586 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6587 DONE;
6588 })
6589
6590 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6591 (define_expand "vec_extractv2dfv1df"
6592 [(match_operand:V1DF 0 "register_operand")
6593 (match_operand:V2DF 1 "register_operand")
6594 (match_operand 2 "immediate_operand")]
6595 "TARGET_SIMD"
6596 {
6597 /* V1DF is rarely used by other patterns, so it should be better to hide
6598 it in a subreg destination of a normal DF op. */
6599 rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6600 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6601 DONE;
6602 })
6603
6604 ;; aes
6605
6606 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6607 [(set (match_operand:V16QI 0 "register_operand" "=w")
6608 (unspec:V16QI
6609 [(xor:V16QI
6610 (match_operand:V16QI 1 "register_operand" "%0")
6611 (match_operand:V16QI 2 "register_operand" "w"))]
6612 CRYPTO_AES))]
6613 "TARGET_SIMD && TARGET_AES"
6614 "aes<aes_op>\\t%0.16b, %2.16b"
6615 [(set_attr "type" "crypto_aese")]
6616 )
6617
6618 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6619 [(set (match_operand:V16QI 0 "register_operand" "=w")
6620 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6621 CRYPTO_AESMC))]
6622 "TARGET_SIMD && TARGET_AES"
6623 "aes<aesmc_op>\\t%0.16b, %1.16b"
6624 [(set_attr "type" "crypto_aesmc")]
6625 )
6626
6627 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6628 ;; and enforce the register dependency without scheduling or register
6629 ;; allocation messing up the order or introducing moves inbetween.
6630 ;; Mash the two together during combine.
6631
6632 (define_insn "*aarch64_crypto_aese_fused"
6633 [(set (match_operand:V16QI 0 "register_operand" "=w")
6634 (unspec:V16QI
6635 [(unspec:V16QI
6636 [(xor:V16QI
6637 (match_operand:V16QI 1 "register_operand" "%0")
6638 (match_operand:V16QI 2 "register_operand" "w"))]
6639 UNSPEC_AESE)]
6640 UNSPEC_AESMC))]
6641 "TARGET_SIMD && TARGET_AES
6642 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6643 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6644 [(set_attr "type" "crypto_aese")
6645 (set_attr "length" "8")]
6646 )
6647
6648 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6649 ;; and enforce the register dependency without scheduling or register
6650 ;; allocation messing up the order or introducing moves inbetween.
6651 ;; Mash the two together during combine.
6652
6653 (define_insn "*aarch64_crypto_aesd_fused"
6654 [(set (match_operand:V16QI 0 "register_operand" "=w")
6655 (unspec:V16QI
6656 [(unspec:V16QI
6657 [(xor:V16QI
6658 (match_operand:V16QI 1 "register_operand" "%0")
6659 (match_operand:V16QI 2 "register_operand" "w"))]
6660 UNSPEC_AESD)]
6661 UNSPEC_AESIMC))]
6662 "TARGET_SIMD && TARGET_AES
6663 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6664 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6665 [(set_attr "type" "crypto_aese")
6666 (set_attr "length" "8")]
6667 )
6668
6669 ;; sha1
6670
6671 (define_insn "aarch64_crypto_sha1hsi"
6672 [(set (match_operand:SI 0 "register_operand" "=w")
6673 (unspec:SI [(match_operand:SI 1
6674 "register_operand" "w")]
6675 UNSPEC_SHA1H))]
6676 "TARGET_SIMD && TARGET_SHA2"
6677 "sha1h\\t%s0, %s1"
6678 [(set_attr "type" "crypto_sha1_fast")]
6679 )
6680
6681 (define_insn "aarch64_crypto_sha1hv4si"
6682 [(set (match_operand:SI 0 "register_operand" "=w")
6683 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6684 (parallel [(const_int 0)]))]
6685 UNSPEC_SHA1H))]
6686 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6687 "sha1h\\t%s0, %s1"
6688 [(set_attr "type" "crypto_sha1_fast")]
6689 )
6690
6691 (define_insn "aarch64_be_crypto_sha1hv4si"
6692 [(set (match_operand:SI 0 "register_operand" "=w")
6693 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6694 (parallel [(const_int 3)]))]
6695 UNSPEC_SHA1H))]
6696 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6697 "sha1h\\t%s0, %s1"
6698 [(set_attr "type" "crypto_sha1_fast")]
6699 )
6700
6701 (define_insn "aarch64_crypto_sha1su1v4si"
6702 [(set (match_operand:V4SI 0 "register_operand" "=w")
6703 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6704 (match_operand:V4SI 2 "register_operand" "w")]
6705 UNSPEC_SHA1SU1))]
6706 "TARGET_SIMD && TARGET_SHA2"
6707 "sha1su1\\t%0.4s, %2.4s"
6708 [(set_attr "type" "crypto_sha1_fast")]
6709 )
6710
6711 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6712 [(set (match_operand:V4SI 0 "register_operand" "=w")
6713 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6714 (match_operand:SI 2 "register_operand" "w")
6715 (match_operand:V4SI 3 "register_operand" "w")]
6716 CRYPTO_SHA1))]
6717 "TARGET_SIMD && TARGET_SHA2"
6718 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6719 [(set_attr "type" "crypto_sha1_slow")]
6720 )
6721
6722 (define_insn "aarch64_crypto_sha1su0v4si"
6723 [(set (match_operand:V4SI 0 "register_operand" "=w")
6724 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6725 (match_operand:V4SI 2 "register_operand" "w")
6726 (match_operand:V4SI 3 "register_operand" "w")]
6727 UNSPEC_SHA1SU0))]
6728 "TARGET_SIMD && TARGET_SHA2"
6729 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6730 [(set_attr "type" "crypto_sha1_xor")]
6731 )
6732
6733 ;; sha256
6734
6735 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6736 [(set (match_operand:V4SI 0 "register_operand" "=w")
6737 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6738 (match_operand:V4SI 2 "register_operand" "w")
6739 (match_operand:V4SI 3 "register_operand" "w")]
6740 CRYPTO_SHA256))]
6741 "TARGET_SIMD && TARGET_SHA2"
6742 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6743 [(set_attr "type" "crypto_sha256_slow")]
6744 )
6745
6746 (define_insn "aarch64_crypto_sha256su0v4si"
6747 [(set (match_operand:V4SI 0 "register_operand" "=w")
6748 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6749 (match_operand:V4SI 2 "register_operand" "w")]
6750 UNSPEC_SHA256SU0))]
6751 "TARGET_SIMD && TARGET_SHA2"
6752 "sha256su0\\t%0.4s, %2.4s"
6753 [(set_attr "type" "crypto_sha256_fast")]
6754 )
6755
6756 (define_insn "aarch64_crypto_sha256su1v4si"
6757 [(set (match_operand:V4SI 0 "register_operand" "=w")
6758 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6759 (match_operand:V4SI 2 "register_operand" "w")
6760 (match_operand:V4SI 3 "register_operand" "w")]
6761 UNSPEC_SHA256SU1))]
6762 "TARGET_SIMD && TARGET_SHA2"
6763 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6764 [(set_attr "type" "crypto_sha256_slow")]
6765 )
6766
6767 ;; sha512
6768
6769 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6770 [(set (match_operand:V2DI 0 "register_operand" "=w")
6771 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6772 (match_operand:V2DI 2 "register_operand" "w")
6773 (match_operand:V2DI 3 "register_operand" "w")]
6774 CRYPTO_SHA512))]
6775 "TARGET_SIMD && TARGET_SHA3"
6776 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6777 [(set_attr "type" "crypto_sha512")]
6778 )
6779
6780 (define_insn "aarch64_crypto_sha512su0qv2di"
6781 [(set (match_operand:V2DI 0 "register_operand" "=w")
6782 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6783 (match_operand:V2DI 2 "register_operand" "w")]
6784 UNSPEC_SHA512SU0))]
6785 "TARGET_SIMD && TARGET_SHA3"
6786 "sha512su0\\t%0.2d, %2.2d"
6787 [(set_attr "type" "crypto_sha512")]
6788 )
6789
6790 (define_insn "aarch64_crypto_sha512su1qv2di"
6791 [(set (match_operand:V2DI 0 "register_operand" "=w")
6792 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6793 (match_operand:V2DI 2 "register_operand" "w")
6794 (match_operand:V2DI 3 "register_operand" "w")]
6795 UNSPEC_SHA512SU1))]
6796 "TARGET_SIMD && TARGET_SHA3"
6797 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6798 [(set_attr "type" "crypto_sha512")]
6799 )
6800
6801 ;; sha3
6802
6803 (define_insn "eor3q<mode>4"
6804 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6805 (xor:VQ_I
6806 (xor:VQ_I
6807 (match_operand:VQ_I 2 "register_operand" "w")
6808 (match_operand:VQ_I 3 "register_operand" "w"))
6809 (match_operand:VQ_I 1 "register_operand" "w")))]
6810 "TARGET_SIMD && TARGET_SHA3"
6811 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6812 [(set_attr "type" "crypto_sha3")]
6813 )
6814
6815 (define_insn "aarch64_rax1qv2di"
6816 [(set (match_operand:V2DI 0 "register_operand" "=w")
6817 (xor:V2DI
6818 (rotate:V2DI
6819 (match_operand:V2DI 2 "register_operand" "w")
6820 (const_int 1))
6821 (match_operand:V2DI 1 "register_operand" "w")))]
6822 "TARGET_SIMD && TARGET_SHA3"
6823 "rax1\\t%0.2d, %1.2d, %2.2d"
6824 [(set_attr "type" "crypto_sha3")]
6825 )
6826
6827 (define_insn "aarch64_xarqv2di"
6828 [(set (match_operand:V2DI 0 "register_operand" "=w")
6829 (rotatert:V2DI
6830 (xor:V2DI
6831 (match_operand:V2DI 1 "register_operand" "%w")
6832 (match_operand:V2DI 2 "register_operand" "w"))
6833 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6834 "TARGET_SIMD && TARGET_SHA3"
6835 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6836 [(set_attr "type" "crypto_sha3")]
6837 )
6838
6839 (define_insn "bcaxq<mode>4"
6840 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6841 (xor:VQ_I
6842 (and:VQ_I
6843 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6844 (match_operand:VQ_I 2 "register_operand" "w"))
6845 (match_operand:VQ_I 1 "register_operand" "w")))]
6846 "TARGET_SIMD && TARGET_SHA3"
6847 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6848 [(set_attr "type" "crypto_sha3")]
6849 )
6850
6851 ;; SM3
6852
6853 (define_insn "aarch64_sm3ss1qv4si"
6854 [(set (match_operand:V4SI 0 "register_operand" "=w")
6855 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6856 (match_operand:V4SI 2 "register_operand" "w")
6857 (match_operand:V4SI 3 "register_operand" "w")]
6858 UNSPEC_SM3SS1))]
6859 "TARGET_SIMD && TARGET_SM4"
6860 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6861 [(set_attr "type" "crypto_sm3")]
6862 )
6863
6864
6865 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6866 [(set (match_operand:V4SI 0 "register_operand" "=w")
6867 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6868 (match_operand:V4SI 2 "register_operand" "w")
6869 (match_operand:V4SI 3 "register_operand" "w")
6870 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6871 CRYPTO_SM3TT))]
6872 "TARGET_SIMD && TARGET_SM4"
6873 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6874 [(set_attr "type" "crypto_sm3")]
6875 )
6876
6877 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6878 [(set (match_operand:V4SI 0 "register_operand" "=w")
6879 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6880 (match_operand:V4SI 2 "register_operand" "w")
6881 (match_operand:V4SI 3 "register_operand" "w")]
6882 CRYPTO_SM3PART))]
6883 "TARGET_SIMD && TARGET_SM4"
6884 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6885 [(set_attr "type" "crypto_sm3")]
6886 )
6887
6888 ;; SM4
6889
6890 (define_insn "aarch64_sm4eqv4si"
6891 [(set (match_operand:V4SI 0 "register_operand" "=w")
6892 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6893 (match_operand:V4SI 2 "register_operand" "w")]
6894 UNSPEC_SM4E))]
6895 "TARGET_SIMD && TARGET_SM4"
6896 "sm4e\\t%0.4s, %2.4s"
6897 [(set_attr "type" "crypto_sm4")]
6898 )
6899
6900 (define_insn "aarch64_sm4ekeyqv4si"
6901 [(set (match_operand:V4SI 0 "register_operand" "=w")
6902 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6903 (match_operand:V4SI 2 "register_operand" "w")]
6904 UNSPEC_SM4EKEY))]
6905 "TARGET_SIMD && TARGET_SM4"
6906 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6907 [(set_attr "type" "crypto_sm4")]
6908 )
6909
6910 ;; fp16fml
6911
6912 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6913 [(set (match_operand:VDQSF 0 "register_operand")
6914 (unspec:VDQSF
6915 [(match_operand:VDQSF 1 "register_operand")
6916 (match_operand:<VFMLA_W> 2 "register_operand")
6917 (match_operand:<VFMLA_W> 3 "register_operand")]
6918 VFMLA16_LOW))]
6919 "TARGET_F16FML"
6920 {
6921 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6922 <nunits> * 2, false);
6923 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6924 <nunits> * 2, false);
6925
6926 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6927 operands[1],
6928 operands[2],
6929 operands[3],
6930 p1, p2));
6931 DONE;
6932
6933 })
6934
6935 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6936 [(set (match_operand:VDQSF 0 "register_operand")
6937 (unspec:VDQSF
6938 [(match_operand:VDQSF 1 "register_operand")
6939 (match_operand:<VFMLA_W> 2 "register_operand")
6940 (match_operand:<VFMLA_W> 3 "register_operand")]
6941 VFMLA16_HIGH))]
6942 "TARGET_F16FML"
6943 {
6944 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6945 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6946
6947 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6948 operands[1],
6949 operands[2],
6950 operands[3],
6951 p1, p2));
6952 DONE;
6953 })
6954
6955 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6956 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6957 (fma:VDQSF
6958 (float_extend:VDQSF
6959 (vec_select:<VFMLA_SEL_W>
6960 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6961 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6962 (float_extend:VDQSF
6963 (vec_select:<VFMLA_SEL_W>
6964 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6965 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6966 (match_operand:VDQSF 1 "register_operand" "0")))]
6967 "TARGET_F16FML"
6968 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6969 [(set_attr "type" "neon_fp_mul_s")]
6970 )
6971
6972 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6973 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6974 (fma:VDQSF
6975 (float_extend:VDQSF
6976 (neg:<VFMLA_SEL_W>
6977 (vec_select:<VFMLA_SEL_W>
6978 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6979 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6980 (float_extend:VDQSF
6981 (vec_select:<VFMLA_SEL_W>
6982 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6983 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6984 (match_operand:VDQSF 1 "register_operand" "0")))]
6985 "TARGET_F16FML"
6986 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6987 [(set_attr "type" "neon_fp_mul_s")]
6988 )
6989
6990 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6991 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6992 (fma:VDQSF
6993 (float_extend:VDQSF
6994 (vec_select:<VFMLA_SEL_W>
6995 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6996 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6997 (float_extend:VDQSF
6998 (vec_select:<VFMLA_SEL_W>
6999 (match_operand:<VFMLA_W> 3 "register_operand" "w")
7000 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7001 (match_operand:VDQSF 1 "register_operand" "0")))]
7002 "TARGET_F16FML"
7003 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7004 [(set_attr "type" "neon_fp_mul_s")]
7005 )
7006
7007 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
7008 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7009 (fma:VDQSF
7010 (float_extend:VDQSF
7011 (neg:<VFMLA_SEL_W>
7012 (vec_select:<VFMLA_SEL_W>
7013 (match_operand:<VFMLA_W> 2 "register_operand" "w")
7014 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
7015 (float_extend:VDQSF
7016 (vec_select:<VFMLA_SEL_W>
7017 (match_operand:<VFMLA_W> 3 "register_operand" "w")
7018 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7019 (match_operand:VDQSF 1 "register_operand" "0")))]
7020 "TARGET_F16FML"
7021 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7022 [(set_attr "type" "neon_fp_mul_s")]
7023 )
7024
7025 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
7026 [(set (match_operand:V2SF 0 "register_operand")
7027 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7028 (match_operand:V4HF 2 "register_operand")
7029 (match_operand:V4HF 3 "register_operand")
7030 (match_operand:SI 4 "aarch64_imm2")]
7031 VFMLA16_LOW))]
7032 "TARGET_F16FML"
7033 {
7034 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7035 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7036
7037 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
7038 operands[1],
7039 operands[2],
7040 operands[3],
7041 p1, lane));
7042 DONE;
7043 }
7044 )
7045
7046 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
7047 [(set (match_operand:V2SF 0 "register_operand")
7048 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7049 (match_operand:V4HF 2 "register_operand")
7050 (match_operand:V4HF 3 "register_operand")
7051 (match_operand:SI 4 "aarch64_imm2")]
7052 VFMLA16_HIGH))]
7053 "TARGET_F16FML"
7054 {
7055 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7056 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7057
7058 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
7059 operands[1],
7060 operands[2],
7061 operands[3],
7062 p1, lane));
7063 DONE;
7064 })
7065
7066 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
7067 [(set (match_operand:V2SF 0 "register_operand" "=w")
7068 (fma:V2SF
7069 (float_extend:V2SF
7070 (vec_select:V2HF
7071 (match_operand:V4HF 2 "register_operand" "w")
7072 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7073 (float_extend:V2SF
7074 (vec_duplicate:V2HF
7075 (vec_select:HF
7076 (match_operand:V4HF 3 "register_operand" "x")
7077 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7078 (match_operand:V2SF 1 "register_operand" "0")))]
7079 "TARGET_F16FML"
7080 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7081 [(set_attr "type" "neon_fp_mul_s")]
7082 )
7083
7084 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
7085 [(set (match_operand:V2SF 0 "register_operand" "=w")
7086 (fma:V2SF
7087 (float_extend:V2SF
7088 (neg:V2HF
7089 (vec_select:V2HF
7090 (match_operand:V4HF 2 "register_operand" "w")
7091 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7092 (float_extend:V2SF
7093 (vec_duplicate:V2HF
7094 (vec_select:HF
7095 (match_operand:V4HF 3 "register_operand" "x")
7096 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7097 (match_operand:V2SF 1 "register_operand" "0")))]
7098 "TARGET_F16FML"
7099 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7100 [(set_attr "type" "neon_fp_mul_s")]
7101 )
7102
7103 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
7104 [(set (match_operand:V2SF 0 "register_operand" "=w")
7105 (fma:V2SF
7106 (float_extend:V2SF
7107 (vec_select:V2HF
7108 (match_operand:V4HF 2 "register_operand" "w")
7109 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7110 (float_extend:V2SF
7111 (vec_duplicate:V2HF
7112 (vec_select:HF
7113 (match_operand:V4HF 3 "register_operand" "x")
7114 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7115 (match_operand:V2SF 1 "register_operand" "0")))]
7116 "TARGET_F16FML"
7117 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7118 [(set_attr "type" "neon_fp_mul_s")]
7119 )
7120
7121 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
7122 [(set (match_operand:V2SF 0 "register_operand" "=w")
7123 (fma:V2SF
7124 (float_extend:V2SF
7125 (neg:V2HF
7126 (vec_select:V2HF
7127 (match_operand:V4HF 2 "register_operand" "w")
7128 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7129 (float_extend:V2SF
7130 (vec_duplicate:V2HF
7131 (vec_select:HF
7132 (match_operand:V4HF 3 "register_operand" "x")
7133 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7134 (match_operand:V2SF 1 "register_operand" "0")))]
7135 "TARGET_F16FML"
7136 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7137 [(set_attr "type" "neon_fp_mul_s")]
7138 )
7139
7140 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
7141 [(set (match_operand:V4SF 0 "register_operand")
7142 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7143 (match_operand:V8HF 2 "register_operand")
7144 (match_operand:V8HF 3 "register_operand")
7145 (match_operand:SI 4 "aarch64_lane_imm3")]
7146 VFMLA16_LOW))]
7147 "TARGET_F16FML"
7148 {
7149 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7150 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7151
7152 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
7153 operands[1],
7154 operands[2],
7155 operands[3],
7156 p1, lane));
7157 DONE;
7158 })
7159
7160 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
7161 [(set (match_operand:V4SF 0 "register_operand")
7162 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7163 (match_operand:V8HF 2 "register_operand")
7164 (match_operand:V8HF 3 "register_operand")
7165 (match_operand:SI 4 "aarch64_lane_imm3")]
7166 VFMLA16_HIGH))]
7167 "TARGET_F16FML"
7168 {
7169 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7170 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7171
7172 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
7173 operands[1],
7174 operands[2],
7175 operands[3],
7176 p1, lane));
7177 DONE;
7178 })
7179
7180 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
7181 [(set (match_operand:V4SF 0 "register_operand" "=w")
7182 (fma:V4SF
7183 (float_extend:V4SF
7184 (vec_select:V4HF
7185 (match_operand:V8HF 2 "register_operand" "w")
7186 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7187 (float_extend:V4SF
7188 (vec_duplicate:V4HF
7189 (vec_select:HF
7190 (match_operand:V8HF 3 "register_operand" "x")
7191 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7192 (match_operand:V4SF 1 "register_operand" "0")))]
7193 "TARGET_F16FML"
7194 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7195 [(set_attr "type" "neon_fp_mul_s")]
7196 )
7197
7198 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
7199 [(set (match_operand:V4SF 0 "register_operand" "=w")
7200 (fma:V4SF
7201 (float_extend:V4SF
7202 (neg:V4HF
7203 (vec_select:V4HF
7204 (match_operand:V8HF 2 "register_operand" "w")
7205 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7206 (float_extend:V4SF
7207 (vec_duplicate:V4HF
7208 (vec_select:HF
7209 (match_operand:V8HF 3 "register_operand" "x")
7210 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7211 (match_operand:V4SF 1 "register_operand" "0")))]
7212 "TARGET_F16FML"
7213 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7214 [(set_attr "type" "neon_fp_mul_s")]
7215 )
7216
7217 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
7218 [(set (match_operand:V4SF 0 "register_operand" "=w")
7219 (fma:V4SF
7220 (float_extend:V4SF
7221 (vec_select:V4HF
7222 (match_operand:V8HF 2 "register_operand" "w")
7223 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7224 (float_extend:V4SF
7225 (vec_duplicate:V4HF
7226 (vec_select:HF
7227 (match_operand:V8HF 3 "register_operand" "x")
7228 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7229 (match_operand:V4SF 1 "register_operand" "0")))]
7230 "TARGET_F16FML"
7231 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7232 [(set_attr "type" "neon_fp_mul_s")]
7233 )
7234
7235 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
7236 [(set (match_operand:V4SF 0 "register_operand" "=w")
7237 (fma:V4SF
7238 (float_extend:V4SF
7239 (neg:V4HF
7240 (vec_select:V4HF
7241 (match_operand:V8HF 2 "register_operand" "w")
7242 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7243 (float_extend:V4SF
7244 (vec_duplicate:V4HF
7245 (vec_select:HF
7246 (match_operand:V8HF 3 "register_operand" "x")
7247 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7248 (match_operand:V4SF 1 "register_operand" "0")))]
7249 "TARGET_F16FML"
7250 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7251 [(set_attr "type" "neon_fp_mul_s")]
7252 )
7253
7254 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
7255 [(set (match_operand:V2SF 0 "register_operand")
7256 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7257 (match_operand:V4HF 2 "register_operand")
7258 (match_operand:V8HF 3 "register_operand")
7259 (match_operand:SI 4 "aarch64_lane_imm3")]
7260 VFMLA16_LOW))]
7261 "TARGET_F16FML"
7262 {
7263 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7264 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7265
7266 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
7267 operands[1],
7268 operands[2],
7269 operands[3],
7270 p1, lane));
7271 DONE;
7272
7273 })
7274
7275 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
7276 [(set (match_operand:V2SF 0 "register_operand")
7277 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7278 (match_operand:V4HF 2 "register_operand")
7279 (match_operand:V8HF 3 "register_operand")
7280 (match_operand:SI 4 "aarch64_lane_imm3")]
7281 VFMLA16_HIGH))]
7282 "TARGET_F16FML"
7283 {
7284 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7285 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7286
7287 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
7288 operands[1],
7289 operands[2],
7290 operands[3],
7291 p1, lane));
7292 DONE;
7293
7294 })
7295
7296 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
7297 [(set (match_operand:V2SF 0 "register_operand" "=w")
7298 (fma:V2SF
7299 (float_extend:V2SF
7300 (vec_select:V2HF
7301 (match_operand:V4HF 2 "register_operand" "w")
7302 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7303 (float_extend:V2SF
7304 (vec_duplicate:V2HF
7305 (vec_select:HF
7306 (match_operand:V8HF 3 "register_operand" "x")
7307 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7308 (match_operand:V2SF 1 "register_operand" "0")))]
7309 "TARGET_F16FML"
7310 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7311 [(set_attr "type" "neon_fp_mul_s")]
7312 )
7313
7314 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
7315 [(set (match_operand:V2SF 0 "register_operand" "=w")
7316 (fma:V2SF
7317 (float_extend:V2SF
7318 (neg:V2HF
7319 (vec_select:V2HF
7320 (match_operand:V4HF 2 "register_operand" "w")
7321 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7322 (float_extend:V2SF
7323 (vec_duplicate:V2HF
7324 (vec_select:HF
7325 (match_operand:V8HF 3 "register_operand" "x")
7326 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7327 (match_operand:V2SF 1 "register_operand" "0")))]
7328 "TARGET_F16FML"
7329 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7330 [(set_attr "type" "neon_fp_mul_s")]
7331 )
7332
7333 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
7334 [(set (match_operand:V2SF 0 "register_operand" "=w")
7335 (fma:V2SF
7336 (float_extend:V2SF
7337 (vec_select:V2HF
7338 (match_operand:V4HF 2 "register_operand" "w")
7339 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7340 (float_extend:V2SF
7341 (vec_duplicate:V2HF
7342 (vec_select:HF
7343 (match_operand:V8HF 3 "register_operand" "x")
7344 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7345 (match_operand:V2SF 1 "register_operand" "0")))]
7346 "TARGET_F16FML"
7347 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7348 [(set_attr "type" "neon_fp_mul_s")]
7349 )
7350
7351 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
7352 [(set (match_operand:V2SF 0 "register_operand" "=w")
7353 (fma:V2SF
7354 (float_extend:V2SF
7355 (neg:V2HF
7356 (vec_select:V2HF
7357 (match_operand:V4HF 2 "register_operand" "w")
7358 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7359 (float_extend:V2SF
7360 (vec_duplicate:V2HF
7361 (vec_select:HF
7362 (match_operand:V8HF 3 "register_operand" "x")
7363 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7364 (match_operand:V2SF 1 "register_operand" "0")))]
7365 "TARGET_F16FML"
7366 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7367 [(set_attr "type" "neon_fp_mul_s")]
7368 )
7369
7370 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
7371 [(set (match_operand:V4SF 0 "register_operand")
7372 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7373 (match_operand:V8HF 2 "register_operand")
7374 (match_operand:V4HF 3 "register_operand")
7375 (match_operand:SI 4 "aarch64_imm2")]
7376 VFMLA16_LOW))]
7377 "TARGET_F16FML"
7378 {
7379 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7380 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7381
7382 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
7383 operands[1],
7384 operands[2],
7385 operands[3],
7386 p1, lane));
7387 DONE;
7388 })
7389
7390 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
7391 [(set (match_operand:V4SF 0 "register_operand")
7392 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7393 (match_operand:V8HF 2 "register_operand")
7394 (match_operand:V4HF 3 "register_operand")
7395 (match_operand:SI 4 "aarch64_imm2")]
7396 VFMLA16_HIGH))]
7397 "TARGET_F16FML"
7398 {
7399 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7400 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7401
7402 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7403 operands[1],
7404 operands[2],
7405 operands[3],
7406 p1, lane));
7407 DONE;
7408 })
7409
7410 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7411 [(set (match_operand:V4SF 0 "register_operand" "=w")
7412 (fma:V4SF
7413 (float_extend:V4SF
7414 (vec_select:V4HF
7415 (match_operand:V8HF 2 "register_operand" "w")
7416 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7417 (float_extend:V4SF
7418 (vec_duplicate:V4HF
7419 (vec_select:HF
7420 (match_operand:V4HF 3 "register_operand" "x")
7421 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7422 (match_operand:V4SF 1 "register_operand" "0")))]
7423 "TARGET_F16FML"
7424 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7425 [(set_attr "type" "neon_fp_mul_s")]
7426 )
7427
7428 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7429 [(set (match_operand:V4SF 0 "register_operand" "=w")
7430 (fma:V4SF
7431 (float_extend:V4SF
7432 (neg:V4HF
7433 (vec_select:V4HF
7434 (match_operand:V8HF 2 "register_operand" "w")
7435 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7436 (float_extend:V4SF
7437 (vec_duplicate:V4HF
7438 (vec_select:HF
7439 (match_operand:V4HF 3 "register_operand" "x")
7440 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7441 (match_operand:V4SF 1 "register_operand" "0")))]
7442 "TARGET_F16FML"
7443 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7444 [(set_attr "type" "neon_fp_mul_s")]
7445 )
7446
7447 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7448 [(set (match_operand:V4SF 0 "register_operand" "=w")
7449 (fma:V4SF
7450 (float_extend:V4SF
7451 (vec_select:V4HF
7452 (match_operand:V8HF 2 "register_operand" "w")
7453 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7454 (float_extend:V4SF
7455 (vec_duplicate:V4HF
7456 (vec_select:HF
7457 (match_operand:V4HF 3 "register_operand" "x")
7458 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7459 (match_operand:V4SF 1 "register_operand" "0")))]
7460 "TARGET_F16FML"
7461 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7462 [(set_attr "type" "neon_fp_mul_s")]
7463 )
7464
7465 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7466 [(set (match_operand:V4SF 0 "register_operand" "=w")
7467 (fma:V4SF
7468 (float_extend:V4SF
7469 (neg:V4HF
7470 (vec_select:V4HF
7471 (match_operand:V8HF 2 "register_operand" "w")
7472 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7473 (float_extend:V4SF
7474 (vec_duplicate:V4HF
7475 (vec_select:HF
7476 (match_operand:V4HF 3 "register_operand" "x")
7477 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7478 (match_operand:V4SF 1 "register_operand" "0")))]
7479 "TARGET_F16FML"
7480 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7481 [(set_attr "type" "neon_fp_mul_s")]
7482 )
7483
7484 ;; pmull
7485
7486 (define_insn "aarch64_crypto_pmulldi"
7487 [(set (match_operand:TI 0 "register_operand" "=w")
7488 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
7489 (match_operand:DI 2 "register_operand" "w")]
7490 UNSPEC_PMULL))]
7491 "TARGET_SIMD && TARGET_AES"
7492 "pmull\\t%0.1q, %1.1d, %2.1d"
7493 [(set_attr "type" "crypto_pmull")]
7494 )
7495
7496 (define_insn "aarch64_crypto_pmullv2di"
7497 [(set (match_operand:TI 0 "register_operand" "=w")
7498 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7499 (match_operand:V2DI 2 "register_operand" "w")]
7500 UNSPEC_PMULL2))]
7501 "TARGET_SIMD && TARGET_AES"
7502 "pmull2\\t%0.1q, %1.2d, %2.2d"
7503 [(set_attr "type" "crypto_pmull")]
7504 )
7505
7506 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7507 (define_insn "<optab><Vnarrowq><mode>2"
7508 [(set (match_operand:VQN 0 "register_operand" "=w")
7509 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7510 "TARGET_SIMD"
7511 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7512 [(set_attr "type" "neon_shift_imm_long")]
7513 )
7514
7515 (define_expand "aarch64_<su>xtl<mode>"
7516 [(set (match_operand:VQN 0 "register_operand" "=w")
7517 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7518 "TARGET_SIMD"
7519 ""
7520 )
7521
7522 (define_expand "aarch64_xtn<mode>"
7523 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7524 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7525 "TARGET_SIMD"
7526 ""
7527 )
7528
7529 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7530 (define_insn "trunc<mode><Vnarrowq>2"
7531 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7532 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7533 "TARGET_SIMD"
7534 "xtn\t%0.<Vntype>, %1.<Vtype>"
7535 [(set_attr "type" "neon_shift_imm_narrow_q")]
7536 )
7537
7538 (define_insn "aarch64_xtn2<mode>_le"
7539 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7540 (vec_concat:<VNARROWQ2>
7541 (match_operand:<VNARROWQ> 1 "register_operand" "0")
7542 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
7543 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
7544 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
7545 [(set_attr "type" "neon_shift_imm_narrow_q")]
7546 )
7547
7548 (define_insn "aarch64_xtn2<mode>_be"
7549 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7550 (vec_concat:<VNARROWQ2>
7551 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
7552 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7553 "TARGET_SIMD && BYTES_BIG_ENDIAN"
7554 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
7555 [(set_attr "type" "neon_shift_imm_narrow_q")]
7556 )
7557
7558 (define_expand "aarch64_xtn2<mode>"
7559 [(match_operand:<VNARROWQ2> 0 "register_operand")
7560 (match_operand:<VNARROWQ> 1 "register_operand")
7561 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
7562 "TARGET_SIMD"
7563 {
7564 if (BYTES_BIG_ENDIAN)
7565 emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
7566 operands[2]));
7567 else
7568 emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
7569 operands[2]));
7570 DONE;
7571 }
7572 )
7573
7574 (define_insn "aarch64_bfdot<mode>"
7575 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7576 (plus:VDQSF
7577 (unspec:VDQSF
7578 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7579 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7580 UNSPEC_BFDOT)
7581 (match_operand:VDQSF 1 "register_operand" "0")))]
7582 "TARGET_BF16_SIMD"
7583 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7584 [(set_attr "type" "neon_dot<q>")]
7585 )
7586
7587 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7588 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7589 (plus:VDQSF
7590 (unspec:VDQSF
7591 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7592 (match_operand:VBF 3 "register_operand" "w")
7593 (match_operand:SI 4 "const_int_operand" "n")]
7594 UNSPEC_BFDOT)
7595 (match_operand:VDQSF 1 "register_operand" "0")))]
7596 "TARGET_BF16_SIMD"
7597 {
7598 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7599 int lane = INTVAL (operands[4]);
7600 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7601 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7602 }
7603 [(set_attr "type" "neon_dot<VDQSF:q>")]
7604 )
7605
7606 ;; vget_low/high_bf16
7607 (define_expand "aarch64_vget_lo_halfv8bf"
7608 [(match_operand:V4BF 0 "register_operand")
7609 (match_operand:V8BF 1 "register_operand")]
7610 "TARGET_BF16_SIMD"
7611 {
7612 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
7613 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7614 DONE;
7615 })
7616
7617 (define_expand "aarch64_vget_hi_halfv8bf"
7618 [(match_operand:V4BF 0 "register_operand")
7619 (match_operand:V8BF 1 "register_operand")]
7620 "TARGET_BF16_SIMD"
7621 {
7622 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
7623 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7624 DONE;
7625 })
7626
7627 ;; bfmmla
7628 (define_insn "aarch64_bfmmlaqv4sf"
7629 [(set (match_operand:V4SF 0 "register_operand" "=w")
7630 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7631 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7632 (match_operand:V8BF 3 "register_operand" "w")]
7633 UNSPEC_BFMMLA)))]
7634 "TARGET_BF16_SIMD"
7635 "bfmmla\\t%0.4s, %2.8h, %3.8h"
7636 [(set_attr "type" "neon_fp_mla_s_q")]
7637 )
7638
7639 ;; bfmlal<bt>
7640 (define_insn "aarch64_bfmlal<bt>v4sf"
7641 [(set (match_operand:V4SF 0 "register_operand" "=w")
7642 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7643 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7644 (match_operand:V8BF 3 "register_operand" "w")]
7645 BF_MLA)))]
7646 "TARGET_BF16_SIMD"
7647 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7648 [(set_attr "type" "neon_fp_mla_s_q")]
7649 )
7650
7651 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7652 [(set (match_operand:V4SF 0 "register_operand" "=w")
7653 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7654 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7655 (match_operand:VBF 3 "register_operand" "w")
7656 (match_operand:SI 4 "const_int_operand" "n")]
7657 BF_MLA)))]
7658 "TARGET_BF16_SIMD"
7659 {
7660 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7661 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7662 }
7663 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7664 )
7665
7666 ;; 8-bit integer matrix multiply-accumulate
7667 (define_insn "aarch64_simd_<sur>mmlav16qi"
7668 [(set (match_operand:V4SI 0 "register_operand" "=w")
7669 (plus:V4SI
7670 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7671 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7672 (match_operand:V4SI 1 "register_operand" "0")))]
7673 "TARGET_I8MM"
7674 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7675 [(set_attr "type" "neon_mla_s_q")]
7676 )
7677
7678 ;; bfcvtn
7679 (define_insn "aarch64_bfcvtn<q><mode>"
7680 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7681 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7682 UNSPEC_BFCVTN))]
7683 "TARGET_BF16_SIMD"
7684 "bfcvtn\\t%0.4h, %1.4s"
7685 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7686 )
7687
7688 (define_insn "aarch64_bfcvtn2v8bf"
7689 [(set (match_operand:V8BF 0 "register_operand" "=w")
7690 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7691 (match_operand:V4SF 2 "register_operand" "w")]
7692 UNSPEC_BFCVTN2))]
7693 "TARGET_BF16_SIMD"
7694 "bfcvtn2\\t%0.8h, %2.4s"
7695 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7696 )
7697
7698 (define_insn "aarch64_bfcvtbf"
7699 [(set (match_operand:BF 0 "register_operand" "=w")
7700 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7701 UNSPEC_BFCVT))]
7702 "TARGET_BF16_FP"
7703 "bfcvt\\t%h0, %s1"
7704 [(set_attr "type" "f_cvt")]
7705 )
7706
7707 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
7708 (define_insn "aarch64_vbfcvt<mode>"
7709 [(set (match_operand:V4SF 0 "register_operand" "=w")
7710 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
7711 UNSPEC_BFCVTN))]
7712 "TARGET_BF16_SIMD"
7713 "shll\\t%0.4s, %1.4h, #16"
7714 [(set_attr "type" "neon_shift_imm_long")]
7715 )
7716
7717 (define_insn "aarch64_vbfcvt_highv8bf"
7718 [(set (match_operand:V4SF 0 "register_operand" "=w")
7719 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
7720 UNSPEC_BFCVTN2))]
7721 "TARGET_BF16_SIMD"
7722 "shll2\\t%0.4s, %1.8h, #16"
7723 [(set_attr "type" "neon_shift_imm_long")]
7724 )
7725
7726 (define_insn "aarch64_bfcvtsf"
7727 [(set (match_operand:SF 0 "register_operand" "=w")
7728 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
7729 UNSPEC_BFCVT))]
7730 "TARGET_BF16_FP"
7731 "shl\\t%d0, %d1, #16"
7732 [(set_attr "type" "neon_shift_imm")]
7733 )