42d4c447182b6143324a5dccf2c126ba29a26e08
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE2
25 UNSPEC_MOVDI_TO_SSE
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSHUFB
32 UNSPEC_PSIGN
33 UNSPEC_PALIGNR
34
35 ;; For SSE4A support
36 UNSPEC_EXTRQI
37 UNSPEC_EXTRQ
38 UNSPEC_INSERTQI
39 UNSPEC_INSERTQ
40
41 ;; For SSE4.1 support
42 UNSPEC_BLENDV
43 UNSPEC_INSERTPS
44 UNSPEC_DP
45 UNSPEC_MOVNTDQA
46 UNSPEC_MPSADBW
47 UNSPEC_PHMINPOSUW
48 UNSPEC_PTEST
49
50 ;; For SSE4.2 support
51 UNSPEC_PCMPESTR
52 UNSPEC_PCMPISTR
53
54 ;; For FMA4 support
55 UNSPEC_FMADDSUB
56 UNSPEC_XOP_UNSIGNED_CMP
57 UNSPEC_XOP_TRUEFALSE
58 UNSPEC_XOP_PERMUTE
59 UNSPEC_FRCZ
60
61 ;; For AES support
62 UNSPEC_AESENC
63 UNSPEC_AESENCLAST
64 UNSPEC_AESDEC
65 UNSPEC_AESDECLAST
66 UNSPEC_AESIMC
67 UNSPEC_AESKEYGENASSIST
68
69 ;; For PCLMUL support
70 UNSPEC_PCLMUL
71
72 ;; For AVX support
73 UNSPEC_PCMP
74 UNSPEC_VPERMIL
75 UNSPEC_VPERMIL2
76 UNSPEC_VPERMIL2F128
77 UNSPEC_CAST
78 UNSPEC_VTESTP
79 UNSPEC_VCVTPH2PS
80 UNSPEC_VCVTPS2PH
81
82 ;; For AVX2 support
83 UNSPEC_VPERMVAR
84 UNSPEC_VPERMTI
85 UNSPEC_GATHER
86 UNSPEC_VSIBADDR
87
88 ;; For AVX512F support
89 UNSPEC_VPERMT2
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
91 UNSPEC_UNSIGNED_PCMP
92 UNSPEC_TESTM
93 UNSPEC_TESTNM
94 UNSPEC_SCATTER
95 UNSPEC_RCP14
96 UNSPEC_RSQRT14
97 UNSPEC_FIXUPIMM
98 UNSPEC_SCALEF
99 UNSPEC_VTERNLOG
100 UNSPEC_GETEXP
101 UNSPEC_GETMANT
102 UNSPEC_ALIGN
103 UNSPEC_CONFLICT
104 UNSPEC_COMPRESS
105 UNSPEC_COMPRESS_STORE
106 UNSPEC_EXPAND
107 UNSPEC_MASKED_EQ
108 UNSPEC_MASKED_GT
109
110 ;; Mask operations
111 UNSPEC_MASKOP
112 UNSPEC_KORTEST
113 UNSPEC_KTEST
114 ;; Mask load
115 UNSPEC_MASKLOAD
116
117 ;; For embed. rounding feature
118 UNSPEC_EMBEDDED_ROUNDING
119
120 ;; For AVX512PF support
121 UNSPEC_GATHER_PREFETCH
122 UNSPEC_SCATTER_PREFETCH
123
124 ;; For AVX512ER support
125 UNSPEC_EXP2
126 UNSPEC_RCP28
127 UNSPEC_RSQRT28
128
129 ;; For SHA support
130 UNSPEC_SHA1MSG1
131 UNSPEC_SHA1MSG2
132 UNSPEC_SHA1NEXTE
133 UNSPEC_SHA1RNDS4
134 UNSPEC_SHA256MSG1
135 UNSPEC_SHA256MSG2
136 UNSPEC_SHA256RNDS2
137
138 ;; For AVX512BW support
139 UNSPEC_DBPSADBW
140 UNSPEC_PMADDUBSW512
141 UNSPEC_PMADDWD512
142 UNSPEC_PSHUFHW
143 UNSPEC_PSHUFLW
144 UNSPEC_CVTINT2MASK
145
146 ;; For AVX512DQ support
147 UNSPEC_REDUCE
148 UNSPEC_FPCLASS
149 UNSPEC_RANGE
150
151 ;; For AVX512IFMA support
152 UNSPEC_VPMADD52LUQ
153 UNSPEC_VPMADD52HUQ
154
155 ;; For AVX512VBMI support
156 UNSPEC_VPMULTISHIFT
157
158 ;; For AVX5124FMAPS/AVX5124VNNIW support
159 UNSPEC_VP4FMADD
160 UNSPEC_VP4FNMADD
161 UNSPEC_VP4DPWSSD
162 UNSPEC_VP4DPWSSDS
163
164 ;; For GFNI support
165 UNSPEC_GF2P8AFFINEINV
166 UNSPEC_GF2P8AFFINE
167 UNSPEC_GF2P8MUL
168
169 ;; For AVX512VBMI2 support
170 UNSPEC_VPSHLD
171 UNSPEC_VPSHRD
172 UNSPEC_VPSHRDV
173 UNSPEC_VPSHLDV
174
175 ;; For AVX512VNNI support
176 UNSPEC_VPMADDUBSWACCD
177 UNSPEC_VPMADDUBSWACCSSD
178 UNSPEC_VPMADDWDACCD
179 UNSPEC_VPMADDWDACCSSD
180
181 ;; For VAES support
182 UNSPEC_VAESDEC
183 UNSPEC_VAESDECLAST
184 UNSPEC_VAESENC
185 UNSPEC_VAESENCLAST
186
187 ;; For VPCLMULQDQ support
188 UNSPEC_VPCLMULQDQ
189
190 ;; For AVX512BITALG support
191 UNSPEC_VPSHUFBIT
192
193 ;; For VP2INTERSECT support
194 UNSPEC_VP2INTERSECT
195
196 ;; For AVX512BF16 support
197 UNSPEC_VCVTNE2PS2BF16
198 UNSPEC_VCVTNEPS2BF16
199 UNSPEC_VDPBF16PS
200 ])
201
202 (define_c_enum "unspecv" [
203 UNSPECV_LDMXCSR
204 UNSPECV_STMXCSR
205 UNSPECV_CLFLUSH
206 UNSPECV_MONITOR
207 UNSPECV_MWAIT
208 UNSPECV_VZEROALL
209 UNSPECV_VZEROUPPER
210
211 ;; For KEYLOCKER
212 UNSPECV_LOADIWKEY
213 UNSPECV_AESDEC128KLU8
214 UNSPECV_AESENC128KLU8
215 UNSPECV_AESDEC256KLU8
216 UNSPECV_AESENC256KLU8
217 UNSPECV_AESDECWIDE128KLU8
218 UNSPECV_AESENCWIDE128KLU8
219 UNSPECV_AESDECWIDE256KLU8
220 UNSPECV_AESENCWIDE256KLU8
221 UNSPECV_ENCODEKEY128U32
222 UNSPECV_ENCODEKEY256U32
223 ])
224
225 ;; All vector modes including V?TImode, used in move patterns.
226 (define_mode_iterator VMOVE
227 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
228 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
229 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
230 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
231 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
232 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
233 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
234
235 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
236 (define_mode_iterator V48_AVX512VL
237 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
238 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
239 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
240 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
241
242 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
243 (define_mode_iterator VI12_AVX512VL
244 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
245 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
246
247 ;; Same iterator, but without supposed TARGET_AVX512BW
248 (define_mode_iterator VI12_AVX512VLBW
249 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
250 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
251 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
252
253 (define_mode_iterator VI1_AVX512VL
254 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
255
256 ;; All vector modes
257 (define_mode_iterator V
258 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
259 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
260 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
261 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
262 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
263 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
264
265 ;; All 128bit vector modes
266 (define_mode_iterator V_128
267 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
268
269 ;; All 256bit vector modes
270 (define_mode_iterator V_256
271 [V32QI V16HI V8SI V4DI V8SF V4DF])
272
273 ;; All 128bit and 256bit vector modes
274 (define_mode_iterator V_128_256
275 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
276
277 ;; All 512bit vector modes
278 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
279
280 ;; All 256bit and 512bit vector modes
281 (define_mode_iterator V_256_512
282 [V32QI V16HI V8SI V4DI V8SF V4DF
283 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
284 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
285
286 ;; All vector float modes
287 (define_mode_iterator VF
288 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
289 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
290
291 ;; 128- and 256-bit float vector modes
292 (define_mode_iterator VF_128_256
293 [(V8SF "TARGET_AVX") V4SF
294 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
295
296 ;; All SFmode vector float modes
297 (define_mode_iterator VF1
298 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
299
300 (define_mode_iterator VF1_AVX2
301 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
302
303 ;; 128- and 256-bit SF vector modes
304 (define_mode_iterator VF1_128_256
305 [(V8SF "TARGET_AVX") V4SF])
306
307 (define_mode_iterator VF1_128_256VL
308 [V8SF (V4SF "TARGET_AVX512VL")])
309
310 ;; All DFmode vector float modes
311 (define_mode_iterator VF2
312 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
313
314 ;; 128- and 256-bit DF vector modes
315 (define_mode_iterator VF2_128_256
316 [(V4DF "TARGET_AVX") V2DF])
317
318 (define_mode_iterator VF2_512_256
319 [(V8DF "TARGET_AVX512F") V4DF])
320
321 (define_mode_iterator VF2_512_256VL
322 [V8DF (V4DF "TARGET_AVX512VL")])
323
324 ;; All 128bit vector float modes
325 (define_mode_iterator VF_128
326 [V4SF (V2DF "TARGET_SSE2")])
327
328 ;; All 256bit vector float modes
329 (define_mode_iterator VF_256
330 [V8SF V4DF])
331
332 ;; All 512bit vector float modes
333 (define_mode_iterator VF_512
334 [V16SF V8DF])
335
336 (define_mode_iterator VI48_AVX512VL
337 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
338 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
339
340 (define_mode_iterator VF_AVX512VL
341 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
342 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
343
344 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
345 (define_mode_iterator VF1_AVX512ER_128_256
346 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
347
348 (define_mode_iterator VF2_AVX512VL
349 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
350
351 (define_mode_iterator VF1_AVX512VL
352 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
353
354 ;; All vector integer modes
355 (define_mode_iterator VI
356 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
357 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
358 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
359 (V8SI "TARGET_AVX") V4SI
360 (V4DI "TARGET_AVX") V2DI])
361
362 (define_mode_iterator VI_AVX2
363 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
364 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
365 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
366 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
367
368 ;; All QImode vector integer modes
369 (define_mode_iterator VI1
370 [(V32QI "TARGET_AVX") V16QI])
371
372 ;; All DImode vector integer modes
373 (define_mode_iterator V_AVX
374 [V16QI V8HI V4SI V2DI V4SF V2DF
375 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
376 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
377 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
378
379 (define_mode_iterator VI48_AVX
380 [V4SI V2DI
381 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
382
383 (define_mode_iterator VI8
384 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
385
386 (define_mode_iterator VI8_FVL
387 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
388
389 (define_mode_iterator VI8_AVX512VL
390 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
391
392 (define_mode_iterator VI8_256_512
393 [V8DI (V4DI "TARGET_AVX512VL")])
394
395 (define_mode_iterator VI1_AVX2
396 [(V32QI "TARGET_AVX2") V16QI])
397
398 (define_mode_iterator VI1_AVX512
399 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
400
401 (define_mode_iterator VI1_AVX512F
402 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
403
404 (define_mode_iterator VI2_AVX2
405 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
406
407 (define_mode_iterator VI2_AVX512F
408 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
409
410 (define_mode_iterator VI4_AVX
411 [(V8SI "TARGET_AVX") V4SI])
412
413 (define_mode_iterator VI4_AVX2
414 [(V8SI "TARGET_AVX2") V4SI])
415
416 (define_mode_iterator VI4_AVX512F
417 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
418
419 (define_mode_iterator VI4_AVX512VL
420 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
421
422 (define_mode_iterator VI48_AVX512F_AVX512VL
423 [V4SI V8SI (V16SI "TARGET_AVX512F")
424 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
425
426 (define_mode_iterator VI2_AVX512VL
427 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
428
429 (define_mode_iterator VI1_AVX512VL_F
430 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
431
432 (define_mode_iterator VI8_AVX2_AVX512BW
433 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
434
435 (define_mode_iterator VI8_AVX2
436 [(V4DI "TARGET_AVX2") V2DI])
437
438 (define_mode_iterator VI8_AVX2_AVX512F
439 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
440
441 (define_mode_iterator VI8_AVX_AVX512F
442 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
443
444 (define_mode_iterator VI4_128_8_256
445 [V4SI V4DI])
446
447 ;; All V8D* modes
448 (define_mode_iterator V8FI
449 [V8DF V8DI])
450
451 ;; All V16S* modes
452 (define_mode_iterator V16FI
453 [V16SF V16SI])
454
455 ;; ??? We should probably use TImode instead.
456 (define_mode_iterator VIMAX_AVX2_AVX512BW
457 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
458
459 ;; Suppose TARGET_AVX512BW as baseline
460 (define_mode_iterator VIMAX_AVX512VL
461 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
462
463 (define_mode_iterator VIMAX_AVX2
464 [(V2TI "TARGET_AVX2") V1TI])
465
466 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
467 (define_mode_iterator SSESCALARMODE
468 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
469
470 (define_mode_iterator VI12_AVX2
471 [(V32QI "TARGET_AVX2") V16QI
472 (V16HI "TARGET_AVX2") V8HI])
473
474 (define_mode_iterator VI12_AVX2_AVX512BW
475 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
476 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
477
478 (define_mode_iterator VI24_AVX2
479 [(V16HI "TARGET_AVX2") V8HI
480 (V8SI "TARGET_AVX2") V4SI])
481
482 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
483 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
484 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
485 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
486
487 (define_mode_iterator VI124_AVX2
488 [(V32QI "TARGET_AVX2") V16QI
489 (V16HI "TARGET_AVX2") V8HI
490 (V8SI "TARGET_AVX2") V4SI])
491
492 (define_mode_iterator VI2_AVX2_AVX512BW
493 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
494
495 (define_mode_iterator VI248_AVX512VL
496 [V32HI V16SI V8DI
497 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
498 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
499 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
500
501 (define_mode_iterator VI48_AVX2
502 [(V8SI "TARGET_AVX2") V4SI
503 (V4DI "TARGET_AVX2") V2DI])
504
505 (define_mode_iterator VI248_AVX2
506 [(V16HI "TARGET_AVX2") V8HI
507 (V8SI "TARGET_AVX2") V4SI
508 (V4DI "TARGET_AVX2") V2DI])
509
510 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
511 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
512 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
513 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
514
515 (define_mode_iterator VI248_AVX512BW
516 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
517
518 (define_mode_iterator VI248_AVX512BW_AVX512VL
519 [(V32HI "TARGET_AVX512BW")
520 (V4DI "TARGET_AVX512VL") V16SI V8DI])
521
522 ;; Suppose TARGET_AVX512VL as baseline
523 (define_mode_iterator VI248_AVX512BW_1
524 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
525 V8SI V4SI
526 V2DI])
527
528 (define_mode_iterator VI248_AVX512BW_2
529 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
530 V8SI V4SI
531 V4DI V2DI])
532
533 (define_mode_iterator VI48_AVX512F
534 [(V16SI "TARGET_AVX512F") V8SI V4SI
535 (V8DI "TARGET_AVX512F") V4DI V2DI])
536
537 (define_mode_iterator VI48_AVX_AVX512F
538 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
540
541 (define_mode_iterator VI12_AVX_AVX512F
542 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
543 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
544
545 (define_mode_iterator V48_AVX2
546 [V4SF V2DF
547 V8SF V4DF
548 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
549 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
550
551 (define_mode_iterator VI1_AVX512VLBW
552 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
553 (V16QI "TARGET_AVX512VL")])
554
555 (define_mode_attr avx512
556 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
557 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
558 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
559 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
560 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
561 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
562
563 (define_mode_attr sse2_avx_avx512f
564 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
565 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
566 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
567 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
568 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
569 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
570
571 (define_mode_attr sse2_avx2
572 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
573 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
574 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
575 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
576 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
577
578 (define_mode_attr ssse3_avx2
579 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
580 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
581 (V4SI "ssse3") (V8SI "avx2")
582 (V2DI "ssse3") (V4DI "avx2")
583 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
584
585 (define_mode_attr sse4_1_avx2
586 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
587 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
588 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
589 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
590
591 (define_mode_attr avx_avx2
592 [(V4SF "avx") (V2DF "avx")
593 (V8SF "avx") (V4DF "avx")
594 (V4SI "avx2") (V2DI "avx2")
595 (V8SI "avx2") (V4DI "avx2")])
596
597 (define_mode_attr vec_avx2
598 [(V16QI "vec") (V32QI "avx2")
599 (V8HI "vec") (V16HI "avx2")
600 (V4SI "vec") (V8SI "avx2")
601 (V2DI "vec") (V4DI "avx2")])
602
603 (define_mode_attr avx2_avx512
604 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
605 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
606 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
607 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
608 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
609
610 (define_mode_attr shuffletype
611 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
612 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
613 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
614 (V32HI "i") (V16HI "i") (V8HI "i")
615 (V64QI "i") (V32QI "i") (V16QI "i")
616 (V4TI "i") (V2TI "i") (V1TI "i")])
617
618 (define_mode_attr ssequartermode
619 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
620
621 (define_mode_attr ssequarterinsnmode
622 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
623
624 (define_mode_attr vecmemsuffix
625 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
626 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
627
628 (define_mode_attr ssedoublemodelower
629 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
630 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
631 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
632
633 (define_mode_attr ssedoublemode
634 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
635 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
636 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
637 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
638 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
639 (V4DI "V8DI") (V8DI "V16DI")])
640
641 (define_mode_attr ssebytemode
642 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
643 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
644
645 ;; All 128bit vector integer modes
646 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
647
648 ;; All 256bit vector integer modes
649 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
650
651 ;; Various 128bit vector integer mode combinations
652 (define_mode_iterator VI12_128 [V16QI V8HI])
653 (define_mode_iterator VI14_128 [V16QI V4SI])
654 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
655 (define_mode_iterator VI24_128 [V8HI V4SI])
656 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
657 (define_mode_iterator VI48_128 [V4SI V2DI])
658
659 ;; Various 256bit and 512 vector integer mode combinations
660 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
661 (define_mode_iterator VI124_256_AVX512F_AVX512BW
662 [V32QI V16HI V8SI
663 (V64QI "TARGET_AVX512BW")
664 (V32HI "TARGET_AVX512BW")
665 (V16SI "TARGET_AVX512F")])
666 (define_mode_iterator VI48_256 [V8SI V4DI])
667 (define_mode_iterator VI48_512 [V16SI V8DI])
668 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
669 (define_mode_iterator VI_AVX512BW
670 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
671
672 ;; Int-float size matches
673 (define_mode_iterator VI4F_128 [V4SI V4SF])
674 (define_mode_iterator VI8F_128 [V2DI V2DF])
675 (define_mode_iterator VI4F_256 [V8SI V8SF])
676 (define_mode_iterator VI8F_256 [V4DI V4DF])
677 (define_mode_iterator VI4F_256_512
678 [V8SI V8SF
679 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
680 (define_mode_iterator VI48F_256_512
681 [V8SI V8SF
682 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
683 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
684 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
685 (define_mode_iterator VF48_I1248
686 [V16SI V16SF V8DI V8DF V32HI V64QI])
687 (define_mode_iterator VI48F
688 [V16SI V16SF V8DI V8DF
689 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
690 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
691 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
692 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
693 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
694
695 (define_mode_iterator VF_AVX512
696 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
697 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
698 V16SF V8DF])
699
700 (define_mode_attr avx512bcst
701 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
702 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
703 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
704 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
705 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
706 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
707
708 ;; Mapping from float mode to required SSE level
709 (define_mode_attr sse
710 [(SF "sse") (DF "sse2")
711 (V4SF "sse") (V2DF "sse2")
712 (V16SF "avx512f") (V8SF "avx")
713 (V8DF "avx512f") (V4DF "avx")])
714
715 (define_mode_attr sse2
716 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
717 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
718
719 (define_mode_attr sse3
720 [(V16QI "sse3") (V32QI "avx")])
721
722 (define_mode_attr sse4_1
723 [(V4SF "sse4_1") (V2DF "sse4_1")
724 (V8SF "avx") (V4DF "avx")
725 (V8DF "avx512f")
726 (V4DI "avx") (V2DI "sse4_1")
727 (V8SI "avx") (V4SI "sse4_1")
728 (V16QI "sse4_1") (V32QI "avx")
729 (V8HI "sse4_1") (V16HI "avx")])
730
731 (define_mode_attr avxsizesuffix
732 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
733 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
734 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
735 (V16SF "512") (V8DF "512")
736 (V8SF "256") (V4DF "256")
737 (V4SF "") (V2DF "")])
738
739 ;; SSE instruction mode
740 (define_mode_attr sseinsnmode
741 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
742 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
743 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
744 (V16SF "V16SF") (V8DF "V8DF")
745 (V8SF "V8SF") (V4DF "V4DF")
746 (V4SF "V4SF") (V2DF "V2DF")
747 (TI "TI")])
748
749 ;; Mapping of vector modes to corresponding mask size
750 (define_mode_attr avx512fmaskmode
751 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
752 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
753 (V16SI "HI") (V8SI "QI") (V4SI "QI")
754 (V8DI "QI") (V4DI "QI") (V2DI "QI")
755 (V16SF "HI") (V8SF "QI") (V4SF "QI")
756 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
757
758 ;; Mapping of vector modes to corresponding mask size
759 (define_mode_attr avx512fmaskmodelower
760 [(V64QI "di") (V32QI "si") (V16QI "hi")
761 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
762 (V16SI "hi") (V8SI "qi") (V4SI "qi")
763 (V8DI "qi") (V4DI "qi") (V2DI "qi")
764 (V16SF "hi") (V8SF "qi") (V4SF "qi")
765 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
766
767 ;; Mapping of vector modes to corresponding mask half size
768 (define_mode_attr avx512fmaskhalfmode
769 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
770 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
771 (V16SI "QI") (V8SI "QI") (V4SI "QI")
772 (V8DI "QI") (V4DI "QI") (V2DI "QI")
773 (V16SF "QI") (V8SF "QI") (V4SF "QI")
774 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
775
776 ;; Mapping of vector float modes to an integer mode of the same size
777 (define_mode_attr sseintvecmode
778 [(V16SF "V16SI") (V8DF "V8DI")
779 (V8SF "V8SI") (V4DF "V4DI")
780 (V4SF "V4SI") (V2DF "V2DI")
781 (V16SI "V16SI") (V8DI "V8DI")
782 (V8SI "V8SI") (V4DI "V4DI")
783 (V4SI "V4SI") (V2DI "V2DI")
784 (V16HI "V16HI") (V8HI "V8HI")
785 (V32HI "V32HI") (V64QI "V64QI")
786 (V32QI "V32QI") (V16QI "V16QI")])
787
788 (define_mode_attr sseintvecmode2
789 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
790 (V8SF "OI") (V4SF "TI")])
791
792 (define_mode_attr sseintvecmodelower
793 [(V16SF "v16si") (V8DF "v8di")
794 (V8SF "v8si") (V4DF "v4di")
795 (V4SF "v4si") (V2DF "v2di")
796 (V8SI "v8si") (V4DI "v4di")
797 (V4SI "v4si") (V2DI "v2di")
798 (V16HI "v16hi") (V8HI "v8hi")
799 (V32QI "v32qi") (V16QI "v16qi")])
800
801 ;; Mapping of vector modes to a vector mode of double size
802 (define_mode_attr ssedoublevecmode
803 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
804 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
805 (V8SF "V16SF") (V4DF "V8DF")
806 (V4SF "V8SF") (V2DF "V4DF")])
807
808 ;; Mapping of vector modes to a vector mode of half size
809 (define_mode_attr ssehalfvecmode
810 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
811 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
812 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
813 (V16SF "V8SF") (V8DF "V4DF")
814 (V8SF "V4SF") (V4DF "V2DF")
815 (V4SF "V2SF")])
816
817 (define_mode_attr ssehalfvecmodelower
818 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
819 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
820 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
821 (V16SF "v8sf") (V8DF "v4df")
822 (V8SF "v4sf") (V4DF "v2df")
823 (V4SF "v2sf")])
824
825 ;; Mapping of vector modes ti packed single mode of the same size
826 (define_mode_attr ssePSmode
827 [(V16SI "V16SF") (V8DF "V16SF")
828 (V16SF "V16SF") (V8DI "V16SF")
829 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
830 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
831 (V8SI "V8SF") (V4SI "V4SF")
832 (V4DI "V8SF") (V2DI "V4SF")
833 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
834 (V8SF "V8SF") (V4SF "V4SF")
835 (V4DF "V8SF") (V2DF "V4SF")])
836
837 (define_mode_attr ssePSmode2
838 [(V8DI "V8SF") (V4DI "V4SF")])
839
840 ;; Mapping of vector modes back to the scalar modes
841 (define_mode_attr ssescalarmode
842 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
843 (V32HI "HI") (V16HI "HI") (V8HI "HI")
844 (V16SI "SI") (V8SI "SI") (V4SI "SI")
845 (V8DI "DI") (V4DI "DI") (V2DI "DI")
846 (V16SF "SF") (V8SF "SF") (V4SF "SF")
847 (V8DF "DF") (V4DF "DF") (V2DF "DF")
848 (V4TI "TI") (V2TI "TI")])
849
850 ;; Mapping of vector modes back to the scalar modes
851 (define_mode_attr ssescalarmodelower
852 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
853 (V32HI "hi") (V16HI "hi") (V8HI "hi")
854 (V16SI "si") (V8SI "si") (V4SI "si")
855 (V8DI "di") (V4DI "di") (V2DI "di")
856 (V16SF "sf") (V8SF "sf") (V4SF "sf")
857 (V8DF "df") (V4DF "df") (V2DF "df")
858 (V4TI "ti") (V2TI "ti")])
859
860 ;; Mapping of vector modes to the 128bit modes
861 (define_mode_attr ssexmmmode
862 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
863 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
864 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
865 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
866 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
867 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
868
869 ;; Pointer size override for scalar modes (Intel asm dialect)
870 (define_mode_attr iptr
871 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
872 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
873 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
874 (V16SF "k") (V8DF "q")
875 (V8SF "k") (V4DF "q")
876 (V4SF "k") (V2DF "q")
877 (SF "k") (DF "q")])
878
879 ;; Mapping of vector modes to VPTERNLOG suffix
880 (define_mode_attr ternlogsuffix
881 [(V8DI "q") (V4DI "q") (V2DI "q")
882 (V16SI "d") (V8SI "d") (V4SI "d")
883 (V32HI "d") (V16HI "d") (V8HI "d")
884 (V64QI "d") (V32QI "d") (V16QI "d")])
885
886 ;; Number of scalar elements in each vector type
887 (define_mode_attr ssescalarnum
888 [(V64QI "64") (V16SI "16") (V8DI "8")
889 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
890 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
891 (V16SF "16") (V8DF "8")
892 (V8SF "8") (V4DF "4")
893 (V4SF "4") (V2DF "2")])
894
895 ;; Mask of scalar elements in each vector type
896 (define_mode_attr ssescalarnummask
897 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
898 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
899 (V8SF "7") (V4DF "3")
900 (V4SF "3") (V2DF "1")])
901
902 (define_mode_attr ssescalarsize
903 [(V4TI "64") (V2TI "64") (V1TI "64")
904 (V8DI "64") (V4DI "64") (V2DI "64")
905 (V64QI "8") (V32QI "8") (V16QI "8")
906 (V32HI "16") (V16HI "16") (V8HI "16")
907 (V16SI "32") (V8SI "32") (V4SI "32")
908 (V16SF "32") (V8SF "32") (V4SF "32")
909 (V8DF "64") (V4DF "64") (V2DF "64")])
910
911 ;; SSE prefix for integer vector modes
912 (define_mode_attr sseintprefix
913 [(V2DI "p") (V2DF "")
914 (V4DI "p") (V4DF "")
915 (V8DI "p") (V8DF "")
916 (V4SI "p") (V4SF "")
917 (V8SI "p") (V8SF "")
918 (V16SI "p") (V16SF "")
919 (V16QI "p") (V8HI "p")
920 (V32QI "p") (V16HI "p")
921 (V64QI "p") (V32HI "p")])
922
923 ;; SSE scalar suffix for vector modes
924 (define_mode_attr ssescalarmodesuffix
925 [(SF "ss") (DF "sd")
926 (V16SF "ss") (V8DF "sd")
927 (V8SF "ss") (V4DF "sd")
928 (V4SF "ss") (V2DF "sd")
929 (V16SI "d") (V8DI "q")
930 (V8SI "d") (V4DI "q")
931 (V4SI "d") (V2DI "q")])
932
933 ;; Pack/unpack vector modes
934 (define_mode_attr sseunpackmode
935 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
936 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
937 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
938
939 (define_mode_attr ssepackmode
940 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
941 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
942 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
943
944 ;; Mapping of the max integer size for xop rotate immediate constraint
945 (define_mode_attr sserotatemax
946 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
947
948 ;; Mapping of mode to cast intrinsic name
949 (define_mode_attr castmode
950 [(V8SI "si") (V8SF "ps") (V4DF "pd")
951 (V16SI "si") (V16SF "ps") (V8DF "pd")])
952
953 ;; Instruction suffix for sign and zero extensions.
954 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
955
956 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
957 ;; i64x4 or f64x4 for 512bit modes.
958 (define_mode_attr i128
959 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
960 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
961 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
962
963 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
964 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
965 (define_mode_attr i128vldq
966 [(V8SF "f32x4") (V4DF "f64x2")
967 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
968
969 ;; Mix-n-match
970 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
971 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
972
973 ;; Mapping for dbpsabbw modes
974 (define_mode_attr dbpsadbwmode
975 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
976
977 ;; Mapping suffixes for broadcast
978 (define_mode_attr bcstscalarsuff
979 [(V64QI "b") (V32QI "b") (V16QI "b")
980 (V32HI "w") (V16HI "w") (V8HI "w")
981 (V16SI "d") (V8SI "d") (V4SI "d")
982 (V8DI "q") (V4DI "q") (V2DI "q")
983 (V16SF "ss") (V8SF "ss") (V4SF "ss")
984 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
985
986 ;; Tie mode of assembler operand to mode iterator
987 (define_mode_attr xtg_mode
988 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
989 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
990 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
991
992 ;; Half mask mode for unpacks
993 (define_mode_attr HALFMASKMODE
994 [(DI "SI") (SI "HI")])
995
996 ;; Double mask mode for packs
997 (define_mode_attr DOUBLEMASKMODE
998 [(HI "SI") (SI "DI")])
999
1000
1001 ;; Include define_subst patterns for instructions with mask
1002 (include "subst.md")
1003
1004 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1005
1006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1007 ;;
1008 ;; Move patterns
1009 ;;
1010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1011
1012 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1013 ;; This is essential for maintaining stable calling conventions.
1014
1015 (define_expand "mov<mode>"
1016 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1017 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1018 "TARGET_SSE"
1019 {
1020 ix86_expand_vector_move (<MODE>mode, operands);
1021 DONE;
1022 })
1023
1024 (define_insn "mov<mode>_internal"
1025 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1026 "=v,v ,v ,m")
1027 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1028 " C,BC,vm,v"))]
1029 "TARGET_SSE
1030 && (register_operand (operands[0], <MODE>mode)
1031 || register_operand (operands[1], <MODE>mode))"
1032 {
1033 switch (get_attr_type (insn))
1034 {
1035 case TYPE_SSELOG1:
1036 return standard_sse_constant_opcode (insn, operands);
1037
1038 case TYPE_SSEMOV:
1039 return ix86_output_ssemov (insn, operands);
1040
1041 default:
1042 gcc_unreachable ();
1043 }
1044 }
1045 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1046 (set_attr "prefix" "maybe_vex")
1047 (set (attr "mode")
1048 (cond [(match_test "TARGET_AVX")
1049 (const_string "<sseinsnmode>")
1050 (ior (not (match_test "TARGET_SSE2"))
1051 (match_test "optimize_function_for_size_p (cfun)"))
1052 (const_string "V4SF")
1053 (and (match_test "<MODE>mode == V2DFmode")
1054 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1055 (const_string "V4SF")
1056 (and (eq_attr "alternative" "3")
1057 (match_test "TARGET_SSE_TYPELESS_STORES"))
1058 (const_string "V4SF")
1059 (and (eq_attr "alternative" "0")
1060 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1061 (const_string "TI")
1062 ]
1063 (const_string "<sseinsnmode>")))
1064 (set (attr "enabled")
1065 (cond [(and (match_test "<MODE_SIZE> == 16")
1066 (eq_attr "alternative" "1"))
1067 (symbol_ref "TARGET_SSE2")
1068 (and (match_test "<MODE_SIZE> == 32")
1069 (eq_attr "alternative" "1"))
1070 (symbol_ref "TARGET_AVX2")
1071 ]
1072 (symbol_ref "true")))])
1073
1074 ;; If mem_addr points to a memory region with less than whole vector size bytes
1075 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1076 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1077 ;; See pr97642.
1078 (define_expand "<avx512>_load<mode>_mask"
1079 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1080 (vec_merge:V48_AVX512VL
1081 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1082 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1083 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1084 "TARGET_AVX512F"
1085 {
1086 if (CONST_INT_P (operands[3]))
1087 {
1088 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1089 DONE;
1090 }
1091 else if (MEM_P (operands[1]))
1092 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1093 gen_rtvec(1, operands[1]),
1094 UNSPEC_MASKLOAD);
1095 })
1096
1097 (define_insn "*<avx512>_load<mode>_mask"
1098 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1099 (vec_merge:V48_AVX512VL
1100 (unspec:V48_AVX512VL
1101 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1102 UNSPEC_MASKLOAD)
1103 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1104 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1105 "TARGET_AVX512F"
1106 {
1107 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1108 {
1109 if (misaligned_operand (operands[1], <MODE>mode))
1110 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1111 else
1112 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1113 }
1114 else
1115 {
1116 if (misaligned_operand (operands[1], <MODE>mode))
1117 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1118 else
1119 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1120 }
1121 }
1122 [(set_attr "type" "ssemov")
1123 (set_attr "prefix" "evex")
1124 (set_attr "mode" "<sseinsnmode>")])
1125
1126 (define_insn_and_split "*<avx512>_load<mode>"
1127 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1128 (unspec:V48_AVX512VL
1129 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1130 UNSPEC_MASKLOAD))]
1131 "TARGET_AVX512F"
1132 "#"
1133 "&& 1"
1134 [(set (match_dup 0) (match_dup 1))])
1135
1136 (define_expand "<avx512>_load<mode>_mask"
1137 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1138 (vec_merge:VI12_AVX512VL
1139 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1140 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1141 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1142 "TARGET_AVX512BW"
1143 {
1144 if (CONST_INT_P (operands[3]))
1145 {
1146 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1147 DONE;
1148 }
1149 else if (MEM_P (operands[1]))
1150 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1151 gen_rtvec(1, operands[1]),
1152 UNSPEC_MASKLOAD);
1153
1154 })
1155
1156 (define_insn "*<avx512>_load<mode>_mask"
1157 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1158 (vec_merge:VI12_AVX512VL
1159 (unspec:VI12_AVX512VL
1160 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1161 UNSPEC_MASKLOAD)
1162 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1163 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1164 "TARGET_AVX512BW"
1165 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1166 [(set_attr "type" "ssemov")
1167 (set_attr "prefix" "evex")
1168 (set_attr "mode" "<sseinsnmode>")])
1169
1170 (define_insn_and_split "*<avx512>_load<mode>"
1171 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1172 (unspec:VI12_AVX512VL
1173 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1174 UNSPEC_MASKLOAD))]
1175 "TARGET_AVX512BW"
1176 "#"
1177 "&& 1"
1178 [(set (match_dup 0) (match_dup 1))])
1179
1180 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1181 [(set (match_operand:VF_128 0 "register_operand" "=v")
1182 (vec_merge:VF_128
1183 (vec_merge:VF_128
1184 (match_operand:VF_128 2 "register_operand" "v")
1185 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1186 (match_operand:QI 4 "register_operand" "Yk"))
1187 (match_operand:VF_128 1 "register_operand" "v")
1188 (const_int 1)))]
1189 "TARGET_AVX512F"
1190 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1191 [(set_attr "type" "ssemov")
1192 (set_attr "prefix" "evex")
1193 (set_attr "mode" "<ssescalarmode>")])
1194
1195 (define_expand "avx512f_load<mode>_mask"
1196 [(set (match_operand:<ssevecmode> 0 "register_operand")
1197 (vec_merge:<ssevecmode>
1198 (vec_merge:<ssevecmode>
1199 (vec_duplicate:<ssevecmode>
1200 (match_operand:MODEF 1 "memory_operand"))
1201 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1202 (match_operand:QI 3 "register_operand"))
1203 (match_dup 4)
1204 (const_int 1)))]
1205 "TARGET_AVX512F"
1206 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1207
1208 (define_insn "*avx512f_load<mode>_mask"
1209 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1210 (vec_merge:<ssevecmode>
1211 (vec_merge:<ssevecmode>
1212 (vec_duplicate:<ssevecmode>
1213 (match_operand:MODEF 1 "memory_operand" "m"))
1214 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1215 (match_operand:QI 3 "register_operand" "Yk"))
1216 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1217 (const_int 1)))]
1218 "TARGET_AVX512F"
1219 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1220 [(set_attr "type" "ssemov")
1221 (set_attr "prefix" "evex")
1222 (set_attr "memory" "load")
1223 (set_attr "mode" "<MODE>")])
1224
1225 (define_insn "avx512f_store<mode>_mask"
1226 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1227 (if_then_else:MODEF
1228 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1229 (const_int 1))
1230 (vec_select:MODEF
1231 (match_operand:<ssevecmode> 1 "register_operand" "v")
1232 (parallel [(const_int 0)]))
1233 (match_dup 0)))]
1234 "TARGET_AVX512F"
1235 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1236 [(set_attr "type" "ssemov")
1237 (set_attr "prefix" "evex")
1238 (set_attr "memory" "store")
1239 (set_attr "mode" "<MODE>")])
1240
1241 (define_insn "<avx512>_blendm<mode>"
1242 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1243 (vec_merge:V48_AVX512VL
1244 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1245 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1246 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1247 "TARGET_AVX512F"
1248 {
1249 if (REG_P (operands[1])
1250 && REGNO (operands[1]) != REGNO (operands[0]))
1251 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1252
1253 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1254 {
1255 if (misaligned_operand (operands[2], <MODE>mode))
1256 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1257 else
1258 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1259 }
1260 else
1261 {
1262 if (misaligned_operand (operands[2], <MODE>mode))
1263 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1264 else
1265 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1266 }
1267 }
1268 [(set_attr "type" "ssemov")
1269 (set_attr "prefix" "evex")
1270 (set_attr "mode" "<sseinsnmode>")])
1271
1272 (define_insn "<avx512>_blendm<mode>"
1273 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1274 (vec_merge:VI12_AVX512VL
1275 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1276 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1277 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1278 "TARGET_AVX512BW"
1279 "@
1280 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1281 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1282 [(set_attr "type" "ssemov")
1283 (set_attr "prefix" "evex")
1284 (set_attr "mode" "<sseinsnmode>")])
1285
1286 (define_insn "<avx512>_store<mode>_mask"
1287 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1288 (vec_merge:V48_AVX512VL
1289 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1290 (match_dup 0)
1291 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1292 "TARGET_AVX512F"
1293 {
1294 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1295 {
1296 if (misaligned_operand (operands[0], <MODE>mode))
1297 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1298 else
1299 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1300 }
1301 else
1302 {
1303 if (misaligned_operand (operands[0], <MODE>mode))
1304 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1305 else
1306 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1307 }
1308 }
1309 [(set_attr "type" "ssemov")
1310 (set_attr "prefix" "evex")
1311 (set_attr "memory" "store")
1312 (set_attr "mode" "<sseinsnmode>")])
1313
1314 (define_insn "<avx512>_store<mode>_mask"
1315 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1316 (vec_merge:VI12_AVX512VL
1317 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1318 (match_dup 0)
1319 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1320 "TARGET_AVX512BW"
1321 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1322 [(set_attr "type" "ssemov")
1323 (set_attr "prefix" "evex")
1324 (set_attr "memory" "store")
1325 (set_attr "mode" "<sseinsnmode>")])
1326
1327 (define_insn "sse2_movq128"
1328 [(set (match_operand:V2DI 0 "register_operand" "=v")
1329 (vec_concat:V2DI
1330 (vec_select:DI
1331 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1332 (parallel [(const_int 0)]))
1333 (const_int 0)))]
1334 "TARGET_SSE2"
1335 "%vmovq\t{%1, %0|%0, %q1}"
1336 [(set_attr "type" "ssemov")
1337 (set_attr "prefix" "maybe_vex")
1338 (set_attr "mode" "TI")])
1339
1340 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1341 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1342 ;; from memory, we'd prefer to load the memory directly into the %xmm
1343 ;; register. To facilitate this happy circumstance, this pattern won't
1344 ;; split until after register allocation. If the 64-bit value didn't
1345 ;; come from memory, this is the best we can do. This is much better
1346 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1347 ;; from there.
1348
1349 (define_insn_and_split "movdi_to_sse"
1350 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1351 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1352 UNSPEC_MOVDI_TO_SSE))
1353 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1354 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1355 "#"
1356 "&& reload_completed"
1357 [(const_int 0)]
1358 {
1359 if (register_operand (operands[1], DImode))
1360 {
1361 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1362 Assemble the 64-bit DImode value in an xmm register. */
1363 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1364 gen_lowpart (SImode, operands[1])));
1365 if (TARGET_SSE4_1)
1366 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1367 gen_highpart (SImode, operands[1]),
1368 GEN_INT (2)));
1369 else
1370 {
1371 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1372 gen_highpart (SImode, operands[1])));
1373 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1374 operands[2]));
1375 }
1376 }
1377 else if (memory_operand (operands[1], DImode))
1378 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1379 operands[1], const0_rtx));
1380 else
1381 gcc_unreachable ();
1382 DONE;
1383 }
1384 [(set_attr "isa" "sse4,*,*")])
1385
1386 (define_split
1387 [(set (match_operand:V4SF 0 "register_operand")
1388 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1389 "TARGET_SSE && reload_completed"
1390 [(set (match_dup 0)
1391 (vec_merge:V4SF
1392 (vec_duplicate:V4SF (match_dup 1))
1393 (match_dup 2)
1394 (const_int 1)))]
1395 {
1396 operands[1] = gen_lowpart (SFmode, operands[1]);
1397 operands[2] = CONST0_RTX (V4SFmode);
1398 })
1399
1400 (define_split
1401 [(set (match_operand:V2DF 0 "register_operand")
1402 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1403 "TARGET_SSE2 && reload_completed"
1404 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1405 {
1406 operands[1] = gen_lowpart (DFmode, operands[1]);
1407 operands[2] = CONST0_RTX (DFmode);
1408 })
1409
1410 (define_expand "movmisalign<mode>"
1411 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1412 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1413 "TARGET_SSE"
1414 {
1415 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1416 DONE;
1417 })
1418
1419 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1420 (define_peephole2
1421 [(set (match_operand:V2DF 0 "sse_reg_operand")
1422 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1423 (match_operand:DF 4 "const0_operand")))
1424 (set (match_operand:V2DF 2 "sse_reg_operand")
1425 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1426 (parallel [(const_int 0)]))
1427 (match_operand:DF 3 "memory_operand")))]
1428 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1429 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1430 [(set (match_dup 2) (match_dup 5))]
1431 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1432
1433 (define_peephole2
1434 [(set (match_operand:DF 0 "sse_reg_operand")
1435 (match_operand:DF 1 "memory_operand"))
1436 (set (match_operand:V2DF 2 "sse_reg_operand")
1437 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1438 (match_operand:DF 3 "memory_operand")))]
1439 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1440 && REGNO (operands[4]) == REGNO (operands[2])
1441 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1442 [(set (match_dup 2) (match_dup 5))]
1443 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1444
1445 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1446 (define_peephole2
1447 [(set (match_operand:DF 0 "memory_operand")
1448 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1449 (parallel [(const_int 0)])))
1450 (set (match_operand:DF 2 "memory_operand")
1451 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1452 (parallel [(const_int 1)])))]
1453 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1454 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1455 [(set (match_dup 4) (match_dup 1))]
1456 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1457
1458 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1459 [(set (match_operand:VI1 0 "register_operand" "=x")
1460 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1461 UNSPEC_LDDQU))]
1462 "TARGET_SSE3"
1463 "%vlddqu\t{%1, %0|%0, %1}"
1464 [(set_attr "type" "ssemov")
1465 (set_attr "movu" "1")
1466 (set (attr "prefix_data16")
1467 (if_then_else
1468 (match_test "TARGET_AVX")
1469 (const_string "*")
1470 (const_string "0")))
1471 (set (attr "prefix_rep")
1472 (if_then_else
1473 (match_test "TARGET_AVX")
1474 (const_string "*")
1475 (const_string "1")))
1476 (set_attr "prefix" "maybe_vex")
1477 (set_attr "mode" "<sseinsnmode>")])
1478
1479 (define_insn "sse2_movnti<mode>"
1480 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1481 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1482 UNSPEC_MOVNT))]
1483 "TARGET_SSE2"
1484 "movnti\t{%1, %0|%0, %1}"
1485 [(set_attr "type" "ssemov")
1486 (set_attr "prefix_data16" "0")
1487 (set_attr "mode" "<MODE>")])
1488
1489 (define_insn "<sse>_movnt<mode>"
1490 [(set (match_operand:VF 0 "memory_operand" "=m")
1491 (unspec:VF
1492 [(match_operand:VF 1 "register_operand" "v")]
1493 UNSPEC_MOVNT))]
1494 "TARGET_SSE"
1495 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1496 [(set_attr "type" "ssemov")
1497 (set_attr "prefix" "maybe_vex")
1498 (set_attr "mode" "<MODE>")])
1499
1500 (define_insn "<sse2>_movnt<mode>"
1501 [(set (match_operand:VI8 0 "memory_operand" "=m")
1502 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1503 UNSPEC_MOVNT))]
1504 "TARGET_SSE2"
1505 "%vmovntdq\t{%1, %0|%0, %1}"
1506 [(set_attr "type" "ssecvt")
1507 (set (attr "prefix_data16")
1508 (if_then_else
1509 (match_test "TARGET_AVX")
1510 (const_string "*")
1511 (const_string "1")))
1512 (set_attr "prefix" "maybe_vex")
1513 (set_attr "mode" "<sseinsnmode>")])
1514
1515 ; Expand patterns for non-temporal stores. At the moment, only those
1516 ; that directly map to insns are defined; it would be possible to
1517 ; define patterns for other modes that would expand to several insns.
1518
1519 ;; Modes handled by storent patterns.
1520 (define_mode_iterator STORENT_MODE
1521 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1522 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1523 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1524 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1525 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1526
1527 (define_expand "storent<mode>"
1528 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1529 (unspec:STORENT_MODE
1530 [(match_operand:STORENT_MODE 1 "register_operand")]
1531 UNSPEC_MOVNT))]
1532 "TARGET_SSE")
1533
1534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1535 ;;
1536 ;; Mask operations
1537 ;;
1538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1539
1540 ;; All integer modes with AVX512BW/DQ.
1541 (define_mode_iterator SWI1248_AVX512BWDQ
1542 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1543
1544 ;; All integer modes with AVX512BW, where HImode operation
1545 ;; can be used instead of QImode.
1546 (define_mode_iterator SWI1248_AVX512BW
1547 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1548
1549 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1550 (define_mode_iterator SWI1248_AVX512BWDQ2
1551 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1552 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1553
1554 (define_expand "kmov<mskmodesuffix>"
1555 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1556 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1557 "TARGET_AVX512F
1558 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1559
1560 (define_insn "k<code><mode>"
1561 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1562 (any_logic:SWI1248_AVX512BW
1563 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1564 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1565 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1566 "TARGET_AVX512F"
1567 {
1568 if (get_attr_mode (insn) == MODE_HI)
1569 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1570 else
1571 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1572 }
1573 [(set_attr "type" "msklog")
1574 (set_attr "prefix" "vex")
1575 (set (attr "mode")
1576 (cond [(and (match_test "<MODE>mode == QImode")
1577 (not (match_test "TARGET_AVX512DQ")))
1578 (const_string "HI")
1579 ]
1580 (const_string "<MODE>")))])
1581
1582 (define_split
1583 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1584 (any_logic:SWI1248_AVX512BW
1585 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1586 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1587 (clobber (reg:CC FLAGS_REG))]
1588 "TARGET_AVX512F && reload_completed"
1589 [(parallel
1590 [(set (match_dup 0)
1591 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1592 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1593
1594 (define_insn "kandn<mode>"
1595 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1596 (and:SWI1248_AVX512BW
1597 (not:SWI1248_AVX512BW
1598 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1599 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1600 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1601 "TARGET_AVX512F"
1602 {
1603 if (get_attr_mode (insn) == MODE_HI)
1604 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1605 else
1606 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1607 }
1608 [(set_attr "type" "msklog")
1609 (set_attr "prefix" "vex")
1610 (set (attr "mode")
1611 (cond [(and (match_test "<MODE>mode == QImode")
1612 (not (match_test "TARGET_AVX512DQ")))
1613 (const_string "HI")
1614 ]
1615 (const_string "<MODE>")))])
1616
1617 (define_split
1618 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1619 (and:SWI1248_AVX512BW
1620 (not:SWI1248_AVX512BW
1621 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1622 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1623 (clobber (reg:CC FLAGS_REG))]
1624 "TARGET_AVX512F && reload_completed"
1625 [(parallel
1626 [(set (match_dup 0)
1627 (and:SWI1248_AVX512BW
1628 (not:SWI1248_AVX512BW (match_dup 1))
1629 (match_dup 2)))
1630 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1631
1632 (define_insn "kxnor<mode>"
1633 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1634 (not:SWI1248_AVX512BW
1635 (xor:SWI1248_AVX512BW
1636 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1637 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1638 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1639 "TARGET_AVX512F"
1640 {
1641 if (get_attr_mode (insn) == MODE_HI)
1642 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1643 else
1644 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1645 }
1646 [(set_attr "type" "msklog")
1647 (set_attr "prefix" "vex")
1648 (set (attr "mode")
1649 (cond [(and (match_test "<MODE>mode == QImode")
1650 (not (match_test "TARGET_AVX512DQ")))
1651 (const_string "HI")
1652 ]
1653 (const_string "<MODE>")))])
1654
1655 (define_insn "knot<mode>"
1656 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1657 (not:SWI1248_AVX512BW
1658 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1659 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1660 "TARGET_AVX512F"
1661 {
1662 if (get_attr_mode (insn) == MODE_HI)
1663 return "knotw\t{%1, %0|%0, %1}";
1664 else
1665 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1666 }
1667 [(set_attr "type" "msklog")
1668 (set_attr "prefix" "vex")
1669 (set (attr "mode")
1670 (cond [(and (match_test "<MODE>mode == QImode")
1671 (not (match_test "TARGET_AVX512DQ")))
1672 (const_string "HI")
1673 ]
1674 (const_string "<MODE>")))])
1675
1676 (define_split
1677 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1678 (not:SWI1248_AVX512BW
1679 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
1680 "TARGET_AVX512F && reload_completed"
1681 [(parallel
1682 [(set (match_dup 0)
1683 (not:SWI1248_AVX512BW (match_dup 1)))
1684 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1685
1686 (define_insn "*knotsi_1_zext"
1687 [(set (match_operand:DI 0 "register_operand" "=k")
1688 (zero_extend:DI
1689 (not:SI (match_operand:SI 1 "register_operand" "k"))))
1690 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1691 "TARGET_AVX512BW"
1692 "knotd\t{%1, %0|%0, %1}";
1693 [(set_attr "type" "msklog")
1694 (set_attr "prefix" "vex")
1695 (set_attr "mode" "SI")])
1696
1697 (define_split
1698 [(set (match_operand:DI 0 "mask_reg_operand")
1699 (zero_extend:DI
1700 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
1701 "TARGET_AVX512BW && reload_completed"
1702 [(parallel
1703 [(set (match_dup 0)
1704 (zero_extend:DI
1705 (not:SI (match_dup 1))))
1706 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1707
1708 (define_insn "kadd<mode>"
1709 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1710 (plus:SWI1248_AVX512BWDQ2
1711 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1712 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1713 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1714 "TARGET_AVX512F"
1715 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1716 [(set_attr "type" "msklog")
1717 (set_attr "prefix" "vex")
1718 (set_attr "mode" "<MODE>")])
1719
1720 ;; Mask variant shift mnemonics
1721 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1722
1723 (define_insn "k<code><mode>"
1724 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1725 (any_lshift:SWI1248_AVX512BWDQ
1726 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1727 (match_operand 2 "const_0_to_255_operand" "n")))
1728 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1729 "TARGET_AVX512F"
1730 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1731 [(set_attr "type" "msklog")
1732 (set_attr "prefix" "vex")
1733 (set_attr "mode" "<MODE>")])
1734
1735 (define_insn "ktest<mode>"
1736 [(set (reg:CC FLAGS_REG)
1737 (unspec:CC
1738 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1739 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1740 UNSPEC_KTEST))]
1741 "TARGET_AVX512F"
1742 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1743 [(set_attr "mode" "<MODE>")
1744 (set_attr "type" "msklog")
1745 (set_attr "prefix" "vex")])
1746
1747 (define_insn "kortest<mode>"
1748 [(set (reg:CC FLAGS_REG)
1749 (unspec:CC
1750 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1751 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1752 UNSPEC_KORTEST))]
1753 "TARGET_AVX512F"
1754 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1755 [(set_attr "mode" "<MODE>")
1756 (set_attr "type" "msklog")
1757 (set_attr "prefix" "vex")])
1758
1759 (define_insn "kunpckhi"
1760 [(set (match_operand:HI 0 "register_operand" "=k")
1761 (ior:HI
1762 (ashift:HI
1763 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1764 (const_int 8))
1765 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1766 "TARGET_AVX512F"
1767 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1768 [(set_attr "mode" "HI")
1769 (set_attr "type" "msklog")
1770 (set_attr "prefix" "vex")])
1771
1772 (define_insn "kunpcksi"
1773 [(set (match_operand:SI 0 "register_operand" "=k")
1774 (ior:SI
1775 (ashift:SI
1776 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1777 (const_int 16))
1778 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1779 "TARGET_AVX512BW"
1780 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1781 [(set_attr "mode" "SI")])
1782
1783 (define_insn "kunpckdi"
1784 [(set (match_operand:DI 0 "register_operand" "=k")
1785 (ior:DI
1786 (ashift:DI
1787 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1788 (const_int 32))
1789 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1790 "TARGET_AVX512BW"
1791 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1792 [(set_attr "mode" "DI")])
1793
1794
1795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1796 ;;
1797 ;; Parallel floating point arithmetic
1798 ;;
1799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1800
1801 (define_expand "<code><mode>2"
1802 [(set (match_operand:VF 0 "register_operand")
1803 (absneg:VF
1804 (match_operand:VF 1 "register_operand")))]
1805 "TARGET_SSE"
1806 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1807
1808 (define_insn_and_split "*<code><mode>2"
1809 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1810 (absneg:VF
1811 (match_operand:VF 1 "vector_operand" "0,xBm,v,m")))
1812 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1813 "TARGET_SSE"
1814 "#"
1815 "&& reload_completed"
1816 [(set (match_dup 0)
1817 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1818 {
1819 if (TARGET_AVX)
1820 {
1821 if (MEM_P (operands[1]))
1822 std::swap (operands[1], operands[2]);
1823 }
1824 else
1825 {
1826 if (operands_match_p (operands[0], operands[2]))
1827 std::swap (operands[1], operands[2]);
1828 }
1829 }
1830 [(set_attr "isa" "noavx,noavx,avx,avx")])
1831
1832 (define_insn_and_split "*nabs<mode>2"
1833 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1834 (neg:VF
1835 (abs:VF
1836 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1837 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1838 "TARGET_SSE"
1839 "#"
1840 "&& reload_completed"
1841 [(set (match_dup 0)
1842 (ior:VF (match_dup 1) (match_dup 2)))]
1843 {
1844 if (TARGET_AVX)
1845 {
1846 if (MEM_P (operands[1]))
1847 std::swap (operands[1], operands[2]);
1848 }
1849 else
1850 {
1851 if (operands_match_p (operands[0], operands[2]))
1852 std::swap (operands[1], operands[2]);
1853 }
1854 }
1855 [(set_attr "isa" "noavx,noavx,avx,avx")])
1856
1857 (define_expand "<insn><mode>3<mask_name><round_name>"
1858 [(set (match_operand:VF 0 "register_operand")
1859 (plusminus:VF
1860 (match_operand:VF 1 "<round_nimm_predicate>")
1861 (match_operand:VF 2 "<round_nimm_predicate>")))]
1862 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1863 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1864
1865 (define_insn "*<insn><mode>3<mask_name><round_name>"
1866 [(set (match_operand:VF 0 "register_operand" "=x,v")
1867 (plusminus:VF
1868 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
1869 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1870 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1871 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1872 "@
1873 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1874 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1875 [(set_attr "isa" "noavx,avx")
1876 (set_attr "type" "sseadd")
1877 (set_attr "prefix" "<bcst_mask_prefix3>")
1878 (set_attr "mode" "<MODE>")])
1879
1880 ;; Standard scalar operation patterns which preserve the rest of the
1881 ;; vector for combiner.
1882 (define_insn "*<sse>_vm<insn><mode>3"
1883 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1884 (vec_merge:VF_128
1885 (vec_duplicate:VF_128
1886 (plusminus:<ssescalarmode>
1887 (vec_select:<ssescalarmode>
1888 (match_operand:VF_128 1 "register_operand" "0,v")
1889 (parallel [(const_int 0)]))
1890 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1891 (match_dup 1)
1892 (const_int 1)))]
1893 "TARGET_SSE"
1894 "@
1895 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1896 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1897 [(set_attr "isa" "noavx,avx")
1898 (set_attr "type" "sseadd")
1899 (set_attr "prefix" "orig,vex")
1900 (set_attr "mode" "<ssescalarmode>")])
1901
1902 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
1903 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1904 (vec_merge:VF_128
1905 (plusminus:VF_128
1906 (match_operand:VF_128 1 "register_operand" "0,v")
1907 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1908 (match_dup 1)
1909 (const_int 1)))]
1910 "TARGET_SSE"
1911 "@
1912 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1913 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1914 [(set_attr "isa" "noavx,avx")
1915 (set_attr "type" "sseadd")
1916 (set_attr "prefix" "<round_scalar_prefix>")
1917 (set_attr "mode" "<ssescalarmode>")])
1918
1919 (define_expand "mul<mode>3<mask_name><round_name>"
1920 [(set (match_operand:VF 0 "register_operand")
1921 (mult:VF
1922 (match_operand:VF 1 "<round_nimm_predicate>")
1923 (match_operand:VF 2 "<round_nimm_predicate>")))]
1924 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1925 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1926
1927 (define_insn "*mul<mode>3<mask_name><round_name>"
1928 [(set (match_operand:VF 0 "register_operand" "=x,v")
1929 (mult:VF
1930 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
1931 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1932 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
1933 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1934 "@
1935 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1936 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1937 [(set_attr "isa" "noavx,avx")
1938 (set_attr "type" "ssemul")
1939 (set_attr "prefix" "<bcst_mask_prefix3>")
1940 (set_attr "btver2_decode" "direct,double")
1941 (set_attr "mode" "<MODE>")])
1942
1943 ;; Standard scalar operation patterns which preserve the rest of the
1944 ;; vector for combiner.
1945 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
1946 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1947 (vec_merge:VF_128
1948 (vec_duplicate:VF_128
1949 (multdiv:<ssescalarmode>
1950 (vec_select:<ssescalarmode>
1951 (match_operand:VF_128 1 "register_operand" "0,v")
1952 (parallel [(const_int 0)]))
1953 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1954 (match_dup 1)
1955 (const_int 1)))]
1956 "TARGET_SSE"
1957 "@
1958 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1959 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1960 [(set_attr "isa" "noavx,avx")
1961 (set_attr "type" "sse<multdiv_mnemonic>")
1962 (set_attr "prefix" "orig,vex")
1963 (set_attr "btver2_decode" "direct,double")
1964 (set_attr "mode" "<ssescalarmode>")])
1965
1966 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1967 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1968 (vec_merge:VF_128
1969 (multdiv:VF_128
1970 (match_operand:VF_128 1 "register_operand" "0,v")
1971 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1972 (match_dup 1)
1973 (const_int 1)))]
1974 "TARGET_SSE"
1975 "@
1976 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1977 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1978 [(set_attr "isa" "noavx,avx")
1979 (set_attr "type" "sse<multdiv_mnemonic>")
1980 (set_attr "prefix" "<round_scalar_prefix>")
1981 (set_attr "btver2_decode" "direct,double")
1982 (set_attr "mode" "<ssescalarmode>")])
1983
1984 (define_expand "div<mode>3"
1985 [(set (match_operand:VF2 0 "register_operand")
1986 (div:VF2 (match_operand:VF2 1 "register_operand")
1987 (match_operand:VF2 2 "vector_operand")))]
1988 "TARGET_SSE2"
1989 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1990
1991 (define_expand "div<mode>3"
1992 [(set (match_operand:VF1 0 "register_operand")
1993 (div:VF1 (match_operand:VF1 1 "register_operand")
1994 (match_operand:VF1 2 "vector_operand")))]
1995 "TARGET_SSE"
1996 {
1997 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1998
1999 if (TARGET_SSE_MATH
2000 && TARGET_RECIP_VEC_DIV
2001 && !optimize_insn_for_size_p ()
2002 && flag_finite_math_only && !flag_trapping_math
2003 && flag_unsafe_math_optimizations)
2004 {
2005 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2006 DONE;
2007 }
2008 })
2009
2010 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2011 [(set (match_operand:VF 0 "register_operand" "=x,v")
2012 (div:VF
2013 (match_operand:VF 1 "register_operand" "0,v")
2014 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2015 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2016 "@
2017 div<ssemodesuffix>\t{%2, %0|%0, %2}
2018 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2019 [(set_attr "isa" "noavx,avx")
2020 (set_attr "type" "ssediv")
2021 (set_attr "prefix" "<bcst_mask_prefix3>")
2022 (set_attr "mode" "<MODE>")])
2023
2024 (define_insn "<sse>_rcp<mode>2"
2025 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2026 (unspec:VF1_128_256
2027 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2028 "TARGET_SSE"
2029 "%vrcpps\t{%1, %0|%0, %1}"
2030 [(set_attr "type" "sse")
2031 (set_attr "atom_sse_attr" "rcp")
2032 (set_attr "btver2_sse_attr" "rcp")
2033 (set_attr "prefix" "maybe_vex")
2034 (set_attr "mode" "<MODE>")])
2035
2036 (define_insn "sse_vmrcpv4sf2"
2037 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2038 (vec_merge:V4SF
2039 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2040 UNSPEC_RCP)
2041 (match_operand:V4SF 2 "register_operand" "0,x")
2042 (const_int 1)))]
2043 "TARGET_SSE"
2044 "@
2045 rcpss\t{%1, %0|%0, %k1}
2046 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2047 [(set_attr "isa" "noavx,avx")
2048 (set_attr "type" "sse")
2049 (set_attr "atom_sse_attr" "rcp")
2050 (set_attr "btver2_sse_attr" "rcp")
2051 (set_attr "prefix" "orig,vex")
2052 (set_attr "mode" "SF")])
2053
2054 (define_insn "*sse_vmrcpv4sf2"
2055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2056 (vec_merge:V4SF
2057 (vec_duplicate:V4SF
2058 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2059 UNSPEC_RCP))
2060 (match_operand:V4SF 2 "register_operand" "0,x")
2061 (const_int 1)))]
2062 "TARGET_SSE"
2063 "@
2064 rcpss\t{%1, %0|%0, %1}
2065 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2066 [(set_attr "isa" "noavx,avx")
2067 (set_attr "type" "sse")
2068 (set_attr "atom_sse_attr" "rcp")
2069 (set_attr "btver2_sse_attr" "rcp")
2070 (set_attr "prefix" "orig,vex")
2071 (set_attr "mode" "SF")])
2072
2073 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2074 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2075 (unspec:VF_AVX512VL
2076 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2077 UNSPEC_RCP14))]
2078 "TARGET_AVX512F"
2079 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2080 [(set_attr "type" "sse")
2081 (set_attr "prefix" "evex")
2082 (set_attr "mode" "<MODE>")])
2083
2084 (define_insn "srcp14<mode>"
2085 [(set (match_operand:VF_128 0 "register_operand" "=v")
2086 (vec_merge:VF_128
2087 (unspec:VF_128
2088 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2089 UNSPEC_RCP14)
2090 (match_operand:VF_128 2 "register_operand" "v")
2091 (const_int 1)))]
2092 "TARGET_AVX512F"
2093 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2094 [(set_attr "type" "sse")
2095 (set_attr "prefix" "evex")
2096 (set_attr "mode" "<MODE>")])
2097
2098 (define_insn "srcp14<mode>_mask"
2099 [(set (match_operand:VF_128 0 "register_operand" "=v")
2100 (vec_merge:VF_128
2101 (vec_merge:VF_128
2102 (unspec:VF_128
2103 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2104 UNSPEC_RCP14)
2105 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2106 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2107 (match_operand:VF_128 2 "register_operand" "v")
2108 (const_int 1)))]
2109 "TARGET_AVX512F"
2110 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2111 [(set_attr "type" "sse")
2112 (set_attr "prefix" "evex")
2113 (set_attr "mode" "<MODE>")])
2114
2115 (define_expand "sqrt<mode>2"
2116 [(set (match_operand:VF2 0 "register_operand")
2117 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2118 "TARGET_SSE2")
2119
2120 (define_expand "sqrt<mode>2"
2121 [(set (match_operand:VF1 0 "register_operand")
2122 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2123 "TARGET_SSE"
2124 {
2125 if (TARGET_SSE_MATH
2126 && TARGET_RECIP_VEC_SQRT
2127 && !optimize_insn_for_size_p ()
2128 && flag_finite_math_only && !flag_trapping_math
2129 && flag_unsafe_math_optimizations)
2130 {
2131 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2132 DONE;
2133 }
2134 })
2135
2136 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2137 [(set (match_operand:VF 0 "register_operand" "=x,v")
2138 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2139 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2140 "@
2141 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2142 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2143 [(set_attr "isa" "noavx,avx")
2144 (set_attr "type" "sse")
2145 (set_attr "atom_sse_attr" "sqrt")
2146 (set_attr "btver2_sse_attr" "sqrt")
2147 (set_attr "prefix" "maybe_vex")
2148 (set_attr "mode" "<MODE>")])
2149
2150 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2151 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2152 (vec_merge:VF_128
2153 (sqrt:VF_128
2154 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2155 (match_operand:VF_128 2 "register_operand" "0,v")
2156 (const_int 1)))]
2157 "TARGET_SSE"
2158 "@
2159 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2160 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2161 [(set_attr "isa" "noavx,avx")
2162 (set_attr "type" "sse")
2163 (set_attr "atom_sse_attr" "sqrt")
2164 (set_attr "prefix" "<round_scalar_prefix>")
2165 (set_attr "btver2_sse_attr" "sqrt")
2166 (set_attr "mode" "<ssescalarmode>")])
2167
2168 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2169 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2170 (vec_merge:VF_128
2171 (vec_duplicate:VF_128
2172 (sqrt:<ssescalarmode>
2173 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2174 (match_operand:VF_128 2 "register_operand" "0,v")
2175 (const_int 1)))]
2176 "TARGET_SSE"
2177 "@
2178 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2179 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2180 [(set_attr "isa" "noavx,avx")
2181 (set_attr "type" "sse")
2182 (set_attr "atom_sse_attr" "sqrt")
2183 (set_attr "prefix" "<round_scalar_prefix>")
2184 (set_attr "btver2_sse_attr" "sqrt")
2185 (set_attr "mode" "<ssescalarmode>")])
2186
2187 (define_expand "rsqrt<mode>2"
2188 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2189 (unspec:VF1_AVX512ER_128_256
2190 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2191 UNSPEC_RSQRT))]
2192 "TARGET_SSE && TARGET_SSE_MATH"
2193 {
2194 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2195 DONE;
2196 })
2197
2198 (define_insn "<sse>_rsqrt<mode>2"
2199 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2200 (unspec:VF1_128_256
2201 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2202 "TARGET_SSE"
2203 "%vrsqrtps\t{%1, %0|%0, %1}"
2204 [(set_attr "type" "sse")
2205 (set_attr "prefix" "maybe_vex")
2206 (set_attr "mode" "<MODE>")])
2207
2208 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2209 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2210 (unspec:VF_AVX512VL
2211 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2212 UNSPEC_RSQRT14))]
2213 "TARGET_AVX512F"
2214 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2215 [(set_attr "type" "sse")
2216 (set_attr "prefix" "evex")
2217 (set_attr "mode" "<MODE>")])
2218
2219 (define_insn "rsqrt14<mode>"
2220 [(set (match_operand:VF_128 0 "register_operand" "=v")
2221 (vec_merge:VF_128
2222 (unspec:VF_128
2223 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2224 UNSPEC_RSQRT14)
2225 (match_operand:VF_128 2 "register_operand" "v")
2226 (const_int 1)))]
2227 "TARGET_AVX512F"
2228 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2229 [(set_attr "type" "sse")
2230 (set_attr "prefix" "evex")
2231 (set_attr "mode" "<MODE>")])
2232
2233 (define_insn "rsqrt14_<mode>_mask"
2234 [(set (match_operand:VF_128 0 "register_operand" "=v")
2235 (vec_merge:VF_128
2236 (vec_merge:VF_128
2237 (unspec:VF_128
2238 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2239 UNSPEC_RSQRT14)
2240 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2241 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2242 (match_operand:VF_128 2 "register_operand" "v")
2243 (const_int 1)))]
2244 "TARGET_AVX512F"
2245 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2246 [(set_attr "type" "sse")
2247 (set_attr "prefix" "evex")
2248 (set_attr "mode" "<MODE>")])
2249
2250 (define_insn "sse_vmrsqrtv4sf2"
2251 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2252 (vec_merge:V4SF
2253 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2254 UNSPEC_RSQRT)
2255 (match_operand:V4SF 2 "register_operand" "0,x")
2256 (const_int 1)))]
2257 "TARGET_SSE"
2258 "@
2259 rsqrtss\t{%1, %0|%0, %k1}
2260 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2261 [(set_attr "isa" "noavx,avx")
2262 (set_attr "type" "sse")
2263 (set_attr "prefix" "orig,vex")
2264 (set_attr "mode" "SF")])
2265
2266 (define_insn "*sse_vmrsqrtv4sf2"
2267 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2268 (vec_merge:V4SF
2269 (vec_duplicate:V4SF
2270 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2271 UNSPEC_RSQRT))
2272 (match_operand:V4SF 2 "register_operand" "0,x")
2273 (const_int 1)))]
2274 "TARGET_SSE"
2275 "@
2276 rsqrtss\t{%1, %0|%0, %1}
2277 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2278 [(set_attr "isa" "noavx,avx")
2279 (set_attr "type" "sse")
2280 (set_attr "prefix" "orig,vex")
2281 (set_attr "mode" "SF")])
2282
2283 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2284 [(set (match_operand:VF 0 "register_operand")
2285 (smaxmin:VF
2286 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2287 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2288 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2289 {
2290 if (!flag_finite_math_only || flag_signed_zeros)
2291 {
2292 operands[1] = force_reg (<MODE>mode, operands[1]);
2293 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2294 (operands[0], operands[1], operands[2]
2295 <mask_operand_arg34>
2296 <round_saeonly_mask_arg3>));
2297 DONE;
2298 }
2299 else
2300 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2301 })
2302
2303 ;; These versions of the min/max patterns are intentionally ignorant of
2304 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2305 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2306 ;; are undefined in this condition, we're certain this is correct.
2307
2308 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2309 [(set (match_operand:VF 0 "register_operand" "=x,v")
2310 (smaxmin:VF
2311 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2312 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2313 "TARGET_SSE
2314 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2315 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2316 "@
2317 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2318 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2319 [(set_attr "isa" "noavx,avx")
2320 (set_attr "type" "sseadd")
2321 (set_attr "btver2_sse_attr" "maxmin")
2322 (set_attr "prefix" "<mask_prefix3>")
2323 (set_attr "mode" "<MODE>")])
2324
2325 ;; These versions of the min/max patterns implement exactly the operations
2326 ;; min = (op1 < op2 ? op1 : op2)
2327 ;; max = (!(op1 < op2) ? op1 : op2)
2328 ;; Their operands are not commutative, and thus they may be used in the
2329 ;; presence of -0.0 and NaN.
2330
2331 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2332 [(set (match_operand:VF 0 "register_operand" "=x,v")
2333 (unspec:VF
2334 [(match_operand:VF 1 "register_operand" "0,v")
2335 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2336 IEEE_MAXMIN))]
2337 "TARGET_SSE
2338 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2339 "@
2340 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2341 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2342 [(set_attr "isa" "noavx,avx")
2343 (set_attr "type" "sseadd")
2344 (set_attr "btver2_sse_attr" "maxmin")
2345 (set_attr "prefix" "<mask_prefix3>")
2346 (set_attr "mode" "<MODE>")])
2347
2348 ;; Standard scalar operation patterns which preserve the rest of the
2349 ;; vector for combiner.
2350 (define_insn "*ieee_<ieee_maxmin><mode>3"
2351 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2352 (vec_merge:VF_128
2353 (vec_duplicate:VF_128
2354 (unspec:<ssescalarmode>
2355 [(vec_select:<ssescalarmode>
2356 (match_operand:VF_128 1 "register_operand" "0,v")
2357 (parallel [(const_int 0)]))
2358 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2359 IEEE_MAXMIN))
2360 (match_dup 1)
2361 (const_int 1)))]
2362 "TARGET_SSE"
2363 "@
2364 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2365 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2366 [(set_attr "isa" "noavx,avx")
2367 (set_attr "type" "sseadd")
2368 (set_attr "btver2_sse_attr" "maxmin")
2369 (set_attr "prefix" "orig,vex")
2370 (set_attr "mode" "<ssescalarmode>")])
2371
2372 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2373 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2374 (vec_merge:VF_128
2375 (smaxmin:VF_128
2376 (match_operand:VF_128 1 "register_operand" "0,v")
2377 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2378 (match_dup 1)
2379 (const_int 1)))]
2380 "TARGET_SSE"
2381 "@
2382 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2383 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2384 [(set_attr "isa" "noavx,avx")
2385 (set_attr "type" "sse")
2386 (set_attr "btver2_sse_attr" "maxmin")
2387 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2388 (set_attr "mode" "<ssescalarmode>")])
2389
2390 (define_insn "avx_addsubv4df3"
2391 [(set (match_operand:V4DF 0 "register_operand" "=x")
2392 (vec_merge:V4DF
2393 (minus:V4DF
2394 (match_operand:V4DF 1 "register_operand" "x")
2395 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2396 (plus:V4DF (match_dup 1) (match_dup 2))
2397 (const_int 5)))]
2398 "TARGET_AVX"
2399 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2400 [(set_attr "type" "sseadd")
2401 (set_attr "prefix" "vex")
2402 (set_attr "mode" "V4DF")])
2403
2404 (define_insn "sse3_addsubv2df3"
2405 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2406 (vec_merge:V2DF
2407 (minus:V2DF
2408 (match_operand:V2DF 1 "register_operand" "0,x")
2409 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2410 (plus:V2DF (match_dup 1) (match_dup 2))
2411 (const_int 1)))]
2412 "TARGET_SSE3"
2413 "@
2414 addsubpd\t{%2, %0|%0, %2}
2415 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2416 [(set_attr "isa" "noavx,avx")
2417 (set_attr "type" "sseadd")
2418 (set_attr "atom_unit" "complex")
2419 (set_attr "prefix" "orig,vex")
2420 (set_attr "mode" "V2DF")])
2421
2422 (define_insn "avx_addsubv8sf3"
2423 [(set (match_operand:V8SF 0 "register_operand" "=x")
2424 (vec_merge:V8SF
2425 (minus:V8SF
2426 (match_operand:V8SF 1 "register_operand" "x")
2427 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2428 (plus:V8SF (match_dup 1) (match_dup 2))
2429 (const_int 85)))]
2430 "TARGET_AVX"
2431 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2432 [(set_attr "type" "sseadd")
2433 (set_attr "prefix" "vex")
2434 (set_attr "mode" "V8SF")])
2435
2436 (define_insn "sse3_addsubv4sf3"
2437 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2438 (vec_merge:V4SF
2439 (minus:V4SF
2440 (match_operand:V4SF 1 "register_operand" "0,x")
2441 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2442 (plus:V4SF (match_dup 1) (match_dup 2))
2443 (const_int 5)))]
2444 "TARGET_SSE3"
2445 "@
2446 addsubps\t{%2, %0|%0, %2}
2447 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2448 [(set_attr "isa" "noavx,avx")
2449 (set_attr "type" "sseadd")
2450 (set_attr "prefix" "orig,vex")
2451 (set_attr "prefix_rep" "1,*")
2452 (set_attr "mode" "V4SF")])
2453
2454 (define_split
2455 [(set (match_operand:VF_128_256 0 "register_operand")
2456 (match_operator:VF_128_256 6 "addsub_vm_operator"
2457 [(minus:VF_128_256
2458 (match_operand:VF_128_256 1 "register_operand")
2459 (match_operand:VF_128_256 2 "vector_operand"))
2460 (plus:VF_128_256
2461 (match_operand:VF_128_256 3 "vector_operand")
2462 (match_operand:VF_128_256 4 "vector_operand"))
2463 (match_operand 5 "const_int_operand")]))]
2464 "TARGET_SSE3
2465 && can_create_pseudo_p ()
2466 && ((rtx_equal_p (operands[1], operands[3])
2467 && rtx_equal_p (operands[2], operands[4]))
2468 || (rtx_equal_p (operands[1], operands[4])
2469 && rtx_equal_p (operands[2], operands[3])))"
2470 [(set (match_dup 0)
2471 (vec_merge:VF_128_256
2472 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2473 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2474 (match_dup 5)))])
2475
2476 (define_split
2477 [(set (match_operand:VF_128_256 0 "register_operand")
2478 (match_operator:VF_128_256 6 "addsub_vm_operator"
2479 [(plus:VF_128_256
2480 (match_operand:VF_128_256 1 "vector_operand")
2481 (match_operand:VF_128_256 2 "vector_operand"))
2482 (minus:VF_128_256
2483 (match_operand:VF_128_256 3 "register_operand")
2484 (match_operand:VF_128_256 4 "vector_operand"))
2485 (match_operand 5 "const_int_operand")]))]
2486 "TARGET_SSE3
2487 && can_create_pseudo_p ()
2488 && ((rtx_equal_p (operands[1], operands[3])
2489 && rtx_equal_p (operands[2], operands[4]))
2490 || (rtx_equal_p (operands[1], operands[4])
2491 && rtx_equal_p (operands[2], operands[3])))"
2492 [(set (match_dup 0)
2493 (vec_merge:VF_128_256
2494 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2495 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2496 (match_dup 5)))]
2497 {
2498 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2499 operands[5]
2500 = GEN_INT (~INTVAL (operands[5])
2501 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2502 })
2503
2504 (define_split
2505 [(set (match_operand:VF_128_256 0 "register_operand")
2506 (match_operator:VF_128_256 7 "addsub_vs_operator"
2507 [(vec_concat:<ssedoublemode>
2508 (minus:VF_128_256
2509 (match_operand:VF_128_256 1 "register_operand")
2510 (match_operand:VF_128_256 2 "vector_operand"))
2511 (plus:VF_128_256
2512 (match_operand:VF_128_256 3 "vector_operand")
2513 (match_operand:VF_128_256 4 "vector_operand")))
2514 (match_parallel 5 "addsub_vs_parallel"
2515 [(match_operand 6 "const_int_operand")])]))]
2516 "TARGET_SSE3
2517 && can_create_pseudo_p ()
2518 && ((rtx_equal_p (operands[1], operands[3])
2519 && rtx_equal_p (operands[2], operands[4]))
2520 || (rtx_equal_p (operands[1], operands[4])
2521 && rtx_equal_p (operands[2], operands[3])))"
2522 [(set (match_dup 0)
2523 (vec_merge:VF_128_256
2524 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2525 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2526 (match_dup 5)))]
2527 {
2528 int i, nelt = XVECLEN (operands[5], 0);
2529 HOST_WIDE_INT ival = 0;
2530
2531 for (i = 0; i < nelt; i++)
2532 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2533 ival |= HOST_WIDE_INT_1 << i;
2534
2535 operands[5] = GEN_INT (ival);
2536 })
2537
2538 (define_split
2539 [(set (match_operand:VF_128_256 0 "register_operand")
2540 (match_operator:VF_128_256 7 "addsub_vs_operator"
2541 [(vec_concat:<ssedoublemode>
2542 (plus:VF_128_256
2543 (match_operand:VF_128_256 1 "vector_operand")
2544 (match_operand:VF_128_256 2 "vector_operand"))
2545 (minus:VF_128_256
2546 (match_operand:VF_128_256 3 "register_operand")
2547 (match_operand:VF_128_256 4 "vector_operand")))
2548 (match_parallel 5 "addsub_vs_parallel"
2549 [(match_operand 6 "const_int_operand")])]))]
2550 "TARGET_SSE3
2551 && can_create_pseudo_p ()
2552 && ((rtx_equal_p (operands[1], operands[3])
2553 && rtx_equal_p (operands[2], operands[4]))
2554 || (rtx_equal_p (operands[1], operands[4])
2555 && rtx_equal_p (operands[2], operands[3])))"
2556 [(set (match_dup 0)
2557 (vec_merge:VF_128_256
2558 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2559 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2560 (match_dup 5)))]
2561 {
2562 int i, nelt = XVECLEN (operands[5], 0);
2563 HOST_WIDE_INT ival = 0;
2564
2565 for (i = 0; i < nelt; i++)
2566 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2567 ival |= HOST_WIDE_INT_1 << i;
2568
2569 operands[5] = GEN_INT (ival);
2570 })
2571
2572 (define_insn "avx_h<insn>v4df3"
2573 [(set (match_operand:V4DF 0 "register_operand" "=x")
2574 (vec_concat:V4DF
2575 (vec_concat:V2DF
2576 (plusminus:DF
2577 (vec_select:DF
2578 (match_operand:V4DF 1 "register_operand" "x")
2579 (parallel [(const_int 0)]))
2580 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2581 (plusminus:DF
2582 (vec_select:DF
2583 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2584 (parallel [(const_int 0)]))
2585 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2586 (vec_concat:V2DF
2587 (plusminus:DF
2588 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2589 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2590 (plusminus:DF
2591 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2592 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2593 "TARGET_AVX"
2594 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2595 [(set_attr "type" "sseadd")
2596 (set_attr "prefix" "vex")
2597 (set_attr "mode" "V4DF")])
2598
2599 (define_expand "sse3_haddv2df3"
2600 [(set (match_operand:V2DF 0 "register_operand")
2601 (vec_concat:V2DF
2602 (plus:DF
2603 (vec_select:DF
2604 (match_operand:V2DF 1 "register_operand")
2605 (parallel [(const_int 0)]))
2606 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2607 (plus:DF
2608 (vec_select:DF
2609 (match_operand:V2DF 2 "vector_operand")
2610 (parallel [(const_int 0)]))
2611 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2612 "TARGET_SSE3")
2613
2614 (define_insn "*sse3_haddv2df3"
2615 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2616 (vec_concat:V2DF
2617 (plus:DF
2618 (vec_select:DF
2619 (match_operand:V2DF 1 "register_operand" "0,x")
2620 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2621 (vec_select:DF
2622 (match_dup 1)
2623 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2624 (plus:DF
2625 (vec_select:DF
2626 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2627 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2628 (vec_select:DF
2629 (match_dup 2)
2630 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2631 "TARGET_SSE3
2632 && INTVAL (operands[3]) != INTVAL (operands[4])
2633 && INTVAL (operands[5]) != INTVAL (operands[6])"
2634 "@
2635 haddpd\t{%2, %0|%0, %2}
2636 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2637 [(set_attr "isa" "noavx,avx")
2638 (set_attr "type" "sseadd")
2639 (set_attr "prefix" "orig,vex")
2640 (set_attr "mode" "V2DF")])
2641
2642 (define_insn "sse3_hsubv2df3"
2643 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2644 (vec_concat:V2DF
2645 (minus:DF
2646 (vec_select:DF
2647 (match_operand:V2DF 1 "register_operand" "0,x")
2648 (parallel [(const_int 0)]))
2649 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2650 (minus:DF
2651 (vec_select:DF
2652 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2653 (parallel [(const_int 0)]))
2654 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2655 "TARGET_SSE3"
2656 "@
2657 hsubpd\t{%2, %0|%0, %2}
2658 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2659 [(set_attr "isa" "noavx,avx")
2660 (set_attr "type" "sseadd")
2661 (set_attr "prefix" "orig,vex")
2662 (set_attr "mode" "V2DF")])
2663
2664 (define_insn "*sse3_haddv2df3_low"
2665 [(set (match_operand:DF 0 "register_operand" "=x,x")
2666 (plus:DF
2667 (vec_select:DF
2668 (match_operand:V2DF 1 "register_operand" "0,x")
2669 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2670 (vec_select:DF
2671 (match_dup 1)
2672 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2673 "TARGET_SSE3
2674 && INTVAL (operands[2]) != INTVAL (operands[3])"
2675 "@
2676 haddpd\t{%0, %0|%0, %0}
2677 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2678 [(set_attr "isa" "noavx,avx")
2679 (set_attr "type" "sseadd1")
2680 (set_attr "prefix" "orig,vex")
2681 (set_attr "mode" "V2DF")])
2682
2683 (define_insn "*sse3_hsubv2df3_low"
2684 [(set (match_operand:DF 0 "register_operand" "=x,x")
2685 (minus:DF
2686 (vec_select:DF
2687 (match_operand:V2DF 1 "register_operand" "0,x")
2688 (parallel [(const_int 0)]))
2689 (vec_select:DF
2690 (match_dup 1)
2691 (parallel [(const_int 1)]))))]
2692 "TARGET_SSE3"
2693 "@
2694 hsubpd\t{%0, %0|%0, %0}
2695 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2696 [(set_attr "isa" "noavx,avx")
2697 (set_attr "type" "sseadd1")
2698 (set_attr "prefix" "orig,vex")
2699 (set_attr "mode" "V2DF")])
2700
2701 (define_insn "avx_h<insn>v8sf3"
2702 [(set (match_operand:V8SF 0 "register_operand" "=x")
2703 (vec_concat:V8SF
2704 (vec_concat:V4SF
2705 (vec_concat:V2SF
2706 (plusminus:SF
2707 (vec_select:SF
2708 (match_operand:V8SF 1 "register_operand" "x")
2709 (parallel [(const_int 0)]))
2710 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2711 (plusminus:SF
2712 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2713 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2714 (vec_concat:V2SF
2715 (plusminus:SF
2716 (vec_select:SF
2717 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2718 (parallel [(const_int 0)]))
2719 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2720 (plusminus:SF
2721 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2722 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2723 (vec_concat:V4SF
2724 (vec_concat:V2SF
2725 (plusminus:SF
2726 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2727 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2728 (plusminus:SF
2729 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2730 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2731 (vec_concat:V2SF
2732 (plusminus:SF
2733 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2734 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2735 (plusminus:SF
2736 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2737 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2738 "TARGET_AVX"
2739 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2740 [(set_attr "type" "sseadd")
2741 (set_attr "prefix" "vex")
2742 (set_attr "mode" "V8SF")])
2743
2744 (define_insn "sse3_h<insn>v4sf3"
2745 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2746 (vec_concat:V4SF
2747 (vec_concat:V2SF
2748 (plusminus:SF
2749 (vec_select:SF
2750 (match_operand:V4SF 1 "register_operand" "0,x")
2751 (parallel [(const_int 0)]))
2752 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2753 (plusminus:SF
2754 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2755 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2756 (vec_concat:V2SF
2757 (plusminus:SF
2758 (vec_select:SF
2759 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2760 (parallel [(const_int 0)]))
2761 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2762 (plusminus:SF
2763 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2764 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2765 "TARGET_SSE3"
2766 "@
2767 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2768 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2769 [(set_attr "isa" "noavx,avx")
2770 (set_attr "type" "sseadd")
2771 (set_attr "atom_unit" "complex")
2772 (set_attr "prefix" "orig,vex")
2773 (set_attr "prefix_rep" "1,*")
2774 (set_attr "mode" "V4SF")])
2775
2776 (define_mode_iterator REDUC_SSE_PLUS_MODE
2777 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2778
2779 (define_expand "reduc_plus_scal_<mode>"
2780 [(plus:REDUC_SSE_PLUS_MODE
2781 (match_operand:<ssescalarmode> 0 "register_operand")
2782 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2783 ""
2784 {
2785 rtx tmp = gen_reg_rtx (<MODE>mode);
2786 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2787 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2788 const0_rtx));
2789 DONE;
2790 })
2791
2792 (define_expand "reduc_plus_scal_v16qi"
2793 [(plus:V16QI
2794 (match_operand:QI 0 "register_operand")
2795 (match_operand:V16QI 1 "register_operand"))]
2796 "TARGET_SSE2"
2797 {
2798 rtx tmp = gen_reg_rtx (V1TImode);
2799 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2800 GEN_INT (64)));
2801 rtx tmp2 = gen_reg_rtx (V16QImode);
2802 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2803 rtx tmp3 = gen_reg_rtx (V16QImode);
2804 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2805 rtx tmp4 = gen_reg_rtx (V2DImode);
2806 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2807 tmp4 = gen_lowpart (V16QImode, tmp4);
2808 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2809 DONE;
2810 })
2811
2812 (define_mode_iterator REDUC_PLUS_MODE
2813 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2814 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2815 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2816
2817 (define_expand "reduc_plus_scal_<mode>"
2818 [(plus:REDUC_PLUS_MODE
2819 (match_operand:<ssescalarmode> 0 "register_operand")
2820 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2821 ""
2822 {
2823 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2824 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2825 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2826 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2827 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2828 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2829 DONE;
2830 })
2831
2832 ;; Modes handled by reduc_sm{in,ax}* patterns.
2833 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2834 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2835 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2836 (V2DI "TARGET_SSE4_2")])
2837
2838 (define_expand "reduc_<code>_scal_<mode>"
2839 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2840 (match_operand:<ssescalarmode> 0 "register_operand")
2841 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2842 ""
2843 {
2844 rtx tmp = gen_reg_rtx (<MODE>mode);
2845 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2846 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2847 const0_rtx));
2848 DONE;
2849 })
2850
2851 (define_mode_iterator REDUC_SMINMAX_MODE
2852 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2853 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2854 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2855 (V64QI "TARGET_AVX512BW")
2856 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2857 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2858 (V8DF "TARGET_AVX512F")])
2859
2860 (define_expand "reduc_<code>_scal_<mode>"
2861 [(smaxmin:REDUC_SMINMAX_MODE
2862 (match_operand:<ssescalarmode> 0 "register_operand")
2863 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2864 ""
2865 {
2866 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2867 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2868 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2869 emit_insn (gen_<code><ssehalfvecmodelower>3
2870 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2871 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2872 DONE;
2873 })
2874
2875 (define_expand "reduc_<code>_scal_<mode>"
2876 [(umaxmin:VI_AVX512BW
2877 (match_operand:<ssescalarmode> 0 "register_operand")
2878 (match_operand:VI_AVX512BW 1 "register_operand"))]
2879 "TARGET_AVX512F"
2880 {
2881 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2882 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2883 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2884 emit_insn (gen_<code><ssehalfvecmodelower>3
2885 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2886 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2887 DONE;
2888 })
2889
2890 (define_expand "reduc_<code>_scal_<mode>"
2891 [(umaxmin:VI_256
2892 (match_operand:<ssescalarmode> 0 "register_operand")
2893 (match_operand:VI_256 1 "register_operand"))]
2894 "TARGET_AVX2"
2895 {
2896 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2897 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2898 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2899 emit_insn (gen_<code><ssehalfvecmodelower>3
2900 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2901 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2902 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2903 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2904 (operands[0], tmp3, const0_rtx));
2905 DONE;
2906 })
2907
2908 (define_expand "reduc_umin_scal_v8hi"
2909 [(umin:V8HI
2910 (match_operand:HI 0 "register_operand")
2911 (match_operand:V8HI 1 "register_operand"))]
2912 "TARGET_SSE4_1"
2913 {
2914 rtx tmp = gen_reg_rtx (V8HImode);
2915 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2916 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2917 DONE;
2918 })
2919
2920 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
2921 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2922 (unspec:VF_AVX512VL
2923 [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2924 (match_operand:SI 2 "const_0_to_255_operand")]
2925 UNSPEC_REDUCE))]
2926 "TARGET_AVX512DQ"
2927 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
2928 [(set_attr "type" "sse")
2929 (set_attr "prefix" "evex")
2930 (set_attr "mode" "<MODE>")])
2931
2932 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
2933 [(set (match_operand:VF_128 0 "register_operand" "=v")
2934 (vec_merge:VF_128
2935 (unspec:VF_128
2936 [(match_operand:VF_128 1 "register_operand" "v")
2937 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
2938 (match_operand:SI 3 "const_0_to_255_operand")]
2939 UNSPEC_REDUCE)
2940 (match_dup 1)
2941 (const_int 1)))]
2942 "TARGET_AVX512DQ"
2943 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
2944 [(set_attr "type" "sse")
2945 (set_attr "prefix" "evex")
2946 (set_attr "mode" "<MODE>")])
2947
2948 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2949 ;;
2950 ;; Parallel floating point comparisons
2951 ;;
2952 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2953
2954 (define_insn "avx_cmp<mode>3"
2955 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2956 (unspec:VF_128_256
2957 [(match_operand:VF_128_256 1 "register_operand" "x")
2958 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2959 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2960 UNSPEC_PCMP))]
2961 "TARGET_AVX"
2962 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2963 [(set_attr "type" "ssecmp")
2964 (set_attr "length_immediate" "1")
2965 (set_attr "prefix" "vex")
2966 (set_attr "mode" "<MODE>")])
2967
2968 (define_insn "avx_vmcmp<mode>3"
2969 [(set (match_operand:VF_128 0 "register_operand" "=x")
2970 (vec_merge:VF_128
2971 (unspec:VF_128
2972 [(match_operand:VF_128 1 "register_operand" "x")
2973 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2974 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2975 UNSPEC_PCMP)
2976 (match_dup 1)
2977 (const_int 1)))]
2978 "TARGET_AVX"
2979 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2980 [(set_attr "type" "ssecmp")
2981 (set_attr "length_immediate" "1")
2982 (set_attr "prefix" "vex")
2983 (set_attr "mode" "<ssescalarmode>")])
2984
2985 (define_insn "*<sse>_maskcmp<mode>3_comm"
2986 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2987 (match_operator:VF_128_256 3 "sse_comparison_operator"
2988 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2989 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2990 "TARGET_SSE
2991 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2992 "@
2993 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2994 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2995 [(set_attr "isa" "noavx,avx")
2996 (set_attr "type" "ssecmp")
2997 (set_attr "length_immediate" "1")
2998 (set_attr "prefix" "orig,vex")
2999 (set_attr "mode" "<MODE>")])
3000
3001 (define_insn "<sse>_maskcmp<mode>3"
3002 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3003 (match_operator:VF_128_256 3 "sse_comparison_operator"
3004 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3005 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3006 "TARGET_SSE"
3007 "@
3008 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3009 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3010 [(set_attr "isa" "noavx,avx")
3011 (set_attr "type" "ssecmp")
3012 (set_attr "length_immediate" "1")
3013 (set_attr "prefix" "orig,vex")
3014 (set_attr "mode" "<MODE>")])
3015
3016 (define_insn "<sse>_vmmaskcmp<mode>3"
3017 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3018 (vec_merge:VF_128
3019 (match_operator:VF_128 3 "sse_comparison_operator"
3020 [(match_operand:VF_128 1 "register_operand" "0,x")
3021 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3022 (match_dup 1)
3023 (const_int 1)))]
3024 "TARGET_SSE"
3025 "@
3026 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3027 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3028 [(set_attr "isa" "noavx,avx")
3029 (set_attr "type" "ssecmp")
3030 (set_attr "length_immediate" "1,*")
3031 (set_attr "prefix" "orig,vex")
3032 (set_attr "mode" "<ssescalarmode>")])
3033
3034 (define_mode_attr cmp_imm_predicate
3035 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3036 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3037 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3038 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3039 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3040 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3041 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3042 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3043 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3044
3045 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3046 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3047 (unspec:<avx512fmaskmode>
3048 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
3049 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3050 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3051 UNSPEC_PCMP))]
3052 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3053 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3054 [(set_attr "type" "ssecmp")
3055 (set_attr "length_immediate" "1")
3056 (set_attr "prefix" "evex")
3057 (set_attr "mode" "<sseinsnmode>")])
3058
3059 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3060 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3061 (unspec:<avx512fmaskmode>
3062 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3063 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3064 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3065 UNSPEC_PCMP))]
3066 "TARGET_AVX512BW"
3067 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3068 [(set_attr "type" "ssecmp")
3069 (set_attr "length_immediate" "1")
3070 (set_attr "prefix" "evex")
3071 (set_attr "mode" "<sseinsnmode>")])
3072
3073 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3074 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3075 (unspec:<avx512fmaskmode>
3076 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3077 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3078 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3079 UNSPEC_UNSIGNED_PCMP))]
3080 "TARGET_AVX512BW"
3081 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3082 [(set_attr "type" "ssecmp")
3083 (set_attr "length_immediate" "1")
3084 (set_attr "prefix" "evex")
3085 (set_attr "mode" "<sseinsnmode>")])
3086
3087 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3088 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3089 (unspec:<avx512fmaskmode>
3090 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3091 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3092 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3093 UNSPEC_UNSIGNED_PCMP))]
3094 "TARGET_AVX512F"
3095 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3096 [(set_attr "type" "ssecmp")
3097 (set_attr "length_immediate" "1")
3098 (set_attr "prefix" "evex")
3099 (set_attr "mode" "<sseinsnmode>")])
3100
3101 (define_int_iterator UNSPEC_PCMP_ITER
3102 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3103
3104 (define_int_attr pcmp_signed_mask
3105 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
3106
3107 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
3108 ;; For signed comparison, handle EQ 0: NEQ 4,
3109 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
3110
3111 (define_split
3112 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3113 (unspec:<avx512fmaskmode>
3114 [(us_minus:VI12_AVX512VL
3115 (match_operand:VI12_AVX512VL 1 "vector_operand")
3116 (match_operand:VI12_AVX512VL 2 "vector_operand"))
3117 (match_operand:VI12_AVX512VL 3 "const0_operand")
3118 (match_operand:SI 4 "const_0_to_7_operand")]
3119 UNSPEC_PCMP_ITER))]
3120 "TARGET_AVX512BW
3121 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
3122 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
3123 [(const_int 0)]
3124 {
3125 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
3126 int cmp_predicate = 2; /* LE */
3127 if (MEM_P (operands[1]))
3128 {
3129 std::swap (operands[1], operands[2]);
3130 cmp_predicate = 5; /* NLT (GE) */
3131 }
3132 if ((INTVAL (operands[4]) & 4) != 0)
3133 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
3134 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
3135 GEN_INT (cmp_predicate)));
3136 DONE;
3137 })
3138
3139 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3140 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3141 (and:<avx512fmaskmode>
3142 (unspec:<avx512fmaskmode>
3143 [(match_operand:VF_128 1 "register_operand" "v")
3144 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3145 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3146 UNSPEC_PCMP)
3147 (const_int 1)))]
3148 "TARGET_AVX512F"
3149 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3150 [(set_attr "type" "ssecmp")
3151 (set_attr "length_immediate" "1")
3152 (set_attr "prefix" "evex")
3153 (set_attr "mode" "<ssescalarmode>")])
3154
3155 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3156 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3157 (and:<avx512fmaskmode>
3158 (unspec:<avx512fmaskmode>
3159 [(match_operand:VF_128 1 "register_operand" "v")
3160 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3161 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3162 UNSPEC_PCMP)
3163 (and:<avx512fmaskmode>
3164 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3165 (const_int 1))))]
3166 "TARGET_AVX512F"
3167 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3168 [(set_attr "type" "ssecmp")
3169 (set_attr "length_immediate" "1")
3170 (set_attr "prefix" "evex")
3171 (set_attr "mode" "<ssescalarmode>")])
3172
3173 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3174 [(set (reg:CCFP FLAGS_REG)
3175 (compare:CCFP
3176 (vec_select:MODEF
3177 (match_operand:<ssevecmode> 0 "register_operand" "v")
3178 (parallel [(const_int 0)]))
3179 (vec_select:MODEF
3180 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3181 (parallel [(const_int 0)]))))]
3182 "SSE_FLOAT_MODE_P (<MODE>mode)"
3183 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3184 [(set_attr "type" "ssecomi")
3185 (set_attr "prefix" "maybe_vex")
3186 (set_attr "prefix_rep" "0")
3187 (set (attr "prefix_data16")
3188 (if_then_else (eq_attr "mode" "DF")
3189 (const_string "1")
3190 (const_string "0")))
3191 (set_attr "mode" "<MODE>")])
3192
3193 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3194 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3195 (match_operator:<avx512fmaskmode> 1 ""
3196 [(match_operand:V48_AVX512VL 2 "register_operand")
3197 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3198 "TARGET_AVX512F"
3199 {
3200 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3201 operands[2], operands[3]);
3202 gcc_assert (ok);
3203 DONE;
3204 })
3205
3206 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3207 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3208 (match_operator:<avx512fmaskmode> 1 ""
3209 [(match_operand:VI12_AVX512VL 2 "register_operand")
3210 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3211 "TARGET_AVX512BW"
3212 {
3213 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3214 operands[2], operands[3]);
3215 gcc_assert (ok);
3216 DONE;
3217 })
3218
3219 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3220 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3221 (match_operator:<sseintvecmode> 1 ""
3222 [(match_operand:VI_256 2 "register_operand")
3223 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3224 "TARGET_AVX2"
3225 {
3226 bool ok = ix86_expand_int_vec_cmp (operands);
3227 gcc_assert (ok);
3228 DONE;
3229 })
3230
3231 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3232 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3233 (match_operator:<sseintvecmode> 1 ""
3234 [(match_operand:VI124_128 2 "register_operand")
3235 (match_operand:VI124_128 3 "vector_operand")]))]
3236 "TARGET_SSE2"
3237 {
3238 bool ok = ix86_expand_int_vec_cmp (operands);
3239 gcc_assert (ok);
3240 DONE;
3241 })
3242
3243 (define_expand "vec_cmpv2div2di"
3244 [(set (match_operand:V2DI 0 "register_operand")
3245 (match_operator:V2DI 1 ""
3246 [(match_operand:V2DI 2 "register_operand")
3247 (match_operand:V2DI 3 "vector_operand")]))]
3248 "TARGET_SSE4_2"
3249 {
3250 bool ok = ix86_expand_int_vec_cmp (operands);
3251 gcc_assert (ok);
3252 DONE;
3253 })
3254
3255 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3256 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3257 (match_operator:<sseintvecmode> 1 ""
3258 [(match_operand:VF_256 2 "register_operand")
3259 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3260 "TARGET_AVX"
3261 {
3262 bool ok = ix86_expand_fp_vec_cmp (operands);
3263 gcc_assert (ok);
3264 DONE;
3265 })
3266
3267 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3268 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3269 (match_operator:<sseintvecmode> 1 ""
3270 [(match_operand:VF_128 2 "register_operand")
3271 (match_operand:VF_128 3 "vector_operand")]))]
3272 "TARGET_SSE"
3273 {
3274 bool ok = ix86_expand_fp_vec_cmp (operands);
3275 gcc_assert (ok);
3276 DONE;
3277 })
3278
3279 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3280 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3281 (match_operator:<avx512fmaskmode> 1 ""
3282 [(match_operand:VI48_AVX512VL 2 "register_operand")
3283 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3284 "TARGET_AVX512F"
3285 {
3286 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3287 operands[2], operands[3]);
3288 gcc_assert (ok);
3289 DONE;
3290 })
3291
3292 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3293 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3294 (match_operator:<avx512fmaskmode> 1 ""
3295 [(match_operand:VI12_AVX512VL 2 "register_operand")
3296 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3297 "TARGET_AVX512BW"
3298 {
3299 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3300 operands[2], operands[3]);
3301 gcc_assert (ok);
3302 DONE;
3303 })
3304
3305 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3306 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3307 (match_operator:<sseintvecmode> 1 ""
3308 [(match_operand:VI_256 2 "register_operand")
3309 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3310 "TARGET_AVX2"
3311 {
3312 bool ok = ix86_expand_int_vec_cmp (operands);
3313 gcc_assert (ok);
3314 DONE;
3315 })
3316
3317 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3318 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3319 (match_operator:<sseintvecmode> 1 ""
3320 [(match_operand:VI124_128 2 "register_operand")
3321 (match_operand:VI124_128 3 "vector_operand")]))]
3322 "TARGET_SSE2"
3323 {
3324 bool ok = ix86_expand_int_vec_cmp (operands);
3325 gcc_assert (ok);
3326 DONE;
3327 })
3328
3329 (define_expand "vec_cmpuv2div2di"
3330 [(set (match_operand:V2DI 0 "register_operand")
3331 (match_operator:V2DI 1 ""
3332 [(match_operand:V2DI 2 "register_operand")
3333 (match_operand:V2DI 3 "vector_operand")]))]
3334 "TARGET_SSE4_2"
3335 {
3336 bool ok = ix86_expand_int_vec_cmp (operands);
3337 gcc_assert (ok);
3338 DONE;
3339 })
3340
3341 (define_expand "vec_cmpeqv2div2di"
3342 [(set (match_operand:V2DI 0 "register_operand")
3343 (match_operator:V2DI 1 ""
3344 [(match_operand:V2DI 2 "register_operand")
3345 (match_operand:V2DI 3 "vector_operand")]))]
3346 "TARGET_SSE4_1"
3347 {
3348 bool ok = ix86_expand_int_vec_cmp (operands);
3349 gcc_assert (ok);
3350 DONE;
3351 })
3352
3353 (define_expand "vcond<V_512:mode><VF_512:mode>"
3354 [(set (match_operand:V_512 0 "register_operand")
3355 (if_then_else:V_512
3356 (match_operator 3 ""
3357 [(match_operand:VF_512 4 "nonimmediate_operand")
3358 (match_operand:VF_512 5 "nonimmediate_operand")])
3359 (match_operand:V_512 1 "general_operand")
3360 (match_operand:V_512 2 "general_operand")))]
3361 "TARGET_AVX512F
3362 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3363 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3364 {
3365 bool ok = ix86_expand_fp_vcond (operands);
3366 gcc_assert (ok);
3367 DONE;
3368 })
3369
3370 (define_expand "vcond<V_256:mode><VF_256:mode>"
3371 [(set (match_operand:V_256 0 "register_operand")
3372 (if_then_else:V_256
3373 (match_operator 3 ""
3374 [(match_operand:VF_256 4 "nonimmediate_operand")
3375 (match_operand:VF_256 5 "nonimmediate_operand")])
3376 (match_operand:V_256 1 "general_operand")
3377 (match_operand:V_256 2 "general_operand")))]
3378 "TARGET_AVX
3379 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3380 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3381 {
3382 bool ok = ix86_expand_fp_vcond (operands);
3383 gcc_assert (ok);
3384 DONE;
3385 })
3386
3387 (define_expand "vcond<V_128:mode><VF_128:mode>"
3388 [(set (match_operand:V_128 0 "register_operand")
3389 (if_then_else:V_128
3390 (match_operator 3 ""
3391 [(match_operand:VF_128 4 "vector_operand")
3392 (match_operand:VF_128 5 "vector_operand")])
3393 (match_operand:V_128 1 "general_operand")
3394 (match_operand:V_128 2 "general_operand")))]
3395 "TARGET_SSE
3396 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3397 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3398 {
3399 bool ok = ix86_expand_fp_vcond (operands);
3400 gcc_assert (ok);
3401 DONE;
3402 })
3403
3404 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3405 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3406 (vec_merge:V48_AVX512VL
3407 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3408 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3409 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3410 "TARGET_AVX512F")
3411
3412 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3413 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3414 (vec_merge:VI12_AVX512VL
3415 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3416 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3417 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3418 "TARGET_AVX512BW")
3419
3420 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3421 ;; and their condition can be folded late into a constant, we need to
3422 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3423 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3424 V8SI V4DI])
3425
3426 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3427 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3428 (vec_merge:VI_256_AVX2
3429 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3430 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3431 (match_operand:<sseintvecmode> 3 "register_operand")))]
3432 "TARGET_AVX"
3433 {
3434 ix86_expand_sse_movcc (operands[0], operands[3],
3435 operands[1], operands[2]);
3436 DONE;
3437 })
3438
3439 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3440 [(set (match_operand:VI124_128 0 "register_operand")
3441 (vec_merge:VI124_128
3442 (match_operand:VI124_128 1 "vector_operand")
3443 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3444 (match_operand:<sseintvecmode> 3 "register_operand")))]
3445 "TARGET_SSE2"
3446 {
3447 ix86_expand_sse_movcc (operands[0], operands[3],
3448 operands[1], operands[2]);
3449 DONE;
3450 })
3451
3452 (define_expand "vcond_mask_v2div2di"
3453 [(set (match_operand:V2DI 0 "register_operand")
3454 (vec_merge:V2DI
3455 (match_operand:V2DI 1 "vector_operand")
3456 (match_operand:V2DI 2 "nonimm_or_0_operand")
3457 (match_operand:V2DI 3 "register_operand")))]
3458 "TARGET_SSE4_2"
3459 {
3460 ix86_expand_sse_movcc (operands[0], operands[3],
3461 operands[1], operands[2]);
3462 DONE;
3463 })
3464
3465 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3466 [(set (match_operand:VF_256 0 "register_operand")
3467 (vec_merge:VF_256
3468 (match_operand:VF_256 1 "nonimmediate_operand")
3469 (match_operand:VF_256 2 "nonimm_or_0_operand")
3470 (match_operand:<sseintvecmode> 3 "register_operand")))]
3471 "TARGET_AVX"
3472 {
3473 ix86_expand_sse_movcc (operands[0], operands[3],
3474 operands[1], operands[2]);
3475 DONE;
3476 })
3477
3478 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3479 [(set (match_operand:VF_128 0 "register_operand")
3480 (vec_merge:VF_128
3481 (match_operand:VF_128 1 "vector_operand")
3482 (match_operand:VF_128 2 "nonimm_or_0_operand")
3483 (match_operand:<sseintvecmode> 3 "register_operand")))]
3484 "TARGET_SSE"
3485 {
3486 ix86_expand_sse_movcc (operands[0], operands[3],
3487 operands[1], operands[2]);
3488 DONE;
3489 })
3490
3491 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3492 ;;
3493 ;; Parallel floating point logical operations
3494 ;;
3495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3496
3497 (define_insn "<sse>_andnot<mode>3<mask_name>"
3498 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3499 (and:VF_128_256
3500 (not:VF_128_256
3501 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3502 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3503 "TARGET_SSE && <mask_avx512vl_condition>"
3504 {
3505 char buf[128];
3506 const char *ops;
3507 const char *suffix;
3508
3509 switch (which_alternative)
3510 {
3511 case 0:
3512 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3513 break;
3514 case 1:
3515 case 2:
3516 case 3:
3517 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3518 break;
3519 default:
3520 gcc_unreachable ();
3521 }
3522
3523 switch (get_attr_mode (insn))
3524 {
3525 case MODE_V8SF:
3526 case MODE_V4SF:
3527 suffix = "ps";
3528 break;
3529 case MODE_OI:
3530 case MODE_TI:
3531 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3532 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3533 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3534 break;
3535 default:
3536 suffix = "<ssemodesuffix>";
3537 }
3538
3539 snprintf (buf, sizeof (buf), ops, suffix);
3540 output_asm_insn (buf, operands);
3541 return "";
3542 }
3543 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3544 (set_attr "type" "sselog")
3545 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3546 (set (attr "mode")
3547 (cond [(and (match_test "<mask_applied>")
3548 (and (eq_attr "alternative" "1")
3549 (match_test "!TARGET_AVX512DQ")))
3550 (const_string "<sseintvecmode2>")
3551 (eq_attr "alternative" "3")
3552 (const_string "<sseintvecmode2>")
3553 (match_test "TARGET_AVX")
3554 (const_string "<MODE>")
3555 (match_test "optimize_function_for_size_p (cfun)")
3556 (const_string "V4SF")
3557 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3558 (const_string "V4SF")
3559 ]
3560 (const_string "<MODE>")))])
3561
3562 (define_insn "<sse>_andnot<mode>3<mask_name>"
3563 [(set (match_operand:VF_512 0 "register_operand" "=v")
3564 (and:VF_512
3565 (not:VF_512
3566 (match_operand:VF_512 1 "register_operand" "v"))
3567 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3568 "TARGET_AVX512F"
3569 {
3570 char buf[128];
3571 const char *ops;
3572 const char *suffix;
3573
3574 suffix = "<ssemodesuffix>";
3575 ops = "";
3576
3577 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3578 if (!TARGET_AVX512DQ)
3579 {
3580 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3581 ops = "p";
3582 }
3583
3584 snprintf (buf, sizeof (buf),
3585 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3586 ops, suffix);
3587 output_asm_insn (buf, operands);
3588 return "";
3589 }
3590 [(set_attr "type" "sselog")
3591 (set_attr "prefix" "evex")
3592 (set (attr "mode")
3593 (if_then_else (match_test "TARGET_AVX512DQ")
3594 (const_string "<sseinsnmode>")
3595 (const_string "XI")))])
3596
3597 (define_expand "<code><mode>3<mask_name>"
3598 [(set (match_operand:VF_128_256 0 "register_operand")
3599 (any_logic:VF_128_256
3600 (match_operand:VF_128_256 1 "vector_operand")
3601 (match_operand:VF_128_256 2 "vector_operand")))]
3602 "TARGET_SSE && <mask_avx512vl_condition>"
3603 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3604
3605 (define_expand "<code><mode>3<mask_name>"
3606 [(set (match_operand:VF_512 0 "register_operand")
3607 (any_logic:VF_512
3608 (match_operand:VF_512 1 "nonimmediate_operand")
3609 (match_operand:VF_512 2 "nonimmediate_operand")))]
3610 "TARGET_AVX512F"
3611 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3612
3613 (define_insn "*<code><mode>3<mask_name>"
3614 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3615 (any_logic:VF_128_256
3616 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3617 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3618 "TARGET_SSE && <mask_avx512vl_condition>
3619 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3620 {
3621 char buf[128];
3622 const char *ops;
3623 const char *suffix;
3624
3625 switch (which_alternative)
3626 {
3627 case 0:
3628 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3629 break;
3630 case 1:
3631 case 2:
3632 case 3:
3633 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3634 break;
3635 default:
3636 gcc_unreachable ();
3637 }
3638
3639 switch (get_attr_mode (insn))
3640 {
3641 case MODE_V8SF:
3642 case MODE_V4SF:
3643 suffix = "ps";
3644 break;
3645 case MODE_OI:
3646 case MODE_TI:
3647 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3648 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3649 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3650 break;
3651 default:
3652 suffix = "<ssemodesuffix>";
3653 }
3654
3655 snprintf (buf, sizeof (buf), ops, suffix);
3656 output_asm_insn (buf, operands);
3657 return "";
3658 }
3659 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3660 (set_attr "type" "sselog")
3661 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3662 (set (attr "mode")
3663 (cond [(and (match_test "<mask_applied>")
3664 (and (eq_attr "alternative" "1")
3665 (match_test "!TARGET_AVX512DQ")))
3666 (const_string "<sseintvecmode2>")
3667 (eq_attr "alternative" "3")
3668 (const_string "<sseintvecmode2>")
3669 (match_test "TARGET_AVX")
3670 (const_string "<MODE>")
3671 (match_test "optimize_function_for_size_p (cfun)")
3672 (const_string "V4SF")
3673 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3674 (const_string "V4SF")
3675 ]
3676 (const_string "<MODE>")))])
3677
3678 (define_insn "*<code><mode>3<mask_name>"
3679 [(set (match_operand:VF_512 0 "register_operand" "=v")
3680 (any_logic:VF_512
3681 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3682 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3683 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3684 {
3685 char buf[128];
3686 const char *ops;
3687 const char *suffix;
3688
3689 suffix = "<ssemodesuffix>";
3690 ops = "";
3691
3692 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3693 if (!TARGET_AVX512DQ)
3694 {
3695 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3696 ops = "p";
3697 }
3698
3699 snprintf (buf, sizeof (buf),
3700 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3701 ops, suffix);
3702 output_asm_insn (buf, operands);
3703 return "";
3704 }
3705 [(set_attr "type" "sselog")
3706 (set_attr "prefix" "evex")
3707 (set (attr "mode")
3708 (if_then_else (match_test "TARGET_AVX512DQ")
3709 (const_string "<sseinsnmode>")
3710 (const_string "XI")))])
3711
3712 (define_expand "copysign<mode>3"
3713 [(set (match_dup 4)
3714 (and:VF
3715 (not:VF (match_dup 3))
3716 (match_operand:VF 1 "vector_operand")))
3717 (set (match_dup 5)
3718 (and:VF (match_dup 3)
3719 (match_operand:VF 2 "vector_operand")))
3720 (set (match_operand:VF 0 "register_operand")
3721 (ior:VF (match_dup 4) (match_dup 5)))]
3722 "TARGET_SSE"
3723 {
3724 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3725
3726 operands[4] = gen_reg_rtx (<MODE>mode);
3727 operands[5] = gen_reg_rtx (<MODE>mode);
3728 })
3729
3730 (define_expand "xorsign<mode>3"
3731 [(set (match_dup 4)
3732 (and:VF (match_dup 3)
3733 (match_operand:VF 2 "vector_operand")))
3734 (set (match_operand:VF 0 "register_operand")
3735 (xor:VF (match_dup 4)
3736 (match_operand:VF 1 "vector_operand")))]
3737 "TARGET_SSE"
3738 {
3739 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3740
3741 operands[4] = gen_reg_rtx (<MODE>mode);
3742 })
3743
3744 (define_expand "signbit<mode>2"
3745 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3746 (lshiftrt:<sseintvecmode>
3747 (subreg:<sseintvecmode>
3748 (match_operand:VF1_AVX2 1 "register_operand") 0)
3749 (match_dup 2)))]
3750 "TARGET_SSE2"
3751 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3752
3753 ;; Also define scalar versions. These are used for abs, neg, and
3754 ;; conditional move. Using subregs into vector modes causes register
3755 ;; allocation lossage. These patterns do not allow memory operands
3756 ;; because the native instructions read the full 128-bits.
3757
3758 (define_insn "*andnot<mode>3"
3759 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3760 (and:MODEF
3761 (not:MODEF
3762 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3763 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3764 "SSE_FLOAT_MODE_P (<MODE>mode)"
3765 {
3766 char buf[128];
3767 const char *ops;
3768 const char *suffix
3769 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3770
3771 switch (which_alternative)
3772 {
3773 case 0:
3774 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3775 break;
3776 case 1:
3777 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3778 break;
3779 case 2:
3780 if (TARGET_AVX512DQ)
3781 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3782 else
3783 {
3784 suffix = <MODE>mode == DFmode ? "q" : "d";
3785 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3786 }
3787 break;
3788 case 3:
3789 if (TARGET_AVX512DQ)
3790 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3791 else
3792 {
3793 suffix = <MODE>mode == DFmode ? "q" : "d";
3794 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3795 }
3796 break;
3797 default:
3798 gcc_unreachable ();
3799 }
3800
3801 snprintf (buf, sizeof (buf), ops, suffix);
3802 output_asm_insn (buf, operands);
3803 return "";
3804 }
3805 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3806 (set_attr "type" "sselog")
3807 (set_attr "prefix" "orig,vex,evex,evex")
3808 (set (attr "mode")
3809 (cond [(eq_attr "alternative" "2")
3810 (if_then_else (match_test "TARGET_AVX512DQ")
3811 (const_string "<ssevecmode>")
3812 (const_string "TI"))
3813 (eq_attr "alternative" "3")
3814 (if_then_else (match_test "TARGET_AVX512DQ")
3815 (const_string "<avx512fvecmode>")
3816 (const_string "XI"))
3817 (match_test "TARGET_AVX")
3818 (const_string "<ssevecmode>")
3819 (match_test "optimize_function_for_size_p (cfun)")
3820 (const_string "V4SF")
3821 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3822 (const_string "V4SF")
3823 ]
3824 (const_string "<ssevecmode>")))])
3825
3826 (define_insn "*andnottf3"
3827 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3828 (and:TF
3829 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3830 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3831 "TARGET_SSE"
3832 {
3833 char buf[128];
3834 const char *ops;
3835 const char *tmp
3836 = (which_alternative >= 2 ? "pandnq"
3837 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3838
3839 switch (which_alternative)
3840 {
3841 case 0:
3842 ops = "%s\t{%%2, %%0|%%0, %%2}";
3843 break;
3844 case 1:
3845 case 2:
3846 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3847 break;
3848 case 3:
3849 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3850 break;
3851 default:
3852 gcc_unreachable ();
3853 }
3854
3855 snprintf (buf, sizeof (buf), ops, tmp);
3856 output_asm_insn (buf, operands);
3857 return "";
3858 }
3859 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3860 (set_attr "type" "sselog")
3861 (set (attr "prefix_data16")
3862 (if_then_else
3863 (and (eq_attr "alternative" "0")
3864 (eq_attr "mode" "TI"))
3865 (const_string "1")
3866 (const_string "*")))
3867 (set_attr "prefix" "orig,vex,evex,evex")
3868 (set (attr "mode")
3869 (cond [(eq_attr "alternative" "2")
3870 (const_string "TI")
3871 (eq_attr "alternative" "3")
3872 (const_string "XI")
3873 (match_test "TARGET_AVX")
3874 (const_string "TI")
3875 (ior (not (match_test "TARGET_SSE2"))
3876 (match_test "optimize_function_for_size_p (cfun)"))
3877 (const_string "V4SF")
3878 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3879 (const_string "V4SF")
3880 ]
3881 (const_string "TI")))])
3882
3883 (define_insn "*<code><mode>3"
3884 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3885 (any_logic:MODEF
3886 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3887 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3888 "SSE_FLOAT_MODE_P (<MODE>mode)"
3889 {
3890 char buf[128];
3891 const char *ops;
3892 const char *suffix
3893 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3894
3895 switch (which_alternative)
3896 {
3897 case 0:
3898 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3899 break;
3900 case 2:
3901 if (!TARGET_AVX512DQ)
3902 {
3903 suffix = <MODE>mode == DFmode ? "q" : "d";
3904 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3905 break;
3906 }
3907 /* FALLTHRU */
3908 case 1:
3909 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3910 break;
3911 case 3:
3912 if (TARGET_AVX512DQ)
3913 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3914 else
3915 {
3916 suffix = <MODE>mode == DFmode ? "q" : "d";
3917 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3918 }
3919 break;
3920 default:
3921 gcc_unreachable ();
3922 }
3923
3924 snprintf (buf, sizeof (buf), ops, suffix);
3925 output_asm_insn (buf, operands);
3926 return "";
3927 }
3928 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3929 (set_attr "type" "sselog")
3930 (set_attr "prefix" "orig,vex,evex,evex")
3931 (set (attr "mode")
3932 (cond [(eq_attr "alternative" "2")
3933 (if_then_else (match_test "TARGET_AVX512DQ")
3934 (const_string "<ssevecmode>")
3935 (const_string "TI"))
3936 (eq_attr "alternative" "3")
3937 (if_then_else (match_test "TARGET_AVX512DQ")
3938 (const_string "<avx512fvecmode>")
3939 (const_string "XI"))
3940 (match_test "TARGET_AVX")
3941 (const_string "<ssevecmode>")
3942 (match_test "optimize_function_for_size_p (cfun)")
3943 (const_string "V4SF")
3944 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3945 (const_string "V4SF")
3946 ]
3947 (const_string "<ssevecmode>")))])
3948
3949 (define_expand "<code>tf3"
3950 [(set (match_operand:TF 0 "register_operand")
3951 (any_logic:TF
3952 (match_operand:TF 1 "vector_operand")
3953 (match_operand:TF 2 "vector_operand")))]
3954 "TARGET_SSE"
3955 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3956
3957 (define_insn "*<code>tf3"
3958 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3959 (any_logic:TF
3960 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3961 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3962 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3963 {
3964 char buf[128];
3965 const char *ops;
3966 const char *tmp
3967 = (which_alternative >= 2 ? "p<logic>q"
3968 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3969
3970 switch (which_alternative)
3971 {
3972 case 0:
3973 ops = "%s\t{%%2, %%0|%%0, %%2}";
3974 break;
3975 case 1:
3976 case 2:
3977 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3978 break;
3979 case 3:
3980 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3981 break;
3982 default:
3983 gcc_unreachable ();
3984 }
3985
3986 snprintf (buf, sizeof (buf), ops, tmp);
3987 output_asm_insn (buf, operands);
3988 return "";
3989 }
3990 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3991 (set_attr "type" "sselog")
3992 (set (attr "prefix_data16")
3993 (if_then_else
3994 (and (eq_attr "alternative" "0")
3995 (eq_attr "mode" "TI"))
3996 (const_string "1")
3997 (const_string "*")))
3998 (set_attr "prefix" "orig,vex,evex,evex")
3999 (set (attr "mode")
4000 (cond [(eq_attr "alternative" "2")
4001 (const_string "TI")
4002 (eq_attr "alternative" "3")
4003 (const_string "QI")
4004 (match_test "TARGET_AVX")
4005 (const_string "TI")
4006 (ior (not (match_test "TARGET_SSE2"))
4007 (match_test "optimize_function_for_size_p (cfun)"))
4008 (const_string "V4SF")
4009 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4010 (const_string "V4SF")
4011 ]
4012 (const_string "TI")))])
4013
4014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4015 ;;
4016 ;; FMA floating point multiply/accumulate instructions. These include
4017 ;; scalar versions of the instructions as well as vector versions.
4018 ;;
4019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4020
4021 ;; The standard names for scalar FMA are only available with SSE math enabled.
4022 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
4023 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
4024 ;; and TARGET_FMA4 are both false.
4025 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
4026 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
4027 ;; GAS to allow proper prefix selection. However, for the moment all hardware
4028 ;; that supports AVX512F also supports FMA so we can ignore this for now.
4029 (define_mode_iterator FMAMODEM
4030 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4031 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4032 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4033 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4034 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4035 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4036 (V16SF "TARGET_AVX512F")
4037 (V8DF "TARGET_AVX512F")])
4038
4039 (define_expand "fma<mode>4"
4040 [(set (match_operand:FMAMODEM 0 "register_operand")
4041 (fma:FMAMODEM
4042 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4043 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4044 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4045
4046 (define_expand "fms<mode>4"
4047 [(set (match_operand:FMAMODEM 0 "register_operand")
4048 (fma:FMAMODEM
4049 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4050 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4051 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4052
4053 (define_expand "fnma<mode>4"
4054 [(set (match_operand:FMAMODEM 0 "register_operand")
4055 (fma:FMAMODEM
4056 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4057 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4058 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4059
4060 (define_expand "fnms<mode>4"
4061 [(set (match_operand:FMAMODEM 0 "register_operand")
4062 (fma:FMAMODEM
4063 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4064 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4065 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4066
4067 ;; The builtins for intrinsics are not constrained by SSE math enabled.
4068 (define_mode_iterator FMAMODE_AVX512
4069 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4070 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4071 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4072 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4073 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4074 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4075 (V16SF "TARGET_AVX512F")
4076 (V8DF "TARGET_AVX512F")])
4077
4078 (define_mode_iterator FMAMODE
4079 [SF DF V4SF V2DF V8SF V4DF])
4080
4081 (define_expand "fma4i_fmadd_<mode>"
4082 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4083 (fma:FMAMODE_AVX512
4084 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4085 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4086 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4087
4088 (define_expand "fma4i_fmsub_<mode>"
4089 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4090 (fma:FMAMODE_AVX512
4091 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4092 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4093 (neg:FMAMODE_AVX512
4094 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4095
4096 (define_expand "fma4i_fnmadd_<mode>"
4097 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4098 (fma:FMAMODE_AVX512
4099 (neg:FMAMODE_AVX512
4100 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4101 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4102 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4103
4104 (define_expand "fma4i_fnmsub_<mode>"
4105 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4106 (fma:FMAMODE_AVX512
4107 (neg:FMAMODE_AVX512
4108 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4109 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4110 (neg:FMAMODE_AVX512
4111 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4112
4113 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4114 [(match_operand:VF_AVX512VL 0 "register_operand")
4115 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4116 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4117 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4118 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4119 "TARGET_AVX512F && <round_mode512bit_condition>"
4120 {
4121 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4122 operands[0], operands[1], operands[2], operands[3],
4123 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4124 DONE;
4125 })
4126
4127 (define_insn "*fma_fmadd_<mode>"
4128 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4129 (fma:FMAMODE
4130 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4131 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4132 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4133 "TARGET_FMA || TARGET_FMA4"
4134 "@
4135 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4136 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4137 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4138 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4139 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4140 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4141 (set_attr "type" "ssemuladd")
4142 (set_attr "mode" "<MODE>")])
4143
4144 ;; Suppose AVX-512F as baseline
4145 (define_mode_iterator VF_SF_AVX512VL
4146 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4147 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4148
4149 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4150 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4151 (fma:VF_SF_AVX512VL
4152 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4153 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4154 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4155 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4156 "@
4157 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4158 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4159 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4160 [(set_attr "type" "ssemuladd")
4161 (set_attr "mode" "<MODE>")])
4162
4163 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4164 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4165 (vec_merge:VF_AVX512VL
4166 (fma:VF_AVX512VL
4167 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4168 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4169 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4170 (match_dup 1)
4171 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4172 "TARGET_AVX512F && <round_mode512bit_condition>"
4173 "@
4174 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4175 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4176 [(set_attr "type" "ssemuladd")
4177 (set_attr "mode" "<MODE>")])
4178
4179 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4180 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4181 (vec_merge:VF_AVX512VL
4182 (fma:VF_AVX512VL
4183 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4184 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4185 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4186 (match_dup 3)
4187 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4188 "TARGET_AVX512F"
4189 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4190 [(set_attr "type" "ssemuladd")
4191 (set_attr "mode" "<MODE>")])
4192
4193 (define_insn "*fma_fmsub_<mode>"
4194 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4195 (fma:FMAMODE
4196 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4197 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4198 (neg:FMAMODE
4199 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4200 "TARGET_FMA || TARGET_FMA4"
4201 "@
4202 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4203 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4204 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4205 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4206 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4207 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4208 (set_attr "type" "ssemuladd")
4209 (set_attr "mode" "<MODE>")])
4210
4211 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4212 [(match_operand:VF_AVX512VL 0 "register_operand")
4213 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4214 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4215 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4216 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4217 "TARGET_AVX512F && <round_mode512bit_condition>"
4218 {
4219 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4220 operands[0], operands[1], operands[2], operands[3],
4221 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4222 DONE;
4223 })
4224
4225 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4226 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4227 (fma:VF_SF_AVX512VL
4228 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4229 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4230 (neg:VF_SF_AVX512VL
4231 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4232 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4233 "@
4234 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4235 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4236 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4237 [(set_attr "type" "ssemuladd")
4238 (set_attr "mode" "<MODE>")])
4239
4240 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4241 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4242 (vec_merge:VF_AVX512VL
4243 (fma:VF_AVX512VL
4244 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4245 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4246 (neg:VF_AVX512VL
4247 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4248 (match_dup 1)
4249 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4250 "TARGET_AVX512F"
4251 "@
4252 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4253 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4254 [(set_attr "type" "ssemuladd")
4255 (set_attr "mode" "<MODE>")])
4256
4257 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4258 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4259 (vec_merge:VF_AVX512VL
4260 (fma:VF_AVX512VL
4261 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4262 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4263 (neg:VF_AVX512VL
4264 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4265 (match_dup 3)
4266 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4267 "TARGET_AVX512F && <round_mode512bit_condition>"
4268 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4269 [(set_attr "type" "ssemuladd")
4270 (set_attr "mode" "<MODE>")])
4271
4272 (define_insn "*fma_fnmadd_<mode>"
4273 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4274 (fma:FMAMODE
4275 (neg:FMAMODE
4276 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4277 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4278 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4279 "TARGET_FMA || TARGET_FMA4"
4280 "@
4281 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4282 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4283 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4284 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4285 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4286 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4287 (set_attr "type" "ssemuladd")
4288 (set_attr "mode" "<MODE>")])
4289
4290 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4291 [(match_operand:VF_AVX512VL 0 "register_operand")
4292 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4293 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4294 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4295 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4296 "TARGET_AVX512F && <round_mode512bit_condition>"
4297 {
4298 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4299 operands[0], operands[1], operands[2], operands[3],
4300 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4301 DONE;
4302 })
4303
4304 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4305 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4306 (fma:VF_SF_AVX512VL
4307 (neg:VF_SF_AVX512VL
4308 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4309 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4310 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4311 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4312 "@
4313 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4314 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4315 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4316 [(set_attr "type" "ssemuladd")
4317 (set_attr "mode" "<MODE>")])
4318
4319 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4320 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4321 (vec_merge:VF_AVX512VL
4322 (fma:VF_AVX512VL
4323 (neg:VF_AVX512VL
4324 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4325 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4326 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4327 (match_dup 1)
4328 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4329 "TARGET_AVX512F && <round_mode512bit_condition>"
4330 "@
4331 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4332 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4333 [(set_attr "type" "ssemuladd")
4334 (set_attr "mode" "<MODE>")])
4335
4336 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4337 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4338 (vec_merge:VF_AVX512VL
4339 (fma:VF_AVX512VL
4340 (neg:VF_AVX512VL
4341 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4342 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4343 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4344 (match_dup 3)
4345 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4346 "TARGET_AVX512F && <round_mode512bit_condition>"
4347 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4348 [(set_attr "type" "ssemuladd")
4349 (set_attr "mode" "<MODE>")])
4350
4351 (define_insn "*fma_fnmsub_<mode>"
4352 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4353 (fma:FMAMODE
4354 (neg:FMAMODE
4355 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4356 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4357 (neg:FMAMODE
4358 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4359 "TARGET_FMA || TARGET_FMA4"
4360 "@
4361 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4362 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4363 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4364 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4365 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4366 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4367 (set_attr "type" "ssemuladd")
4368 (set_attr "mode" "<MODE>")])
4369
4370 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4371 [(match_operand:VF_AVX512VL 0 "register_operand")
4372 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4373 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4374 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4375 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4376 "TARGET_AVX512F && <round_mode512bit_condition>"
4377 {
4378 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4379 operands[0], operands[1], operands[2], operands[3],
4380 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4381 DONE;
4382 })
4383
4384 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4385 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4386 (fma:VF_SF_AVX512VL
4387 (neg:VF_SF_AVX512VL
4388 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4389 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4390 (neg:VF_SF_AVX512VL
4391 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4392 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4393 "@
4394 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4395 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4396 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4397 [(set_attr "type" "ssemuladd")
4398 (set_attr "mode" "<MODE>")])
4399
4400 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4401 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4402 (vec_merge:VF_AVX512VL
4403 (fma:VF_AVX512VL
4404 (neg:VF_AVX512VL
4405 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4406 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4407 (neg:VF_AVX512VL
4408 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4409 (match_dup 1)
4410 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4411 "TARGET_AVX512F && <round_mode512bit_condition>"
4412 "@
4413 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4414 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4415 [(set_attr "type" "ssemuladd")
4416 (set_attr "mode" "<MODE>")])
4417
4418 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4419 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4420 (vec_merge:VF_AVX512VL
4421 (fma:VF_AVX512VL
4422 (neg:VF_AVX512VL
4423 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4424 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4425 (neg:VF_AVX512VL
4426 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4427 (match_dup 3)
4428 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4429 "TARGET_AVX512F"
4430 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4431 [(set_attr "type" "ssemuladd")
4432 (set_attr "mode" "<MODE>")])
4433
4434 ;; FMA parallel floating point multiply addsub and subadd operations.
4435
4436 ;; It would be possible to represent these without the UNSPEC as
4437 ;;
4438 ;; (vec_merge
4439 ;; (fma op1 op2 op3)
4440 ;; (fma op1 op2 (neg op3))
4441 ;; (merge-const))
4442 ;;
4443 ;; But this doesn't seem useful in practice.
4444
4445 (define_expand "fmaddsub_<mode>"
4446 [(set (match_operand:VF 0 "register_operand")
4447 (unspec:VF
4448 [(match_operand:VF 1 "nonimmediate_operand")
4449 (match_operand:VF 2 "nonimmediate_operand")
4450 (match_operand:VF 3 "nonimmediate_operand")]
4451 UNSPEC_FMADDSUB))]
4452 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4453
4454 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4455 [(match_operand:VF_AVX512VL 0 "register_operand")
4456 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4457 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4458 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4459 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4460 "TARGET_AVX512F"
4461 {
4462 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4463 operands[0], operands[1], operands[2], operands[3],
4464 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4465 DONE;
4466 })
4467
4468 (define_insn "*fma_fmaddsub_<mode>"
4469 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4470 (unspec:VF_128_256
4471 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4472 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4473 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4474 UNSPEC_FMADDSUB))]
4475 "TARGET_FMA || TARGET_FMA4"
4476 "@
4477 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4478 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4479 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4480 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4481 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4482 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4483 (set_attr "type" "ssemuladd")
4484 (set_attr "mode" "<MODE>")])
4485
4486 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4487 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4488 (unspec:VF_SF_AVX512VL
4489 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4490 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4491 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4492 UNSPEC_FMADDSUB))]
4493 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4494 "@
4495 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4496 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4497 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4498 [(set_attr "type" "ssemuladd")
4499 (set_attr "mode" "<MODE>")])
4500
4501 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4502 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4503 (vec_merge:VF_AVX512VL
4504 (unspec:VF_AVX512VL
4505 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4506 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4507 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4508 UNSPEC_FMADDSUB)
4509 (match_dup 1)
4510 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4511 "TARGET_AVX512F"
4512 "@
4513 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4514 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4515 [(set_attr "type" "ssemuladd")
4516 (set_attr "mode" "<MODE>")])
4517
4518 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4519 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4520 (vec_merge:VF_AVX512VL
4521 (unspec:VF_AVX512VL
4522 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4523 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4524 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4525 UNSPEC_FMADDSUB)
4526 (match_dup 3)
4527 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4528 "TARGET_AVX512F"
4529 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4530 [(set_attr "type" "ssemuladd")
4531 (set_attr "mode" "<MODE>")])
4532
4533 (define_insn "*fma_fmsubadd_<mode>"
4534 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4535 (unspec:VF_128_256
4536 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4537 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4538 (neg:VF_128_256
4539 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4540 UNSPEC_FMADDSUB))]
4541 "TARGET_FMA || TARGET_FMA4"
4542 "@
4543 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4544 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4545 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4546 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4547 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4548 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4549 (set_attr "type" "ssemuladd")
4550 (set_attr "mode" "<MODE>")])
4551
4552 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4553 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4554 (unspec:VF_SF_AVX512VL
4555 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4556 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4557 (neg:VF_SF_AVX512VL
4558 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4559 UNSPEC_FMADDSUB))]
4560 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4561 "@
4562 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4563 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4564 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4565 [(set_attr "type" "ssemuladd")
4566 (set_attr "mode" "<MODE>")])
4567
4568 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4569 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4570 (vec_merge:VF_AVX512VL
4571 (unspec:VF_AVX512VL
4572 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4573 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4574 (neg:VF_AVX512VL
4575 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4576 UNSPEC_FMADDSUB)
4577 (match_dup 1)
4578 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4579 "TARGET_AVX512F"
4580 "@
4581 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4582 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4583 [(set_attr "type" "ssemuladd")
4584 (set_attr "mode" "<MODE>")])
4585
4586 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4587 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4588 (vec_merge:VF_AVX512VL
4589 (unspec:VF_AVX512VL
4590 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4591 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4592 (neg:VF_AVX512VL
4593 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4594 UNSPEC_FMADDSUB)
4595 (match_dup 3)
4596 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4597 "TARGET_AVX512F"
4598 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4599 [(set_attr "type" "ssemuladd")
4600 (set_attr "mode" "<MODE>")])
4601
4602 ;; FMA3 floating point scalar intrinsics. These merge result with
4603 ;; high-order elements from the destination register.
4604
4605 (define_expand "fmai_vmfmadd_<mode><round_name>"
4606 [(set (match_operand:VF_128 0 "register_operand")
4607 (vec_merge:VF_128
4608 (fma:VF_128
4609 (match_operand:VF_128 1 "register_operand")
4610 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4611 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4612 (match_dup 1)
4613 (const_int 1)))]
4614 "TARGET_FMA")
4615
4616 (define_expand "fmai_vmfmsub_<mode><round_name>"
4617 [(set (match_operand:VF_128 0 "register_operand")
4618 (vec_merge:VF_128
4619 (fma:VF_128
4620 (match_operand:VF_128 1 "register_operand")
4621 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4622 (neg:VF_128
4623 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4624 (match_dup 1)
4625 (const_int 1)))]
4626 "TARGET_FMA")
4627
4628 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4629 [(set (match_operand:VF_128 0 "register_operand")
4630 (vec_merge:VF_128
4631 (fma:VF_128
4632 (neg:VF_128
4633 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4634 (match_operand:VF_128 1 "register_operand")
4635 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4636 (match_dup 1)
4637 (const_int 1)))]
4638 "TARGET_FMA")
4639
4640 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4641 [(set (match_operand:VF_128 0 "register_operand")
4642 (vec_merge:VF_128
4643 (fma:VF_128
4644 (neg:VF_128
4645 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4646 (match_operand:VF_128 1 "register_operand")
4647 (neg:VF_128
4648 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4649 (match_dup 1)
4650 (const_int 1)))]
4651 "TARGET_FMA")
4652
4653 (define_insn "*fmai_fmadd_<mode>"
4654 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4655 (vec_merge:VF_128
4656 (fma:VF_128
4657 (match_operand:VF_128 1 "register_operand" "0,0")
4658 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
4659 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4660 (match_dup 1)
4661 (const_int 1)))]
4662 "TARGET_FMA || TARGET_AVX512F"
4663 "@
4664 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4665 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4666 [(set_attr "type" "ssemuladd")
4667 (set_attr "mode" "<MODE>")])
4668
4669 (define_insn "*fmai_fmsub_<mode>"
4670 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4671 (vec_merge:VF_128
4672 (fma:VF_128
4673 (match_operand:VF_128 1 "register_operand" "0,0")
4674 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4675 (neg:VF_128
4676 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4677 (match_dup 1)
4678 (const_int 1)))]
4679 "TARGET_FMA || TARGET_AVX512F"
4680 "@
4681 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4682 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4683 [(set_attr "type" "ssemuladd")
4684 (set_attr "mode" "<MODE>")])
4685
4686 (define_insn "*fmai_fnmadd_<mode><round_name>"
4687 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4688 (vec_merge:VF_128
4689 (fma:VF_128
4690 (neg:VF_128
4691 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4692 (match_operand:VF_128 1 "register_operand" "0,0")
4693 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4694 (match_dup 1)
4695 (const_int 1)))]
4696 "TARGET_FMA || TARGET_AVX512F"
4697 "@
4698 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4699 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4700 [(set_attr "type" "ssemuladd")
4701 (set_attr "mode" "<MODE>")])
4702
4703 (define_insn "*fmai_fnmsub_<mode><round_name>"
4704 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4705 (vec_merge:VF_128
4706 (fma:VF_128
4707 (neg:VF_128
4708 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4709 (match_operand:VF_128 1 "register_operand" "0,0")
4710 (neg:VF_128
4711 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4712 (match_dup 1)
4713 (const_int 1)))]
4714 "TARGET_FMA || TARGET_AVX512F"
4715 "@
4716 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4717 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4718 [(set_attr "type" "ssemuladd")
4719 (set_attr "mode" "<MODE>")])
4720
4721 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4722 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4723 (vec_merge:VF_128
4724 (vec_merge:VF_128
4725 (fma:VF_128
4726 (match_operand:VF_128 1 "register_operand" "0,0")
4727 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4728 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4729 (match_dup 1)
4730 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4731 (match_dup 1)
4732 (const_int 1)))]
4733 "TARGET_AVX512F"
4734 "@
4735 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4736 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4737 [(set_attr "type" "ssemuladd")
4738 (set_attr "mode" "<MODE>")])
4739
4740 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4741 [(set (match_operand:VF_128 0 "register_operand" "=v")
4742 (vec_merge:VF_128
4743 (vec_merge:VF_128
4744 (fma:VF_128
4745 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4746 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4747 (match_operand:VF_128 3 "register_operand" "0"))
4748 (match_dup 3)
4749 (match_operand:QI 4 "register_operand" "Yk"))
4750 (match_dup 3)
4751 (const_int 1)))]
4752 "TARGET_AVX512F"
4753 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4754 [(set_attr "type" "ssemuladd")
4755 (set_attr "mode" "<MODE>")])
4756
4757 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4758 [(match_operand:VF_128 0 "register_operand")
4759 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4760 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4761 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4762 (match_operand:QI 4 "register_operand")]
4763 "TARGET_AVX512F"
4764 {
4765 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4766 operands[0], operands[1], operands[2], operands[3],
4767 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4768 DONE;
4769 })
4770
4771 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4772 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4773 (vec_merge:VF_128
4774 (vec_merge:VF_128
4775 (fma:VF_128
4776 (match_operand:VF_128 1 "register_operand" "0,0")
4777 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4778 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4779 (match_operand:VF_128 4 "const0_operand" "C,C")
4780 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4781 (match_dup 1)
4782 (const_int 1)))]
4783 "TARGET_AVX512F"
4784 "@
4785 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4786 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4787 [(set_attr "type" "ssemuladd")
4788 (set_attr "mode" "<MODE>")])
4789
4790 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4791 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4792 (vec_merge:VF_128
4793 (vec_merge:VF_128
4794 (fma:VF_128
4795 (match_operand:VF_128 1 "register_operand" "0,0")
4796 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4797 (neg:VF_128
4798 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4799 (match_dup 1)
4800 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4801 (match_dup 1)
4802 (const_int 1)))]
4803 "TARGET_AVX512F"
4804 "@
4805 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4806 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4807 [(set_attr "type" "ssemuladd")
4808 (set_attr "mode" "<MODE>")])
4809
4810 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4811 [(set (match_operand:VF_128 0 "register_operand" "=v")
4812 (vec_merge:VF_128
4813 (vec_merge:VF_128
4814 (fma:VF_128
4815 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4816 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4817 (neg:VF_128
4818 (match_operand:VF_128 3 "register_operand" "0")))
4819 (match_dup 3)
4820 (match_operand:QI 4 "register_operand" "Yk"))
4821 (match_dup 3)
4822 (const_int 1)))]
4823 "TARGET_AVX512F"
4824 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4825 [(set_attr "type" "ssemuladd")
4826 (set_attr "mode" "<MODE>")])
4827
4828 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4829 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4830 (vec_merge:VF_128
4831 (vec_merge:VF_128
4832 (fma:VF_128
4833 (match_operand:VF_128 1 "register_operand" "0,0")
4834 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4835 (neg:VF_128
4836 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4837 (match_operand:VF_128 4 "const0_operand" "C,C")
4838 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4839 (match_dup 1)
4840 (const_int 1)))]
4841 "TARGET_AVX512F"
4842 "@
4843 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4844 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4845 [(set_attr "type" "ssemuladd")
4846 (set_attr "mode" "<MODE>")])
4847
4848 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4849 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4850 (vec_merge:VF_128
4851 (vec_merge:VF_128
4852 (fma:VF_128
4853 (neg:VF_128
4854 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4855 (match_operand:VF_128 1 "register_operand" "0,0")
4856 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4857 (match_dup 1)
4858 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4859 (match_dup 1)
4860 (const_int 1)))]
4861 "TARGET_AVX512F"
4862 "@
4863 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4864 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4865 [(set_attr "type" "ssemuladd")
4866 (set_attr "mode" "<MODE>")])
4867
4868 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4869 [(set (match_operand:VF_128 0 "register_operand" "=v")
4870 (vec_merge:VF_128
4871 (vec_merge:VF_128
4872 (fma:VF_128
4873 (neg:VF_128
4874 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4875 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4876 (match_operand:VF_128 3 "register_operand" "0"))
4877 (match_dup 3)
4878 (match_operand:QI 4 "register_operand" "Yk"))
4879 (match_dup 3)
4880 (const_int 1)))]
4881 "TARGET_AVX512F"
4882 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4883 [(set_attr "type" "ssemuladd")
4884 (set_attr "mode" "<MODE>")])
4885
4886 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4887 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4888 (vec_merge:VF_128
4889 (vec_merge:VF_128
4890 (fma:VF_128
4891 (neg:VF_128
4892 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4893 (match_operand:VF_128 1 "register_operand" "0,0")
4894 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4895 (match_operand:VF_128 4 "const0_operand" "C,C")
4896 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4897 (match_dup 1)
4898 (const_int 1)))]
4899 "TARGET_AVX512F"
4900 "@
4901 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4902 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4903 [(set_attr "type" "ssemuladd")
4904 (set_attr "mode" "<MODE>")])
4905
4906 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4907 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4908 (vec_merge:VF_128
4909 (vec_merge:VF_128
4910 (fma:VF_128
4911 (neg:VF_128
4912 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4913 (match_operand:VF_128 1 "register_operand" "0,0")
4914 (neg:VF_128
4915 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4916 (match_dup 1)
4917 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4918 (match_dup 1)
4919 (const_int 1)))]
4920 "TARGET_AVX512F"
4921 "@
4922 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4923 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4924 [(set_attr "type" "ssemuladd")
4925 (set_attr "mode" "<MODE>")])
4926
4927 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4928 [(set (match_operand:VF_128 0 "register_operand" "=v")
4929 (vec_merge:VF_128
4930 (vec_merge:VF_128
4931 (fma:VF_128
4932 (neg:VF_128
4933 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4934 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4935 (neg:VF_128
4936 (match_operand:VF_128 3 "register_operand" "0")))
4937 (match_dup 3)
4938 (match_operand:QI 4 "register_operand" "Yk"))
4939 (match_dup 3)
4940 (const_int 1)))]
4941 "TARGET_AVX512F"
4942 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4943 [(set_attr "type" "ssemuladd")
4944 (set_attr "mode" "<MODE>")])
4945
4946 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
4947 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4948 (vec_merge:VF_128
4949 (vec_merge:VF_128
4950 (fma:VF_128
4951 (neg:VF_128
4952 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4953 (match_operand:VF_128 1 "register_operand" "0,0")
4954 (neg:VF_128
4955 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4956 (match_operand:VF_128 4 "const0_operand" "C,C")
4957 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4958 (match_dup 1)
4959 (const_int 1)))]
4960 "TARGET_AVX512F"
4961 "@
4962 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4963 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4964 [(set_attr "type" "ssemuladd")
4965 (set_attr "mode" "<MODE>")])
4966
4967 ;; FMA4 floating point scalar intrinsics. These write the
4968 ;; entire destination register, with the high-order elements zeroed.
4969
4970 (define_expand "fma4i_vmfmadd_<mode>"
4971 [(set (match_operand:VF_128 0 "register_operand")
4972 (vec_merge:VF_128
4973 (fma:VF_128
4974 (match_operand:VF_128 1 "nonimmediate_operand")
4975 (match_operand:VF_128 2 "nonimmediate_operand")
4976 (match_operand:VF_128 3 "nonimmediate_operand"))
4977 (match_dup 4)
4978 (const_int 1)))]
4979 "TARGET_FMA4"
4980 "operands[4] = CONST0_RTX (<MODE>mode);")
4981
4982 (define_insn "*fma4i_vmfmadd_<mode>"
4983 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4984 (vec_merge:VF_128
4985 (fma:VF_128
4986 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4987 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4988 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4989 (match_operand:VF_128 4 "const0_operand")
4990 (const_int 1)))]
4991 "TARGET_FMA4"
4992 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4993 [(set_attr "type" "ssemuladd")
4994 (set_attr "mode" "<MODE>")])
4995
4996 (define_insn "*fma4i_vmfmsub_<mode>"
4997 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4998 (vec_merge:VF_128
4999 (fma:VF_128
5000 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5001 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5002 (neg:VF_128
5003 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5004 (match_operand:VF_128 4 "const0_operand")
5005 (const_int 1)))]
5006 "TARGET_FMA4"
5007 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5008 [(set_attr "type" "ssemuladd")
5009 (set_attr "mode" "<MODE>")])
5010
5011 (define_insn "*fma4i_vmfnmadd_<mode>"
5012 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5013 (vec_merge:VF_128
5014 (fma:VF_128
5015 (neg:VF_128
5016 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5017 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5018 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5019 (match_operand:VF_128 4 "const0_operand")
5020 (const_int 1)))]
5021 "TARGET_FMA4"
5022 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5023 [(set_attr "type" "ssemuladd")
5024 (set_attr "mode" "<MODE>")])
5025
5026 (define_insn "*fma4i_vmfnmsub_<mode>"
5027 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5028 (vec_merge:VF_128
5029 (fma:VF_128
5030 (neg:VF_128
5031 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5032 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5033 (neg:VF_128
5034 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5035 (match_operand:VF_128 4 "const0_operand")
5036 (const_int 1)))]
5037 "TARGET_FMA4"
5038 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5039 [(set_attr "type" "ssemuladd")
5040 (set_attr "mode" "<MODE>")])
5041
5042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5043 ;;
5044 ;; Parallel single-precision floating point conversion operations
5045 ;;
5046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5047
5048 (define_insn_and_split "sse_cvtpi2ps"
5049 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5050 (vec_merge:V4SF
5051 (vec_duplicate:V4SF
5052 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5053 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5054 (const_int 3)))
5055 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5056 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5057 "@
5058 cvtpi2ps\t{%2, %0|%0, %2}
5059 #
5060 #"
5061 "TARGET_SSE2 && reload_completed
5062 && SSE_REG_P (operands[2])"
5063 [(const_int 0)]
5064 {
5065 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5066 GET_MODE (operands[2]));
5067 /* Generate SSE2 cvtdq2ps. */
5068 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5069
5070 /* Merge operands[3] with operands[0]. */
5071 rtx mask, op1;
5072 if (TARGET_AVX)
5073 {
5074 mask = gen_rtx_PARALLEL (VOIDmode,
5075 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5076 GEN_INT (6), GEN_INT (7)));
5077 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5078 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5079 emit_insn (gen_rtx_SET (operands[0], op2));
5080 }
5081 else
5082 {
5083 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5084 mask = gen_rtx_PARALLEL (VOIDmode,
5085 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5086 GEN_INT (4), GEN_INT (5)));
5087 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5088 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5089 emit_insn (gen_rtx_SET (operands[0], op2));
5090
5091 /* Swap bits 0:63 with bits 64:127. */
5092 mask = gen_rtx_PARALLEL (VOIDmode,
5093 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5094 GEN_INT (0), GEN_INT (1)));
5095 rtx dest = lowpart_subreg (V4SImode, operands[0],
5096 GET_MODE (operands[0]));
5097 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5098 emit_insn (gen_rtx_SET (dest, op1));
5099 }
5100 DONE;
5101 }
5102 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5103 (set_attr "type" "ssecvt")
5104 (set_attr "mode" "V4SF")])
5105
5106 (define_insn_and_split "sse_cvtps2pi"
5107 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5108 (vec_select:V2SI
5109 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5110 UNSPEC_FIX_NOTRUNC)
5111 (parallel [(const_int 0) (const_int 1)])))]
5112 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5113 "@
5114 cvtps2pi\t{%1, %0|%0, %q1}
5115 #"
5116 "TARGET_SSE2 && reload_completed
5117 && SSE_REG_P (operands[0])"
5118 [(const_int 0)]
5119 {
5120 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5121 GET_MODE (operands[1]));
5122 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5123 GET_MODE (operands[0]));
5124
5125 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5126 emit_insn (gen_rtx_SET (tmp, op1));
5127
5128 rtx dest = lowpart_subreg (V4SImode, operands[0],
5129 GET_MODE (operands[0]));
5130 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5131 DONE;
5132 }
5133 [(set_attr "isa" "*,sse2")
5134 (set_attr "mmx_isa" "native,*")
5135 (set_attr "type" "ssecvt")
5136 (set_attr "unit" "mmx,*")
5137 (set_attr "mode" "DI")])
5138
5139 (define_insn_and_split "sse_cvttps2pi"
5140 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5141 (vec_select:V2SI
5142 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5143 (parallel [(const_int 0) (const_int 1)])))]
5144 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5145 "@
5146 cvttps2pi\t{%1, %0|%0, %q1}
5147 #"
5148 "TARGET_SSE2 && reload_completed
5149 && SSE_REG_P (operands[0])"
5150 [(const_int 0)]
5151 {
5152 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5153 GET_MODE (operands[1]));
5154 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5155 GET_MODE (operands[0]));
5156
5157 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5158 emit_insn (gen_rtx_SET (tmp, op1));
5159
5160 rtx dest = lowpart_subreg (V4SImode, operands[0],
5161 GET_MODE (operands[0]));
5162 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5163 DONE;
5164 }
5165 [(set_attr "isa" "*,sse2")
5166 (set_attr "mmx_isa" "native,*")
5167 (set_attr "type" "ssecvt")
5168 (set_attr "unit" "mmx,*")
5169 (set_attr "prefix_rep" "0")
5170 (set_attr "mode" "SF")])
5171
5172 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5173 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5174 (vec_merge:V4SF
5175 (vec_duplicate:V4SF
5176 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5177 (match_operand:V4SF 1 "register_operand" "0,0,v")
5178 (const_int 1)))]
5179 "TARGET_SSE"
5180 "@
5181 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5182 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5183 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5184 [(set_attr "isa" "noavx,noavx,avx")
5185 (set_attr "type" "sseicvt")
5186 (set_attr "athlon_decode" "vector,double,*")
5187 (set_attr "amdfam10_decode" "vector,double,*")
5188 (set_attr "bdver1_decode" "double,direct,*")
5189 (set_attr "btver2_decode" "double,double,double")
5190 (set_attr "znver1_decode" "double,double,double")
5191 (set (attr "length_vex")
5192 (if_then_else
5193 (and (match_test "<MODE>mode == DImode")
5194 (eq_attr "alternative" "2"))
5195 (const_string "4")
5196 (const_string "*")))
5197 (set (attr "prefix_rex")
5198 (if_then_else
5199 (and (match_test "<MODE>mode == DImode")
5200 (eq_attr "alternative" "0,1"))
5201 (const_string "1")
5202 (const_string "*")))
5203 (set_attr "prefix" "orig,orig,maybe_evex")
5204 (set_attr "mode" "SF")])
5205
5206 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5207 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5208 (unspec:SWI48
5209 [(vec_select:SF
5210 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5211 (parallel [(const_int 0)]))]
5212 UNSPEC_FIX_NOTRUNC))]
5213 "TARGET_SSE"
5214 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5215 [(set_attr "type" "sseicvt")
5216 (set_attr "athlon_decode" "double,vector")
5217 (set_attr "bdver1_decode" "double,double")
5218 (set_attr "prefix_rep" "1")
5219 (set_attr "prefix" "maybe_vex")
5220 (set_attr "mode" "<MODE>")])
5221
5222 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5223 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5224 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5225 UNSPEC_FIX_NOTRUNC))]
5226 "TARGET_SSE"
5227 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5228 [(set_attr "type" "sseicvt")
5229 (set_attr "athlon_decode" "double,vector")
5230 (set_attr "amdfam10_decode" "double,double")
5231 (set_attr "bdver1_decode" "double,double")
5232 (set_attr "prefix_rep" "1")
5233 (set_attr "prefix" "maybe_vex")
5234 (set_attr "mode" "<MODE>")])
5235
5236 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5237 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5238 (fix:SWI48
5239 (vec_select:SF
5240 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5241 (parallel [(const_int 0)]))))]
5242 "TARGET_SSE"
5243 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5244 [(set_attr "type" "sseicvt")
5245 (set_attr "athlon_decode" "double,vector")
5246 (set_attr "amdfam10_decode" "double,double")
5247 (set_attr "bdver1_decode" "double,double")
5248 (set_attr "prefix_rep" "1")
5249 (set_attr "prefix" "maybe_vex")
5250 (set_attr "mode" "<MODE>")])
5251
5252 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5253 [(set (match_operand:VF_128 0 "register_operand" "=v")
5254 (vec_merge:VF_128
5255 (vec_duplicate:VF_128
5256 (unsigned_float:<ssescalarmode>
5257 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5258 (match_operand:VF_128 1 "register_operand" "v")
5259 (const_int 1)))]
5260 "TARGET_AVX512F && <round_modev4sf_condition>"
5261 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5262 [(set_attr "type" "sseicvt")
5263 (set_attr "prefix" "evex")
5264 (set_attr "mode" "<ssescalarmode>")])
5265
5266 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5267 [(set (match_operand:VF_128 0 "register_operand" "=v")
5268 (vec_merge:VF_128
5269 (vec_duplicate:VF_128
5270 (unsigned_float:<ssescalarmode>
5271 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5272 (match_operand:VF_128 1 "register_operand" "v")
5273 (const_int 1)))]
5274 "TARGET_AVX512F && TARGET_64BIT"
5275 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5276 [(set_attr "type" "sseicvt")
5277 (set_attr "prefix" "evex")
5278 (set_attr "mode" "<ssescalarmode>")])
5279
5280 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5281 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5282 (float:VF1
5283 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5284 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5285 "@
5286 cvtdq2ps\t{%1, %0|%0, %1}
5287 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5288 [(set_attr "isa" "noavx,avx")
5289 (set_attr "type" "ssecvt")
5290 (set_attr "prefix" "maybe_vex")
5291 (set_attr "mode" "<sseinsnmode>")])
5292
5293 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5294 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5295 (unsigned_float:VF1_AVX512VL
5296 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5297 "TARGET_AVX512F"
5298 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5299 [(set_attr "type" "ssecvt")
5300 (set_attr "prefix" "evex")
5301 (set_attr "mode" "<MODE>")])
5302
5303 (define_expand "floatuns<sseintvecmodelower><mode>2"
5304 [(match_operand:VF1 0 "register_operand")
5305 (match_operand:<sseintvecmode> 1 "register_operand")]
5306 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5307 {
5308 if (<MODE>mode == V16SFmode)
5309 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5310 else
5311 if (TARGET_AVX512VL)
5312 {
5313 if (<MODE>mode == V4SFmode)
5314 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5315 else
5316 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5317 }
5318 else
5319 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5320
5321 DONE;
5322 })
5323
5324
5325 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5326 (define_mode_attr sf2simodelower
5327 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5328
5329 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5330 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5331 (unspec:VI4_AVX
5332 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5333 UNSPEC_FIX_NOTRUNC))]
5334 "TARGET_SSE2 && <mask_mode512bit_condition>"
5335 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5336 [(set_attr "type" "ssecvt")
5337 (set (attr "prefix_data16")
5338 (if_then_else
5339 (match_test "TARGET_AVX")
5340 (const_string "*")
5341 (const_string "1")))
5342 (set_attr "prefix" "maybe_vex")
5343 (set_attr "mode" "<sseinsnmode>")])
5344
5345 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5346 [(set (match_operand:V16SI 0 "register_operand" "=v")
5347 (unspec:V16SI
5348 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5349 UNSPEC_FIX_NOTRUNC))]
5350 "TARGET_AVX512F"
5351 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5352 [(set_attr "type" "ssecvt")
5353 (set_attr "prefix" "evex")
5354 (set_attr "mode" "XI")])
5355
5356 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5357 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5358 (unspec:VI4_AVX512VL
5359 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5360 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5361 "TARGET_AVX512F"
5362 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5363 [(set_attr "type" "ssecvt")
5364 (set_attr "prefix" "evex")
5365 (set_attr "mode" "<sseinsnmode>")])
5366
5367 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5368 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5369 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5370 UNSPEC_FIX_NOTRUNC))]
5371 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5372 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5373 [(set_attr "type" "ssecvt")
5374 (set_attr "prefix" "evex")
5375 (set_attr "mode" "<sseinsnmode>")])
5376
5377 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5378 [(set (match_operand:V2DI 0 "register_operand" "=v")
5379 (unspec:V2DI
5380 [(vec_select:V2SF
5381 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5382 (parallel [(const_int 0) (const_int 1)]))]
5383 UNSPEC_FIX_NOTRUNC))]
5384 "TARGET_AVX512DQ && TARGET_AVX512VL"
5385 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5386 [(set_attr "type" "ssecvt")
5387 (set_attr "prefix" "evex")
5388 (set_attr "mode" "TI")])
5389
5390 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5391 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5392 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5393 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5394 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5395 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5396 [(set_attr "type" "ssecvt")
5397 (set_attr "prefix" "evex")
5398 (set_attr "mode" "<sseinsnmode>")])
5399
5400 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5401 [(set (match_operand:V2DI 0 "register_operand" "=v")
5402 (unspec:V2DI
5403 [(vec_select:V2SF
5404 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5405 (parallel [(const_int 0) (const_int 1)]))]
5406 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5407 "TARGET_AVX512DQ && TARGET_AVX512VL"
5408 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5409 [(set_attr "type" "ssecvt")
5410 (set_attr "prefix" "evex")
5411 (set_attr "mode" "TI")])
5412
5413 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5414 [(set (match_operand:V16SI 0 "register_operand" "=v")
5415 (any_fix:V16SI
5416 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5417 "TARGET_AVX512F"
5418 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5419 [(set_attr "type" "ssecvt")
5420 (set_attr "prefix" "evex")
5421 (set_attr "mode" "XI")])
5422
5423 (define_insn "fix_truncv8sfv8si2<mask_name>"
5424 [(set (match_operand:V8SI 0 "register_operand" "=v")
5425 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5426 "TARGET_AVX && <mask_avx512vl_condition>"
5427 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5428 [(set_attr "type" "ssecvt")
5429 (set_attr "prefix" "<mask_prefix>")
5430 (set_attr "mode" "OI")])
5431
5432 (define_insn "fix_truncv4sfv4si2<mask_name>"
5433 [(set (match_operand:V4SI 0 "register_operand" "=v")
5434 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5435 "TARGET_SSE2 && <mask_avx512vl_condition>"
5436 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5437 [(set_attr "type" "ssecvt")
5438 (set (attr "prefix_rep")
5439 (if_then_else
5440 (match_test "TARGET_AVX")
5441 (const_string "*")
5442 (const_string "1")))
5443 (set (attr "prefix_data16")
5444 (if_then_else
5445 (match_test "TARGET_AVX")
5446 (const_string "*")
5447 (const_string "0")))
5448 (set_attr "prefix_data16" "0")
5449 (set_attr "prefix" "<mask_prefix2>")
5450 (set_attr "mode" "TI")])
5451
5452 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5453 [(match_operand:<sseintvecmode> 0 "register_operand")
5454 (match_operand:VF1 1 "register_operand")]
5455 "TARGET_SSE2"
5456 {
5457 if (<MODE>mode == V16SFmode)
5458 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5459 operands[1]));
5460 else
5461 {
5462 rtx tmp[3];
5463 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5464 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5465 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5466 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5467 }
5468 DONE;
5469 })
5470
5471 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5472 ;;
5473 ;; Parallel double-precision floating point conversion operations
5474 ;;
5475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5476
5477 (define_insn "sse2_cvtpi2pd"
5478 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5479 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5480 "TARGET_SSE2"
5481 "@
5482 %vcvtdq2pd\t{%1, %0|%0, %1}
5483 cvtpi2pd\t{%1, %0|%0, %1}"
5484 [(set_attr "mmx_isa" "*,native")
5485 (set_attr "type" "ssecvt")
5486 (set_attr "unit" "*,mmx")
5487 (set_attr "prefix_data16" "*,1")
5488 (set_attr "prefix" "maybe_vex,*")
5489 (set_attr "mode" "V2DF")])
5490
5491 (define_expand "floatv2siv2df2"
5492 [(set (match_operand:V2DF 0 "register_operand")
5493 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5494 "TARGET_MMX_WITH_SSE")
5495
5496 (define_insn "floatunsv2siv2df2"
5497 [(set (match_operand:V2DF 0 "register_operand" "=v")
5498 (unsigned_float:V2DF
5499 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5500 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5501 "vcvtudq2pd\t{%1, %0|%0, %1}"
5502 [(set_attr "type" "ssecvt")
5503 (set_attr "prefix" "evex")
5504 (set_attr "mode" "V2DF")])
5505
5506 (define_insn "sse2_cvtpd2pi"
5507 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5508 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5509 UNSPEC_FIX_NOTRUNC))]
5510 "TARGET_SSE2"
5511 "@
5512 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5513 cvtpd2pi\t{%1, %0|%0, %1}"
5514 [(set_attr "mmx_isa" "*,native")
5515 (set_attr "type" "ssecvt")
5516 (set_attr "unit" "*,mmx")
5517 (set_attr "amdfam10_decode" "double")
5518 (set_attr "athlon_decode" "vector")
5519 (set_attr "bdver1_decode" "double")
5520 (set_attr "prefix_data16" "*,1")
5521 (set_attr "prefix" "maybe_vex,*")
5522 (set_attr "mode" "TI")])
5523
5524 (define_insn "sse2_cvttpd2pi"
5525 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5526 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5527 "TARGET_SSE2"
5528 "@
5529 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5530 cvttpd2pi\t{%1, %0|%0, %1}"
5531 [(set_attr "mmx_isa" "*,native")
5532 (set_attr "type" "ssecvt")
5533 (set_attr "unit" "*,mmx")
5534 (set_attr "amdfam10_decode" "double")
5535 (set_attr "athlon_decode" "vector")
5536 (set_attr "bdver1_decode" "double")
5537 (set_attr "prefix_data16" "*,1")
5538 (set_attr "prefix" "maybe_vex,*")
5539 (set_attr "mode" "TI")])
5540
5541 (define_expand "fix_truncv2dfv2si2"
5542 [(set (match_operand:V2SI 0 "register_operand")
5543 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5544 "TARGET_MMX_WITH_SSE")
5545
5546 (define_insn "fixuns_truncv2dfv2si2"
5547 [(set (match_operand:V2SI 0 "register_operand" "=v")
5548 (unsigned_fix:V2SI
5549 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5550 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5551 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5552 [(set_attr "type" "ssecvt")
5553 (set_attr "prefix" "evex")
5554 (set_attr "mode" "TI")])
5555
5556 (define_insn "sse2_cvtsi2sd"
5557 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5558 (vec_merge:V2DF
5559 (vec_duplicate:V2DF
5560 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5561 (match_operand:V2DF 1 "register_operand" "0,0,v")
5562 (const_int 1)))]
5563 "TARGET_SSE2"
5564 "@
5565 cvtsi2sd{l}\t{%2, %0|%0, %2}
5566 cvtsi2sd{l}\t{%2, %0|%0, %2}
5567 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5568 [(set_attr "isa" "noavx,noavx,avx")
5569 (set_attr "type" "sseicvt")
5570 (set_attr "athlon_decode" "double,direct,*")
5571 (set_attr "amdfam10_decode" "vector,double,*")
5572 (set_attr "bdver1_decode" "double,direct,*")
5573 (set_attr "btver2_decode" "double,double,double")
5574 (set_attr "znver1_decode" "double,double,double")
5575 (set_attr "prefix" "orig,orig,maybe_evex")
5576 (set_attr "mode" "DF")])
5577
5578 (define_insn "sse2_cvtsi2sdq<round_name>"
5579 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5580 (vec_merge:V2DF
5581 (vec_duplicate:V2DF
5582 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5583 (match_operand:V2DF 1 "register_operand" "0,0,v")
5584 (const_int 1)))]
5585 "TARGET_SSE2 && TARGET_64BIT"
5586 "@
5587 cvtsi2sd{q}\t{%2, %0|%0, %2}
5588 cvtsi2sd{q}\t{%2, %0|%0, %2}
5589 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5590 [(set_attr "isa" "noavx,noavx,avx")
5591 (set_attr "type" "sseicvt")
5592 (set_attr "athlon_decode" "double,direct,*")
5593 (set_attr "amdfam10_decode" "vector,double,*")
5594 (set_attr "bdver1_decode" "double,direct,*")
5595 (set_attr "length_vex" "*,*,4")
5596 (set_attr "prefix_rex" "1,1,*")
5597 (set_attr "prefix" "orig,orig,maybe_evex")
5598 (set_attr "mode" "DF")])
5599
5600 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5601 [(set (match_operand:SWI48 0 "register_operand" "=r")
5602 (unspec:SWI48
5603 [(vec_select:SF
5604 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5605 (parallel [(const_int 0)]))]
5606 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5607 "TARGET_AVX512F"
5608 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5609 [(set_attr "type" "sseicvt")
5610 (set_attr "prefix" "evex")
5611 (set_attr "mode" "<MODE>")])
5612
5613 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5614 [(set (match_operand:SWI48 0 "register_operand" "=r")
5615 (unsigned_fix:SWI48
5616 (vec_select:SF
5617 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5618 (parallel [(const_int 0)]))))]
5619 "TARGET_AVX512F"
5620 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5621 [(set_attr "type" "sseicvt")
5622 (set_attr "prefix" "evex")
5623 (set_attr "mode" "<MODE>")])
5624
5625 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5626 [(set (match_operand:SWI48 0 "register_operand" "=r")
5627 (unspec:SWI48
5628 [(vec_select:DF
5629 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5630 (parallel [(const_int 0)]))]
5631 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5632 "TARGET_AVX512F"
5633 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5634 [(set_attr "type" "sseicvt")
5635 (set_attr "prefix" "evex")
5636 (set_attr "mode" "<MODE>")])
5637
5638 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5639 [(set (match_operand:SWI48 0 "register_operand" "=r")
5640 (unsigned_fix:SWI48
5641 (vec_select:DF
5642 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5643 (parallel [(const_int 0)]))))]
5644 "TARGET_AVX512F"
5645 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5646 [(set_attr "type" "sseicvt")
5647 (set_attr "prefix" "evex")
5648 (set_attr "mode" "<MODE>")])
5649
5650 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5651 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5652 (unspec:SWI48
5653 [(vec_select:DF
5654 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5655 (parallel [(const_int 0)]))]
5656 UNSPEC_FIX_NOTRUNC))]
5657 "TARGET_SSE2"
5658 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5659 [(set_attr "type" "sseicvt")
5660 (set_attr "athlon_decode" "double,vector")
5661 (set_attr "bdver1_decode" "double,double")
5662 (set_attr "btver2_decode" "double,double")
5663 (set_attr "prefix_rep" "1")
5664 (set_attr "prefix" "maybe_vex")
5665 (set_attr "mode" "<MODE>")])
5666
5667 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5668 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5669 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5670 UNSPEC_FIX_NOTRUNC))]
5671 "TARGET_SSE2"
5672 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5673 [(set_attr "type" "sseicvt")
5674 (set_attr "athlon_decode" "double,vector")
5675 (set_attr "amdfam10_decode" "double,double")
5676 (set_attr "bdver1_decode" "double,double")
5677 (set_attr "prefix_rep" "1")
5678 (set_attr "prefix" "maybe_vex")
5679 (set_attr "mode" "<MODE>")])
5680
5681 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5682 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5683 (fix:SWI48
5684 (vec_select:DF
5685 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5686 (parallel [(const_int 0)]))))]
5687 "TARGET_SSE2"
5688 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5689 [(set_attr "type" "sseicvt")
5690 (set_attr "athlon_decode" "double,vector")
5691 (set_attr "amdfam10_decode" "double,double")
5692 (set_attr "bdver1_decode" "double,double")
5693 (set_attr "btver2_decode" "double,double")
5694 (set_attr "prefix_rep" "1")
5695 (set_attr "prefix" "maybe_vex")
5696 (set_attr "mode" "<MODE>")])
5697
5698 ;; For float<si2dfmode><mode>2 insn pattern
5699 (define_mode_attr si2dfmode
5700 [(V8DF "V8SI") (V4DF "V4SI")])
5701 (define_mode_attr si2dfmodelower
5702 [(V8DF "v8si") (V4DF "v4si")])
5703
5704 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5705 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5706 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5707 "TARGET_AVX && <mask_mode512bit_condition>"
5708 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5709 [(set_attr "type" "ssecvt")
5710 (set_attr "prefix" "maybe_vex")
5711 (set_attr "mode" "<MODE>")])
5712
5713 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5714 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5715 (any_float:VF2_AVX512VL
5716 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5717 "TARGET_AVX512DQ"
5718 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5719 [(set_attr "type" "ssecvt")
5720 (set_attr "prefix" "evex")
5721 (set_attr "mode" "<MODE>")])
5722
5723 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5724 (define_mode_attr qq2pssuff
5725 [(V8SF "") (V4SF "{y}")])
5726
5727 (define_mode_attr sselongvecmode
5728 [(V8SF "V8DI") (V4SF "V4DI")])
5729
5730 (define_mode_attr sselongvecmodelower
5731 [(V8SF "v8di") (V4SF "v4di")])
5732
5733 (define_mode_attr sseintvecmode3
5734 [(V8SF "XI") (V4SF "OI")
5735 (V8DF "OI") (V4DF "TI")])
5736
5737 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5738 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5739 (any_float:VF1_128_256VL
5740 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5741 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5742 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5743 [(set_attr "type" "ssecvt")
5744 (set_attr "prefix" "evex")
5745 (set_attr "mode" "<MODE>")])
5746
5747 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
5748 [(set (match_operand:V4SF 0 "register_operand" "=v")
5749 (vec_concat:V4SF
5750 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5751 (match_dup 2)))]
5752 "TARGET_AVX512DQ && TARGET_AVX512VL"
5753 "operands[2] = CONST0_RTX (V2SFmode);")
5754
5755 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
5756 [(set (match_operand:V4SF 0 "register_operand" "=v")
5757 (vec_concat:V4SF
5758 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5759 (match_operand:V2SF 2 "const0_operand" "C")))]
5760 "TARGET_AVX512DQ && TARGET_AVX512VL"
5761 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5762 [(set_attr "type" "ssecvt")
5763 (set_attr "prefix" "evex")
5764 (set_attr "mode" "V4SF")])
5765
5766 (define_expand "float<floatunssuffix>v2div2sf2"
5767 [(set (match_operand:V2SF 0 "register_operand")
5768 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
5769 "TARGET_AVX512DQ && TARGET_AVX512VL"
5770 {
5771 operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
5772 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
5773 (operands[0], operands[1]));
5774 DONE;
5775 })
5776
5777 (define_mode_attr vpckfloat_concat_mode
5778 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5779 (define_mode_attr vpckfloat_temp_mode
5780 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5781 (define_mode_attr vpckfloat_op_mode
5782 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5783
5784 (define_expand "vec_pack<floatprefix>_float_<mode>"
5785 [(match_operand:<ssePSmode> 0 "register_operand")
5786 (any_float:<ssePSmode>
5787 (match_operand:VI8_AVX512VL 1 "register_operand"))
5788 (match_operand:VI8_AVX512VL 2 "register_operand")]
5789 "TARGET_AVX512DQ"
5790 {
5791 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5792 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5793 rtx (*gen) (rtx, rtx);
5794
5795 if (<MODE>mode == V2DImode)
5796 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
5797 else
5798 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5799 emit_insn (gen (r1, operands[1]));
5800 emit_insn (gen (r2, operands[2]));
5801 if (<MODE>mode == V2DImode)
5802 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5803 else
5804 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5805 r1, r2));
5806 DONE;
5807 })
5808
5809 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5810 [(set (match_operand:V4SF 0 "register_operand" "=v")
5811 (vec_concat:V4SF
5812 (vec_merge:V2SF
5813 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5814 (vec_select:V2SF
5815 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5816 (parallel [(const_int 0) (const_int 1)]))
5817 (match_operand:QI 3 "register_operand" "Yk"))
5818 (match_dup 4)))]
5819 "TARGET_AVX512DQ && TARGET_AVX512VL"
5820 "operands[4] = CONST0_RTX (V2SFmode);")
5821
5822 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5823 [(set (match_operand:V4SF 0 "register_operand" "=v")
5824 (vec_concat:V4SF
5825 (vec_merge:V2SF
5826 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5827 (vec_select:V2SF
5828 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5829 (parallel [(const_int 0) (const_int 1)]))
5830 (match_operand:QI 3 "register_operand" "Yk"))
5831 (match_operand:V2SF 4 "const0_operand" "C")))]
5832 "TARGET_AVX512DQ && TARGET_AVX512VL"
5833 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5834 [(set_attr "type" "ssecvt")
5835 (set_attr "prefix" "evex")
5836 (set_attr "mode" "V4SF")])
5837
5838 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5839 [(set (match_operand:V4SF 0 "register_operand" "=v")
5840 (vec_concat:V4SF
5841 (vec_merge:V2SF
5842 (any_float:V2SF (match_operand:V2DI 1
5843 "nonimmediate_operand" "vm"))
5844 (match_operand:V2SF 3 "const0_operand" "C")
5845 (match_operand:QI 2 "register_operand" "Yk"))
5846 (match_operand:V2SF 4 "const0_operand" "C")))]
5847 "TARGET_AVX512DQ && TARGET_AVX512VL"
5848 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5849 [(set_attr "type" "ssecvt")
5850 (set_attr "prefix" "evex")
5851 (set_attr "mode" "V4SF")])
5852
5853 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5854 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5855 (unsigned_float:VF2_512_256VL
5856 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5857 "TARGET_AVX512F"
5858 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5859 [(set_attr "type" "ssecvt")
5860 (set_attr "prefix" "evex")
5861 (set_attr "mode" "<MODE>")])
5862
5863 (define_insn "ufloatv2siv2df2<mask_name>"
5864 [(set (match_operand:V2DF 0 "register_operand" "=v")
5865 (unsigned_float:V2DF
5866 (vec_select:V2SI
5867 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5868 (parallel [(const_int 0) (const_int 1)]))))]
5869 "TARGET_AVX512VL"
5870 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5871 [(set_attr "type" "ssecvt")
5872 (set_attr "prefix" "evex")
5873 (set_attr "mode" "V2DF")])
5874
5875 (define_insn "avx512f_cvtdq2pd512_2"
5876 [(set (match_operand:V8DF 0 "register_operand" "=v")
5877 (float:V8DF
5878 (vec_select:V8SI
5879 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5880 (parallel [(const_int 0) (const_int 1)
5881 (const_int 2) (const_int 3)
5882 (const_int 4) (const_int 5)
5883 (const_int 6) (const_int 7)]))))]
5884 "TARGET_AVX512F"
5885 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5886 [(set_attr "type" "ssecvt")
5887 (set_attr "prefix" "evex")
5888 (set_attr "mode" "V8DF")])
5889
5890 (define_insn "avx_cvtdq2pd256_2"
5891 [(set (match_operand:V4DF 0 "register_operand" "=v")
5892 (float:V4DF
5893 (vec_select:V4SI
5894 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5895 (parallel [(const_int 0) (const_int 1)
5896 (const_int 2) (const_int 3)]))))]
5897 "TARGET_AVX"
5898 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5899 [(set_attr "type" "ssecvt")
5900 (set_attr "prefix" "maybe_evex")
5901 (set_attr "mode" "V4DF")])
5902
5903 (define_insn "sse2_cvtdq2pd<mask_name>"
5904 [(set (match_operand:V2DF 0 "register_operand" "=v")
5905 (float:V2DF
5906 (vec_select:V2SI
5907 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5908 (parallel [(const_int 0) (const_int 1)]))))]
5909 "TARGET_SSE2 && <mask_avx512vl_condition>"
5910 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5911 [(set_attr "type" "ssecvt")
5912 (set_attr "prefix" "maybe_vex")
5913 (set_attr "mode" "V2DF")])
5914
5915 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5916 [(set (match_operand:V8SI 0 "register_operand" "=v")
5917 (unspec:V8SI
5918 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5919 UNSPEC_FIX_NOTRUNC))]
5920 "TARGET_AVX512F"
5921 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5922 [(set_attr "type" "ssecvt")
5923 (set_attr "prefix" "evex")
5924 (set_attr "mode" "OI")])
5925
5926 (define_insn "avx_cvtpd2dq256<mask_name>"
5927 [(set (match_operand:V4SI 0 "register_operand" "=v")
5928 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5929 UNSPEC_FIX_NOTRUNC))]
5930 "TARGET_AVX && <mask_avx512vl_condition>"
5931 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5932 [(set_attr "type" "ssecvt")
5933 (set_attr "prefix" "<mask_prefix>")
5934 (set_attr "mode" "OI")])
5935
5936 (define_expand "avx_cvtpd2dq256_2"
5937 [(set (match_operand:V8SI 0 "register_operand")
5938 (vec_concat:V8SI
5939 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5940 UNSPEC_FIX_NOTRUNC)
5941 (match_dup 2)))]
5942 "TARGET_AVX"
5943 "operands[2] = CONST0_RTX (V4SImode);")
5944
5945 (define_insn "*avx_cvtpd2dq256_2"
5946 [(set (match_operand:V8SI 0 "register_operand" "=v")
5947 (vec_concat:V8SI
5948 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5949 UNSPEC_FIX_NOTRUNC)
5950 (match_operand:V4SI 2 "const0_operand")))]
5951 "TARGET_AVX"
5952 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5953 [(set_attr "type" "ssecvt")
5954 (set_attr "prefix" "vex")
5955 (set_attr "btver2_decode" "vector")
5956 (set_attr "mode" "OI")])
5957
5958 (define_insn "sse2_cvtpd2dq"
5959 [(set (match_operand:V4SI 0 "register_operand" "=v")
5960 (vec_concat:V4SI
5961 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5962 UNSPEC_FIX_NOTRUNC)
5963 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5964 "TARGET_SSE2"
5965 {
5966 if (TARGET_AVX)
5967 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
5968 else
5969 return "cvtpd2dq\t{%1, %0|%0, %1}";
5970 }
5971 [(set_attr "type" "ssecvt")
5972 (set_attr "prefix_rep" "1")
5973 (set_attr "prefix_data16" "0")
5974 (set_attr "prefix" "maybe_vex")
5975 (set_attr "mode" "TI")
5976 (set_attr "amdfam10_decode" "double")
5977 (set_attr "athlon_decode" "vector")
5978 (set_attr "bdver1_decode" "double")])
5979
5980 (define_insn "sse2_cvtpd2dq_mask"
5981 [(set (match_operand:V4SI 0 "register_operand" "=v")
5982 (vec_concat:V4SI
5983 (vec_merge:V2SI
5984 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5985 UNSPEC_FIX_NOTRUNC)
5986 (vec_select:V2SI
5987 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
5988 (parallel [(const_int 0) (const_int 1)]))
5989 (match_operand:QI 3 "register_operand" "Yk"))
5990 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5991 "TARGET_AVX512VL"
5992 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5993 [(set_attr "type" "ssecvt")
5994 (set_attr "prefix" "evex")
5995 (set_attr "mode" "TI")])
5996
5997 (define_insn "*sse2_cvtpd2dq_mask_1"
5998 [(set (match_operand:V4SI 0 "register_operand" "=v")
5999 (vec_concat:V4SI
6000 (vec_merge:V2SI
6001 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6002 UNSPEC_FIX_NOTRUNC)
6003 (const_vector:V2SI [(const_int 0) (const_int 0)])
6004 (match_operand:QI 2 "register_operand" "Yk"))
6005 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6006 "TARGET_AVX512VL"
6007 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6008 [(set_attr "type" "ssecvt")
6009 (set_attr "prefix" "evex")
6010 (set_attr "mode" "TI")])
6011
6012 ;; For ufix_notrunc* insn patterns
6013 (define_mode_attr pd2udqsuff
6014 [(V8DF "") (V4DF "{y}")])
6015
6016 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6017 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6018 (unspec:<si2dfmode>
6019 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6020 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6021 "TARGET_AVX512F"
6022 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6023 [(set_attr "type" "ssecvt")
6024 (set_attr "prefix" "evex")
6025 (set_attr "mode" "<sseinsnmode>")])
6026
6027 (define_insn "ufix_notruncv2dfv2si2"
6028 [(set (match_operand:V4SI 0 "register_operand" "=v")
6029 (vec_concat:V4SI
6030 (unspec:V2SI
6031 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6032 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6033 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6034 "TARGET_AVX512VL"
6035 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6036 [(set_attr "type" "ssecvt")
6037 (set_attr "prefix" "evex")
6038 (set_attr "mode" "TI")])
6039
6040 (define_insn "ufix_notruncv2dfv2si2_mask"
6041 [(set (match_operand:V4SI 0 "register_operand" "=v")
6042 (vec_concat:V4SI
6043 (vec_merge:V2SI
6044 (unspec:V2SI
6045 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6046 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6047 (vec_select:V2SI
6048 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6049 (parallel [(const_int 0) (const_int 1)]))
6050 (match_operand:QI 3 "register_operand" "Yk"))
6051 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6052 "TARGET_AVX512VL"
6053 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6054 [(set_attr "type" "ssecvt")
6055 (set_attr "prefix" "evex")
6056 (set_attr "mode" "TI")])
6057
6058 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6059 [(set (match_operand:V4SI 0 "register_operand" "=v")
6060 (vec_concat:V4SI
6061 (vec_merge:V2SI
6062 (unspec:V2SI
6063 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6064 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6065 (const_vector:V2SI [(const_int 0) (const_int 0)])
6066 (match_operand:QI 2 "register_operand" "Yk"))
6067 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6068 "TARGET_AVX512VL"
6069 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6070 [(set_attr "type" "ssecvt")
6071 (set_attr "prefix" "evex")
6072 (set_attr "mode" "TI")])
6073
6074 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6075 [(set (match_operand:V8SI 0 "register_operand" "=v")
6076 (any_fix:V8SI
6077 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6078 "TARGET_AVX512F"
6079 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6080 [(set_attr "type" "ssecvt")
6081 (set_attr "prefix" "evex")
6082 (set_attr "mode" "OI")])
6083
6084 (define_insn "ufix_truncv2dfv2si2"
6085 [(set (match_operand:V4SI 0 "register_operand" "=v")
6086 (vec_concat:V4SI
6087 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6088 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6089 "TARGET_AVX512VL"
6090 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6091 [(set_attr "type" "ssecvt")
6092 (set_attr "prefix" "evex")
6093 (set_attr "mode" "TI")])
6094
6095 (define_insn "ufix_truncv2dfv2si2_mask"
6096 [(set (match_operand:V4SI 0 "register_operand" "=v")
6097 (vec_concat:V4SI
6098 (vec_merge:V2SI
6099 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6100 (vec_select:V2SI
6101 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6102 (parallel [(const_int 0) (const_int 1)]))
6103 (match_operand:QI 3 "register_operand" "Yk"))
6104 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6105 "TARGET_AVX512VL"
6106 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6107 [(set_attr "type" "ssecvt")
6108 (set_attr "prefix" "evex")
6109 (set_attr "mode" "TI")])
6110
6111 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6112 [(set (match_operand:V4SI 0 "register_operand" "=v")
6113 (vec_concat:V4SI
6114 (vec_merge:V2SI
6115 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6116 (const_vector:V2SI [(const_int 0) (const_int 0)])
6117 (match_operand:QI 2 "register_operand" "Yk"))
6118 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6119 "TARGET_AVX512VL"
6120 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6121 [(set_attr "type" "ssecvt")
6122 (set_attr "prefix" "evex")
6123 (set_attr "mode" "TI")])
6124
6125 (define_insn "fix_truncv4dfv4si2<mask_name>"
6126 [(set (match_operand:V4SI 0 "register_operand" "=v")
6127 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6128 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6129 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6130 [(set_attr "type" "ssecvt")
6131 (set_attr "prefix" "maybe_evex")
6132 (set_attr "mode" "OI")])
6133
6134 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6135 [(set (match_operand:V4SI 0 "register_operand" "=v")
6136 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6137 "TARGET_AVX512VL && TARGET_AVX512F"
6138 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6139 [(set_attr "type" "ssecvt")
6140 (set_attr "prefix" "maybe_evex")
6141 (set_attr "mode" "OI")])
6142
6143 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6144 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6145 (any_fix:<sseintvecmode>
6146 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6147 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6148 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6149 [(set_attr "type" "ssecvt")
6150 (set_attr "prefix" "evex")
6151 (set_attr "mode" "<sseintvecmode2>")])
6152
6153 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6154 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6155 (unspec:<sseintvecmode>
6156 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6157 UNSPEC_FIX_NOTRUNC))]
6158 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6159 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6160 [(set_attr "type" "ssecvt")
6161 (set_attr "prefix" "evex")
6162 (set_attr "mode" "<sseintvecmode2>")])
6163
6164 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6165 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6166 (unspec:<sseintvecmode>
6167 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6168 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6169 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6170 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6171 [(set_attr "type" "ssecvt")
6172 (set_attr "prefix" "evex")
6173 (set_attr "mode" "<sseintvecmode2>")])
6174
6175 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6176 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6177 (any_fix:<sselongvecmode>
6178 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6179 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6180 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6181 [(set_attr "type" "ssecvt")
6182 (set_attr "prefix" "evex")
6183 (set_attr "mode" "<sseintvecmode3>")])
6184
6185 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6186 [(set (match_operand:V2DI 0 "register_operand" "=v")
6187 (any_fix:V2DI
6188 (vec_select:V2SF
6189 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6190 (parallel [(const_int 0) (const_int 1)]))))]
6191 "TARGET_AVX512DQ && TARGET_AVX512VL"
6192 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6193 [(set_attr "type" "ssecvt")
6194 (set_attr "prefix" "evex")
6195 (set_attr "mode" "TI")])
6196
6197 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
6198 [(set (match_operand:V2DI 0 "register_operand")
6199 (any_fix:V2DI
6200 (match_operand:V2SF 1 "register_operand")))]
6201 "TARGET_AVX512DQ && TARGET_AVX512VL"
6202 {
6203 operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
6204 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
6205 (operands[0], operands[1]));
6206 DONE;
6207 })
6208
6209 (define_mode_attr vunpckfixt_mode
6210 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6211 (define_mode_attr vunpckfixt_model
6212 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6213 (define_mode_attr vunpckfixt_extract_mode
6214 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6215
6216 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6217 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6218 (any_fix:<vunpckfixt_mode>
6219 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6220 "TARGET_AVX512DQ"
6221 {
6222 rtx tem = operands[1];
6223 rtx (*gen) (rtx, rtx);
6224
6225 if (<MODE>mode != V4SFmode)
6226 {
6227 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6228 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6229 operands[1]));
6230 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6231 }
6232 else
6233 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6234
6235 emit_insn (gen (operands[0], tem));
6236 DONE;
6237 })
6238
6239 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6240 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6241 (any_fix:<vunpckfixt_mode>
6242 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6243 "TARGET_AVX512DQ"
6244 {
6245 rtx tem;
6246 rtx (*gen) (rtx, rtx);
6247
6248 if (<MODE>mode != V4SFmode)
6249 {
6250 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6251 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6252 operands[1]));
6253 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6254 }
6255 else
6256 {
6257 tem = gen_reg_rtx (V4SFmode);
6258 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6259 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6260 }
6261
6262 emit_insn (gen (operands[0], tem));
6263 DONE;
6264 })
6265
6266 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6267 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6268 (unsigned_fix:<sseintvecmode>
6269 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6270 "TARGET_AVX512VL"
6271 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6272 [(set_attr "type" "ssecvt")
6273 (set_attr "prefix" "evex")
6274 (set_attr "mode" "<sseintvecmode2>")])
6275
6276 (define_expand "avx_cvttpd2dq256_2"
6277 [(set (match_operand:V8SI 0 "register_operand")
6278 (vec_concat:V8SI
6279 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6280 (match_dup 2)))]
6281 "TARGET_AVX"
6282 "operands[2] = CONST0_RTX (V4SImode);")
6283
6284 (define_insn "sse2_cvttpd2dq"
6285 [(set (match_operand:V4SI 0 "register_operand" "=v")
6286 (vec_concat:V4SI
6287 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6288 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6289 "TARGET_SSE2"
6290 {
6291 if (TARGET_AVX)
6292 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6293 else
6294 return "cvttpd2dq\t{%1, %0|%0, %1}";
6295 }
6296 [(set_attr "type" "ssecvt")
6297 (set_attr "amdfam10_decode" "double")
6298 (set_attr "athlon_decode" "vector")
6299 (set_attr "bdver1_decode" "double")
6300 (set_attr "prefix" "maybe_vex")
6301 (set_attr "mode" "TI")])
6302
6303 (define_insn "sse2_cvttpd2dq_mask"
6304 [(set (match_operand:V4SI 0 "register_operand" "=v")
6305 (vec_concat:V4SI
6306 (vec_merge:V2SI
6307 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6308 (vec_select:V2SI
6309 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6310 (parallel [(const_int 0) (const_int 1)]))
6311 (match_operand:QI 3 "register_operand" "Yk"))
6312 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6313 "TARGET_AVX512VL"
6314 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6315 [(set_attr "type" "ssecvt")
6316 (set_attr "prefix" "evex")
6317 (set_attr "mode" "TI")])
6318
6319 (define_insn "*sse2_cvttpd2dq_mask_1"
6320 [(set (match_operand:V4SI 0 "register_operand" "=v")
6321 (vec_concat:V4SI
6322 (vec_merge:V2SI
6323 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6324 (const_vector:V2SI [(const_int 0) (const_int 0)])
6325 (match_operand:QI 2 "register_operand" "Yk"))
6326 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6327 "TARGET_AVX512VL"
6328 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6329 [(set_attr "type" "ssecvt")
6330 (set_attr "prefix" "evex")
6331 (set_attr "mode" "TI")])
6332
6333 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
6334 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6335 (vec_merge:V4SF
6336 (vec_duplicate:V4SF
6337 (float_truncate:V2SF
6338 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6339 (match_operand:V4SF 1 "register_operand" "0,0,v")
6340 (const_int 1)))]
6341 "TARGET_SSE2"
6342 "@
6343 cvtsd2ss\t{%2, %0|%0, %2}
6344 cvtsd2ss\t{%2, %0|%0, %q2}
6345 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
6346 [(set_attr "isa" "noavx,noavx,avx")
6347 (set_attr "type" "ssecvt")
6348 (set_attr "athlon_decode" "vector,double,*")
6349 (set_attr "amdfam10_decode" "vector,double,*")
6350 (set_attr "bdver1_decode" "direct,direct,*")
6351 (set_attr "btver2_decode" "double,double,double")
6352 (set_attr "prefix" "orig,orig,<round_prefix>")
6353 (set_attr "mode" "SF")])
6354
6355 (define_insn "*sse2_vd_cvtsd2ss"
6356 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6357 (vec_merge:V4SF
6358 (vec_duplicate:V4SF
6359 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6360 (match_operand:V4SF 1 "register_operand" "0,0,v")
6361 (const_int 1)))]
6362 "TARGET_SSE2"
6363 "@
6364 cvtsd2ss\t{%2, %0|%0, %2}
6365 cvtsd2ss\t{%2, %0|%0, %2}
6366 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6367 [(set_attr "isa" "noavx,noavx,avx")
6368 (set_attr "type" "ssecvt")
6369 (set_attr "athlon_decode" "vector,double,*")
6370 (set_attr "amdfam10_decode" "vector,double,*")
6371 (set_attr "bdver1_decode" "direct,direct,*")
6372 (set_attr "btver2_decode" "double,double,double")
6373 (set_attr "prefix" "orig,orig,vex")
6374 (set_attr "mode" "SF")])
6375
6376 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
6377 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6378 (vec_merge:V2DF
6379 (float_extend:V2DF
6380 (vec_select:V2SF
6381 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6382 (parallel [(const_int 0) (const_int 1)])))
6383 (match_operand:V2DF 1 "register_operand" "0,0,v")
6384 (const_int 1)))]
6385 "TARGET_SSE2"
6386 "@
6387 cvtss2sd\t{%2, %0|%0, %2}
6388 cvtss2sd\t{%2, %0|%0, %k2}
6389 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
6390 [(set_attr "isa" "noavx,noavx,avx")
6391 (set_attr "type" "ssecvt")
6392 (set_attr "amdfam10_decode" "vector,double,*")
6393 (set_attr "athlon_decode" "direct,direct,*")
6394 (set_attr "bdver1_decode" "direct,direct,*")
6395 (set_attr "btver2_decode" "double,double,double")
6396 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6397 (set_attr "mode" "DF")])
6398
6399 (define_insn "*sse2_vd_cvtss2sd"
6400 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6401 (vec_merge:V2DF
6402 (vec_duplicate:V2DF
6403 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6404 (match_operand:V2DF 1 "register_operand" "0,0,v")
6405 (const_int 1)))]
6406 "TARGET_SSE2"
6407 "@
6408 cvtss2sd\t{%2, %0|%0, %2}
6409 cvtss2sd\t{%2, %0|%0, %2}
6410 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6411 [(set_attr "isa" "noavx,noavx,avx")
6412 (set_attr "type" "ssecvt")
6413 (set_attr "amdfam10_decode" "vector,double,*")
6414 (set_attr "athlon_decode" "direct,direct,*")
6415 (set_attr "bdver1_decode" "direct,direct,*")
6416 (set_attr "btver2_decode" "double,double,double")
6417 (set_attr "prefix" "orig,orig,vex")
6418 (set_attr "mode" "DF")])
6419
6420 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6421 [(set (match_operand:V8SF 0 "register_operand" "=v")
6422 (float_truncate:V8SF
6423 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6424 "TARGET_AVX512F"
6425 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6426 [(set_attr "type" "ssecvt")
6427 (set_attr "prefix" "evex")
6428 (set_attr "mode" "V8SF")])
6429
6430 (define_insn "avx_cvtpd2ps256<mask_name>"
6431 [(set (match_operand:V4SF 0 "register_operand" "=v")
6432 (float_truncate:V4SF
6433 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6434 "TARGET_AVX && <mask_avx512vl_condition>"
6435 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6436 [(set_attr "type" "ssecvt")
6437 (set_attr "prefix" "maybe_evex")
6438 (set_attr "btver2_decode" "vector")
6439 (set_attr "mode" "V4SF")])
6440
6441 (define_expand "sse2_cvtpd2ps"
6442 [(set (match_operand:V4SF 0 "register_operand")
6443 (vec_concat:V4SF
6444 (float_truncate:V2SF
6445 (match_operand:V2DF 1 "vector_operand"))
6446 (match_dup 2)))]
6447 "TARGET_SSE2"
6448 "operands[2] = CONST0_RTX (V2SFmode);")
6449
6450 (define_expand "sse2_cvtpd2ps_mask"
6451 [(set (match_operand:V4SF 0 "register_operand")
6452 (vec_concat:V4SF
6453 (vec_merge:V2SF
6454 (float_truncate:V2SF
6455 (match_operand:V2DF 1 "vector_operand"))
6456 (vec_select:V2SF
6457 (match_operand:V4SF 2 "nonimm_or_0_operand")
6458 (parallel [(const_int 0) (const_int 1)]))
6459 (match_operand:QI 3 "register_operand"))
6460 (match_dup 4)))]
6461 "TARGET_SSE2"
6462 "operands[4] = CONST0_RTX (V2SFmode);")
6463
6464 (define_insn "*sse2_cvtpd2ps"
6465 [(set (match_operand:V4SF 0 "register_operand" "=v")
6466 (vec_concat:V4SF
6467 (float_truncate:V2SF
6468 (match_operand:V2DF 1 "vector_operand" "vBm"))
6469 (match_operand:V2SF 2 "const0_operand" "C")))]
6470 "TARGET_SSE2"
6471 {
6472 if (TARGET_AVX)
6473 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6474 else
6475 return "cvtpd2ps\t{%1, %0|%0, %1}";
6476 }
6477 [(set_attr "type" "ssecvt")
6478 (set_attr "amdfam10_decode" "double")
6479 (set_attr "athlon_decode" "vector")
6480 (set_attr "bdver1_decode" "double")
6481 (set_attr "prefix_data16" "1")
6482 (set_attr "prefix" "maybe_vex")
6483 (set_attr "mode" "V4SF")])
6484
6485 (define_insn "truncv2dfv2sf2"
6486 [(set (match_operand:V2SF 0 "register_operand" "=v")
6487 (float_truncate:V2SF
6488 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6489 "TARGET_MMX_WITH_SSE"
6490 {
6491 if (TARGET_AVX)
6492 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6493 else
6494 return "cvtpd2ps\t{%1, %0|%0, %1}";
6495 }
6496 [(set_attr "type" "ssecvt")
6497 (set_attr "amdfam10_decode" "double")
6498 (set_attr "athlon_decode" "vector")
6499 (set_attr "bdver1_decode" "double")
6500 (set_attr "prefix_data16" "1")
6501 (set_attr "prefix" "maybe_vex")
6502 (set_attr "mode" "V4SF")])
6503
6504 (define_insn "*sse2_cvtpd2ps_mask"
6505 [(set (match_operand:V4SF 0 "register_operand" "=v")
6506 (vec_concat:V4SF
6507 (vec_merge:V2SF
6508 (float_truncate:V2SF
6509 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6510 (vec_select:V2SF
6511 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6512 (parallel [(const_int 0) (const_int 1)]))
6513 (match_operand:QI 3 "register_operand" "Yk"))
6514 (match_operand:V2SF 4 "const0_operand" "C")))]
6515 "TARGET_AVX512VL"
6516 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6517 [(set_attr "type" "ssecvt")
6518 (set_attr "prefix" "evex")
6519 (set_attr "mode" "V4SF")])
6520
6521 (define_insn "*sse2_cvtpd2ps_mask_1"
6522 [(set (match_operand:V4SF 0 "register_operand" "=v")
6523 (vec_concat:V4SF
6524 (vec_merge:V2SF
6525 (float_truncate:V2SF
6526 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6527 (match_operand:V2SF 3 "const0_operand" "C")
6528 (match_operand:QI 2 "register_operand" "Yk"))
6529 (match_operand:V2SF 4 "const0_operand" "C")))]
6530 "TARGET_AVX512VL"
6531 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6532 [(set_attr "type" "ssecvt")
6533 (set_attr "prefix" "evex")
6534 (set_attr "mode" "V4SF")])
6535
6536 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6537 (define_mode_attr sf2dfmode
6538 [(V8DF "V8SF") (V4DF "V4SF")])
6539 (define_mode_attr sf2dfmode_lower
6540 [(V8DF "v8sf") (V4DF "v4sf")])
6541
6542 (define_expand "trunc<mode><sf2dfmode_lower>2"
6543 [(set (match_operand:<sf2dfmode> 0 "register_operand")
6544 (float_truncate:<sf2dfmode>
6545 (match_operand:VF2_512_256 1 "vector_operand")))]
6546 "TARGET_AVX")
6547
6548 (define_expand "extend<sf2dfmode_lower><mode>2"
6549 [(set (match_operand:VF2_512_256 0 "register_operand")
6550 (float_extend:VF2_512_256
6551 (match_operand:<sf2dfmode> 1 "vector_operand")))]
6552 "TARGET_AVX")
6553
6554 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6555 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6556 (float_extend:VF2_512_256
6557 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6558 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6559 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6560 [(set_attr "type" "ssecvt")
6561 (set_attr "prefix" "maybe_vex")
6562 (set_attr "mode" "<MODE>")])
6563
6564 (define_insn "*avx_cvtps2pd256_2"
6565 [(set (match_operand:V4DF 0 "register_operand" "=v")
6566 (float_extend:V4DF
6567 (vec_select:V4SF
6568 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6569 (parallel [(const_int 0) (const_int 1)
6570 (const_int 2) (const_int 3)]))))]
6571 "TARGET_AVX"
6572 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6573 [(set_attr "type" "ssecvt")
6574 (set_attr "prefix" "vex")
6575 (set_attr "mode" "V4DF")])
6576
6577 (define_insn "vec_unpacks_lo_v16sf"
6578 [(set (match_operand:V8DF 0 "register_operand" "=v")
6579 (float_extend:V8DF
6580 (vec_select:V8SF
6581 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6582 (parallel [(const_int 0) (const_int 1)
6583 (const_int 2) (const_int 3)
6584 (const_int 4) (const_int 5)
6585 (const_int 6) (const_int 7)]))))]
6586 "TARGET_AVX512F"
6587 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6588 [(set_attr "type" "ssecvt")
6589 (set_attr "prefix" "evex")
6590 (set_attr "mode" "V8DF")])
6591
6592 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6593 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6594 (unspec:<avx512fmaskmode>
6595 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6596 UNSPEC_CVTINT2MASK))]
6597 "TARGET_AVX512BW"
6598 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6599 [(set_attr "prefix" "evex")
6600 (set_attr "mode" "<sseinsnmode>")])
6601
6602 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6603 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6604 (unspec:<avx512fmaskmode>
6605 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6606 UNSPEC_CVTINT2MASK))]
6607 "TARGET_AVX512DQ"
6608 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6609 [(set_attr "prefix" "evex")
6610 (set_attr "mode" "<sseinsnmode>")])
6611
6612 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6613 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6614 (vec_merge:VI12_AVX512VL
6615 (match_dup 2)
6616 (match_dup 3)
6617 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6618 "TARGET_AVX512BW"
6619 {
6620 operands[2] = CONSTM1_RTX (<MODE>mode);
6621 operands[3] = CONST0_RTX (<MODE>mode);
6622 })
6623
6624 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6625 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6626 (vec_merge:VI12_AVX512VL
6627 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6628 (match_operand:VI12_AVX512VL 3 "const0_operand")
6629 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6630 "TARGET_AVX512BW"
6631 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6632 [(set_attr "prefix" "evex")
6633 (set_attr "mode" "<sseinsnmode>")])
6634
6635 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6636 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6637 (vec_merge:VI48_AVX512VL
6638 (match_dup 2)
6639 (match_dup 3)
6640 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6641 "TARGET_AVX512F"
6642 "{
6643 operands[2] = CONSTM1_RTX (<MODE>mode);
6644 operands[3] = CONST0_RTX (<MODE>mode);
6645 }")
6646
6647 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6648 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
6649 (vec_merge:VI48_AVX512VL
6650 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6651 (match_operand:VI48_AVX512VL 3 "const0_operand")
6652 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
6653 "TARGET_AVX512F"
6654 "@
6655 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
6656 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
6657 [(set_attr "isa" "avx512dq,*")
6658 (set_attr "length_immediate" "0,1")
6659 (set_attr "prefix" "evex")
6660 (set_attr "mode" "<sseinsnmode>")])
6661
6662 (define_insn "sse2_cvtps2pd<mask_name>"
6663 [(set (match_operand:V2DF 0 "register_operand" "=v")
6664 (float_extend:V2DF
6665 (vec_select:V2SF
6666 (match_operand:V4SF 1 "vector_operand" "vm")
6667 (parallel [(const_int 0) (const_int 1)]))))]
6668 "TARGET_SSE2 && <mask_avx512vl_condition>"
6669 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6670 [(set_attr "type" "ssecvt")
6671 (set_attr "amdfam10_decode" "direct")
6672 (set_attr "athlon_decode" "double")
6673 (set_attr "bdver1_decode" "double")
6674 (set_attr "prefix_data16" "0")
6675 (set_attr "prefix" "maybe_vex")
6676 (set_attr "mode" "V2DF")])
6677
6678 (define_insn "extendv2sfv2df2"
6679 [(set (match_operand:V2DF 0 "register_operand" "=v")
6680 (float_extend:V2DF
6681 (match_operand:V2SF 1 "register_operand" "v")))]
6682 "TARGET_MMX_WITH_SSE"
6683 "%vcvtps2pd\t{%1, %0|%0, %1}"
6684 [(set_attr "type" "ssecvt")
6685 (set_attr "amdfam10_decode" "direct")
6686 (set_attr "athlon_decode" "double")
6687 (set_attr "bdver1_decode" "double")
6688 (set_attr "prefix_data16" "0")
6689 (set_attr "prefix" "maybe_vex")
6690 (set_attr "mode" "V2DF")])
6691
6692 (define_expand "vec_unpacks_hi_v4sf"
6693 [(set (match_dup 2)
6694 (vec_select:V4SF
6695 (vec_concat:V8SF
6696 (match_dup 2)
6697 (match_operand:V4SF 1 "vector_operand"))
6698 (parallel [(const_int 6) (const_int 7)
6699 (const_int 2) (const_int 3)])))
6700 (set (match_operand:V2DF 0 "register_operand")
6701 (float_extend:V2DF
6702 (vec_select:V2SF
6703 (match_dup 2)
6704 (parallel [(const_int 0) (const_int 1)]))))]
6705 "TARGET_SSE2"
6706 "operands[2] = gen_reg_rtx (V4SFmode);")
6707
6708 (define_expand "vec_unpacks_hi_v8sf"
6709 [(set (match_dup 2)
6710 (vec_select:V4SF
6711 (match_operand:V8SF 1 "register_operand")
6712 (parallel [(const_int 4) (const_int 5)
6713 (const_int 6) (const_int 7)])))
6714 (set (match_operand:V4DF 0 "register_operand")
6715 (float_extend:V4DF
6716 (match_dup 2)))]
6717 "TARGET_AVX"
6718 "operands[2] = gen_reg_rtx (V4SFmode);")
6719
6720 (define_expand "vec_unpacks_hi_v16sf"
6721 [(set (match_dup 2)
6722 (vec_select:V8SF
6723 (match_operand:V16SF 1 "register_operand")
6724 (parallel [(const_int 8) (const_int 9)
6725 (const_int 10) (const_int 11)
6726 (const_int 12) (const_int 13)
6727 (const_int 14) (const_int 15)])))
6728 (set (match_operand:V8DF 0 "register_operand")
6729 (float_extend:V8DF
6730 (match_dup 2)))]
6731 "TARGET_AVX512F"
6732 "operands[2] = gen_reg_rtx (V8SFmode);")
6733
6734 (define_expand "vec_unpacks_lo_v4sf"
6735 [(set (match_operand:V2DF 0 "register_operand")
6736 (float_extend:V2DF
6737 (vec_select:V2SF
6738 (match_operand:V4SF 1 "vector_operand")
6739 (parallel [(const_int 0) (const_int 1)]))))]
6740 "TARGET_SSE2")
6741
6742 (define_expand "vec_unpacks_lo_v8sf"
6743 [(set (match_operand:V4DF 0 "register_operand")
6744 (float_extend:V4DF
6745 (vec_select:V4SF
6746 (match_operand:V8SF 1 "nonimmediate_operand")
6747 (parallel [(const_int 0) (const_int 1)
6748 (const_int 2) (const_int 3)]))))]
6749 "TARGET_AVX")
6750
6751 (define_mode_attr sseunpackfltmode
6752 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6753 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6754
6755 (define_expand "vec_unpacks_float_hi_<mode>"
6756 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6757 (match_operand:VI2_AVX512F 1 "register_operand")]
6758 "TARGET_SSE2"
6759 {
6760 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6761
6762 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6763 emit_insn (gen_rtx_SET (operands[0],
6764 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6765 DONE;
6766 })
6767
6768 (define_expand "vec_unpacks_float_lo_<mode>"
6769 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6770 (match_operand:VI2_AVX512F 1 "register_operand")]
6771 "TARGET_SSE2"
6772 {
6773 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6774
6775 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6776 emit_insn (gen_rtx_SET (operands[0],
6777 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6778 DONE;
6779 })
6780
6781 (define_expand "vec_unpacku_float_hi_<mode>"
6782 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6783 (match_operand:VI2_AVX512F 1 "register_operand")]
6784 "TARGET_SSE2"
6785 {
6786 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6787
6788 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6789 emit_insn (gen_rtx_SET (operands[0],
6790 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6791 DONE;
6792 })
6793
6794 (define_expand "vec_unpacku_float_lo_<mode>"
6795 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6796 (match_operand:VI2_AVX512F 1 "register_operand")]
6797 "TARGET_SSE2"
6798 {
6799 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6800
6801 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6802 emit_insn (gen_rtx_SET (operands[0],
6803 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6804 DONE;
6805 })
6806
6807 (define_expand "vec_unpacks_float_hi_v4si"
6808 [(set (match_dup 2)
6809 (vec_select:V4SI
6810 (match_operand:V4SI 1 "vector_operand")
6811 (parallel [(const_int 2) (const_int 3)
6812 (const_int 2) (const_int 3)])))
6813 (set (match_operand:V2DF 0 "register_operand")
6814 (float:V2DF
6815 (vec_select:V2SI
6816 (match_dup 2)
6817 (parallel [(const_int 0) (const_int 1)]))))]
6818 "TARGET_SSE2"
6819 "operands[2] = gen_reg_rtx (V4SImode);")
6820
6821 (define_expand "vec_unpacks_float_lo_v4si"
6822 [(set (match_operand:V2DF 0 "register_operand")
6823 (float:V2DF
6824 (vec_select:V2SI
6825 (match_operand:V4SI 1 "vector_operand")
6826 (parallel [(const_int 0) (const_int 1)]))))]
6827 "TARGET_SSE2")
6828
6829 (define_expand "vec_unpacks_float_hi_v8si"
6830 [(set (match_dup 2)
6831 (vec_select:V4SI
6832 (match_operand:V8SI 1 "vector_operand")
6833 (parallel [(const_int 4) (const_int 5)
6834 (const_int 6) (const_int 7)])))
6835 (set (match_operand:V4DF 0 "register_operand")
6836 (float:V4DF
6837 (match_dup 2)))]
6838 "TARGET_AVX"
6839 "operands[2] = gen_reg_rtx (V4SImode);")
6840
6841 (define_expand "vec_unpacks_float_lo_v8si"
6842 [(set (match_operand:V4DF 0 "register_operand")
6843 (float:V4DF
6844 (vec_select:V4SI
6845 (match_operand:V8SI 1 "nonimmediate_operand")
6846 (parallel [(const_int 0) (const_int 1)
6847 (const_int 2) (const_int 3)]))))]
6848 "TARGET_AVX")
6849
6850 (define_expand "vec_unpacks_float_hi_v16si"
6851 [(set (match_dup 2)
6852 (vec_select:V8SI
6853 (match_operand:V16SI 1 "nonimmediate_operand")
6854 (parallel [(const_int 8) (const_int 9)
6855 (const_int 10) (const_int 11)
6856 (const_int 12) (const_int 13)
6857 (const_int 14) (const_int 15)])))
6858 (set (match_operand:V8DF 0 "register_operand")
6859 (float:V8DF
6860 (match_dup 2)))]
6861 "TARGET_AVX512F"
6862 "operands[2] = gen_reg_rtx (V8SImode);")
6863
6864 (define_expand "vec_unpacks_float_lo_v16si"
6865 [(set (match_operand:V8DF 0 "register_operand")
6866 (float:V8DF
6867 (vec_select:V8SI
6868 (match_operand:V16SI 1 "nonimmediate_operand")
6869 (parallel [(const_int 0) (const_int 1)
6870 (const_int 2) (const_int 3)
6871 (const_int 4) (const_int 5)
6872 (const_int 6) (const_int 7)]))))]
6873 "TARGET_AVX512F")
6874
6875 (define_expand "vec_unpacku_float_hi_v4si"
6876 [(set (match_dup 5)
6877 (vec_select:V4SI
6878 (match_operand:V4SI 1 "vector_operand")
6879 (parallel [(const_int 2) (const_int 3)
6880 (const_int 2) (const_int 3)])))
6881 (set (match_dup 6)
6882 (float:V2DF
6883 (vec_select:V2SI
6884 (match_dup 5)
6885 (parallel [(const_int 0) (const_int 1)]))))
6886 (set (match_dup 7)
6887 (lt:V2DF (match_dup 6) (match_dup 3)))
6888 (set (match_dup 8)
6889 (and:V2DF (match_dup 7) (match_dup 4)))
6890 (set (match_operand:V2DF 0 "register_operand")
6891 (plus:V2DF (match_dup 6) (match_dup 8)))]
6892 "TARGET_SSE2"
6893 {
6894 REAL_VALUE_TYPE TWO32r;
6895 rtx x;
6896 int i;
6897
6898 real_ldexp (&TWO32r, &dconst1, 32);
6899 x = const_double_from_real_value (TWO32r, DFmode);
6900
6901 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6902 operands[4] = force_reg (V2DFmode,
6903 ix86_build_const_vector (V2DFmode, 1, x));
6904
6905 operands[5] = gen_reg_rtx (V4SImode);
6906
6907 for (i = 6; i < 9; i++)
6908 operands[i] = gen_reg_rtx (V2DFmode);
6909 })
6910
6911 (define_expand "vec_unpacku_float_lo_v4si"
6912 [(set (match_dup 5)
6913 (float:V2DF
6914 (vec_select:V2SI
6915 (match_operand:V4SI 1 "vector_operand")
6916 (parallel [(const_int 0) (const_int 1)]))))
6917 (set (match_dup 6)
6918 (lt:V2DF (match_dup 5) (match_dup 3)))
6919 (set (match_dup 7)
6920 (and:V2DF (match_dup 6) (match_dup 4)))
6921 (set (match_operand:V2DF 0 "register_operand")
6922 (plus:V2DF (match_dup 5) (match_dup 7)))]
6923 "TARGET_SSE2"
6924 {
6925 REAL_VALUE_TYPE TWO32r;
6926 rtx x;
6927 int i;
6928
6929 real_ldexp (&TWO32r, &dconst1, 32);
6930 x = const_double_from_real_value (TWO32r, DFmode);
6931
6932 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6933 operands[4] = force_reg (V2DFmode,
6934 ix86_build_const_vector (V2DFmode, 1, x));
6935
6936 for (i = 5; i < 8; i++)
6937 operands[i] = gen_reg_rtx (V2DFmode);
6938 })
6939
6940 (define_expand "vec_unpacku_float_hi_v8si"
6941 [(match_operand:V4DF 0 "register_operand")
6942 (match_operand:V8SI 1 "register_operand")]
6943 "TARGET_AVX"
6944 {
6945 REAL_VALUE_TYPE TWO32r;
6946 rtx x, tmp[6];
6947 int i;
6948
6949 real_ldexp (&TWO32r, &dconst1, 32);
6950 x = const_double_from_real_value (TWO32r, DFmode);
6951
6952 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6953 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6954 tmp[5] = gen_reg_rtx (V4SImode);
6955
6956 for (i = 2; i < 5; i++)
6957 tmp[i] = gen_reg_rtx (V4DFmode);
6958 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6959 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6960 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6961 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6962 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6963 DONE;
6964 })
6965
6966 (define_expand "vec_unpacku_float_hi_v16si"
6967 [(match_operand:V8DF 0 "register_operand")
6968 (match_operand:V16SI 1 "register_operand")]
6969 "TARGET_AVX512F"
6970 {
6971 REAL_VALUE_TYPE TWO32r;
6972 rtx k, x, tmp[4];
6973
6974 real_ldexp (&TWO32r, &dconst1, 32);
6975 x = const_double_from_real_value (TWO32r, DFmode);
6976
6977 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6978 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6979 tmp[2] = gen_reg_rtx (V8DFmode);
6980 tmp[3] = gen_reg_rtx (V8SImode);
6981 k = gen_reg_rtx (QImode);
6982
6983 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6984 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6985 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
6986 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6987 emit_move_insn (operands[0], tmp[2]);
6988 DONE;
6989 })
6990
6991 (define_expand "vec_unpacku_float_lo_v8si"
6992 [(match_operand:V4DF 0 "register_operand")
6993 (match_operand:V8SI 1 "nonimmediate_operand")]
6994 "TARGET_AVX"
6995 {
6996 REAL_VALUE_TYPE TWO32r;
6997 rtx x, tmp[5];
6998 int i;
6999
7000 real_ldexp (&TWO32r, &dconst1, 32);
7001 x = const_double_from_real_value (TWO32r, DFmode);
7002
7003 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7004 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7005
7006 for (i = 2; i < 5; i++)
7007 tmp[i] = gen_reg_rtx (V4DFmode);
7008 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
7009 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7010 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7011 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7012 DONE;
7013 })
7014
7015 (define_expand "vec_unpacku_float_lo_v16si"
7016 [(match_operand:V8DF 0 "register_operand")
7017 (match_operand:V16SI 1 "nonimmediate_operand")]
7018 "TARGET_AVX512F"
7019 {
7020 REAL_VALUE_TYPE TWO32r;
7021 rtx k, x, tmp[3];
7022
7023 real_ldexp (&TWO32r, &dconst1, 32);
7024 x = const_double_from_real_value (TWO32r, DFmode);
7025
7026 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7027 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7028 tmp[2] = gen_reg_rtx (V8DFmode);
7029 k = gen_reg_rtx (QImode);
7030
7031 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7032 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7033 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7034 emit_move_insn (operands[0], tmp[2]);
7035 DONE;
7036 })
7037
7038 (define_expand "vec_pack_trunc_<mode>"
7039 [(set (match_dup 3)
7040 (float_truncate:<sf2dfmode>
7041 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7042 (set (match_dup 4)
7043 (float_truncate:<sf2dfmode>
7044 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7045 (set (match_operand:<ssePSmode> 0 "register_operand")
7046 (vec_concat:<ssePSmode>
7047 (match_dup 3)
7048 (match_dup 4)))]
7049 "TARGET_AVX"
7050 {
7051 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7052 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7053 })
7054
7055 (define_expand "vec_pack_trunc_v2df"
7056 [(match_operand:V4SF 0 "register_operand")
7057 (match_operand:V2DF 1 "vector_operand")
7058 (match_operand:V2DF 2 "vector_operand")]
7059 "TARGET_SSE2"
7060 {
7061 rtx tmp0, tmp1;
7062
7063 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7064 {
7065 tmp0 = gen_reg_rtx (V4DFmode);
7066 tmp1 = force_reg (V2DFmode, operands[1]);
7067
7068 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7069 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7070 }
7071 else
7072 {
7073 tmp0 = gen_reg_rtx (V4SFmode);
7074 tmp1 = gen_reg_rtx (V4SFmode);
7075
7076 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7077 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7078 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7079 }
7080 DONE;
7081 })
7082
7083 (define_expand "vec_pack_sfix_trunc_v8df"
7084 [(match_operand:V16SI 0 "register_operand")
7085 (match_operand:V8DF 1 "nonimmediate_operand")
7086 (match_operand:V8DF 2 "nonimmediate_operand")]
7087 "TARGET_AVX512F"
7088 {
7089 rtx r1, r2;
7090
7091 r1 = gen_reg_rtx (V8SImode);
7092 r2 = gen_reg_rtx (V8SImode);
7093
7094 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7095 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7096 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7097 DONE;
7098 })
7099
7100 (define_expand "vec_pack_sfix_trunc_v4df"
7101 [(match_operand:V8SI 0 "register_operand")
7102 (match_operand:V4DF 1 "nonimmediate_operand")
7103 (match_operand:V4DF 2 "nonimmediate_operand")]
7104 "TARGET_AVX"
7105 {
7106 rtx r1, r2;
7107
7108 r1 = gen_reg_rtx (V4SImode);
7109 r2 = gen_reg_rtx (V4SImode);
7110
7111 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7112 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7113 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7114 DONE;
7115 })
7116
7117 (define_expand "vec_pack_sfix_trunc_v2df"
7118 [(match_operand:V4SI 0 "register_operand")
7119 (match_operand:V2DF 1 "vector_operand")
7120 (match_operand:V2DF 2 "vector_operand")]
7121 "TARGET_SSE2"
7122 {
7123 rtx tmp0, tmp1, tmp2;
7124
7125 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7126 {
7127 tmp0 = gen_reg_rtx (V4DFmode);
7128 tmp1 = force_reg (V2DFmode, operands[1]);
7129
7130 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7131 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7132 }
7133 else
7134 {
7135 tmp0 = gen_reg_rtx (V4SImode);
7136 tmp1 = gen_reg_rtx (V4SImode);
7137 tmp2 = gen_reg_rtx (V2DImode);
7138
7139 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7140 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7141 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7142 gen_lowpart (V2DImode, tmp0),
7143 gen_lowpart (V2DImode, tmp1)));
7144 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7145 }
7146 DONE;
7147 })
7148
7149 (define_mode_attr ssepackfltmode
7150 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7151
7152 (define_expand "vec_pack_ufix_trunc_<mode>"
7153 [(match_operand:<ssepackfltmode> 0 "register_operand")
7154 (match_operand:VF2 1 "register_operand")
7155 (match_operand:VF2 2 "register_operand")]
7156 "TARGET_SSE2"
7157 {
7158 if (<MODE>mode == V8DFmode)
7159 {
7160 rtx r1, r2;
7161
7162 r1 = gen_reg_rtx (V8SImode);
7163 r2 = gen_reg_rtx (V8SImode);
7164
7165 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7166 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7167 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7168 }
7169 else
7170 {
7171 rtx tmp[7];
7172 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7173 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7174 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7175 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7176 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7177 {
7178 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7179 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7180 }
7181 else
7182 {
7183 tmp[5] = gen_reg_rtx (V8SFmode);
7184 ix86_expand_vec_extract_even_odd (tmp[5],
7185 gen_lowpart (V8SFmode, tmp[2]),
7186 gen_lowpart (V8SFmode, tmp[3]), 0);
7187 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7188 }
7189 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7190 operands[0], 0, OPTAB_DIRECT);
7191 if (tmp[6] != operands[0])
7192 emit_move_insn (operands[0], tmp[6]);
7193 }
7194
7195 DONE;
7196 })
7197
7198 (define_expand "avx512f_vec_pack_sfix_v8df"
7199 [(match_operand:V16SI 0 "register_operand")
7200 (match_operand:V8DF 1 "nonimmediate_operand")
7201 (match_operand:V8DF 2 "nonimmediate_operand")]
7202 "TARGET_AVX512F"
7203 {
7204 rtx r1, r2;
7205
7206 r1 = gen_reg_rtx (V8SImode);
7207 r2 = gen_reg_rtx (V8SImode);
7208
7209 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7210 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7211 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7212 DONE;
7213 })
7214
7215 (define_expand "vec_pack_sfix_v4df"
7216 [(match_operand:V8SI 0 "register_operand")
7217 (match_operand:V4DF 1 "nonimmediate_operand")
7218 (match_operand:V4DF 2 "nonimmediate_operand")]
7219 "TARGET_AVX"
7220 {
7221 rtx r1, r2;
7222
7223 r1 = gen_reg_rtx (V4SImode);
7224 r2 = gen_reg_rtx (V4SImode);
7225
7226 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7227 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7228 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7229 DONE;
7230 })
7231
7232 (define_expand "vec_pack_sfix_v2df"
7233 [(match_operand:V4SI 0 "register_operand")
7234 (match_operand:V2DF 1 "vector_operand")
7235 (match_operand:V2DF 2 "vector_operand")]
7236 "TARGET_SSE2"
7237 {
7238 rtx tmp0, tmp1, tmp2;
7239
7240 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7241 {
7242 tmp0 = gen_reg_rtx (V4DFmode);
7243 tmp1 = force_reg (V2DFmode, operands[1]);
7244
7245 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7246 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7247 }
7248 else
7249 {
7250 tmp0 = gen_reg_rtx (V4SImode);
7251 tmp1 = gen_reg_rtx (V4SImode);
7252 tmp2 = gen_reg_rtx (V2DImode);
7253
7254 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7255 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7256 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7257 gen_lowpart (V2DImode, tmp0),
7258 gen_lowpart (V2DImode, tmp1)));
7259 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7260 }
7261 DONE;
7262 })
7263
7264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7265 ;;
7266 ;; Parallel single-precision floating point element swizzling
7267 ;;
7268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7269
7270 (define_expand "sse_movhlps_exp"
7271 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7272 (vec_select:V4SF
7273 (vec_concat:V8SF
7274 (match_operand:V4SF 1 "nonimmediate_operand")
7275 (match_operand:V4SF 2 "nonimmediate_operand"))
7276 (parallel [(const_int 6)
7277 (const_int 7)
7278 (const_int 2)
7279 (const_int 3)])))]
7280 "TARGET_SSE"
7281 {
7282 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7283
7284 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7285
7286 /* Fix up the destination if needed. */
7287 if (dst != operands[0])
7288 emit_move_insn (operands[0], dst);
7289
7290 DONE;
7291 })
7292
7293 (define_insn "sse_movhlps"
7294 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7295 (vec_select:V4SF
7296 (vec_concat:V8SF
7297 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7298 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7299 (parallel [(const_int 6)
7300 (const_int 7)
7301 (const_int 2)
7302 (const_int 3)])))]
7303 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7304 "@
7305 movhlps\t{%2, %0|%0, %2}
7306 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7307 movlps\t{%H2, %0|%0, %H2}
7308 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7309 %vmovhps\t{%2, %0|%q0, %2}"
7310 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7311 (set_attr "type" "ssemov")
7312 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7313 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7314
7315 (define_expand "sse_movlhps_exp"
7316 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7317 (vec_select:V4SF
7318 (vec_concat:V8SF
7319 (match_operand:V4SF 1 "nonimmediate_operand")
7320 (match_operand:V4SF 2 "nonimmediate_operand"))
7321 (parallel [(const_int 0)
7322 (const_int 1)
7323 (const_int 4)
7324 (const_int 5)])))]
7325 "TARGET_SSE"
7326 {
7327 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7328
7329 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7330
7331 /* Fix up the destination if needed. */
7332 if (dst != operands[0])
7333 emit_move_insn (operands[0], dst);
7334
7335 DONE;
7336 })
7337
7338 (define_insn "sse_movlhps"
7339 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7340 (vec_select:V4SF
7341 (vec_concat:V8SF
7342 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7343 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7344 (parallel [(const_int 0)
7345 (const_int 1)
7346 (const_int 4)
7347 (const_int 5)])))]
7348 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7349 "@
7350 movlhps\t{%2, %0|%0, %2}
7351 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7352 movhps\t{%2, %0|%0, %q2}
7353 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7354 %vmovlps\t{%2, %H0|%H0, %2}"
7355 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7356 (set_attr "type" "ssemov")
7357 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7358 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7359
7360 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7361 [(set (match_operand:V16SF 0 "register_operand" "=v")
7362 (vec_select:V16SF
7363 (vec_concat:V32SF
7364 (match_operand:V16SF 1 "register_operand" "v")
7365 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7366 (parallel [(const_int 2) (const_int 18)
7367 (const_int 3) (const_int 19)
7368 (const_int 6) (const_int 22)
7369 (const_int 7) (const_int 23)
7370 (const_int 10) (const_int 26)
7371 (const_int 11) (const_int 27)
7372 (const_int 14) (const_int 30)
7373 (const_int 15) (const_int 31)])))]
7374 "TARGET_AVX512F"
7375 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7376 [(set_attr "type" "sselog")
7377 (set_attr "prefix" "evex")
7378 (set_attr "mode" "V16SF")])
7379
7380 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7381 (define_insn "avx_unpckhps256<mask_name>"
7382 [(set (match_operand:V8SF 0 "register_operand" "=v")
7383 (vec_select:V8SF
7384 (vec_concat:V16SF
7385 (match_operand:V8SF 1 "register_operand" "v")
7386 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7387 (parallel [(const_int 2) (const_int 10)
7388 (const_int 3) (const_int 11)
7389 (const_int 6) (const_int 14)
7390 (const_int 7) (const_int 15)])))]
7391 "TARGET_AVX && <mask_avx512vl_condition>"
7392 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7393 [(set_attr "type" "sselog")
7394 (set_attr "prefix" "vex")
7395 (set_attr "mode" "V8SF")])
7396
7397 (define_expand "vec_interleave_highv8sf"
7398 [(set (match_dup 3)
7399 (vec_select:V8SF
7400 (vec_concat:V16SF
7401 (match_operand:V8SF 1 "register_operand")
7402 (match_operand:V8SF 2 "nonimmediate_operand"))
7403 (parallel [(const_int 0) (const_int 8)
7404 (const_int 1) (const_int 9)
7405 (const_int 4) (const_int 12)
7406 (const_int 5) (const_int 13)])))
7407 (set (match_dup 4)
7408 (vec_select:V8SF
7409 (vec_concat:V16SF
7410 (match_dup 1)
7411 (match_dup 2))
7412 (parallel [(const_int 2) (const_int 10)
7413 (const_int 3) (const_int 11)
7414 (const_int 6) (const_int 14)
7415 (const_int 7) (const_int 15)])))
7416 (set (match_operand:V8SF 0 "register_operand")
7417 (vec_select:V8SF
7418 (vec_concat:V16SF
7419 (match_dup 3)
7420 (match_dup 4))
7421 (parallel [(const_int 4) (const_int 5)
7422 (const_int 6) (const_int 7)
7423 (const_int 12) (const_int 13)
7424 (const_int 14) (const_int 15)])))]
7425 "TARGET_AVX"
7426 {
7427 operands[3] = gen_reg_rtx (V8SFmode);
7428 operands[4] = gen_reg_rtx (V8SFmode);
7429 })
7430
7431 (define_insn "vec_interleave_highv4sf<mask_name>"
7432 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7433 (vec_select:V4SF
7434 (vec_concat:V8SF
7435 (match_operand:V4SF 1 "register_operand" "0,v")
7436 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7437 (parallel [(const_int 2) (const_int 6)
7438 (const_int 3) (const_int 7)])))]
7439 "TARGET_SSE && <mask_avx512vl_condition>"
7440 "@
7441 unpckhps\t{%2, %0|%0, %2}
7442 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7443 [(set_attr "isa" "noavx,avx")
7444 (set_attr "type" "sselog")
7445 (set_attr "prefix" "orig,vex")
7446 (set_attr "mode" "V4SF")])
7447
7448 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7449 [(set (match_operand:V16SF 0 "register_operand" "=v")
7450 (vec_select:V16SF
7451 (vec_concat:V32SF
7452 (match_operand:V16SF 1 "register_operand" "v")
7453 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7454 (parallel [(const_int 0) (const_int 16)
7455 (const_int 1) (const_int 17)
7456 (const_int 4) (const_int 20)
7457 (const_int 5) (const_int 21)
7458 (const_int 8) (const_int 24)
7459 (const_int 9) (const_int 25)
7460 (const_int 12) (const_int 28)
7461 (const_int 13) (const_int 29)])))]
7462 "TARGET_AVX512F"
7463 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7464 [(set_attr "type" "sselog")
7465 (set_attr "prefix" "evex")
7466 (set_attr "mode" "V16SF")])
7467
7468 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7469 (define_insn "avx_unpcklps256<mask_name>"
7470 [(set (match_operand:V8SF 0 "register_operand" "=v")
7471 (vec_select:V8SF
7472 (vec_concat:V16SF
7473 (match_operand:V8SF 1 "register_operand" "v")
7474 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7475 (parallel [(const_int 0) (const_int 8)
7476 (const_int 1) (const_int 9)
7477 (const_int 4) (const_int 12)
7478 (const_int 5) (const_int 13)])))]
7479 "TARGET_AVX && <mask_avx512vl_condition>"
7480 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7481 [(set_attr "type" "sselog")
7482 (set_attr "prefix" "vex")
7483 (set_attr "mode" "V8SF")])
7484
7485 (define_insn "unpcklps128_mask"
7486 [(set (match_operand:V4SF 0 "register_operand" "=v")
7487 (vec_merge:V4SF
7488 (vec_select:V4SF
7489 (vec_concat:V8SF
7490 (match_operand:V4SF 1 "register_operand" "v")
7491 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7492 (parallel [(const_int 0) (const_int 4)
7493 (const_int 1) (const_int 5)]))
7494 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7495 (match_operand:QI 4 "register_operand" "Yk")))]
7496 "TARGET_AVX512VL"
7497 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7498 [(set_attr "type" "sselog")
7499 (set_attr "prefix" "evex")
7500 (set_attr "mode" "V4SF")])
7501
7502 (define_expand "vec_interleave_lowv8sf"
7503 [(set (match_dup 3)
7504 (vec_select:V8SF
7505 (vec_concat:V16SF
7506 (match_operand:V8SF 1 "register_operand")
7507 (match_operand:V8SF 2 "nonimmediate_operand"))
7508 (parallel [(const_int 0) (const_int 8)
7509 (const_int 1) (const_int 9)
7510 (const_int 4) (const_int 12)
7511 (const_int 5) (const_int 13)])))
7512 (set (match_dup 4)
7513 (vec_select:V8SF
7514 (vec_concat:V16SF
7515 (match_dup 1)
7516 (match_dup 2))
7517 (parallel [(const_int 2) (const_int 10)
7518 (const_int 3) (const_int 11)
7519 (const_int 6) (const_int 14)
7520 (const_int 7) (const_int 15)])))
7521 (set (match_operand:V8SF 0 "register_operand")
7522 (vec_select:V8SF
7523 (vec_concat:V16SF
7524 (match_dup 3)
7525 (match_dup 4))
7526 (parallel [(const_int 0) (const_int 1)
7527 (const_int 2) (const_int 3)
7528 (const_int 8) (const_int 9)
7529 (const_int 10) (const_int 11)])))]
7530 "TARGET_AVX"
7531 {
7532 operands[3] = gen_reg_rtx (V8SFmode);
7533 operands[4] = gen_reg_rtx (V8SFmode);
7534 })
7535
7536 (define_insn "vec_interleave_lowv4sf"
7537 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7538 (vec_select:V4SF
7539 (vec_concat:V8SF
7540 (match_operand:V4SF 1 "register_operand" "0,v")
7541 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7542 (parallel [(const_int 0) (const_int 4)
7543 (const_int 1) (const_int 5)])))]
7544 "TARGET_SSE"
7545 "@
7546 unpcklps\t{%2, %0|%0, %2}
7547 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7548 [(set_attr "isa" "noavx,avx")
7549 (set_attr "type" "sselog")
7550 (set_attr "prefix" "orig,maybe_evex")
7551 (set_attr "mode" "V4SF")])
7552
7553 ;; These are modeled with the same vec_concat as the others so that we
7554 ;; capture users of shufps that can use the new instructions
7555 (define_insn "avx_movshdup256<mask_name>"
7556 [(set (match_operand:V8SF 0 "register_operand" "=v")
7557 (vec_select:V8SF
7558 (vec_concat:V16SF
7559 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7560 (match_dup 1))
7561 (parallel [(const_int 1) (const_int 1)
7562 (const_int 3) (const_int 3)
7563 (const_int 5) (const_int 5)
7564 (const_int 7) (const_int 7)])))]
7565 "TARGET_AVX && <mask_avx512vl_condition>"
7566 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7567 [(set_attr "type" "sse")
7568 (set_attr "prefix" "vex")
7569 (set_attr "mode" "V8SF")])
7570
7571 (define_insn "sse3_movshdup<mask_name>"
7572 [(set (match_operand:V4SF 0 "register_operand" "=v")
7573 (vec_select:V4SF
7574 (vec_concat:V8SF
7575 (match_operand:V4SF 1 "vector_operand" "vBm")
7576 (match_dup 1))
7577 (parallel [(const_int 1)
7578 (const_int 1)
7579 (const_int 7)
7580 (const_int 7)])))]
7581 "TARGET_SSE3 && <mask_avx512vl_condition>"
7582 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7583 [(set_attr "type" "sse")
7584 (set_attr "prefix_rep" "1")
7585 (set_attr "prefix" "maybe_vex")
7586 (set_attr "mode" "V4SF")])
7587
7588 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7589 [(set (match_operand:V16SF 0 "register_operand" "=v")
7590 (vec_select:V16SF
7591 (vec_concat:V32SF
7592 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7593 (match_dup 1))
7594 (parallel [(const_int 1) (const_int 1)
7595 (const_int 3) (const_int 3)
7596 (const_int 5) (const_int 5)
7597 (const_int 7) (const_int 7)
7598 (const_int 9) (const_int 9)
7599 (const_int 11) (const_int 11)
7600 (const_int 13) (const_int 13)
7601 (const_int 15) (const_int 15)])))]
7602 "TARGET_AVX512F"
7603 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7604 [(set_attr "type" "sse")
7605 (set_attr "prefix" "evex")
7606 (set_attr "mode" "V16SF")])
7607
7608 (define_insn "avx_movsldup256<mask_name>"
7609 [(set (match_operand:V8SF 0 "register_operand" "=v")
7610 (vec_select:V8SF
7611 (vec_concat:V16SF
7612 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7613 (match_dup 1))
7614 (parallel [(const_int 0) (const_int 0)
7615 (const_int 2) (const_int 2)
7616 (const_int 4) (const_int 4)
7617 (const_int 6) (const_int 6)])))]
7618 "TARGET_AVX && <mask_avx512vl_condition>"
7619 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7620 [(set_attr "type" "sse")
7621 (set_attr "prefix" "vex")
7622 (set_attr "mode" "V8SF")])
7623
7624 (define_insn "sse3_movsldup<mask_name>"
7625 [(set (match_operand:V4SF 0 "register_operand" "=v")
7626 (vec_select:V4SF
7627 (vec_concat:V8SF
7628 (match_operand:V4SF 1 "vector_operand" "vBm")
7629 (match_dup 1))
7630 (parallel [(const_int 0)
7631 (const_int 0)
7632 (const_int 6)
7633 (const_int 6)])))]
7634 "TARGET_SSE3 && <mask_avx512vl_condition>"
7635 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7636 [(set_attr "type" "sse")
7637 (set_attr "prefix_rep" "1")
7638 (set_attr "prefix" "maybe_vex")
7639 (set_attr "mode" "V4SF")])
7640
7641 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7642 [(set (match_operand:V16SF 0 "register_operand" "=v")
7643 (vec_select:V16SF
7644 (vec_concat:V32SF
7645 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7646 (match_dup 1))
7647 (parallel [(const_int 0) (const_int 0)
7648 (const_int 2) (const_int 2)
7649 (const_int 4) (const_int 4)
7650 (const_int 6) (const_int 6)
7651 (const_int 8) (const_int 8)
7652 (const_int 10) (const_int 10)
7653 (const_int 12) (const_int 12)
7654 (const_int 14) (const_int 14)])))]
7655 "TARGET_AVX512F"
7656 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7657 [(set_attr "type" "sse")
7658 (set_attr "prefix" "evex")
7659 (set_attr "mode" "V16SF")])
7660
7661 (define_expand "avx_shufps256<mask_expand4_name>"
7662 [(match_operand:V8SF 0 "register_operand")
7663 (match_operand:V8SF 1 "register_operand")
7664 (match_operand:V8SF 2 "nonimmediate_operand")
7665 (match_operand:SI 3 "const_int_operand")]
7666 "TARGET_AVX"
7667 {
7668 int mask = INTVAL (operands[3]);
7669 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7670 operands[1],
7671 operands[2],
7672 GEN_INT ((mask >> 0) & 3),
7673 GEN_INT ((mask >> 2) & 3),
7674 GEN_INT (((mask >> 4) & 3) + 8),
7675 GEN_INT (((mask >> 6) & 3) + 8),
7676 GEN_INT (((mask >> 0) & 3) + 4),
7677 GEN_INT (((mask >> 2) & 3) + 4),
7678 GEN_INT (((mask >> 4) & 3) + 12),
7679 GEN_INT (((mask >> 6) & 3) + 12)
7680 <mask_expand4_args>));
7681 DONE;
7682 })
7683
7684 ;; One bit in mask selects 2 elements.
7685 (define_insn "avx_shufps256_1<mask_name>"
7686 [(set (match_operand:V8SF 0 "register_operand" "=v")
7687 (vec_select:V8SF
7688 (vec_concat:V16SF
7689 (match_operand:V8SF 1 "register_operand" "v")
7690 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7691 (parallel [(match_operand 3 "const_0_to_3_operand" )
7692 (match_operand 4 "const_0_to_3_operand" )
7693 (match_operand 5 "const_8_to_11_operand" )
7694 (match_operand 6 "const_8_to_11_operand" )
7695 (match_operand 7 "const_4_to_7_operand" )
7696 (match_operand 8 "const_4_to_7_operand" )
7697 (match_operand 9 "const_12_to_15_operand")
7698 (match_operand 10 "const_12_to_15_operand")])))]
7699 "TARGET_AVX
7700 && <mask_avx512vl_condition>
7701 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7702 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7703 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7704 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7705 {
7706 int mask;
7707 mask = INTVAL (operands[3]);
7708 mask |= INTVAL (operands[4]) << 2;
7709 mask |= (INTVAL (operands[5]) - 8) << 4;
7710 mask |= (INTVAL (operands[6]) - 8) << 6;
7711 operands[3] = GEN_INT (mask);
7712
7713 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7714 }
7715 [(set_attr "type" "sseshuf")
7716 (set_attr "length_immediate" "1")
7717 (set_attr "prefix" "<mask_prefix>")
7718 (set_attr "mode" "V8SF")])
7719
7720 (define_expand "sse_shufps<mask_expand4_name>"
7721 [(match_operand:V4SF 0 "register_operand")
7722 (match_operand:V4SF 1 "register_operand")
7723 (match_operand:V4SF 2 "vector_operand")
7724 (match_operand:SI 3 "const_int_operand")]
7725 "TARGET_SSE"
7726 {
7727 int mask = INTVAL (operands[3]);
7728 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7729 operands[1],
7730 operands[2],
7731 GEN_INT ((mask >> 0) & 3),
7732 GEN_INT ((mask >> 2) & 3),
7733 GEN_INT (((mask >> 4) & 3) + 4),
7734 GEN_INT (((mask >> 6) & 3) + 4)
7735 <mask_expand4_args>));
7736 DONE;
7737 })
7738
7739 (define_insn "sse_shufps_v4sf_mask"
7740 [(set (match_operand:V4SF 0 "register_operand" "=v")
7741 (vec_merge:V4SF
7742 (vec_select:V4SF
7743 (vec_concat:V8SF
7744 (match_operand:V4SF 1 "register_operand" "v")
7745 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7746 (parallel [(match_operand 3 "const_0_to_3_operand")
7747 (match_operand 4 "const_0_to_3_operand")
7748 (match_operand 5 "const_4_to_7_operand")
7749 (match_operand 6 "const_4_to_7_operand")]))
7750 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7751 (match_operand:QI 8 "register_operand" "Yk")))]
7752 "TARGET_AVX512VL"
7753 {
7754 int mask = 0;
7755 mask |= INTVAL (operands[3]) << 0;
7756 mask |= INTVAL (operands[4]) << 2;
7757 mask |= (INTVAL (operands[5]) - 4) << 4;
7758 mask |= (INTVAL (operands[6]) - 4) << 6;
7759 operands[3] = GEN_INT (mask);
7760
7761 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7762 }
7763 [(set_attr "type" "sseshuf")
7764 (set_attr "length_immediate" "1")
7765 (set_attr "prefix" "evex")
7766 (set_attr "mode" "V4SF")])
7767
7768 (define_insn "sse_shufps_<mode>"
7769 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7770 (vec_select:VI4F_128
7771 (vec_concat:<ssedoublevecmode>
7772 (match_operand:VI4F_128 1 "register_operand" "0,v")
7773 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7774 (parallel [(match_operand 3 "const_0_to_3_operand")
7775 (match_operand 4 "const_0_to_3_operand")
7776 (match_operand 5 "const_4_to_7_operand")
7777 (match_operand 6 "const_4_to_7_operand")])))]
7778 "TARGET_SSE"
7779 {
7780 int mask = 0;
7781 mask |= INTVAL (operands[3]) << 0;
7782 mask |= INTVAL (operands[4]) << 2;
7783 mask |= (INTVAL (operands[5]) - 4) << 4;
7784 mask |= (INTVAL (operands[6]) - 4) << 6;
7785 operands[3] = GEN_INT (mask);
7786
7787 switch (which_alternative)
7788 {
7789 case 0:
7790 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7791 case 1:
7792 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7793 default:
7794 gcc_unreachable ();
7795 }
7796 }
7797 [(set_attr "isa" "noavx,avx")
7798 (set_attr "type" "sseshuf")
7799 (set_attr "length_immediate" "1")
7800 (set_attr "prefix" "orig,maybe_evex")
7801 (set_attr "mode" "V4SF")])
7802
7803 (define_insn "sse_storehps"
7804 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7805 (vec_select:V2SF
7806 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7807 (parallel [(const_int 2) (const_int 3)])))]
7808 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7809 "@
7810 %vmovhps\t{%1, %0|%q0, %1}
7811 %vmovhlps\t{%1, %d0|%d0, %1}
7812 %vmovlps\t{%H1, %d0|%d0, %H1}"
7813 [(set_attr "type" "ssemov")
7814 (set_attr "prefix" "maybe_vex")
7815 (set_attr "mode" "V2SF,V4SF,V2SF")])
7816
7817 (define_expand "sse_loadhps_exp"
7818 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7819 (vec_concat:V4SF
7820 (vec_select:V2SF
7821 (match_operand:V4SF 1 "nonimmediate_operand")
7822 (parallel [(const_int 0) (const_int 1)]))
7823 (match_operand:V2SF 2 "nonimmediate_operand")))]
7824 "TARGET_SSE"
7825 {
7826 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7827
7828 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7829
7830 /* Fix up the destination if needed. */
7831 if (dst != operands[0])
7832 emit_move_insn (operands[0], dst);
7833
7834 DONE;
7835 })
7836
7837 (define_insn "sse_loadhps"
7838 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7839 (vec_concat:V4SF
7840 (vec_select:V2SF
7841 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7842 (parallel [(const_int 0) (const_int 1)]))
7843 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7844 "TARGET_SSE"
7845 "@
7846 movhps\t{%2, %0|%0, %q2}
7847 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7848 movlhps\t{%2, %0|%0, %2}
7849 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7850 %vmovlps\t{%2, %H0|%H0, %2}"
7851 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7852 (set_attr "type" "ssemov")
7853 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7854 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7855
7856 (define_insn "sse_storelps"
7857 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7858 (vec_select:V2SF
7859 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7860 (parallel [(const_int 0) (const_int 1)])))]
7861 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7862 "@
7863 %vmovlps\t{%1, %0|%q0, %1}
7864 %vmovaps\t{%1, %0|%0, %1}
7865 %vmovlps\t{%1, %d0|%d0, %q1}"
7866 [(set_attr "type" "ssemov")
7867 (set_attr "prefix" "maybe_vex")
7868 (set_attr "mode" "V2SF,V4SF,V2SF")])
7869
7870 (define_expand "sse_loadlps_exp"
7871 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7872 (vec_concat:V4SF
7873 (match_operand:V2SF 2 "nonimmediate_operand")
7874 (vec_select:V2SF
7875 (match_operand:V4SF 1 "nonimmediate_operand")
7876 (parallel [(const_int 2) (const_int 3)]))))]
7877 "TARGET_SSE"
7878 {
7879 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7880
7881 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7882
7883 /* Fix up the destination if needed. */
7884 if (dst != operands[0])
7885 emit_move_insn (operands[0], dst);
7886
7887 DONE;
7888 })
7889
7890 (define_insn "sse_loadlps"
7891 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7892 (vec_concat:V4SF
7893 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7894 (vec_select:V2SF
7895 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7896 (parallel [(const_int 2) (const_int 3)]))))]
7897 "TARGET_SSE"
7898 "@
7899 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7900 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7901 movlps\t{%2, %0|%0, %q2}
7902 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7903 %vmovlps\t{%2, %0|%q0, %2}"
7904 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7905 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7906 (set (attr "length_immediate")
7907 (if_then_else (eq_attr "alternative" "0,1")
7908 (const_string "1")
7909 (const_string "*")))
7910 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7911 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7912
7913 (define_insn "sse_movss"
7914 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7915 (vec_merge:V4SF
7916 (match_operand:V4SF 2 "register_operand" " x,v")
7917 (match_operand:V4SF 1 "register_operand" " 0,v")
7918 (const_int 1)))]
7919 "TARGET_SSE"
7920 "@
7921 movss\t{%2, %0|%0, %2}
7922 vmovss\t{%2, %1, %0|%0, %1, %2}"
7923 [(set_attr "isa" "noavx,avx")
7924 (set_attr "type" "ssemov")
7925 (set_attr "prefix" "orig,maybe_evex")
7926 (set_attr "mode" "SF")])
7927
7928 (define_insn "avx2_vec_dup<mode>"
7929 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7930 (vec_duplicate:VF1_128_256
7931 (vec_select:SF
7932 (match_operand:V4SF 1 "register_operand" "v")
7933 (parallel [(const_int 0)]))))]
7934 "TARGET_AVX2"
7935 "vbroadcastss\t{%1, %0|%0, %1}"
7936 [(set_attr "type" "sselog1")
7937 (set_attr "prefix" "maybe_evex")
7938 (set_attr "mode" "<MODE>")])
7939
7940 (define_insn "avx2_vec_dupv8sf_1"
7941 [(set (match_operand:V8SF 0 "register_operand" "=v")
7942 (vec_duplicate:V8SF
7943 (vec_select:SF
7944 (match_operand:V8SF 1 "register_operand" "v")
7945 (parallel [(const_int 0)]))))]
7946 "TARGET_AVX2"
7947 "vbroadcastss\t{%x1, %0|%0, %x1}"
7948 [(set_attr "type" "sselog1")
7949 (set_attr "prefix" "maybe_evex")
7950 (set_attr "mode" "V8SF")])
7951
7952 (define_insn "avx512f_vec_dup<mode>_1"
7953 [(set (match_operand:VF_512 0 "register_operand" "=v")
7954 (vec_duplicate:VF_512
7955 (vec_select:<ssescalarmode>
7956 (match_operand:VF_512 1 "register_operand" "v")
7957 (parallel [(const_int 0)]))))]
7958 "TARGET_AVX512F"
7959 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7960 [(set_attr "type" "sselog1")
7961 (set_attr "prefix" "evex")
7962 (set_attr "mode" "<MODE>")])
7963
7964 ;; Although insertps takes register source, we prefer
7965 ;; unpcklps with register source since it is shorter.
7966 (define_insn "*vec_concatv2sf_sse4_1"
7967 [(set (match_operand:V2SF 0 "register_operand"
7968 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7969 (vec_concat:V2SF
7970 (match_operand:SF 1 "nonimmediate_operand"
7971 " 0, 0,Yv, 0,0, v,m, 0 , m")
7972 (match_operand:SF 2 "nonimm_or_0_operand"
7973 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7974 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7975 "@
7976 unpcklps\t{%2, %0|%0, %2}
7977 unpcklps\t{%2, %0|%0, %2}
7978 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7979 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7980 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7981 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7982 %vmovss\t{%1, %0|%0, %1}
7983 punpckldq\t{%2, %0|%0, %2}
7984 movd\t{%1, %0|%0, %1}"
7985 [(set (attr "isa")
7986 (cond [(eq_attr "alternative" "0,1,3,4")
7987 (const_string "noavx")
7988 (eq_attr "alternative" "2,5")
7989 (const_string "avx")
7990 ]
7991 (const_string "*")))
7992 (set (attr "type")
7993 (cond [(eq_attr "alternative" "6")
7994 (const_string "ssemov")
7995 (eq_attr "alternative" "7")
7996 (const_string "mmxcvt")
7997 (eq_attr "alternative" "8")
7998 (const_string "mmxmov")
7999 ]
8000 (const_string "sselog")))
8001 (set (attr "mmx_isa")
8002 (if_then_else (eq_attr "alternative" "7,8")
8003 (const_string "native")
8004 (const_string "*")))
8005 (set (attr "prefix_data16")
8006 (if_then_else (eq_attr "alternative" "3,4")
8007 (const_string "1")
8008 (const_string "*")))
8009 (set (attr "prefix_extra")
8010 (if_then_else (eq_attr "alternative" "3,4,5")
8011 (const_string "1")
8012 (const_string "*")))
8013 (set (attr "length_immediate")
8014 (if_then_else (eq_attr "alternative" "3,4,5")
8015 (const_string "1")
8016 (const_string "*")))
8017 (set (attr "prefix")
8018 (cond [(eq_attr "alternative" "2,5")
8019 (const_string "maybe_evex")
8020 (eq_attr "alternative" "6")
8021 (const_string "maybe_vex")
8022 ]
8023 (const_string "orig")))
8024 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8025
8026 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8027 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8028 ;; alternatives pretty much forces the MMX alternative to be chosen.
8029 (define_insn "*vec_concatv2sf_sse"
8030 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8031 (vec_concat:V2SF
8032 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8033 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8034 "TARGET_SSE"
8035 "@
8036 unpcklps\t{%2, %0|%0, %2}
8037 movss\t{%1, %0|%0, %1}
8038 punpckldq\t{%2, %0|%0, %2}
8039 movd\t{%1, %0|%0, %1}"
8040 [(set_attr "mmx_isa" "*,*,native,native")
8041 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8042 (set_attr "mode" "V4SF,SF,DI,DI")])
8043
8044 (define_insn "*vec_concatv4sf"
8045 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8046 (vec_concat:V4SF
8047 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8048 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8049 "TARGET_SSE"
8050 "@
8051 movlhps\t{%2, %0|%0, %2}
8052 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8053 movhps\t{%2, %0|%0, %q2}
8054 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8055 [(set_attr "isa" "noavx,avx,noavx,avx")
8056 (set_attr "type" "ssemov")
8057 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8058 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8059
8060 (define_insn "*vec_concatv4sf_0"
8061 [(set (match_operand:V4SF 0 "register_operand" "=v")
8062 (vec_concat:V4SF
8063 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
8064 (match_operand:V2SF 2 "const0_operand" " C")))]
8065 "TARGET_SSE2"
8066 "%vmovq\t{%1, %0|%0, %1}"
8067 [(set_attr "type" "ssemov")
8068 (set_attr "prefix" "maybe_vex")
8069 (set_attr "mode" "DF")])
8070
8071 ;; Avoid combining registers from different units in a single alternative,
8072 ;; see comment above inline_secondary_memory_needed function in i386.c
8073 (define_insn "vec_set<mode>_0"
8074 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8075 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8076 (vec_merge:VI4F_128
8077 (vec_duplicate:VI4F_128
8078 (match_operand:<ssescalarmode> 2 "general_operand"
8079 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8080 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8081 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8082 (const_int 1)))]
8083 "TARGET_SSE"
8084 "@
8085 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8086 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8087 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8088 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8089 %vmovd\t{%2, %0|%0, %2}
8090 movss\t{%2, %0|%0, %2}
8091 movss\t{%2, %0|%0, %2}
8092 vmovss\t{%2, %1, %0|%0, %1, %2}
8093 pinsrd\t{$0, %2, %0|%0, %2, 0}
8094 pinsrd\t{$0, %2, %0|%0, %2, 0}
8095 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8096 #
8097 #
8098 #"
8099 [(set (attr "isa")
8100 (cond [(eq_attr "alternative" "0,1,8,9")
8101 (const_string "sse4_noavx")
8102 (eq_attr "alternative" "2,7,10")
8103 (const_string "avx")
8104 (eq_attr "alternative" "3,4")
8105 (const_string "sse2")
8106 (eq_attr "alternative" "5,6")
8107 (const_string "noavx")
8108 ]
8109 (const_string "*")))
8110 (set (attr "type")
8111 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8112 (const_string "sselog")
8113 (eq_attr "alternative" "12")
8114 (const_string "imov")
8115 (eq_attr "alternative" "13")
8116 (const_string "fmov")
8117 ]
8118 (const_string "ssemov")))
8119 (set (attr "prefix_extra")
8120 (if_then_else (eq_attr "alternative" "8,9,10")
8121 (const_string "1")
8122 (const_string "*")))
8123 (set (attr "length_immediate")
8124 (if_then_else (eq_attr "alternative" "8,9,10")
8125 (const_string "1")
8126 (const_string "*")))
8127 (set (attr "prefix")
8128 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8129 (const_string "orig")
8130 (eq_attr "alternative" "2")
8131 (const_string "maybe_evex")
8132 (eq_attr "alternative" "3,4")
8133 (const_string "maybe_vex")
8134 (eq_attr "alternative" "7,10")
8135 (const_string "vex")
8136 ]
8137 (const_string "*")))
8138 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8139 (set (attr "preferred_for_speed")
8140 (cond [(eq_attr "alternative" "4")
8141 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8142 ]
8143 (symbol_ref "true")))])
8144
8145 ;; A subset is vec_setv4sf.
8146 (define_insn "*vec_setv4sf_sse4_1"
8147 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8148 (vec_merge:V4SF
8149 (vec_duplicate:V4SF
8150 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8151 (match_operand:V4SF 1 "register_operand" "0,0,v")
8152 (match_operand:SI 3 "const_int_operand")))]
8153 "TARGET_SSE4_1
8154 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8155 < GET_MODE_NUNITS (V4SFmode))"
8156 {
8157 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8158 switch (which_alternative)
8159 {
8160 case 0:
8161 case 1:
8162 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8163 case 2:
8164 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8165 default:
8166 gcc_unreachable ();
8167 }
8168 }
8169 [(set_attr "isa" "noavx,noavx,avx")
8170 (set_attr "type" "sselog")
8171 (set_attr "prefix_data16" "1,1,*")
8172 (set_attr "prefix_extra" "1")
8173 (set_attr "length_immediate" "1")
8174 (set_attr "prefix" "orig,orig,maybe_evex")
8175 (set_attr "mode" "V4SF")])
8176
8177 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8178 (define_insn "vec_set<mode>_0"
8179 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8180 (vec_merge:VI4F_256_512
8181 (vec_duplicate:VI4F_256_512
8182 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8183 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8184 (const_int 1)))]
8185 "TARGET_AVX"
8186 "@
8187 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8188 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8189 vmovd\t{%2, %x0|%x0, %2}"
8190 [(set (attr "type")
8191 (if_then_else (eq_attr "alternative" "0")
8192 (const_string "sselog")
8193 (const_string "ssemov")))
8194 (set_attr "prefix" "maybe_evex")
8195 (set_attr "mode" "SF,<ssescalarmode>,SI")
8196 (set (attr "preferred_for_speed")
8197 (cond [(eq_attr "alternative" "2")
8198 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8199 ]
8200 (symbol_ref "true")))])
8201
8202 (define_insn "sse4_1_insertps"
8203 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8204 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8205 (match_operand:V4SF 1 "register_operand" "0,0,v")
8206 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8207 UNSPEC_INSERTPS))]
8208 "TARGET_SSE4_1"
8209 {
8210 if (MEM_P (operands[2]))
8211 {
8212 unsigned count_s = INTVAL (operands[3]) >> 6;
8213 if (count_s)
8214 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8215 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8216 }
8217 switch (which_alternative)
8218 {
8219 case 0:
8220 case 1:
8221 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8222 case 2:
8223 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8224 default:
8225 gcc_unreachable ();
8226 }
8227 }
8228 [(set_attr "isa" "noavx,noavx,avx")
8229 (set_attr "type" "sselog")
8230 (set_attr "prefix_data16" "1,1,*")
8231 (set_attr "prefix_extra" "1")
8232 (set_attr "length_immediate" "1")
8233 (set_attr "prefix" "orig,orig,maybe_evex")
8234 (set_attr "mode" "V4SF")])
8235
8236 (define_split
8237 [(set (match_operand:VI4F_128 0 "memory_operand")
8238 (vec_merge:VI4F_128
8239 (vec_duplicate:VI4F_128
8240 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8241 (match_dup 0)
8242 (const_int 1)))]
8243 "TARGET_SSE && reload_completed"
8244 [(set (match_dup 0) (match_dup 1))]
8245 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8246
8247 ;; Standard scalar operation patterns which preserve the rest of the
8248 ;; vector for combiner.
8249 (define_insn "vec_setv2df_0"
8250 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8251 (vec_merge:V2DF
8252 (vec_duplicate:V2DF
8253 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8254 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8255 (const_int 1)))]
8256 "TARGET_SSE2"
8257 "@
8258 movsd\t{%2, %0|%0, %2}
8259 vmovsd\t{%2, %1, %0|%0, %1, %2}
8260 movlpd\t{%2, %0|%0, %2}
8261 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8262 [(set_attr "isa" "noavx,avx,noavx,avx")
8263 (set_attr "type" "ssemov")
8264 (set_attr "mode" "DF")])
8265
8266 (define_expand "vec_set<mode>"
8267 [(match_operand:V 0 "register_operand")
8268 (match_operand:<ssescalarmode> 1 "register_operand")
8269 (match_operand 2 "vec_setm_operand")]
8270 "TARGET_SSE"
8271 {
8272 if (CONST_INT_P (operands[2]))
8273 ix86_expand_vector_set (false, operands[0], operands[1],
8274 INTVAL (operands[2]));
8275 else
8276 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8277 DONE;
8278 })
8279
8280 (define_insn_and_split "*vec_extractv4sf_0"
8281 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8282 (vec_select:SF
8283 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8284 (parallel [(const_int 0)])))]
8285 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8286 "#"
8287 "&& reload_completed"
8288 [(set (match_dup 0) (match_dup 1))]
8289 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8290
8291 (define_insn_and_split "*sse4_1_extractps"
8292 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8293 (vec_select:SF
8294 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8295 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8296 "TARGET_SSE4_1"
8297 "@
8298 extractps\t{%2, %1, %0|%0, %1, %2}
8299 extractps\t{%2, %1, %0|%0, %1, %2}
8300 vextractps\t{%2, %1, %0|%0, %1, %2}
8301 #
8302 #"
8303 "&& reload_completed && SSE_REG_P (operands[0])"
8304 [(const_int 0)]
8305 {
8306 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8307 switch (INTVAL (operands[2]))
8308 {
8309 case 1:
8310 case 3:
8311 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8312 operands[2], operands[2],
8313 GEN_INT (INTVAL (operands[2]) + 4),
8314 GEN_INT (INTVAL (operands[2]) + 4)));
8315 break;
8316 case 2:
8317 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8318 break;
8319 default:
8320 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8321 gcc_unreachable ();
8322 }
8323 DONE;
8324 }
8325 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8326 (set_attr "type" "sselog,sselog,sselog,*,*")
8327 (set_attr "prefix_data16" "1,1,1,*,*")
8328 (set_attr "prefix_extra" "1,1,1,*,*")
8329 (set_attr "length_immediate" "1,1,1,*,*")
8330 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8331 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8332
8333 (define_insn_and_split "*vec_extractv4sf_mem"
8334 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8335 (vec_select:SF
8336 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8337 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8338 "TARGET_SSE"
8339 "#"
8340 "&& reload_completed"
8341 [(set (match_dup 0) (match_dup 1))]
8342 {
8343 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8344 })
8345
8346 (define_mode_attr extract_type
8347 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8348
8349 (define_mode_attr extract_suf
8350 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8351
8352 (define_mode_iterator AVX512_VEC
8353 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8354
8355 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8356 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8357 (match_operand:AVX512_VEC 1 "register_operand")
8358 (match_operand:SI 2 "const_0_to_3_operand")
8359 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8360 (match_operand:QI 4 "register_operand")]
8361 "TARGET_AVX512F"
8362 {
8363 int mask;
8364 mask = INTVAL (operands[2]);
8365 rtx dest = operands[0];
8366
8367 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8368 dest = gen_reg_rtx (<ssequartermode>mode);
8369
8370 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8371 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8372 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8373 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8374 operands[4]));
8375 else
8376 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8377 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8378 operands[4]));
8379 if (dest != operands[0])
8380 emit_move_insn (operands[0], dest);
8381 DONE;
8382 })
8383
8384 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8385 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8386 (vec_merge:<ssequartermode>
8387 (vec_select:<ssequartermode>
8388 (match_operand:V8FI 1 "register_operand" "v,v")
8389 (parallel [(match_operand 2 "const_0_to_7_operand")
8390 (match_operand 3 "const_0_to_7_operand")]))
8391 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8392 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8393 "TARGET_AVX512DQ
8394 && INTVAL (operands[2]) % 2 == 0
8395 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8396 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8397 {
8398 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8399 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8400 }
8401 [(set_attr "type" "sselog1")
8402 (set_attr "prefix_extra" "1")
8403 (set_attr "length_immediate" "1")
8404 (set_attr "prefix" "evex")
8405 (set_attr "mode" "<sseinsnmode>")])
8406
8407 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8408 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8409 (vec_select:<ssequartermode>
8410 (match_operand:V8FI 1 "register_operand" "v")
8411 (parallel [(match_operand 2 "const_0_to_7_operand")
8412 (match_operand 3 "const_0_to_7_operand")])))]
8413 "TARGET_AVX512DQ
8414 && INTVAL (operands[2]) % 2 == 0
8415 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8416 {
8417 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8418 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8419 }
8420 [(set_attr "type" "sselog1")
8421 (set_attr "prefix_extra" "1")
8422 (set_attr "length_immediate" "1")
8423 (set_attr "prefix" "evex")
8424 (set_attr "mode" "<sseinsnmode>")])
8425
8426 (define_split
8427 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8428 (vec_select:<ssequartermode>
8429 (match_operand:V8FI 1 "register_operand")
8430 (parallel [(const_int 0) (const_int 1)])))]
8431 "TARGET_AVX512DQ
8432 && reload_completed
8433 && (TARGET_AVX512VL
8434 || REG_P (operands[0])
8435 || !EXT_REX_SSE_REG_P (operands[1]))"
8436 [(set (match_dup 0) (match_dup 1))]
8437 {
8438 if (!TARGET_AVX512VL
8439 && REG_P (operands[0])
8440 && EXT_REX_SSE_REG_P (operands[1]))
8441 operands[0]
8442 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8443 else
8444 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8445 })
8446
8447 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8448 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8449 (vec_merge:<ssequartermode>
8450 (vec_select:<ssequartermode>
8451 (match_operand:V16FI 1 "register_operand" "v,v")
8452 (parallel [(match_operand 2 "const_0_to_15_operand")
8453 (match_operand 3 "const_0_to_15_operand")
8454 (match_operand 4 "const_0_to_15_operand")
8455 (match_operand 5 "const_0_to_15_operand")]))
8456 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8457 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8458 "TARGET_AVX512F
8459 && INTVAL (operands[2]) % 4 == 0
8460 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8461 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8462 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8463 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8464 {
8465 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8466 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8467 }
8468 [(set_attr "type" "sselog1")
8469 (set_attr "prefix_extra" "1")
8470 (set_attr "length_immediate" "1")
8471 (set_attr "prefix" "evex")
8472 (set_attr "mode" "<sseinsnmode>")])
8473
8474 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8475 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8476 (vec_select:<ssequartermode>
8477 (match_operand:V16FI 1 "register_operand" "v")
8478 (parallel [(match_operand 2 "const_0_to_15_operand")
8479 (match_operand 3 "const_0_to_15_operand")
8480 (match_operand 4 "const_0_to_15_operand")
8481 (match_operand 5 "const_0_to_15_operand")])))]
8482 "TARGET_AVX512F
8483 && INTVAL (operands[2]) % 4 == 0
8484 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8485 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8486 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8487 {
8488 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8489 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8490 }
8491 [(set_attr "type" "sselog1")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "length_immediate" "1")
8494 (set_attr "prefix" "evex")
8495 (set_attr "mode" "<sseinsnmode>")])
8496
8497 (define_split
8498 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8499 (vec_select:<ssequartermode>
8500 (match_operand:V16FI 1 "register_operand")
8501 (parallel [(const_int 0) (const_int 1)
8502 (const_int 2) (const_int 3)])))]
8503 "TARGET_AVX512F
8504 && reload_completed
8505 && (TARGET_AVX512VL
8506 || REG_P (operands[0])
8507 || !EXT_REX_SSE_REG_P (operands[1]))"
8508 [(set (match_dup 0) (match_dup 1))]
8509 {
8510 if (!TARGET_AVX512VL
8511 && REG_P (operands[0])
8512 && EXT_REX_SSE_REG_P (operands[1]))
8513 operands[0]
8514 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8515 else
8516 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8517 })
8518
8519 (define_mode_attr extract_type_2
8520 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8521
8522 (define_mode_attr extract_suf_2
8523 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8524
8525 (define_mode_iterator AVX512_VEC_2
8526 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8527
8528 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8529 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8530 (match_operand:AVX512_VEC_2 1 "register_operand")
8531 (match_operand:SI 2 "const_0_to_1_operand")
8532 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8533 (match_operand:QI 4 "register_operand")]
8534 "TARGET_AVX512F"
8535 {
8536 rtx (*insn)(rtx, rtx, rtx, rtx);
8537 rtx dest = operands[0];
8538
8539 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8540 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8541
8542 switch (INTVAL (operands[2]))
8543 {
8544 case 0:
8545 insn = gen_vec_extract_lo_<mode>_mask;
8546 break;
8547 case 1:
8548 insn = gen_vec_extract_hi_<mode>_mask;
8549 break;
8550 default:
8551 gcc_unreachable ();
8552 }
8553
8554 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8555 if (dest != operands[0])
8556 emit_move_insn (operands[0], dest);
8557 DONE;
8558 })
8559
8560 (define_split
8561 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8562 (vec_select:<ssehalfvecmode>
8563 (match_operand:V8FI 1 "nonimmediate_operand")
8564 (parallel [(const_int 0) (const_int 1)
8565 (const_int 2) (const_int 3)])))]
8566 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8567 && reload_completed
8568 && (TARGET_AVX512VL
8569 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8570 [(set (match_dup 0) (match_dup 1))]
8571 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8572
8573 (define_insn "vec_extract_lo_<mode>_mask"
8574 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8575 (vec_merge:<ssehalfvecmode>
8576 (vec_select:<ssehalfvecmode>
8577 (match_operand:V8FI 1 "register_operand" "v,v")
8578 (parallel [(const_int 0) (const_int 1)
8579 (const_int 2) (const_int 3)]))
8580 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8581 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8582 "TARGET_AVX512F
8583 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8584 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8585 [(set_attr "type" "sselog1")
8586 (set_attr "prefix_extra" "1")
8587 (set_attr "length_immediate" "1")
8588 (set_attr "memory" "none,store")
8589 (set_attr "prefix" "evex")
8590 (set_attr "mode" "<sseinsnmode>")])
8591
8592 (define_insn "vec_extract_lo_<mode>"
8593 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
8594 (vec_select:<ssehalfvecmode>
8595 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
8596 (parallel [(const_int 0) (const_int 1)
8597 (const_int 2) (const_int 3)])))]
8598 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8599 {
8600 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
8601 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8602 else
8603 return "#";
8604 }
8605 [(set_attr "type" "sselog1")
8606 (set_attr "prefix_extra" "1")
8607 (set_attr "length_immediate" "1")
8608 (set_attr "memory" "none,store,load")
8609 (set_attr "prefix" "evex")
8610 (set_attr "mode" "<sseinsnmode>")])
8611
8612 (define_insn "vec_extract_hi_<mode>_mask"
8613 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8614 (vec_merge:<ssehalfvecmode>
8615 (vec_select:<ssehalfvecmode>
8616 (match_operand:V8FI 1 "register_operand" "v,v")
8617 (parallel [(const_int 4) (const_int 5)
8618 (const_int 6) (const_int 7)]))
8619 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8620 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8621 "TARGET_AVX512F
8622 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8623 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8624 [(set_attr "type" "sselog1")
8625 (set_attr "prefix_extra" "1")
8626 (set_attr "length_immediate" "1")
8627 (set_attr "prefix" "evex")
8628 (set_attr "mode" "<sseinsnmode>")])
8629
8630 (define_insn "vec_extract_hi_<mode>"
8631 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8632 (vec_select:<ssehalfvecmode>
8633 (match_operand:V8FI 1 "register_operand" "v")
8634 (parallel [(const_int 4) (const_int 5)
8635 (const_int 6) (const_int 7)])))]
8636 "TARGET_AVX512F"
8637 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8638 [(set_attr "type" "sselog1")
8639 (set_attr "prefix_extra" "1")
8640 (set_attr "length_immediate" "1")
8641 (set_attr "prefix" "evex")
8642 (set_attr "mode" "<sseinsnmode>")])
8643
8644 (define_insn "vec_extract_hi_<mode>_mask"
8645 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8646 (vec_merge:<ssehalfvecmode>
8647 (vec_select:<ssehalfvecmode>
8648 (match_operand:V16FI 1 "register_operand" "v,v")
8649 (parallel [(const_int 8) (const_int 9)
8650 (const_int 10) (const_int 11)
8651 (const_int 12) (const_int 13)
8652 (const_int 14) (const_int 15)]))
8653 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8654 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8655 "TARGET_AVX512DQ
8656 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8657 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8658 [(set_attr "type" "sselog1")
8659 (set_attr "prefix_extra" "1")
8660 (set_attr "length_immediate" "1")
8661 (set_attr "prefix" "evex")
8662 (set_attr "mode" "<sseinsnmode>")])
8663
8664 (define_insn "vec_extract_hi_<mode>"
8665 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
8666 (vec_select:<ssehalfvecmode>
8667 (match_operand:V16FI 1 "register_operand" "v,v")
8668 (parallel [(const_int 8) (const_int 9)
8669 (const_int 10) (const_int 11)
8670 (const_int 12) (const_int 13)
8671 (const_int 14) (const_int 15)])))]
8672 "TARGET_AVX512F"
8673 "@
8674 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
8675 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8676 [(set_attr "type" "sselog1")
8677 (set_attr "prefix_extra" "1")
8678 (set_attr "isa" "avx512dq,noavx512dq")
8679 (set_attr "length_immediate" "1")
8680 (set_attr "prefix" "evex")
8681 (set_attr "mode" "<sseinsnmode>")])
8682
8683 (define_mode_iterator VI48F_256_DQ
8684 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
8685
8686 (define_expand "avx512vl_vextractf128<mode>"
8687 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8688 (match_operand:VI48F_256_DQ 1 "register_operand")
8689 (match_operand:SI 2 "const_0_to_1_operand")
8690 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8691 (match_operand:QI 4 "register_operand")]
8692 "TARGET_AVX512VL"
8693 {
8694 rtx (*insn)(rtx, rtx, rtx, rtx);
8695 rtx dest = operands[0];
8696
8697 if (MEM_P (dest)
8698 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8699 /* For V8S[IF]mode there are maskm insns with =m and 0
8700 constraints. */
8701 ? !rtx_equal_p (dest, operands[3])
8702 /* For V4D[IF]mode, hi insns don't allow memory, and
8703 lo insns have =m and 0C constraints. */
8704 : (operands[2] != const0_rtx
8705 || (!rtx_equal_p (dest, operands[3])
8706 && GET_CODE (operands[3]) != CONST_VECTOR))))
8707 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8708 switch (INTVAL (operands[2]))
8709 {
8710 case 0:
8711 insn = gen_vec_extract_lo_<mode>_mask;
8712 break;
8713 case 1:
8714 insn = gen_vec_extract_hi_<mode>_mask;
8715 break;
8716 default:
8717 gcc_unreachable ();
8718 }
8719
8720 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8721 if (dest != operands[0])
8722 emit_move_insn (operands[0], dest);
8723 DONE;
8724 })
8725
8726 (define_expand "avx_vextractf128<mode>"
8727 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8728 (match_operand:V_256 1 "register_operand")
8729 (match_operand:SI 2 "const_0_to_1_operand")]
8730 "TARGET_AVX"
8731 {
8732 rtx (*insn)(rtx, rtx);
8733
8734 switch (INTVAL (operands[2]))
8735 {
8736 case 0:
8737 insn = gen_vec_extract_lo_<mode>;
8738 break;
8739 case 1:
8740 insn = gen_vec_extract_hi_<mode>;
8741 break;
8742 default:
8743 gcc_unreachable ();
8744 }
8745
8746 emit_insn (insn (operands[0], operands[1]));
8747 DONE;
8748 })
8749
8750 (define_insn "vec_extract_lo_<mode>_mask"
8751 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8752 (vec_merge:<ssehalfvecmode>
8753 (vec_select:<ssehalfvecmode>
8754 (match_operand:V16FI 1 "register_operand" "v,v")
8755 (parallel [(const_int 0) (const_int 1)
8756 (const_int 2) (const_int 3)
8757 (const_int 4) (const_int 5)
8758 (const_int 6) (const_int 7)]))
8759 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8760 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8761 "TARGET_AVX512DQ
8762 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8763 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8764 [(set_attr "type" "sselog1")
8765 (set_attr "prefix_extra" "1")
8766 (set_attr "length_immediate" "1")
8767 (set_attr "memory" "none,store")
8768 (set_attr "prefix" "evex")
8769 (set_attr "mode" "<sseinsnmode>")])
8770
8771 (define_insn "vec_extract_lo_<mode>"
8772 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8773 (vec_select:<ssehalfvecmode>
8774 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
8775 (parallel [(const_int 0) (const_int 1)
8776 (const_int 2) (const_int 3)
8777 (const_int 4) (const_int 5)
8778 (const_int 6) (const_int 7)])))]
8779 "TARGET_AVX512F
8780 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8781 {
8782 if (!TARGET_AVX512VL
8783 && !REG_P (operands[0])
8784 && EXT_REX_SSE_REG_P (operands[1]))
8785 {
8786 if (TARGET_AVX512DQ)
8787 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
8788 else
8789 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8790 }
8791 else
8792 return "#";
8793 }
8794 [(set_attr "type" "sselog1")
8795 (set_attr "prefix_extra" "1")
8796 (set_attr "length_immediate" "1")
8797 (set_attr "memory" "none,load,store")
8798 (set_attr "prefix" "evex")
8799 (set_attr "mode" "<sseinsnmode>")])
8800
8801 (define_split
8802 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8803 (vec_select:<ssehalfvecmode>
8804 (match_operand:V16FI 1 "nonimmediate_operand")
8805 (parallel [(const_int 0) (const_int 1)
8806 (const_int 2) (const_int 3)
8807 (const_int 4) (const_int 5)
8808 (const_int 6) (const_int 7)])))]
8809 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8810 && reload_completed
8811 && (TARGET_AVX512VL
8812 || REG_P (operands[0])
8813 || !EXT_REX_SSE_REG_P (operands[1]))"
8814 [(set (match_dup 0) (match_dup 1))]
8815 {
8816 if (!TARGET_AVX512VL
8817 && REG_P (operands[0])
8818 && EXT_REX_SSE_REG_P (operands[1]))
8819 operands[0]
8820 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8821 else
8822 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8823 })
8824
8825 (define_insn "vec_extract_lo_<mode>_mask"
8826 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8827 (vec_merge:<ssehalfvecmode>
8828 (vec_select:<ssehalfvecmode>
8829 (match_operand:VI8F_256 1 "register_operand" "v,v")
8830 (parallel [(const_int 0) (const_int 1)]))
8831 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8832 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8833 "TARGET_AVX512DQ
8834 && TARGET_AVX512VL
8835 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8836 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8837 [(set_attr "type" "sselog1")
8838 (set_attr "prefix_extra" "1")
8839 (set_attr "length_immediate" "1")
8840 (set_attr "memory" "none,store")
8841 (set_attr "prefix" "evex")
8842 (set_attr "mode" "XI")])
8843
8844 (define_insn "vec_extract_lo_<mode>"
8845 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
8846 (vec_select:<ssehalfvecmode>
8847 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
8848 (parallel [(const_int 0) (const_int 1)])))]
8849 "TARGET_AVX
8850 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8851 "#")
8852
8853 (define_split
8854 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8855 (vec_select:<ssehalfvecmode>
8856 (match_operand:VI8F_256 1 "nonimmediate_operand")
8857 (parallel [(const_int 0) (const_int 1)])))]
8858 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8859 && reload_completed"
8860 [(set (match_dup 0) (match_dup 1))]
8861 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8862
8863 (define_insn "vec_extract_hi_<mode>_mask"
8864 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8865 (vec_merge:<ssehalfvecmode>
8866 (vec_select:<ssehalfvecmode>
8867 (match_operand:VI8F_256 1 "register_operand" "v,v")
8868 (parallel [(const_int 2) (const_int 3)]))
8869 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8870 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8871 "TARGET_AVX512DQ
8872 && TARGET_AVX512VL
8873 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8874 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8875 [(set_attr "type" "sselog1")
8876 (set_attr "prefix_extra" "1")
8877 (set_attr "length_immediate" "1")
8878 (set_attr "prefix" "vex")
8879 (set_attr "mode" "<sseinsnmode>")])
8880
8881 (define_insn "vec_extract_hi_<mode>"
8882 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8883 (vec_select:<ssehalfvecmode>
8884 (match_operand:VI8F_256 1 "register_operand" "v")
8885 (parallel [(const_int 2) (const_int 3)])))]
8886 "TARGET_AVX"
8887 {
8888 if (TARGET_AVX512VL)
8889 {
8890 if (TARGET_AVX512DQ)
8891 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
8892 else
8893 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8894 }
8895 else
8896 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8897 }
8898 [(set_attr "type" "sselog1")
8899 (set_attr "prefix_extra" "1")
8900 (set_attr "length_immediate" "1")
8901 (set_attr "prefix" "vex")
8902 (set_attr "mode" "<sseinsnmode>")])
8903
8904 (define_split
8905 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8906 (vec_select:<ssehalfvecmode>
8907 (match_operand:VI4F_256 1 "nonimmediate_operand")
8908 (parallel [(const_int 0) (const_int 1)
8909 (const_int 2) (const_int 3)])))]
8910 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8911 && reload_completed"
8912 [(set (match_dup 0) (match_dup 1))]
8913 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8914
8915 (define_insn "vec_extract_lo_<mode>_mask"
8916 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8917 (vec_merge:<ssehalfvecmode>
8918 (vec_select:<ssehalfvecmode>
8919 (match_operand:VI4F_256 1 "register_operand" "v,v")
8920 (parallel [(const_int 0) (const_int 1)
8921 (const_int 2) (const_int 3)]))
8922 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8923 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8924 "TARGET_AVX512VL
8925 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8926 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8927 [(set_attr "type" "sselog1")
8928 (set_attr "prefix_extra" "1")
8929 (set_attr "length_immediate" "1")
8930 (set_attr "prefix" "evex")
8931 (set_attr "mode" "<sseinsnmode>")])
8932
8933 (define_insn "vec_extract_lo_<mode>"
8934 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
8935 (vec_select:<ssehalfvecmode>
8936 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
8937 (parallel [(const_int 0) (const_int 1)
8938 (const_int 2) (const_int 3)])))]
8939 "TARGET_AVX
8940 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8941 "#"
8942 [(set_attr "type" "sselog1")
8943 (set_attr "prefix_extra" "1")
8944 (set_attr "length_immediate" "1")
8945 (set_attr "prefix" "evex")
8946 (set_attr "mode" "<sseinsnmode>")])
8947
8948 (define_insn "vec_extract_hi_<mode>_mask"
8949 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
8950 (vec_merge:<ssehalfvecmode>
8951 (vec_select:<ssehalfvecmode>
8952 (match_operand:VI4F_256 1 "register_operand" "v,v")
8953 (parallel [(const_int 4) (const_int 5)
8954 (const_int 6) (const_int 7)]))
8955 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8956 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8957 "TARGET_AVX512VL
8958 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8959 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8960 [(set_attr "type" "sselog1")
8961 (set_attr "length_immediate" "1")
8962 (set_attr "prefix" "evex")
8963 (set_attr "mode" "<sseinsnmode>")])
8964
8965 (define_insn "vec_extract_hi_<mode>"
8966 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8967 (vec_select:<ssehalfvecmode>
8968 (match_operand:VI4F_256 1 "register_operand" "x, v")
8969 (parallel [(const_int 4) (const_int 5)
8970 (const_int 6) (const_int 7)])))]
8971 "TARGET_AVX"
8972 "@
8973 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8974 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8975 [(set_attr "isa" "*, avx512vl")
8976 (set_attr "prefix" "vex, evex")
8977 (set_attr "type" "sselog1")
8978 (set_attr "length_immediate" "1")
8979 (set_attr "mode" "<sseinsnmode>")])
8980
8981 (define_insn_and_split "vec_extract_lo_v32hi"
8982 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8983 (vec_select:V16HI
8984 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8985 (parallel [(const_int 0) (const_int 1)
8986 (const_int 2) (const_int 3)
8987 (const_int 4) (const_int 5)
8988 (const_int 6) (const_int 7)
8989 (const_int 8) (const_int 9)
8990 (const_int 10) (const_int 11)
8991 (const_int 12) (const_int 13)
8992 (const_int 14) (const_int 15)])))]
8993 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8994 {
8995 if (TARGET_AVX512VL
8996 || REG_P (operands[0])
8997 || !EXT_REX_SSE_REG_P (operands[1]))
8998 return "#";
8999 else
9000 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9001 }
9002 "&& reload_completed
9003 && (TARGET_AVX512VL
9004 || REG_P (operands[0])
9005 || !EXT_REX_SSE_REG_P (operands[1]))"
9006 [(set (match_dup 0) (match_dup 1))]
9007 {
9008 if (!TARGET_AVX512VL
9009 && REG_P (operands[0])
9010 && EXT_REX_SSE_REG_P (operands[1]))
9011 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
9012 else
9013 operands[1] = gen_lowpart (V16HImode, operands[1]);
9014 }
9015 [(set_attr "type" "sselog1")
9016 (set_attr "prefix_extra" "1")
9017 (set_attr "length_immediate" "1")
9018 (set_attr "memory" "none,load,store")
9019 (set_attr "prefix" "evex")
9020 (set_attr "mode" "XI")])
9021
9022 (define_insn "vec_extract_hi_v32hi"
9023 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9024 (vec_select:V16HI
9025 (match_operand:V32HI 1 "register_operand" "v")
9026 (parallel [(const_int 16) (const_int 17)
9027 (const_int 18) (const_int 19)
9028 (const_int 20) (const_int 21)
9029 (const_int 22) (const_int 23)
9030 (const_int 24) (const_int 25)
9031 (const_int 26) (const_int 27)
9032 (const_int 28) (const_int 29)
9033 (const_int 30) (const_int 31)])))]
9034 "TARGET_AVX512F"
9035 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9036 [(set_attr "type" "sselog1")
9037 (set_attr "prefix_extra" "1")
9038 (set_attr "length_immediate" "1")
9039 (set_attr "prefix" "evex")
9040 (set_attr "mode" "XI")])
9041
9042 (define_insn_and_split "vec_extract_lo_v16hi"
9043 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9044 (vec_select:V8HI
9045 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9046 (parallel [(const_int 0) (const_int 1)
9047 (const_int 2) (const_int 3)
9048 (const_int 4) (const_int 5)
9049 (const_int 6) (const_int 7)])))]
9050 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9051 "#"
9052 "&& reload_completed"
9053 [(set (match_dup 0) (match_dup 1))]
9054 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9055
9056 (define_insn "vec_extract_hi_v16hi"
9057 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9058 (vec_select:V8HI
9059 (match_operand:V16HI 1 "register_operand" "x,v,v")
9060 (parallel [(const_int 8) (const_int 9)
9061 (const_int 10) (const_int 11)
9062 (const_int 12) (const_int 13)
9063 (const_int 14) (const_int 15)])))]
9064 "TARGET_AVX"
9065 "@
9066 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9067 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9068 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9069 [(set_attr "type" "sselog1")
9070 (set_attr "prefix_extra" "1")
9071 (set_attr "length_immediate" "1")
9072 (set_attr "isa" "*,avx512dq,avx512f")
9073 (set_attr "prefix" "vex,evex,evex")
9074 (set_attr "mode" "OI")])
9075
9076 (define_insn_and_split "vec_extract_lo_v64qi"
9077 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9078 (vec_select:V32QI
9079 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9080 (parallel [(const_int 0) (const_int 1)
9081 (const_int 2) (const_int 3)
9082 (const_int 4) (const_int 5)
9083 (const_int 6) (const_int 7)
9084 (const_int 8) (const_int 9)
9085 (const_int 10) (const_int 11)
9086 (const_int 12) (const_int 13)
9087 (const_int 14) (const_int 15)
9088 (const_int 16) (const_int 17)
9089 (const_int 18) (const_int 19)
9090 (const_int 20) (const_int 21)
9091 (const_int 22) (const_int 23)
9092 (const_int 24) (const_int 25)
9093 (const_int 26) (const_int 27)
9094 (const_int 28) (const_int 29)
9095 (const_int 30) (const_int 31)])))]
9096 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9097 {
9098 if (TARGET_AVX512VL
9099 || REG_P (operands[0])
9100 || !EXT_REX_SSE_REG_P (operands[1]))
9101 return "#";
9102 else
9103 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9104 }
9105 "&& reload_completed
9106 && (TARGET_AVX512VL
9107 || REG_P (operands[0])
9108 || !EXT_REX_SSE_REG_P (operands[1]))"
9109 [(set (match_dup 0) (match_dup 1))]
9110 {
9111 if (!TARGET_AVX512VL
9112 && REG_P (operands[0])
9113 && EXT_REX_SSE_REG_P (operands[1]))
9114 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9115 else
9116 operands[1] = gen_lowpart (V32QImode, operands[1]);
9117 }
9118 [(set_attr "type" "sselog1")
9119 (set_attr "prefix_extra" "1")
9120 (set_attr "length_immediate" "1")
9121 (set_attr "memory" "none,load,store")
9122 (set_attr "prefix" "evex")
9123 (set_attr "mode" "XI")])
9124
9125 (define_insn "vec_extract_hi_v64qi"
9126 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9127 (vec_select:V32QI
9128 (match_operand:V64QI 1 "register_operand" "v")
9129 (parallel [(const_int 32) (const_int 33)
9130 (const_int 34) (const_int 35)
9131 (const_int 36) (const_int 37)
9132 (const_int 38) (const_int 39)
9133 (const_int 40) (const_int 41)
9134 (const_int 42) (const_int 43)
9135 (const_int 44) (const_int 45)
9136 (const_int 46) (const_int 47)
9137 (const_int 48) (const_int 49)
9138 (const_int 50) (const_int 51)
9139 (const_int 52) (const_int 53)
9140 (const_int 54) (const_int 55)
9141 (const_int 56) (const_int 57)
9142 (const_int 58) (const_int 59)
9143 (const_int 60) (const_int 61)
9144 (const_int 62) (const_int 63)])))]
9145 "TARGET_AVX512F"
9146 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9147 [(set_attr "type" "sselog1")
9148 (set_attr "prefix_extra" "1")
9149 (set_attr "length_immediate" "1")
9150 (set_attr "prefix" "evex")
9151 (set_attr "mode" "XI")])
9152
9153 (define_insn_and_split "vec_extract_lo_v32qi"
9154 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9155 (vec_select:V16QI
9156 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9157 (parallel [(const_int 0) (const_int 1)
9158 (const_int 2) (const_int 3)
9159 (const_int 4) (const_int 5)
9160 (const_int 6) (const_int 7)
9161 (const_int 8) (const_int 9)
9162 (const_int 10) (const_int 11)
9163 (const_int 12) (const_int 13)
9164 (const_int 14) (const_int 15)])))]
9165 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9166 "#"
9167 "&& reload_completed"
9168 [(set (match_dup 0) (match_dup 1))]
9169 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9170
9171 (define_insn "vec_extract_hi_v32qi"
9172 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9173 (vec_select:V16QI
9174 (match_operand:V32QI 1 "register_operand" "x,v,v")
9175 (parallel [(const_int 16) (const_int 17)
9176 (const_int 18) (const_int 19)
9177 (const_int 20) (const_int 21)
9178 (const_int 22) (const_int 23)
9179 (const_int 24) (const_int 25)
9180 (const_int 26) (const_int 27)
9181 (const_int 28) (const_int 29)
9182 (const_int 30) (const_int 31)])))]
9183 "TARGET_AVX"
9184 "@
9185 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9186 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9187 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9188 [(set_attr "type" "sselog1")
9189 (set_attr "prefix_extra" "1")
9190 (set_attr "length_immediate" "1")
9191 (set_attr "isa" "*,avx512dq,avx512f")
9192 (set_attr "prefix" "vex,evex,evex")
9193 (set_attr "mode" "OI")])
9194
9195 ;; Modes handled by vec_extract patterns.
9196 (define_mode_iterator VEC_EXTRACT_MODE
9197 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9198 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9199 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9200 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9201 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9202 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9203 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9204
9205 (define_expand "vec_extract<mode><ssescalarmodelower>"
9206 [(match_operand:<ssescalarmode> 0 "register_operand")
9207 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9208 (match_operand 2 "const_int_operand")]
9209 "TARGET_SSE"
9210 {
9211 ix86_expand_vector_extract (false, operands[0], operands[1],
9212 INTVAL (operands[2]));
9213 DONE;
9214 })
9215
9216 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9217 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9218 (match_operand:V_256_512 1 "register_operand")
9219 (match_operand 2 "const_0_to_1_operand")]
9220 "TARGET_AVX"
9221 {
9222 if (INTVAL (operands[2]))
9223 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9224 else
9225 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9226 DONE;
9227 })
9228
9229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9230 ;;
9231 ;; Parallel double-precision floating point element swizzling
9232 ;;
9233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9234
9235 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9236 [(set (match_operand:V8DF 0 "register_operand" "=v")
9237 (vec_select:V8DF
9238 (vec_concat:V16DF
9239 (match_operand:V8DF 1 "register_operand" "v")
9240 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9241 (parallel [(const_int 1) (const_int 9)
9242 (const_int 3) (const_int 11)
9243 (const_int 5) (const_int 13)
9244 (const_int 7) (const_int 15)])))]
9245 "TARGET_AVX512F"
9246 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9247 [(set_attr "type" "sselog")
9248 (set_attr "prefix" "evex")
9249 (set_attr "mode" "V8DF")])
9250
9251 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9252 (define_insn "avx_unpckhpd256<mask_name>"
9253 [(set (match_operand:V4DF 0 "register_operand" "=v")
9254 (vec_select:V4DF
9255 (vec_concat:V8DF
9256 (match_operand:V4DF 1 "register_operand" "v")
9257 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9258 (parallel [(const_int 1) (const_int 5)
9259 (const_int 3) (const_int 7)])))]
9260 "TARGET_AVX && <mask_avx512vl_condition>"
9261 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9262 [(set_attr "type" "sselog")
9263 (set_attr "prefix" "vex")
9264 (set_attr "mode" "V4DF")])
9265
9266 (define_expand "vec_interleave_highv4df"
9267 [(set (match_dup 3)
9268 (vec_select:V4DF
9269 (vec_concat:V8DF
9270 (match_operand:V4DF 1 "register_operand")
9271 (match_operand:V4DF 2 "nonimmediate_operand"))
9272 (parallel [(const_int 0) (const_int 4)
9273 (const_int 2) (const_int 6)])))
9274 (set (match_dup 4)
9275 (vec_select:V4DF
9276 (vec_concat:V8DF
9277 (match_dup 1)
9278 (match_dup 2))
9279 (parallel [(const_int 1) (const_int 5)
9280 (const_int 3) (const_int 7)])))
9281 (set (match_operand:V4DF 0 "register_operand")
9282 (vec_select:V4DF
9283 (vec_concat:V8DF
9284 (match_dup 3)
9285 (match_dup 4))
9286 (parallel [(const_int 2) (const_int 3)
9287 (const_int 6) (const_int 7)])))]
9288 "TARGET_AVX"
9289 {
9290 operands[3] = gen_reg_rtx (V4DFmode);
9291 operands[4] = gen_reg_rtx (V4DFmode);
9292 })
9293
9294
9295 (define_insn "avx512vl_unpckhpd128_mask"
9296 [(set (match_operand:V2DF 0 "register_operand" "=v")
9297 (vec_merge:V2DF
9298 (vec_select:V2DF
9299 (vec_concat:V4DF
9300 (match_operand:V2DF 1 "register_operand" "v")
9301 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9302 (parallel [(const_int 1) (const_int 3)]))
9303 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9304 (match_operand:QI 4 "register_operand" "Yk")))]
9305 "TARGET_AVX512VL"
9306 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9307 [(set_attr "type" "sselog")
9308 (set_attr "prefix" "evex")
9309 (set_attr "mode" "V2DF")])
9310
9311 (define_expand "vec_interleave_highv2df"
9312 [(set (match_operand:V2DF 0 "register_operand")
9313 (vec_select:V2DF
9314 (vec_concat:V4DF
9315 (match_operand:V2DF 1 "nonimmediate_operand")
9316 (match_operand:V2DF 2 "nonimmediate_operand"))
9317 (parallel [(const_int 1)
9318 (const_int 3)])))]
9319 "TARGET_SSE2"
9320 {
9321 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9322 operands[2] = force_reg (V2DFmode, operands[2]);
9323 })
9324
9325 (define_insn "*vec_interleave_highv2df"
9326 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9327 (vec_select:V2DF
9328 (vec_concat:V4DF
9329 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9330 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9331 (parallel [(const_int 1)
9332 (const_int 3)])))]
9333 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9334 "@
9335 unpckhpd\t{%2, %0|%0, %2}
9336 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9337 %vmovddup\t{%H1, %0|%0, %H1}
9338 movlpd\t{%H1, %0|%0, %H1}
9339 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9340 %vmovhpd\t{%1, %0|%q0, %1}"
9341 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9342 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9343 (set (attr "prefix_data16")
9344 (if_then_else (eq_attr "alternative" "3,5")
9345 (const_string "1")
9346 (const_string "*")))
9347 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9348 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9349
9350 (define_expand "avx512f_movddup512<mask_name>"
9351 [(set (match_operand:V8DF 0 "register_operand")
9352 (vec_select:V8DF
9353 (vec_concat:V16DF
9354 (match_operand:V8DF 1 "nonimmediate_operand")
9355 (match_dup 1))
9356 (parallel [(const_int 0) (const_int 8)
9357 (const_int 2) (const_int 10)
9358 (const_int 4) (const_int 12)
9359 (const_int 6) (const_int 14)])))]
9360 "TARGET_AVX512F")
9361
9362 (define_expand "avx512f_unpcklpd512<mask_name>"
9363 [(set (match_operand:V8DF 0 "register_operand")
9364 (vec_select:V8DF
9365 (vec_concat:V16DF
9366 (match_operand:V8DF 1 "register_operand")
9367 (match_operand:V8DF 2 "nonimmediate_operand"))
9368 (parallel [(const_int 0) (const_int 8)
9369 (const_int 2) (const_int 10)
9370 (const_int 4) (const_int 12)
9371 (const_int 6) (const_int 14)])))]
9372 "TARGET_AVX512F")
9373
9374 (define_insn "*avx512f_unpcklpd512<mask_name>"
9375 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9376 (vec_select:V8DF
9377 (vec_concat:V16DF
9378 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9379 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9380 (parallel [(const_int 0) (const_int 8)
9381 (const_int 2) (const_int 10)
9382 (const_int 4) (const_int 12)
9383 (const_int 6) (const_int 14)])))]
9384 "TARGET_AVX512F"
9385 "@
9386 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9387 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9388 [(set_attr "type" "sselog")
9389 (set_attr "prefix" "evex")
9390 (set_attr "mode" "V8DF")])
9391
9392 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9393 (define_expand "avx_movddup256<mask_name>"
9394 [(set (match_operand:V4DF 0 "register_operand")
9395 (vec_select:V4DF
9396 (vec_concat:V8DF
9397 (match_operand:V4DF 1 "nonimmediate_operand")
9398 (match_dup 1))
9399 (parallel [(const_int 0) (const_int 4)
9400 (const_int 2) (const_int 6)])))]
9401 "TARGET_AVX && <mask_avx512vl_condition>")
9402
9403 (define_expand "avx_unpcklpd256<mask_name>"
9404 [(set (match_operand:V4DF 0 "register_operand")
9405 (vec_select:V4DF
9406 (vec_concat:V8DF
9407 (match_operand:V4DF 1 "register_operand")
9408 (match_operand:V4DF 2 "nonimmediate_operand"))
9409 (parallel [(const_int 0) (const_int 4)
9410 (const_int 2) (const_int 6)])))]
9411 "TARGET_AVX && <mask_avx512vl_condition>")
9412
9413 (define_insn "*avx_unpcklpd256<mask_name>"
9414 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9415 (vec_select:V4DF
9416 (vec_concat:V8DF
9417 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9418 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9419 (parallel [(const_int 0) (const_int 4)
9420 (const_int 2) (const_int 6)])))]
9421 "TARGET_AVX && <mask_avx512vl_condition>"
9422 "@
9423 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9424 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9425 [(set_attr "type" "sselog")
9426 (set_attr "prefix" "vex")
9427 (set_attr "mode" "V4DF")])
9428
9429 (define_expand "vec_interleave_lowv4df"
9430 [(set (match_dup 3)
9431 (vec_select:V4DF
9432 (vec_concat:V8DF
9433 (match_operand:V4DF 1 "register_operand")
9434 (match_operand:V4DF 2 "nonimmediate_operand"))
9435 (parallel [(const_int 0) (const_int 4)
9436 (const_int 2) (const_int 6)])))
9437 (set (match_dup 4)
9438 (vec_select:V4DF
9439 (vec_concat:V8DF
9440 (match_dup 1)
9441 (match_dup 2))
9442 (parallel [(const_int 1) (const_int 5)
9443 (const_int 3) (const_int 7)])))
9444 (set (match_operand:V4DF 0 "register_operand")
9445 (vec_select:V4DF
9446 (vec_concat:V8DF
9447 (match_dup 3)
9448 (match_dup 4))
9449 (parallel [(const_int 0) (const_int 1)
9450 (const_int 4) (const_int 5)])))]
9451 "TARGET_AVX"
9452 {
9453 operands[3] = gen_reg_rtx (V4DFmode);
9454 operands[4] = gen_reg_rtx (V4DFmode);
9455 })
9456
9457 (define_insn "avx512vl_unpcklpd128_mask"
9458 [(set (match_operand:V2DF 0 "register_operand" "=v")
9459 (vec_merge:V2DF
9460 (vec_select:V2DF
9461 (vec_concat:V4DF
9462 (match_operand:V2DF 1 "register_operand" "v")
9463 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9464 (parallel [(const_int 0) (const_int 2)]))
9465 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9466 (match_operand:QI 4 "register_operand" "Yk")))]
9467 "TARGET_AVX512VL"
9468 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9469 [(set_attr "type" "sselog")
9470 (set_attr "prefix" "evex")
9471 (set_attr "mode" "V2DF")])
9472
9473 (define_expand "vec_interleave_lowv2df"
9474 [(set (match_operand:V2DF 0 "register_operand")
9475 (vec_select:V2DF
9476 (vec_concat:V4DF
9477 (match_operand:V2DF 1 "nonimmediate_operand")
9478 (match_operand:V2DF 2 "nonimmediate_operand"))
9479 (parallel [(const_int 0)
9480 (const_int 2)])))]
9481 "TARGET_SSE2"
9482 {
9483 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9484 operands[1] = force_reg (V2DFmode, operands[1]);
9485 })
9486
9487 (define_insn "*vec_interleave_lowv2df"
9488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9489 (vec_select:V2DF
9490 (vec_concat:V4DF
9491 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9492 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9493 (parallel [(const_int 0)
9494 (const_int 2)])))]
9495 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9496 "@
9497 unpcklpd\t{%2, %0|%0, %2}
9498 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9499 %vmovddup\t{%1, %0|%0, %q1}
9500 movhpd\t{%2, %0|%0, %q2}
9501 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9502 %vmovlpd\t{%2, %H0|%H0, %2}"
9503 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9504 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9505 (set (attr "prefix_data16")
9506 (if_then_else (eq_attr "alternative" "3,5")
9507 (const_string "1")
9508 (const_string "*")))
9509 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9510 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9511
9512 (define_split
9513 [(set (match_operand:V2DF 0 "memory_operand")
9514 (vec_select:V2DF
9515 (vec_concat:V4DF
9516 (match_operand:V2DF 1 "register_operand")
9517 (match_dup 1))
9518 (parallel [(const_int 0)
9519 (const_int 2)])))]
9520 "TARGET_SSE3 && reload_completed"
9521 [(const_int 0)]
9522 {
9523 rtx low = gen_lowpart (DFmode, operands[1]);
9524
9525 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9526 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9527 DONE;
9528 })
9529
9530 (define_split
9531 [(set (match_operand:V2DF 0 "register_operand")
9532 (vec_select:V2DF
9533 (vec_concat:V4DF
9534 (match_operand:V2DF 1 "memory_operand")
9535 (match_dup 1))
9536 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9537 (match_operand:SI 3 "const_int_operand")])))]
9538 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9539 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9540 {
9541 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9542 })
9543
9544 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9545 [(set (match_operand:VF_128 0 "register_operand" "=v")
9546 (vec_merge:VF_128
9547 (unspec:VF_128
9548 [(match_operand:VF_128 1 "register_operand" "v")
9549 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9550 UNSPEC_SCALEF)
9551 (match_dup 1)
9552 (const_int 1)))]
9553 "TARGET_AVX512F"
9554 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9555 [(set_attr "prefix" "evex")
9556 (set_attr "mode" "<ssescalarmode>")])
9557
9558 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9559 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9560 (unspec:VF_AVX512VL
9561 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9562 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9563 UNSPEC_SCALEF))]
9564 "TARGET_AVX512F"
9565 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9566 [(set_attr "prefix" "evex")
9567 (set_attr "mode" "<MODE>")])
9568
9569 (define_expand "<avx512>_vternlog<mode>_maskz"
9570 [(match_operand:VI48_AVX512VL 0 "register_operand")
9571 (match_operand:VI48_AVX512VL 1 "register_operand")
9572 (match_operand:VI48_AVX512VL 2 "register_operand")
9573 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9574 (match_operand:SI 4 "const_0_to_255_operand")
9575 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9576 "TARGET_AVX512F"
9577 {
9578 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9579 operands[0], operands[1], operands[2], operands[3],
9580 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9581 DONE;
9582 })
9583
9584 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9585 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9586 (unspec:VI48_AVX512VL
9587 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9588 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9589 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9590 (match_operand:SI 4 "const_0_to_255_operand")]
9591 UNSPEC_VTERNLOG))]
9592 "TARGET_AVX512F"
9593 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9594 [(set_attr "type" "sselog")
9595 (set_attr "prefix" "evex")
9596 (set_attr "mode" "<sseinsnmode>")])
9597
9598 (define_insn "<avx512>_vternlog<mode>_mask"
9599 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9600 (vec_merge:VI48_AVX512VL
9601 (unspec:VI48_AVX512VL
9602 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9603 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9604 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9605 (match_operand:SI 4 "const_0_to_255_operand")]
9606 UNSPEC_VTERNLOG)
9607 (match_dup 1)
9608 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9609 "TARGET_AVX512F"
9610 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9611 [(set_attr "type" "sselog")
9612 (set_attr "prefix" "evex")
9613 (set_attr "mode" "<sseinsnmode>")])
9614
9615 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9616 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9617 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9618 UNSPEC_GETEXP))]
9619 "TARGET_AVX512F"
9620 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9621 [(set_attr "prefix" "evex")
9622 (set_attr "mode" "<MODE>")])
9623
9624 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9625 [(set (match_operand:VF_128 0 "register_operand" "=v")
9626 (vec_merge:VF_128
9627 (unspec:VF_128
9628 [(match_operand:VF_128 1 "register_operand" "v")
9629 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9630 UNSPEC_GETEXP)
9631 (match_dup 1)
9632 (const_int 1)))]
9633 "TARGET_AVX512F"
9634 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9635 [(set_attr "prefix" "evex")
9636 (set_attr "mode" "<ssescalarmode>")])
9637
9638 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9639 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9640 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9641 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9642 (match_operand:SI 3 "const_0_to_255_operand")]
9643 UNSPEC_ALIGN))]
9644 "TARGET_AVX512F"
9645 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9646 [(set_attr "prefix" "evex")
9647 (set_attr "mode" "<sseinsnmode>")])
9648
9649 (define_expand "avx512f_shufps512_mask"
9650 [(match_operand:V16SF 0 "register_operand")
9651 (match_operand:V16SF 1 "register_operand")
9652 (match_operand:V16SF 2 "nonimmediate_operand")
9653 (match_operand:SI 3 "const_0_to_255_operand")
9654 (match_operand:V16SF 4 "register_operand")
9655 (match_operand:HI 5 "register_operand")]
9656 "TARGET_AVX512F"
9657 {
9658 int mask = INTVAL (operands[3]);
9659 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9660 GEN_INT ((mask >> 0) & 3),
9661 GEN_INT ((mask >> 2) & 3),
9662 GEN_INT (((mask >> 4) & 3) + 16),
9663 GEN_INT (((mask >> 6) & 3) + 16),
9664 GEN_INT (((mask >> 0) & 3) + 4),
9665 GEN_INT (((mask >> 2) & 3) + 4),
9666 GEN_INT (((mask >> 4) & 3) + 20),
9667 GEN_INT (((mask >> 6) & 3) + 20),
9668 GEN_INT (((mask >> 0) & 3) + 8),
9669 GEN_INT (((mask >> 2) & 3) + 8),
9670 GEN_INT (((mask >> 4) & 3) + 24),
9671 GEN_INT (((mask >> 6) & 3) + 24),
9672 GEN_INT (((mask >> 0) & 3) + 12),
9673 GEN_INT (((mask >> 2) & 3) + 12),
9674 GEN_INT (((mask >> 4) & 3) + 28),
9675 GEN_INT (((mask >> 6) & 3) + 28),
9676 operands[4], operands[5]));
9677 DONE;
9678 })
9679
9680
9681 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9682 [(match_operand:VF_AVX512VL 0 "register_operand")
9683 (match_operand:VF_AVX512VL 1 "register_operand")
9684 (match_operand:VF_AVX512VL 2 "register_operand")
9685 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9686 (match_operand:SI 4 "const_0_to_255_operand")
9687 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9688 "TARGET_AVX512F"
9689 {
9690 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9691 operands[0], operands[1], operands[2], operands[3],
9692 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9693 <round_saeonly_expand_operand6>));
9694 DONE;
9695 })
9696
9697 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9698 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9699 (unspec:VF_AVX512VL
9700 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9701 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9702 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9703 (match_operand:SI 4 "const_0_to_255_operand")]
9704 UNSPEC_FIXUPIMM))]
9705 "TARGET_AVX512F"
9706 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9707 [(set_attr "prefix" "evex")
9708 (set_attr "mode" "<MODE>")])
9709
9710 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9711 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9712 (vec_merge:VF_AVX512VL
9713 (unspec:VF_AVX512VL
9714 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9715 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9716 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9717 (match_operand:SI 4 "const_0_to_255_operand")]
9718 UNSPEC_FIXUPIMM)
9719 (match_dup 1)
9720 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9721 "TARGET_AVX512F"
9722 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9723 [(set_attr "prefix" "evex")
9724 (set_attr "mode" "<MODE>")])
9725
9726 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9727 [(match_operand:VF_128 0 "register_operand")
9728 (match_operand:VF_128 1 "register_operand")
9729 (match_operand:VF_128 2 "register_operand")
9730 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9731 (match_operand:SI 4 "const_0_to_255_operand")
9732 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9733 "TARGET_AVX512F"
9734 {
9735 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9736 operands[0], operands[1], operands[2], operands[3],
9737 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9738 <round_saeonly_expand_operand6>));
9739 DONE;
9740 })
9741
9742 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9743 [(set (match_operand:VF_128 0 "register_operand" "=v")
9744 (vec_merge:VF_128
9745 (unspec:VF_128
9746 [(match_operand:VF_128 1 "register_operand" "0")
9747 (match_operand:VF_128 2 "register_operand" "v")
9748 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9749 (match_operand:SI 4 "const_0_to_255_operand")]
9750 UNSPEC_FIXUPIMM)
9751 (match_dup 1)
9752 (const_int 1)))]
9753 "TARGET_AVX512F"
9754 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9755 [(set_attr "prefix" "evex")
9756 (set_attr "mode" "<ssescalarmode>")])
9757
9758 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9759 [(set (match_operand:VF_128 0 "register_operand" "=v")
9760 (vec_merge:VF_128
9761 (vec_merge:VF_128
9762 (unspec:VF_128
9763 [(match_operand:VF_128 1 "register_operand" "0")
9764 (match_operand:VF_128 2 "register_operand" "v")
9765 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9766 (match_operand:SI 4 "const_0_to_255_operand")]
9767 UNSPEC_FIXUPIMM)
9768 (match_dup 1)
9769 (const_int 1))
9770 (match_dup 1)
9771 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9772 "TARGET_AVX512F"
9773 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9774 [(set_attr "prefix" "evex")
9775 (set_attr "mode" "<ssescalarmode>")])
9776
9777 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9778 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9779 (unspec:VF_AVX512VL
9780 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9781 (match_operand:SI 2 "const_0_to_255_operand")]
9782 UNSPEC_ROUND))]
9783 "TARGET_AVX512F"
9784 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9785 [(set_attr "length_immediate" "1")
9786 (set_attr "prefix" "evex")
9787 (set_attr "mode" "<MODE>")])
9788
9789 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
9790 [(set (match_operand:VF_128 0 "register_operand" "=v")
9791 (vec_merge:VF_128
9792 (unspec:VF_128
9793 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
9794 (match_operand:SI 3 "const_0_to_255_operand")]
9795 UNSPEC_ROUND)
9796 (match_operand:VF_128 1 "register_operand" "v")
9797 (const_int 1)))]
9798 "TARGET_AVX512F"
9799 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
9800 [(set_attr "length_immediate" "1")
9801 (set_attr "prefix" "evex")
9802 (set_attr "mode" "<MODE>")])
9803
9804 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
9805 [(set (match_operand:VF_128 0 "register_operand" "=v")
9806 (vec_merge:VF_128
9807 (vec_duplicate:VF_128
9808 (unspec:<ssescalarmode>
9809 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9810 (match_operand:SI 3 "const_0_to_255_operand")]
9811 UNSPEC_ROUND))
9812 (match_operand:VF_128 1 "register_operand" "v")
9813 (const_int 1)))]
9814 "TARGET_AVX512F"
9815 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
9816 [(set_attr "length_immediate" "1")
9817 (set_attr "prefix" "evex")
9818 (set_attr "mode" "<MODE>")])
9819
9820 ;; One bit in mask selects 2 elements.
9821 (define_insn "avx512f_shufps512_1<mask_name>"
9822 [(set (match_operand:V16SF 0 "register_operand" "=v")
9823 (vec_select:V16SF
9824 (vec_concat:V32SF
9825 (match_operand:V16SF 1 "register_operand" "v")
9826 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9827 (parallel [(match_operand 3 "const_0_to_3_operand")
9828 (match_operand 4 "const_0_to_3_operand")
9829 (match_operand 5 "const_16_to_19_operand")
9830 (match_operand 6 "const_16_to_19_operand")
9831 (match_operand 7 "const_4_to_7_operand")
9832 (match_operand 8 "const_4_to_7_operand")
9833 (match_operand 9 "const_20_to_23_operand")
9834 (match_operand 10 "const_20_to_23_operand")
9835 (match_operand 11 "const_8_to_11_operand")
9836 (match_operand 12 "const_8_to_11_operand")
9837 (match_operand 13 "const_24_to_27_operand")
9838 (match_operand 14 "const_24_to_27_operand")
9839 (match_operand 15 "const_12_to_15_operand")
9840 (match_operand 16 "const_12_to_15_operand")
9841 (match_operand 17 "const_28_to_31_operand")
9842 (match_operand 18 "const_28_to_31_operand")])))]
9843 "TARGET_AVX512F
9844 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9845 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9846 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9847 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9848 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9849 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9850 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9851 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9852 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9853 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9854 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9855 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9856 {
9857 int mask;
9858 mask = INTVAL (operands[3]);
9859 mask |= INTVAL (operands[4]) << 2;
9860 mask |= (INTVAL (operands[5]) - 16) << 4;
9861 mask |= (INTVAL (operands[6]) - 16) << 6;
9862 operands[3] = GEN_INT (mask);
9863
9864 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9865 }
9866 [(set_attr "type" "sselog")
9867 (set_attr "length_immediate" "1")
9868 (set_attr "prefix" "evex")
9869 (set_attr "mode" "V16SF")])
9870
9871 (define_expand "avx512f_shufpd512_mask"
9872 [(match_operand:V8DF 0 "register_operand")
9873 (match_operand:V8DF 1 "register_operand")
9874 (match_operand:V8DF 2 "nonimmediate_operand")
9875 (match_operand:SI 3 "const_0_to_255_operand")
9876 (match_operand:V8DF 4 "register_operand")
9877 (match_operand:QI 5 "register_operand")]
9878 "TARGET_AVX512F"
9879 {
9880 int mask = INTVAL (operands[3]);
9881 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9882 GEN_INT (mask & 1),
9883 GEN_INT (mask & 2 ? 9 : 8),
9884 GEN_INT (mask & 4 ? 3 : 2),
9885 GEN_INT (mask & 8 ? 11 : 10),
9886 GEN_INT (mask & 16 ? 5 : 4),
9887 GEN_INT (mask & 32 ? 13 : 12),
9888 GEN_INT (mask & 64 ? 7 : 6),
9889 GEN_INT (mask & 128 ? 15 : 14),
9890 operands[4], operands[5]));
9891 DONE;
9892 })
9893
9894 (define_insn "avx512f_shufpd512_1<mask_name>"
9895 [(set (match_operand:V8DF 0 "register_operand" "=v")
9896 (vec_select:V8DF
9897 (vec_concat:V16DF
9898 (match_operand:V8DF 1 "register_operand" "v")
9899 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9900 (parallel [(match_operand 3 "const_0_to_1_operand")
9901 (match_operand 4 "const_8_to_9_operand")
9902 (match_operand 5 "const_2_to_3_operand")
9903 (match_operand 6 "const_10_to_11_operand")
9904 (match_operand 7 "const_4_to_5_operand")
9905 (match_operand 8 "const_12_to_13_operand")
9906 (match_operand 9 "const_6_to_7_operand")
9907 (match_operand 10 "const_14_to_15_operand")])))]
9908 "TARGET_AVX512F"
9909 {
9910 int mask;
9911 mask = INTVAL (operands[3]);
9912 mask |= (INTVAL (operands[4]) - 8) << 1;
9913 mask |= (INTVAL (operands[5]) - 2) << 2;
9914 mask |= (INTVAL (operands[6]) - 10) << 3;
9915 mask |= (INTVAL (operands[7]) - 4) << 4;
9916 mask |= (INTVAL (operands[8]) - 12) << 5;
9917 mask |= (INTVAL (operands[9]) - 6) << 6;
9918 mask |= (INTVAL (operands[10]) - 14) << 7;
9919 operands[3] = GEN_INT (mask);
9920
9921 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9922 }
9923 [(set_attr "type" "sselog")
9924 (set_attr "length_immediate" "1")
9925 (set_attr "prefix" "evex")
9926 (set_attr "mode" "V8DF")])
9927
9928 (define_expand "avx_shufpd256<mask_expand4_name>"
9929 [(match_operand:V4DF 0 "register_operand")
9930 (match_operand:V4DF 1 "register_operand")
9931 (match_operand:V4DF 2 "nonimmediate_operand")
9932 (match_operand:SI 3 "const_int_operand")]
9933 "TARGET_AVX"
9934 {
9935 int mask = INTVAL (operands[3]);
9936 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9937 operands[1],
9938 operands[2],
9939 GEN_INT (mask & 1),
9940 GEN_INT (mask & 2 ? 5 : 4),
9941 GEN_INT (mask & 4 ? 3 : 2),
9942 GEN_INT (mask & 8 ? 7 : 6)
9943 <mask_expand4_args>));
9944 DONE;
9945 })
9946
9947 (define_insn "avx_shufpd256_1<mask_name>"
9948 [(set (match_operand:V4DF 0 "register_operand" "=v")
9949 (vec_select:V4DF
9950 (vec_concat:V8DF
9951 (match_operand:V4DF 1 "register_operand" "v")
9952 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9953 (parallel [(match_operand 3 "const_0_to_1_operand")
9954 (match_operand 4 "const_4_to_5_operand")
9955 (match_operand 5 "const_2_to_3_operand")
9956 (match_operand 6 "const_6_to_7_operand")])))]
9957 "TARGET_AVX && <mask_avx512vl_condition>"
9958 {
9959 int mask;
9960 mask = INTVAL (operands[3]);
9961 mask |= (INTVAL (operands[4]) - 4) << 1;
9962 mask |= (INTVAL (operands[5]) - 2) << 2;
9963 mask |= (INTVAL (operands[6]) - 6) << 3;
9964 operands[3] = GEN_INT (mask);
9965
9966 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9967 }
9968 [(set_attr "type" "sseshuf")
9969 (set_attr "length_immediate" "1")
9970 (set_attr "prefix" "vex")
9971 (set_attr "mode" "V4DF")])
9972
9973 (define_expand "sse2_shufpd<mask_expand4_name>"
9974 [(match_operand:V2DF 0 "register_operand")
9975 (match_operand:V2DF 1 "register_operand")
9976 (match_operand:V2DF 2 "vector_operand")
9977 (match_operand:SI 3 "const_int_operand")]
9978 "TARGET_SSE2"
9979 {
9980 int mask = INTVAL (operands[3]);
9981 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9982 operands[2], GEN_INT (mask & 1),
9983 GEN_INT (mask & 2 ? 3 : 2)
9984 <mask_expand4_args>));
9985 DONE;
9986 })
9987
9988 (define_insn "sse2_shufpd_v2df_mask"
9989 [(set (match_operand:V2DF 0 "register_operand" "=v")
9990 (vec_merge:V2DF
9991 (vec_select:V2DF
9992 (vec_concat:V4DF
9993 (match_operand:V2DF 1 "register_operand" "v")
9994 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9995 (parallel [(match_operand 3 "const_0_to_1_operand")
9996 (match_operand 4 "const_2_to_3_operand")]))
9997 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9998 (match_operand:QI 6 "register_operand" "Yk")))]
9999 "TARGET_AVX512VL"
10000 {
10001 int mask;
10002 mask = INTVAL (operands[3]);
10003 mask |= (INTVAL (operands[4]) - 2) << 1;
10004 operands[3] = GEN_INT (mask);
10005
10006 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
10007 }
10008 [(set_attr "type" "sseshuf")
10009 (set_attr "length_immediate" "1")
10010 (set_attr "prefix" "evex")
10011 (set_attr "mode" "V2DF")])
10012
10013 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
10014 (define_insn "avx2_interleave_highv4di<mask_name>"
10015 [(set (match_operand:V4DI 0 "register_operand" "=v")
10016 (vec_select:V4DI
10017 (vec_concat:V8DI
10018 (match_operand:V4DI 1 "register_operand" "v")
10019 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10020 (parallel [(const_int 1)
10021 (const_int 5)
10022 (const_int 3)
10023 (const_int 7)])))]
10024 "TARGET_AVX2 && <mask_avx512vl_condition>"
10025 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10026 [(set_attr "type" "sselog")
10027 (set_attr "prefix" "vex")
10028 (set_attr "mode" "OI")])
10029
10030 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10031 [(set (match_operand:V8DI 0 "register_operand" "=v")
10032 (vec_select:V8DI
10033 (vec_concat:V16DI
10034 (match_operand:V8DI 1 "register_operand" "v")
10035 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10036 (parallel [(const_int 1) (const_int 9)
10037 (const_int 3) (const_int 11)
10038 (const_int 5) (const_int 13)
10039 (const_int 7) (const_int 15)])))]
10040 "TARGET_AVX512F"
10041 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10042 [(set_attr "type" "sselog")
10043 (set_attr "prefix" "evex")
10044 (set_attr "mode" "XI")])
10045
10046 (define_insn "vec_interleave_highv2di<mask_name>"
10047 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10048 (vec_select:V2DI
10049 (vec_concat:V4DI
10050 (match_operand:V2DI 1 "register_operand" "0,v")
10051 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10052 (parallel [(const_int 1)
10053 (const_int 3)])))]
10054 "TARGET_SSE2 && <mask_avx512vl_condition>"
10055 "@
10056 punpckhqdq\t{%2, %0|%0, %2}
10057 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10058 [(set_attr "isa" "noavx,avx")
10059 (set_attr "type" "sselog")
10060 (set_attr "prefix_data16" "1,*")
10061 (set_attr "prefix" "orig,<mask_prefix>")
10062 (set_attr "mode" "TI")])
10063
10064 (define_insn "avx2_interleave_lowv4di<mask_name>"
10065 [(set (match_operand:V4DI 0 "register_operand" "=v")
10066 (vec_select:V4DI
10067 (vec_concat:V8DI
10068 (match_operand:V4DI 1 "register_operand" "v")
10069 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10070 (parallel [(const_int 0)
10071 (const_int 4)
10072 (const_int 2)
10073 (const_int 6)])))]
10074 "TARGET_AVX2 && <mask_avx512vl_condition>"
10075 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10076 [(set_attr "type" "sselog")
10077 (set_attr "prefix" "vex")
10078 (set_attr "mode" "OI")])
10079
10080 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10081 [(set (match_operand:V8DI 0 "register_operand" "=v")
10082 (vec_select:V8DI
10083 (vec_concat:V16DI
10084 (match_operand:V8DI 1 "register_operand" "v")
10085 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10086 (parallel [(const_int 0) (const_int 8)
10087 (const_int 2) (const_int 10)
10088 (const_int 4) (const_int 12)
10089 (const_int 6) (const_int 14)])))]
10090 "TARGET_AVX512F"
10091 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10092 [(set_attr "type" "sselog")
10093 (set_attr "prefix" "evex")
10094 (set_attr "mode" "XI")])
10095
10096 (define_insn "vec_interleave_lowv2di<mask_name>"
10097 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10098 (vec_select:V2DI
10099 (vec_concat:V4DI
10100 (match_operand:V2DI 1 "register_operand" "0,v")
10101 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10102 (parallel [(const_int 0)
10103 (const_int 2)])))]
10104 "TARGET_SSE2 && <mask_avx512vl_condition>"
10105 "@
10106 punpcklqdq\t{%2, %0|%0, %2}
10107 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10108 [(set_attr "isa" "noavx,avx")
10109 (set_attr "type" "sselog")
10110 (set_attr "prefix_data16" "1,*")
10111 (set_attr "prefix" "orig,vex")
10112 (set_attr "mode" "TI")])
10113
10114 (define_insn "sse2_shufpd_<mode>"
10115 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10116 (vec_select:VI8F_128
10117 (vec_concat:<ssedoublevecmode>
10118 (match_operand:VI8F_128 1 "register_operand" "0,v")
10119 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10120 (parallel [(match_operand 3 "const_0_to_1_operand")
10121 (match_operand 4 "const_2_to_3_operand")])))]
10122 "TARGET_SSE2"
10123 {
10124 int mask;
10125 mask = INTVAL (operands[3]);
10126 mask |= (INTVAL (operands[4]) - 2) << 1;
10127 operands[3] = GEN_INT (mask);
10128
10129 switch (which_alternative)
10130 {
10131 case 0:
10132 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10133 case 1:
10134 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10135 default:
10136 gcc_unreachable ();
10137 }
10138 }
10139 [(set_attr "isa" "noavx,avx")
10140 (set_attr "type" "sseshuf")
10141 (set_attr "length_immediate" "1")
10142 (set_attr "prefix" "orig,maybe_evex")
10143 (set_attr "mode" "V2DF")])
10144
10145 ;; Avoid combining registers from different units in a single alternative,
10146 ;; see comment above inline_secondary_memory_needed function in i386.c
10147 (define_insn "sse2_storehpd"
10148 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10149 (vec_select:DF
10150 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10151 (parallel [(const_int 1)])))]
10152 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10153 "@
10154 %vmovhpd\t{%1, %0|%0, %1}
10155 unpckhpd\t%0, %0
10156 vunpckhpd\t{%d1, %0|%0, %d1}
10157 #
10158 #
10159 #"
10160 [(set_attr "isa" "*,noavx,avx,*,*,*")
10161 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10162 (set (attr "prefix_data16")
10163 (if_then_else
10164 (and (eq_attr "alternative" "0")
10165 (not (match_test "TARGET_AVX")))
10166 (const_string "1")
10167 (const_string "*")))
10168 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10169 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10170
10171 (define_split
10172 [(set (match_operand:DF 0 "register_operand")
10173 (vec_select:DF
10174 (match_operand:V2DF 1 "memory_operand")
10175 (parallel [(const_int 1)])))]
10176 "TARGET_SSE2 && reload_completed"
10177 [(set (match_dup 0) (match_dup 1))]
10178 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10179
10180 (define_insn "*vec_extractv2df_1_sse"
10181 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10182 (vec_select:DF
10183 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10184 (parallel [(const_int 1)])))]
10185 "!TARGET_SSE2 && TARGET_SSE
10186 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10187 "@
10188 movhps\t{%1, %0|%0, %1}
10189 movhlps\t{%1, %0|%0, %1}
10190 movlps\t{%H1, %0|%0, %H1}"
10191 [(set_attr "type" "ssemov")
10192 (set_attr "mode" "V2SF,V4SF,V2SF")])
10193
10194 ;; Avoid combining registers from different units in a single alternative,
10195 ;; see comment above inline_secondary_memory_needed function in i386.c
10196 (define_insn "sse2_storelpd"
10197 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10198 (vec_select:DF
10199 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10200 (parallel [(const_int 0)])))]
10201 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10202 "@
10203 %vmovlpd\t{%1, %0|%0, %1}
10204 #
10205 #
10206 #
10207 #"
10208 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10209 (set (attr "prefix_data16")
10210 (if_then_else (eq_attr "alternative" "0")
10211 (const_string "1")
10212 (const_string "*")))
10213 (set_attr "prefix" "maybe_vex")
10214 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10215
10216 (define_split
10217 [(set (match_operand:DF 0 "register_operand")
10218 (vec_select:DF
10219 (match_operand:V2DF 1 "nonimmediate_operand")
10220 (parallel [(const_int 0)])))]
10221 "TARGET_SSE2 && reload_completed"
10222 [(set (match_dup 0) (match_dup 1))]
10223 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10224
10225 (define_insn "*vec_extractv2df_0_sse"
10226 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10227 (vec_select:DF
10228 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10229 (parallel [(const_int 0)])))]
10230 "!TARGET_SSE2 && TARGET_SSE
10231 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10232 "@
10233 movlps\t{%1, %0|%0, %1}
10234 movaps\t{%1, %0|%0, %1}
10235 movlps\t{%1, %0|%0, %q1}"
10236 [(set_attr "type" "ssemov")
10237 (set_attr "mode" "V2SF,V4SF,V2SF")])
10238
10239 (define_expand "sse2_loadhpd_exp"
10240 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10241 (vec_concat:V2DF
10242 (vec_select:DF
10243 (match_operand:V2DF 1 "nonimmediate_operand")
10244 (parallel [(const_int 0)]))
10245 (match_operand:DF 2 "nonimmediate_operand")))]
10246 "TARGET_SSE2"
10247 {
10248 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10249
10250 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10251
10252 /* Fix up the destination if needed. */
10253 if (dst != operands[0])
10254 emit_move_insn (operands[0], dst);
10255
10256 DONE;
10257 })
10258
10259 ;; Avoid combining registers from different units in a single alternative,
10260 ;; see comment above inline_secondary_memory_needed function in i386.c
10261 (define_insn "sse2_loadhpd"
10262 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10263 "=x,v,x,v ,o,o ,o")
10264 (vec_concat:V2DF
10265 (vec_select:DF
10266 (match_operand:V2DF 1 "nonimmediate_operand"
10267 " 0,v,0,v ,0,0 ,0")
10268 (parallel [(const_int 0)]))
10269 (match_operand:DF 2 "nonimmediate_operand"
10270 " m,m,x,Yv,x,*f,r")))]
10271 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10272 "@
10273 movhpd\t{%2, %0|%0, %2}
10274 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10275 unpcklpd\t{%2, %0|%0, %2}
10276 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10277 #
10278 #
10279 #"
10280 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10281 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10282 (set (attr "prefix_data16")
10283 (if_then_else (eq_attr "alternative" "0")
10284 (const_string "1")
10285 (const_string "*")))
10286 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10287 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10288
10289 (define_split
10290 [(set (match_operand:V2DF 0 "memory_operand")
10291 (vec_concat:V2DF
10292 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10293 (match_operand:DF 1 "register_operand")))]
10294 "TARGET_SSE2 && reload_completed"
10295 [(set (match_dup 0) (match_dup 1))]
10296 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10297
10298 (define_expand "sse2_loadlpd_exp"
10299 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10300 (vec_concat:V2DF
10301 (match_operand:DF 2 "nonimmediate_operand")
10302 (vec_select:DF
10303 (match_operand:V2DF 1 "nonimmediate_operand")
10304 (parallel [(const_int 1)]))))]
10305 "TARGET_SSE2"
10306 {
10307 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10308
10309 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10310
10311 /* Fix up the destination if needed. */
10312 if (dst != operands[0])
10313 emit_move_insn (operands[0], dst);
10314
10315 DONE;
10316 })
10317
10318 ;; Avoid combining registers from different units in a single alternative,
10319 ;; see comment above inline_secondary_memory_needed function in i386.c
10320 (define_insn "sse2_loadlpd"
10321 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10322 "=v,x,v,x,v,x,x,v,m,m ,m")
10323 (vec_concat:V2DF
10324 (match_operand:DF 2 "nonimmediate_operand"
10325 "vm,m,m,x,v,0,0,v,x,*f,r")
10326 (vec_select:DF
10327 (match_operand:V2DF 1 "nonimm_or_0_operand"
10328 " C,0,v,0,v,x,o,o,0,0 ,0")
10329 (parallel [(const_int 1)]))))]
10330 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10331 "@
10332 %vmovq\t{%2, %0|%0, %2}
10333 movlpd\t{%2, %0|%0, %2}
10334 vmovlpd\t{%2, %1, %0|%0, %1, %2}
10335 movsd\t{%2, %0|%0, %2}
10336 vmovsd\t{%2, %1, %0|%0, %1, %2}
10337 shufpd\t{$2, %1, %0|%0, %1, 2}
10338 movhpd\t{%H1, %0|%0, %H1}
10339 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10340 #
10341 #
10342 #"
10343 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10344 (set (attr "type")
10345 (cond [(eq_attr "alternative" "5")
10346 (const_string "sselog")
10347 (eq_attr "alternative" "9")
10348 (const_string "fmov")
10349 (eq_attr "alternative" "10")
10350 (const_string "imov")
10351 ]
10352 (const_string "ssemov")))
10353 (set (attr "prefix_data16")
10354 (if_then_else (eq_attr "alternative" "1,6")
10355 (const_string "1")
10356 (const_string "*")))
10357 (set (attr "length_immediate")
10358 (if_then_else (eq_attr "alternative" "5")
10359 (const_string "1")
10360 (const_string "*")))
10361 (set (attr "prefix")
10362 (cond [(eq_attr "alternative" "0")
10363 (const_string "maybe_vex")
10364 (eq_attr "alternative" "1,3,5,6")
10365 (const_string "orig")
10366 (eq_attr "alternative" "2,4,7")
10367 (const_string "maybe_evex")
10368 ]
10369 (const_string "*")))
10370 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10371
10372 (define_split
10373 [(set (match_operand:V2DF 0 "memory_operand")
10374 (vec_concat:V2DF
10375 (match_operand:DF 1 "register_operand")
10376 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10377 "TARGET_SSE2 && reload_completed"
10378 [(set (match_dup 0) (match_dup 1))]
10379 "operands[0] = adjust_address (operands[0], DFmode, 0);")
10380
10381 (define_insn "sse2_movsd"
10382 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
10383 (vec_merge:V2DF
10384 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10385 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10386 (const_int 1)))]
10387 "TARGET_SSE2"
10388 "@
10389 movsd\t{%2, %0|%0, %2}
10390 vmovsd\t{%2, %1, %0|%0, %1, %2}
10391 movlpd\t{%2, %0|%0, %q2}
10392 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10393 %vmovlpd\t{%2, %0|%q0, %2}
10394 shufpd\t{$2, %1, %0|%0, %1, 2}
10395 movhps\t{%H1, %0|%0, %H1}
10396 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10397 %vmovhps\t{%1, %H0|%H0, %1}"
10398 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10399 (set (attr "type")
10400 (if_then_else
10401 (eq_attr "alternative" "5")
10402 (const_string "sselog")
10403 (const_string "ssemov")))
10404 (set (attr "prefix_data16")
10405 (if_then_else
10406 (and (eq_attr "alternative" "2,4")
10407 (not (match_test "TARGET_AVX")))
10408 (const_string "1")
10409 (const_string "*")))
10410 (set (attr "length_immediate")
10411 (if_then_else (eq_attr "alternative" "5")
10412 (const_string "1")
10413 (const_string "*")))
10414 (set (attr "prefix")
10415 (cond [(eq_attr "alternative" "1,3,7")
10416 (const_string "maybe_evex")
10417 (eq_attr "alternative" "4,8")
10418 (const_string "maybe_vex")
10419 ]
10420 (const_string "orig")))
10421 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10422
10423 (define_insn "vec_dupv2df<mask_name>"
10424 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10425 (vec_duplicate:V2DF
10426 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10427 "TARGET_SSE2 && <mask_avx512vl_condition>"
10428 "@
10429 unpcklpd\t%0, %0
10430 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10431 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10432 [(set_attr "isa" "noavx,sse3,avx512vl")
10433 (set_attr "type" "sselog1")
10434 (set_attr "prefix" "orig,maybe_vex,evex")
10435 (set_attr "mode" "V2DF,DF,DF")])
10436
10437 (define_insn "vec_concatv2df"
10438 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10439 (vec_concat:V2DF
10440 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10441 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10442 "TARGET_SSE
10443 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10444 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10445 "@
10446 unpcklpd\t{%2, %0|%0, %2}
10447 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10448 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10449 %vmovddup\t{%1, %0|%0, %1}
10450 vmovddup\t{%1, %0|%0, %1}
10451 movhpd\t{%2, %0|%0, %2}
10452 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10453 %vmovq\t{%1, %0|%0, %1}
10454 movlhps\t{%2, %0|%0, %2}
10455 movhps\t{%2, %0|%0, %2}"
10456 [(set (attr "isa")
10457 (cond [(eq_attr "alternative" "0,5")
10458 (const_string "sse2_noavx")
10459 (eq_attr "alternative" "1,6")
10460 (const_string "avx")
10461 (eq_attr "alternative" "2,4")
10462 (const_string "avx512vl")
10463 (eq_attr "alternative" "3")
10464 (const_string "sse3")
10465 (eq_attr "alternative" "7")
10466 (const_string "sse2")
10467 ]
10468 (const_string "noavx")))
10469 (set (attr "type")
10470 (if_then_else
10471 (eq_attr "alternative" "0,1,2,3,4")
10472 (const_string "sselog")
10473 (const_string "ssemov")))
10474 (set (attr "prefix_data16")
10475 (if_then_else (eq_attr "alternative" "5")
10476 (const_string "1")
10477 (const_string "*")))
10478 (set (attr "prefix")
10479 (cond [(eq_attr "alternative" "1,6")
10480 (const_string "vex")
10481 (eq_attr "alternative" "2,4")
10482 (const_string "evex")
10483 (eq_attr "alternative" "3,7")
10484 (const_string "maybe_vex")
10485 ]
10486 (const_string "orig")))
10487 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10488
10489 ;; vmovq clears also the higher bits.
10490 (define_insn "vec_set<mode>_0"
10491 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10492 (vec_merge:VF2_512_256
10493 (vec_duplicate:VF2_512_256
10494 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
10495 (match_operand:VF2_512_256 1 "const0_operand" "C")
10496 (const_int 1)))]
10497 "TARGET_AVX"
10498 "vmovq\t{%2, %x0|%x0, %2}"
10499 [(set_attr "type" "ssemov")
10500 (set_attr "prefix" "maybe_evex")
10501 (set_attr "mode" "DF")])
10502
10503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10504 ;;
10505 ;; Parallel integer down-conversion operations
10506 ;;
10507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10508
10509 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10510 (define_mode_attr pmov_src_mode
10511 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10512 (define_mode_attr pmov_src_lower
10513 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10514 (define_mode_attr pmov_suff_1
10515 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10516
10517 (define_expand "trunc<pmov_src_lower><mode>2"
10518 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
10519 (truncate:PMOV_DST_MODE_1
10520 (match_operand:<pmov_src_mode> 1 "register_operand")))]
10521 "TARGET_AVX512F")
10522
10523 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10524 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10525 (any_truncate:PMOV_DST_MODE_1
10526 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10527 "TARGET_AVX512F"
10528 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10529 [(set_attr "type" "ssemov")
10530 (set_attr "memory" "none,store")
10531 (set_attr "prefix" "evex")
10532 (set_attr "mode" "<sseinsnmode>")])
10533
10534 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10535 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10536 (vec_merge:PMOV_DST_MODE_1
10537 (any_truncate:PMOV_DST_MODE_1
10538 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10539 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10540 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10541 "TARGET_AVX512F"
10542 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10543 [(set_attr "type" "ssemov")
10544 (set_attr "memory" "none,store")
10545 (set_attr "prefix" "evex")
10546 (set_attr "mode" "<sseinsnmode>")])
10547
10548 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10549 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10550 (vec_merge:PMOV_DST_MODE_1
10551 (any_truncate:PMOV_DST_MODE_1
10552 (match_operand:<pmov_src_mode> 1 "register_operand"))
10553 (match_dup 0)
10554 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10555 "TARGET_AVX512F")
10556
10557 (define_expand "truncv32hiv32qi2"
10558 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10559 (truncate:V32QI
10560 (match_operand:V32HI 1 "register_operand")))]
10561 "TARGET_AVX512BW")
10562
10563 (define_insn "avx512bw_<code>v32hiv32qi2"
10564 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10565 (any_truncate:V32QI
10566 (match_operand:V32HI 1 "register_operand" "v,v")))]
10567 "TARGET_AVX512BW"
10568 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10569 [(set_attr "type" "ssemov")
10570 (set_attr "memory" "none,store")
10571 (set_attr "prefix" "evex")
10572 (set_attr "mode" "XI")])
10573
10574 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10575 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10576 (vec_merge:V32QI
10577 (any_truncate:V32QI
10578 (match_operand:V32HI 1 "register_operand" "v,v"))
10579 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10580 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10581 "TARGET_AVX512BW"
10582 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10583 [(set_attr "type" "ssemov")
10584 (set_attr "memory" "none,store")
10585 (set_attr "prefix" "evex")
10586 (set_attr "mode" "XI")])
10587
10588 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10589 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10590 (vec_merge:V32QI
10591 (any_truncate:V32QI
10592 (match_operand:V32HI 1 "register_operand"))
10593 (match_dup 0)
10594 (match_operand:SI 2 "register_operand")))]
10595 "TARGET_AVX512BW")
10596
10597 (define_mode_iterator PMOV_DST_MODE_2
10598 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10599 (define_mode_attr pmov_suff_2
10600 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10601
10602 (define_expand "trunc<ssedoublemodelower><mode>2"
10603 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10604 (truncate:PMOV_DST_MODE_2
10605 (match_operand:<ssedoublemode> 1 "register_operand")))]
10606 "TARGET_AVX512VL")
10607
10608 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10609 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10610 (any_truncate:PMOV_DST_MODE_2
10611 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10612 "TARGET_AVX512VL"
10613 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10614 [(set_attr "type" "ssemov")
10615 (set_attr "memory" "none,store")
10616 (set_attr "prefix" "evex")
10617 (set_attr "mode" "<sseinsnmode>")])
10618
10619 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10620 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10621 (vec_merge:PMOV_DST_MODE_2
10622 (any_truncate:PMOV_DST_MODE_2
10623 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10624 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10625 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10626 "TARGET_AVX512VL"
10627 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10628 [(set_attr "type" "ssemov")
10629 (set_attr "memory" "none,store")
10630 (set_attr "prefix" "evex")
10631 (set_attr "mode" "<sseinsnmode>")])
10632
10633 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10634 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10635 (vec_merge:PMOV_DST_MODE_2
10636 (any_truncate:PMOV_DST_MODE_2
10637 (match_operand:<ssedoublemode> 1 "register_operand"))
10638 (match_dup 0)
10639 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10640 "TARGET_AVX512VL")
10641
10642 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10643 (define_mode_attr pmov_dst_3_lower
10644 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
10645 (define_mode_attr pmov_dst_3
10646 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10647 (define_mode_attr pmov_dst_zeroed_3
10648 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10649 (define_mode_attr pmov_suff_3
10650 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10651
10652 (define_expand "trunc<mode><pmov_dst_3_lower>2"
10653 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
10654 (truncate:<pmov_dst_3>
10655 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
10656 "TARGET_AVX512VL"
10657 {
10658 operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
10659 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
10660 operands[1],
10661 CONST0_RTX (<pmov_dst_zeroed_3>mode)));
10662 DONE;
10663 })
10664
10665 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
10666 [(set (match_operand:V16QI 0 "register_operand" "=v")
10667 (vec_concat:V16QI
10668 (any_truncate:<pmov_dst_3>
10669 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10670 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10671 "TARGET_AVX512VL"
10672 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10673 [(set_attr "type" "ssemov")
10674 (set_attr "prefix" "evex")
10675 (set_attr "mode" "TI")])
10676
10677 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
10678 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10679 (any_truncate:V2QI
10680 (match_operand:V2DI 1 "register_operand" "v")))]
10681 "TARGET_AVX512VL"
10682 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10683 [(set_attr "type" "ssemov")
10684 (set_attr "memory" "store")
10685 (set_attr "prefix" "evex")
10686 (set_attr "mode" "TI")])
10687
10688 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
10689 [(set (match_operand:HI 0 "memory_operand")
10690 (subreg:HI
10691 (any_truncate:V2QI
10692 (match_operand:V2DI 1 "register_operand")) 0))]
10693 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10694 "#"
10695 "&& 1"
10696 [(set (match_dup 0)
10697 (any_truncate:V2QI (match_dup 1)))]
10698 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10699
10700 (define_insn "avx512vl_<code>v2div2qi2_mask"
10701 [(set (match_operand:V16QI 0 "register_operand" "=v")
10702 (vec_concat:V16QI
10703 (vec_merge:V2QI
10704 (any_truncate:V2QI
10705 (match_operand:V2DI 1 "register_operand" "v"))
10706 (vec_select:V2QI
10707 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10708 (parallel [(const_int 0) (const_int 1)]))
10709 (match_operand:QI 3 "register_operand" "Yk"))
10710 (const_vector:V14QI [(const_int 0) (const_int 0)
10711 (const_int 0) (const_int 0)
10712 (const_int 0) (const_int 0)
10713 (const_int 0) (const_int 0)
10714 (const_int 0) (const_int 0)
10715 (const_int 0) (const_int 0)
10716 (const_int 0) (const_int 0)])))]
10717 "TARGET_AVX512VL"
10718 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10719 [(set_attr "type" "ssemov")
10720 (set_attr "prefix" "evex")
10721 (set_attr "mode" "TI")])
10722
10723 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10724 [(set (match_operand:V16QI 0 "register_operand" "=v")
10725 (vec_concat:V16QI
10726 (vec_merge:V2QI
10727 (any_truncate:V2QI
10728 (match_operand:V2DI 1 "register_operand" "v"))
10729 (const_vector:V2QI [(const_int 0) (const_int 0)])
10730 (match_operand:QI 2 "register_operand" "Yk"))
10731 (const_vector:V14QI [(const_int 0) (const_int 0)
10732 (const_int 0) (const_int 0)
10733 (const_int 0) (const_int 0)
10734 (const_int 0) (const_int 0)
10735 (const_int 0) (const_int 0)
10736 (const_int 0) (const_int 0)
10737 (const_int 0) (const_int 0)])))]
10738 "TARGET_AVX512VL"
10739 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10740 [(set_attr "type" "ssemov")
10741 (set_attr "prefix" "evex")
10742 (set_attr "mode" "TI")])
10743
10744 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
10745 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10746 (vec_merge:V2QI
10747 (any_truncate:V2QI
10748 (match_operand:V2DI 1 "register_operand" "v"))
10749 (match_dup 0)
10750 (match_operand:QI 2 "register_operand" "Yk")))]
10751 "TARGET_AVX512VL"
10752 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10753 [(set_attr "type" "ssemov")
10754 (set_attr "memory" "store")
10755 (set_attr "prefix" "evex")
10756 (set_attr "mode" "TI")])
10757
10758 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
10759 [(set (match_operand:HI 0 "memory_operand")
10760 (subreg:HI
10761 (vec_merge:V2QI
10762 (any_truncate:V2QI
10763 (match_operand:V2DI 1 "register_operand"))
10764 (vec_select:V2QI
10765 (subreg:V4QI
10766 (vec_concat:V2HI
10767 (match_dup 0)
10768 (const_int 0)) 0)
10769 (parallel [(const_int 0) (const_int 1)]))
10770 (match_operand:QI 2 "register_operand")) 0))]
10771 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10772 "#"
10773 "&& 1"
10774 [(set (match_dup 0)
10775 (vec_merge:V2QI
10776 (any_truncate:V2QI (match_dup 1))
10777 (match_dup 0)
10778 (match_dup 2)))]
10779 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10780
10781 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
10782 [(set (match_operand:V4QI 0 "memory_operand" "=m")
10783 (any_truncate:V4QI
10784 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
10785 "TARGET_AVX512VL"
10786 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10787 [(set_attr "type" "ssemov")
10788 (set_attr "memory" "store")
10789 (set_attr "prefix" "evex")
10790 (set_attr "mode" "TI")])
10791
10792 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
10793 [(set (match_operand:SI 0 "memory_operand")
10794 (subreg:SI
10795 (any_truncate:V4QI
10796 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
10797 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10798 "#"
10799 "&& 1"
10800 [(set (match_dup 0)
10801 (any_truncate:V4QI (match_dup 1)))]
10802 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
10803
10804 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10805 [(set (match_operand:V16QI 0 "register_operand" "=v")
10806 (vec_concat:V16QI
10807 (vec_merge:V4QI
10808 (any_truncate:V4QI
10809 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10810 (vec_select:V4QI
10811 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10812 (parallel [(const_int 0) (const_int 1)
10813 (const_int 2) (const_int 3)]))
10814 (match_operand:QI 3 "register_operand" "Yk"))
10815 (const_vector:V12QI [(const_int 0) (const_int 0)
10816 (const_int 0) (const_int 0)
10817 (const_int 0) (const_int 0)
10818 (const_int 0) (const_int 0)
10819 (const_int 0) (const_int 0)
10820 (const_int 0) (const_int 0)])))]
10821 "TARGET_AVX512VL"
10822 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10823 [(set_attr "type" "ssemov")
10824 (set_attr "prefix" "evex")
10825 (set_attr "mode" "TI")])
10826
10827 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10828 [(set (match_operand:V16QI 0 "register_operand" "=v")
10829 (vec_concat:V16QI
10830 (vec_merge:V4QI
10831 (any_truncate:V4QI
10832 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10833 (const_vector:V4QI [(const_int 0) (const_int 0)
10834 (const_int 0) (const_int 0)])
10835 (match_operand:QI 2 "register_operand" "Yk"))
10836 (const_vector:V12QI [(const_int 0) (const_int 0)
10837 (const_int 0) (const_int 0)
10838 (const_int 0) (const_int 0)
10839 (const_int 0) (const_int 0)
10840 (const_int 0) (const_int 0)
10841 (const_int 0) (const_int 0)])))]
10842 "TARGET_AVX512VL"
10843 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10844 [(set_attr "type" "ssemov")
10845 (set_attr "prefix" "evex")
10846 (set_attr "mode" "TI")])
10847
10848 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
10849 [(set (match_operand:V4QI 0 "memory_operand" "=m")
10850 (vec_merge:V4QI
10851 (any_truncate:V4QI
10852 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10853 (match_dup 0)
10854 (match_operand:QI 2 "register_operand" "Yk")))]
10855 "TARGET_AVX512VL"
10856 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10857 [(set_attr "type" "ssemov")
10858 (set_attr "memory" "store")
10859 (set_attr "prefix" "evex")
10860 (set_attr "mode" "TI")])
10861
10862 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
10863 [(set (match_operand:SI 0 "memory_operand")
10864 (subreg:SI
10865 (vec_merge:V4QI
10866 (any_truncate:V4QI
10867 (match_operand:VI4_128_8_256 1 "register_operand"))
10868 (vec_select:V4QI
10869 (subreg:V8QI
10870 (vec_concat:V2SI
10871 (match_dup 0)
10872 (const_int 0)) 0)
10873 (parallel [(const_int 0) (const_int 1)
10874 (const_int 2) (const_int 3)]))
10875 (match_operand:QI 2 "register_operand")) 0))]
10876 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10877 "#"
10878 "&& 1"
10879 [(set (match_dup 0)
10880 (vec_merge:V4QI
10881 (any_truncate:V4QI (match_dup 1))
10882 (match_dup 0)
10883 (match_dup 2)))]
10884 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
10885
10886 (define_mode_iterator VI2_128_BW_4_256
10887 [(V8HI "TARGET_AVX512BW") V8SI])
10888
10889 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
10890 [(set (match_operand:V8QI 0 "memory_operand" "=m")
10891 (any_truncate:V8QI
10892 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
10893 "TARGET_AVX512VL"
10894 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10895 [(set_attr "type" "ssemov")
10896 (set_attr "memory" "store")
10897 (set_attr "prefix" "evex")
10898 (set_attr "mode" "TI")])
10899
10900 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
10901 [(set (match_operand:DI 0 "memory_operand" "=m")
10902 (subreg:DI
10903 (any_truncate:V8QI
10904 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
10905 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10906 "#"
10907 "&& 1"
10908 [(set (match_dup 0)
10909 (any_truncate:V8QI (match_dup 1)))]
10910 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
10911
10912 (define_insn "avx512vl_<code><mode>v8qi2_mask"
10913 [(set (match_operand:V16QI 0 "register_operand" "=v")
10914 (vec_concat:V16QI
10915 (vec_merge:V8QI
10916 (any_truncate:V8QI
10917 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10918 (vec_select:V8QI
10919 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10920 (parallel [(const_int 0) (const_int 1)
10921 (const_int 2) (const_int 3)
10922 (const_int 4) (const_int 5)
10923 (const_int 6) (const_int 7)]))
10924 (match_operand:QI 3 "register_operand" "Yk"))
10925 (const_vector:V8QI [(const_int 0) (const_int 0)
10926 (const_int 0) (const_int 0)
10927 (const_int 0) (const_int 0)
10928 (const_int 0) (const_int 0)])))]
10929 "TARGET_AVX512VL"
10930 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10931 [(set_attr "type" "ssemov")
10932 (set_attr "prefix" "evex")
10933 (set_attr "mode" "TI")])
10934
10935 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10936 [(set (match_operand:V16QI 0 "register_operand" "=v")
10937 (vec_concat:V16QI
10938 (vec_merge:V8QI
10939 (any_truncate:V8QI
10940 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10941 (const_vector:V8QI [(const_int 0) (const_int 0)
10942 (const_int 0) (const_int 0)
10943 (const_int 0) (const_int 0)
10944 (const_int 0) (const_int 0)])
10945 (match_operand:QI 2 "register_operand" "Yk"))
10946 (const_vector:V8QI [(const_int 0) (const_int 0)
10947 (const_int 0) (const_int 0)
10948 (const_int 0) (const_int 0)
10949 (const_int 0) (const_int 0)])))]
10950 "TARGET_AVX512VL"
10951 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10952 [(set_attr "type" "ssemov")
10953 (set_attr "prefix" "evex")
10954 (set_attr "mode" "TI")])
10955
10956 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
10957 [(set (match_operand:V8QI 0 "memory_operand" "=m")
10958 (vec_merge:V8QI
10959 (any_truncate:V8QI
10960 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10961 (match_dup 0)
10962 (match_operand:QI 2 "register_operand" "Yk")))]
10963 "TARGET_AVX512VL"
10964 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10965 [(set_attr "type" "ssemov")
10966 (set_attr "memory" "store")
10967 (set_attr "prefix" "evex")
10968 (set_attr "mode" "TI")])
10969
10970 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
10971 [(set (match_operand:DI 0 "memory_operand")
10972 (subreg:DI
10973 (vec_merge:V8QI
10974 (any_truncate:V8QI
10975 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
10976 (vec_select:V8QI
10977 (subreg:V16QI
10978 (vec_concat:V2DI
10979 (match_dup 0)
10980 (const_int 0)) 0)
10981 (parallel [(const_int 0) (const_int 1)
10982 (const_int 2) (const_int 3)
10983 (const_int 4) (const_int 5)
10984 (const_int 6) (const_int 7)]))
10985 (match_operand:QI 2 "register_operand")) 0))]
10986 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10987 "#"
10988 "&& 1"
10989 [(set (match_dup 0)
10990 (vec_merge:V8QI
10991 (any_truncate:V8QI (match_dup 1))
10992 (match_dup 0)
10993 (match_dup 2)))]
10994 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
10995
10996 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10997 (define_mode_attr pmov_dst_4
10998 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10999 (define_mode_attr pmov_dst_zeroed_4
11000 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
11001 (define_mode_attr pmov_suff_4
11002 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
11003
11004 (define_expand "trunc<mode><pmov_dst_4>2"
11005 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
11006 (truncate:<pmov_dst_4>
11007 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
11008 "TARGET_AVX512VL"
11009 {
11010 operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
11011 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
11012 operands[1],
11013 CONST0_RTX (<pmov_dst_zeroed_4>mode)));
11014 DONE;
11015
11016 })
11017
11018 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
11019 [(set (match_operand:V8HI 0 "register_operand" "=v")
11020 (vec_concat:V8HI
11021 (any_truncate:<pmov_dst_4>
11022 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
11023 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
11024 "TARGET_AVX512VL"
11025 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11026 [(set_attr "type" "ssemov")
11027 (set_attr "prefix" "evex")
11028 (set_attr "mode" "TI")])
11029
11030 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
11031 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11032 (any_truncate:V4HI
11033 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11034 "TARGET_AVX512VL"
11035 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11036 [(set_attr "type" "ssemov")
11037 (set_attr "memory" "store")
11038 (set_attr "prefix" "evex")
11039 (set_attr "mode" "TI")])
11040
11041 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
11042 [(set (match_operand:DI 0 "memory_operand")
11043 (subreg:DI
11044 (any_truncate:V4HI
11045 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11046 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11047 "#"
11048 "&& 1"
11049 [(set (match_dup 0)
11050 (any_truncate:V4HI (match_dup 1)))]
11051 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11052
11053 (define_insn "avx512vl_<code><mode>v4hi2_mask"
11054 [(set (match_operand:V8HI 0 "register_operand" "=v")
11055 (vec_concat:V8HI
11056 (vec_merge:V4HI
11057 (any_truncate:V4HI
11058 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11059 (vec_select:V4HI
11060 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11061 (parallel [(const_int 0) (const_int 1)
11062 (const_int 2) (const_int 3)]))
11063 (match_operand:QI 3 "register_operand" "Yk"))
11064 (const_vector:V4HI [(const_int 0) (const_int 0)
11065 (const_int 0) (const_int 0)])))]
11066 "TARGET_AVX512VL"
11067 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11068 [(set_attr "type" "ssemov")
11069 (set_attr "prefix" "evex")
11070 (set_attr "mode" "TI")])
11071
11072 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
11073 [(set (match_operand:V8HI 0 "register_operand" "=v")
11074 (vec_concat:V8HI
11075 (vec_merge:V4HI
11076 (any_truncate:V4HI
11077 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11078 (const_vector:V4HI [(const_int 0) (const_int 0)
11079 (const_int 0) (const_int 0)])
11080 (match_operand:QI 2 "register_operand" "Yk"))
11081 (const_vector:V4HI [(const_int 0) (const_int 0)
11082 (const_int 0) (const_int 0)])))]
11083 "TARGET_AVX512VL"
11084 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11085 [(set_attr "type" "ssemov")
11086 (set_attr "prefix" "evex")
11087 (set_attr "mode" "TI")])
11088
11089 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
11090 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11091 (vec_merge:V4HI
11092 (any_truncate:V4HI
11093 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11094 (match_dup 0)
11095 (match_operand:QI 2 "register_operand" "Yk")))]
11096 "TARGET_AVX512VL"
11097 {
11098 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
11099 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
11100 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
11101 }
11102 [(set_attr "type" "ssemov")
11103 (set_attr "memory" "store")
11104 (set_attr "prefix" "evex")
11105 (set_attr "mode" "TI")])
11106
11107 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
11108 [(set (match_operand:DI 0 "memory_operand")
11109 (subreg:DI
11110 (vec_merge:V4HI
11111 (any_truncate:V4HI
11112 (match_operand:VI4_128_8_256 1 "register_operand"))
11113 (vec_select:V4HI
11114 (subreg:V8HI
11115 (vec_concat:V2DI
11116 (match_dup 0)
11117 (const_int 0)) 0)
11118 (parallel [(const_int 0) (const_int 1)
11119 (const_int 2) (const_int 3)]))
11120 (match_operand:QI 2 "register_operand")) 0))]
11121 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11122 "#"
11123 "&& 1"
11124 [(set (match_dup 0)
11125 (vec_merge:V4HI
11126 (any_truncate:V4HI (match_dup 1))
11127 (match_dup 0)
11128 (match_dup 2)))]
11129 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11130
11131
11132 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
11133 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11134 (any_truncate:V2HI
11135 (match_operand:V2DI 1 "register_operand" "v")))]
11136 "TARGET_AVX512VL"
11137 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11138 [(set_attr "type" "ssemov")
11139 (set_attr "memory" "store")
11140 (set_attr "prefix" "evex")
11141 (set_attr "mode" "TI")])
11142
11143 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
11144 [(set (match_operand:SI 0 "memory_operand")
11145 (subreg:SI
11146 (any_truncate:V2HI
11147 (match_operand:V2DI 1 "register_operand")) 0))]
11148 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11149 "#"
11150 "&& 1"
11151 [(set (match_dup 0)
11152 (any_truncate:V2HI (match_dup 1)))]
11153 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11154
11155 (define_insn "avx512vl_<code>v2div2hi2_mask"
11156 [(set (match_operand:V8HI 0 "register_operand" "=v")
11157 (vec_concat:V8HI
11158 (vec_merge:V2HI
11159 (any_truncate:V2HI
11160 (match_operand:V2DI 1 "register_operand" "v"))
11161 (vec_select:V2HI
11162 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11163 (parallel [(const_int 0) (const_int 1)]))
11164 (match_operand:QI 3 "register_operand" "Yk"))
11165 (const_vector:V6HI [(const_int 0) (const_int 0)
11166 (const_int 0) (const_int 0)
11167 (const_int 0) (const_int 0)])))]
11168 "TARGET_AVX512VL"
11169 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11170 [(set_attr "type" "ssemov")
11171 (set_attr "prefix" "evex")
11172 (set_attr "mode" "TI")])
11173
11174 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
11175 [(set (match_operand:V8HI 0 "register_operand" "=v")
11176 (vec_concat:V8HI
11177 (vec_merge:V2HI
11178 (any_truncate:V2HI
11179 (match_operand:V2DI 1 "register_operand" "v"))
11180 (const_vector:V2HI [(const_int 0) (const_int 0)])
11181 (match_operand:QI 2 "register_operand" "Yk"))
11182 (const_vector:V6HI [(const_int 0) (const_int 0)
11183 (const_int 0) (const_int 0)
11184 (const_int 0) (const_int 0)])))]
11185 "TARGET_AVX512VL"
11186 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11187 [(set_attr "type" "ssemov")
11188 (set_attr "prefix" "evex")
11189 (set_attr "mode" "TI")])
11190
11191 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
11192 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11193 (vec_merge:V2HI
11194 (any_truncate:V2HI
11195 (match_operand:V2DI 1 "register_operand" "v"))
11196 (match_dup 0)
11197 (match_operand:QI 2 "register_operand" "Yk")))]
11198 "TARGET_AVX512VL"
11199 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
11200 [(set_attr "type" "ssemov")
11201 (set_attr "memory" "store")
11202 (set_attr "prefix" "evex")
11203 (set_attr "mode" "TI")])
11204
11205 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
11206 [(set (match_operand:SI 0 "memory_operand")
11207 (subreg:SI
11208 (vec_merge:V2HI
11209 (any_truncate:V2HI
11210 (match_operand:V2DI 1 "register_operand"))
11211 (vec_select:V2HI
11212 (subreg:V4HI
11213 (vec_concat:V2SI
11214 (match_dup 0)
11215 (const_int 0)) 0)
11216 (parallel [(const_int 0) (const_int 1)]))
11217 (match_operand:QI 2 "register_operand")) 0))]
11218 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11219 "#"
11220 "&& 1"
11221 [(set (match_dup 0)
11222 (vec_merge:V2HI
11223 (any_truncate:V2HI (match_dup 1))
11224 (match_dup 0)
11225 (match_dup 2)))]
11226 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11227
11228 (define_expand "truncv2div2si2"
11229 [(set (match_operand:V2SI 0 "register_operand")
11230 (truncate:V2SI
11231 (match_operand:V2DI 1 "register_operand")))]
11232 "TARGET_AVX512VL"
11233 {
11234 operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
11235 emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
11236 operands[1],
11237 CONST0_RTX (V2SImode)));
11238 DONE;
11239 })
11240
11241 (define_insn "avx512vl_<code>v2div2si2"
11242 [(set (match_operand:V4SI 0 "register_operand" "=v")
11243 (vec_concat:V4SI
11244 (any_truncate:V2SI
11245 (match_operand:V2DI 1 "register_operand" "v"))
11246 (match_operand:V2SI 2 "const0_operand")))]
11247 "TARGET_AVX512VL"
11248 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11249 [(set_attr "type" "ssemov")
11250 (set_attr "prefix" "evex")
11251 (set_attr "mode" "TI")])
11252
11253 (define_insn "*avx512vl_<code>v2div2si2_store_1"
11254 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11255 (any_truncate:V2SI
11256 (match_operand:V2DI 1 "register_operand" "v")))]
11257 "TARGET_AVX512VL"
11258 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11259 [(set_attr "type" "ssemov")
11260 (set_attr "memory" "store")
11261 (set_attr "prefix" "evex")
11262 (set_attr "mode" "TI")])
11263
11264 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
11265 [(set (match_operand:DI 0 "memory_operand")
11266 (subreg:DI
11267 (any_truncate:V2SI
11268 (match_operand:V2DI 1 "register_operand")) 0))]
11269 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11270 "#"
11271 "&& 1"
11272 [(set (match_dup 0)
11273 (any_truncate:V2SI (match_dup 1)))]
11274 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11275
11276 (define_insn "avx512vl_<code>v2div2si2_mask"
11277 [(set (match_operand:V4SI 0 "register_operand" "=v")
11278 (vec_concat:V4SI
11279 (vec_merge:V2SI
11280 (any_truncate:V2SI
11281 (match_operand:V2DI 1 "register_operand" "v"))
11282 (vec_select:V2SI
11283 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11284 (parallel [(const_int 0) (const_int 1)]))
11285 (match_operand:QI 3 "register_operand" "Yk"))
11286 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11287 "TARGET_AVX512VL"
11288 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11289 [(set_attr "type" "ssemov")
11290 (set_attr "prefix" "evex")
11291 (set_attr "mode" "TI")])
11292
11293 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
11294 [(set (match_operand:V4SI 0 "register_operand" "=v")
11295 (vec_concat:V4SI
11296 (vec_merge:V2SI
11297 (any_truncate:V2SI
11298 (match_operand:V2DI 1 "register_operand" "v"))
11299 (const_vector:V2SI [(const_int 0) (const_int 0)])
11300 (match_operand:QI 2 "register_operand" "Yk"))
11301 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11302 "TARGET_AVX512VL"
11303 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11304 [(set_attr "type" "ssemov")
11305 (set_attr "prefix" "evex")
11306 (set_attr "mode" "TI")])
11307
11308 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
11309 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11310 (vec_merge:V2SI
11311 (any_truncate:V2SI
11312 (match_operand:V2DI 1 "register_operand" "v"))
11313 (match_dup 0)
11314 (match_operand:QI 2 "register_operand" "Yk")))]
11315 "TARGET_AVX512VL"
11316 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11317 [(set_attr "type" "ssemov")
11318 (set_attr "memory" "store")
11319 (set_attr "prefix" "evex")
11320 (set_attr "mode" "TI")])
11321
11322 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
11323 [(set (match_operand:DI 0 "memory_operand")
11324 (subreg:DI
11325 (vec_merge:V2SI
11326 (any_truncate:V2SI
11327 (match_operand:V2DI 1 "register_operand"))
11328 (vec_select:V2SI
11329 (subreg:V4SI
11330 (vec_concat:V2DI
11331 (match_dup 0)
11332 (const_int 0)) 0)
11333 (parallel [(const_int 0) (const_int 1)]))
11334 (match_operand:QI 2 "register_operand")) 0))]
11335 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11336 "#"
11337 "&& 1"
11338 [(set (match_dup 0)
11339 (vec_merge:V2SI
11340 (any_truncate:V2SI (match_dup 1))
11341 (match_dup 0)
11342 (match_dup 2)))]
11343 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11344
11345 (define_expand "truncv8div8qi2"
11346 [(set (match_operand:V8QI 0 "register_operand")
11347 (truncate:V8QI
11348 (match_operand:V8DI 1 "register_operand")))]
11349 "TARGET_AVX512F"
11350 {
11351 operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
11352 emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
11353 DONE;
11354 })
11355
11356 (define_insn "avx512f_<code>v8div16qi2"
11357 [(set (match_operand:V16QI 0 "register_operand" "=v")
11358 (vec_concat:V16QI
11359 (any_truncate:V8QI
11360 (match_operand:V8DI 1 "register_operand" "v"))
11361 (const_vector:V8QI [(const_int 0) (const_int 0)
11362 (const_int 0) (const_int 0)
11363 (const_int 0) (const_int 0)
11364 (const_int 0) (const_int 0)])))]
11365 "TARGET_AVX512F"
11366 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11367 [(set_attr "type" "ssemov")
11368 (set_attr "prefix" "evex")
11369 (set_attr "mode" "TI")])
11370
11371 (define_insn "*avx512f_<code>v8div16qi2_store_1"
11372 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11373 (any_truncate:V8QI
11374 (match_operand:V8DI 1 "register_operand" "v")))]
11375 "TARGET_AVX512F"
11376 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11377 [(set_attr "type" "ssemov")
11378 (set_attr "memory" "store")
11379 (set_attr "prefix" "evex")
11380 (set_attr "mode" "TI")])
11381
11382 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
11383 [(set (match_operand:DI 0 "memory_operand")
11384 (subreg:DI
11385 (any_truncate:V8QI
11386 (match_operand:V8DI 1 "register_operand")) 0))]
11387 "TARGET_AVX512F && ix86_pre_reload_split ()"
11388 "#"
11389 "&& 1"
11390 [(set (match_dup 0)
11391 (any_truncate:V8QI (match_dup 1)))]
11392 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11393
11394 (define_insn "avx512f_<code>v8div16qi2_mask"
11395 [(set (match_operand:V16QI 0 "register_operand" "=v")
11396 (vec_concat:V16QI
11397 (vec_merge:V8QI
11398 (any_truncate:V8QI
11399 (match_operand:V8DI 1 "register_operand" "v"))
11400 (vec_select:V8QI
11401 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11402 (parallel [(const_int 0) (const_int 1)
11403 (const_int 2) (const_int 3)
11404 (const_int 4) (const_int 5)
11405 (const_int 6) (const_int 7)]))
11406 (match_operand:QI 3 "register_operand" "Yk"))
11407 (const_vector:V8QI [(const_int 0) (const_int 0)
11408 (const_int 0) (const_int 0)
11409 (const_int 0) (const_int 0)
11410 (const_int 0) (const_int 0)])))]
11411 "TARGET_AVX512F"
11412 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11413 [(set_attr "type" "ssemov")
11414 (set_attr "prefix" "evex")
11415 (set_attr "mode" "TI")])
11416
11417 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
11418 [(set (match_operand:V16QI 0 "register_operand" "=v")
11419 (vec_concat:V16QI
11420 (vec_merge:V8QI
11421 (any_truncate:V8QI
11422 (match_operand:V8DI 1 "register_operand" "v"))
11423 (const_vector:V8QI [(const_int 0) (const_int 0)
11424 (const_int 0) (const_int 0)
11425 (const_int 0) (const_int 0)
11426 (const_int 0) (const_int 0)])
11427 (match_operand:QI 2 "register_operand" "Yk"))
11428 (const_vector:V8QI [(const_int 0) (const_int 0)
11429 (const_int 0) (const_int 0)
11430 (const_int 0) (const_int 0)
11431 (const_int 0) (const_int 0)])))]
11432 "TARGET_AVX512F"
11433 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11434 [(set_attr "type" "ssemov")
11435 (set_attr "prefix" "evex")
11436 (set_attr "mode" "TI")])
11437
11438 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
11439 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11440 (vec_merge:V8QI
11441 (any_truncate:V8QI
11442 (match_operand:V8DI 1 "register_operand" "v"))
11443 (match_dup 0)
11444 (match_operand:QI 2 "register_operand" "Yk")))]
11445 "TARGET_AVX512F"
11446 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11447 [(set_attr "type" "ssemov")
11448 (set_attr "memory" "store")
11449 (set_attr "prefix" "evex")
11450 (set_attr "mode" "TI")])
11451
11452 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
11453 [(set (match_operand:DI 0 "memory_operand")
11454 (subreg:DI
11455 (vec_merge:V8QI
11456 (any_truncate:V8QI
11457 (match_operand:V8DI 1 "register_operand"))
11458 (vec_select:V8QI
11459 (subreg:V16QI
11460 (vec_concat:V2DI
11461 (match_dup 0)
11462 (const_int 0)) 0)
11463 (parallel [(const_int 0) (const_int 1)
11464 (const_int 2) (const_int 3)
11465 (const_int 4) (const_int 5)
11466 (const_int 6) (const_int 7)]))
11467 (match_operand:QI 2 "register_operand")) 0))]
11468 "TARGET_AVX512F && ix86_pre_reload_split ()"
11469 "#"
11470 "&& 1"
11471 [(set (match_dup 0)
11472 (vec_merge:V8QI
11473 (any_truncate:V8QI (match_dup 1))
11474 (match_dup 0)
11475 (match_dup 2)))]
11476 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11477
11478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11479 ;;
11480 ;; Parallel integral arithmetic
11481 ;;
11482 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11483
11484 (define_expand "neg<mode>2"
11485 [(set (match_operand:VI_AVX2 0 "register_operand")
11486 (minus:VI_AVX2
11487 (match_dup 2)
11488 (match_operand:VI_AVX2 1 "vector_operand")))]
11489 "TARGET_SSE2"
11490 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11491
11492 (define_expand "<insn><mode>3"
11493 [(set (match_operand:VI_AVX2 0 "register_operand")
11494 (plusminus:VI_AVX2
11495 (match_operand:VI_AVX2 1 "vector_operand")
11496 (match_operand:VI_AVX2 2 "vector_operand")))]
11497 "TARGET_SSE2"
11498 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11499
11500 (define_expand "<insn><mode>3_mask"
11501 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11502 (vec_merge:VI48_AVX512VL
11503 (plusminus:VI48_AVX512VL
11504 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11505 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11506 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11507 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11508 "TARGET_AVX512F"
11509 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11510
11511 (define_expand "<insn><mode>3_mask"
11512 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11513 (vec_merge:VI12_AVX512VL
11514 (plusminus:VI12_AVX512VL
11515 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11516 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11517 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11518 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11519 "TARGET_AVX512BW"
11520 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11521
11522 (define_insn "*<insn><mode>3"
11523 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
11524 (plusminus:VI_AVX2
11525 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
11526 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
11527 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11528 "@
11529 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11530 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11531 [(set_attr "isa" "noavx,avx")
11532 (set_attr "type" "sseiadd")
11533 (set_attr "prefix_data16" "1,*")
11534 (set_attr "prefix" "orig,maybe_evex")
11535 (set_attr "mode" "<sseinsnmode>")])
11536
11537 (define_insn "*<insn><mode>3_mask"
11538 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11539 (vec_merge:VI48_AVX512VL
11540 (plusminus:VI48_AVX512VL
11541 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11542 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11543 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
11544 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11545 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11546 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11547 [(set_attr "type" "sseiadd")
11548 (set_attr "prefix" "evex")
11549 (set_attr "mode" "<sseinsnmode>")])
11550
11551 (define_insn "*<insn><mode>3_mask"
11552 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11553 (vec_merge:VI12_AVX512VL
11554 (plusminus:VI12_AVX512VL
11555 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11556 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11557 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
11558 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11559 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11560 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11561 [(set_attr "type" "sseiadd")
11562 (set_attr "prefix" "evex")
11563 (set_attr "mode" "<sseinsnmode>")])
11564
11565 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
11566 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
11567 (sat_plusminus:VI12_AVX2_AVX512BW
11568 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
11569 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
11570 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11571 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11572
11573 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
11574 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
11575 (sat_plusminus:VI12_AVX2_AVX512BW
11576 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
11577 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
11578 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
11579 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11580 "@
11581 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11582 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11583 [(set_attr "isa" "noavx,avx")
11584 (set_attr "type" "sseiadd")
11585 (set_attr "prefix_data16" "1,*")
11586 (set_attr "prefix" "orig,maybe_evex")
11587 (set_attr "mode" "TI")])
11588
11589 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
11590 (define_split
11591 [(set (match_operand:VI12_AVX2 0 "register_operand")
11592 (eq:VI12_AVX2
11593 (us_minus:VI12_AVX2
11594 (match_operand:VI12_AVX2 1 "vector_operand")
11595 (match_operand:VI12_AVX2 2 "vector_operand"))
11596 (match_operand:VI12_AVX2 3 "const0_operand")))]
11597 "TARGET_SSE2
11598 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
11599 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
11600 [(set (match_dup 4)
11601 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
11602 (set (match_dup 0)
11603 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
11604 "operands[4] = gen_reg_rtx (<MODE>mode);")
11605
11606 (define_expand "mulv8qi3"
11607 [(set (match_operand:V8QI 0 "register_operand")
11608 (mult:V8QI (match_operand:V8QI 1 "register_operand")
11609 (match_operand:V8QI 2 "register_operand")))]
11610 "TARGET_AVX512VL && TARGET_AVX512BW"
11611 {
11612 gcc_assert (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]));
11613 DONE;
11614 })
11615
11616 (define_expand "mul<mode>3"
11617 [(set (match_operand:VI1_AVX512 0 "register_operand")
11618 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
11619 (match_operand:VI1_AVX512 2 "register_operand")))]
11620 "TARGET_SSE2"
11621 {
11622 if (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]))
11623 DONE;
11624 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11625 DONE;
11626 })
11627
11628 (define_expand "mul<mode>3<mask_name>"
11629 [(set (match_operand:VI2_AVX2 0 "register_operand")
11630 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11631 (match_operand:VI2_AVX2 2 "vector_operand")))]
11632 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11634
11635 (define_insn "*mul<mode>3<mask_name>"
11636 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11637 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11638 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11639 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11640 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11641 "@
11642 pmullw\t{%2, %0|%0, %2}
11643 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11644 [(set_attr "isa" "noavx,avx")
11645 (set_attr "type" "sseimul")
11646 (set_attr "prefix_data16" "1,*")
11647 (set_attr "prefix" "orig,vex")
11648 (set_attr "mode" "<sseinsnmode>")])
11649
11650 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11651 [(set (match_operand:VI2_AVX2 0 "register_operand")
11652 (truncate:VI2_AVX2
11653 (lshiftrt:<ssedoublemode>
11654 (mult:<ssedoublemode>
11655 (any_extend:<ssedoublemode>
11656 (match_operand:VI2_AVX2 1 "vector_operand"))
11657 (any_extend:<ssedoublemode>
11658 (match_operand:VI2_AVX2 2 "vector_operand")))
11659 (const_int 16))))]
11660 "TARGET_SSE2
11661 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11662 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11663
11664 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11665 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11666 (truncate:VI2_AVX2
11667 (lshiftrt:<ssedoublemode>
11668 (mult:<ssedoublemode>
11669 (any_extend:<ssedoublemode>
11670 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11671 (any_extend:<ssedoublemode>
11672 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11673 (const_int 16))))]
11674 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11675 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11676 "@
11677 pmulh<u>w\t{%2, %0|%0, %2}
11678 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11679 [(set_attr "isa" "noavx,avx")
11680 (set_attr "type" "sseimul")
11681 (set_attr "prefix_data16" "1,*")
11682 (set_attr "prefix" "orig,vex")
11683 (set_attr "mode" "<sseinsnmode>")])
11684
11685 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11686 [(set (match_operand:V8DI 0 "register_operand")
11687 (mult:V8DI
11688 (zero_extend:V8DI
11689 (vec_select:V8SI
11690 (match_operand:V16SI 1 "nonimmediate_operand")
11691 (parallel [(const_int 0) (const_int 2)
11692 (const_int 4) (const_int 6)
11693 (const_int 8) (const_int 10)
11694 (const_int 12) (const_int 14)])))
11695 (zero_extend:V8DI
11696 (vec_select:V8SI
11697 (match_operand:V16SI 2 "nonimmediate_operand")
11698 (parallel [(const_int 0) (const_int 2)
11699 (const_int 4) (const_int 6)
11700 (const_int 8) (const_int 10)
11701 (const_int 12) (const_int 14)])))))]
11702 "TARGET_AVX512F"
11703 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11704
11705 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11706 [(set (match_operand:V8DI 0 "register_operand" "=v")
11707 (mult:V8DI
11708 (zero_extend:V8DI
11709 (vec_select:V8SI
11710 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11711 (parallel [(const_int 0) (const_int 2)
11712 (const_int 4) (const_int 6)
11713 (const_int 8) (const_int 10)
11714 (const_int 12) (const_int 14)])))
11715 (zero_extend:V8DI
11716 (vec_select:V8SI
11717 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11718 (parallel [(const_int 0) (const_int 2)
11719 (const_int 4) (const_int 6)
11720 (const_int 8) (const_int 10)
11721 (const_int 12) (const_int 14)])))))]
11722 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11723 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11724 [(set_attr "type" "sseimul")
11725 (set_attr "prefix_extra" "1")
11726 (set_attr "prefix" "evex")
11727 (set_attr "mode" "XI")])
11728
11729 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11730 [(set (match_operand:V4DI 0 "register_operand")
11731 (mult:V4DI
11732 (zero_extend:V4DI
11733 (vec_select:V4SI
11734 (match_operand:V8SI 1 "nonimmediate_operand")
11735 (parallel [(const_int 0) (const_int 2)
11736 (const_int 4) (const_int 6)])))
11737 (zero_extend:V4DI
11738 (vec_select:V4SI
11739 (match_operand:V8SI 2 "nonimmediate_operand")
11740 (parallel [(const_int 0) (const_int 2)
11741 (const_int 4) (const_int 6)])))))]
11742 "TARGET_AVX2 && <mask_avx512vl_condition>"
11743 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11744
11745 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11746 [(set (match_operand:V4DI 0 "register_operand" "=v")
11747 (mult:V4DI
11748 (zero_extend:V4DI
11749 (vec_select:V4SI
11750 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11751 (parallel [(const_int 0) (const_int 2)
11752 (const_int 4) (const_int 6)])))
11753 (zero_extend:V4DI
11754 (vec_select:V4SI
11755 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11756 (parallel [(const_int 0) (const_int 2)
11757 (const_int 4) (const_int 6)])))))]
11758 "TARGET_AVX2 && <mask_avx512vl_condition>
11759 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11760 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11761 [(set_attr "type" "sseimul")
11762 (set_attr "prefix" "maybe_evex")
11763 (set_attr "mode" "OI")])
11764
11765 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11766 [(set (match_operand:V2DI 0 "register_operand")
11767 (mult:V2DI
11768 (zero_extend:V2DI
11769 (vec_select:V2SI
11770 (match_operand:V4SI 1 "vector_operand")
11771 (parallel [(const_int 0) (const_int 2)])))
11772 (zero_extend:V2DI
11773 (vec_select:V2SI
11774 (match_operand:V4SI 2 "vector_operand")
11775 (parallel [(const_int 0) (const_int 2)])))))]
11776 "TARGET_SSE2 && <mask_avx512vl_condition>"
11777 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11778
11779 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11780 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11781 (mult:V2DI
11782 (zero_extend:V2DI
11783 (vec_select:V2SI
11784 (match_operand:V4SI 1 "vector_operand" "%0,v")
11785 (parallel [(const_int 0) (const_int 2)])))
11786 (zero_extend:V2DI
11787 (vec_select:V2SI
11788 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11789 (parallel [(const_int 0) (const_int 2)])))))]
11790 "TARGET_SSE2 && <mask_avx512vl_condition>
11791 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11792 "@
11793 pmuludq\t{%2, %0|%0, %2}
11794 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11795 [(set_attr "isa" "noavx,avx")
11796 (set_attr "type" "sseimul")
11797 (set_attr "prefix_data16" "1,*")
11798 (set_attr "prefix" "orig,maybe_evex")
11799 (set_attr "mode" "TI")])
11800
11801 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11802 [(set (match_operand:V8DI 0 "register_operand")
11803 (mult:V8DI
11804 (sign_extend:V8DI
11805 (vec_select:V8SI
11806 (match_operand:V16SI 1 "nonimmediate_operand")
11807 (parallel [(const_int 0) (const_int 2)
11808 (const_int 4) (const_int 6)
11809 (const_int 8) (const_int 10)
11810 (const_int 12) (const_int 14)])))
11811 (sign_extend:V8DI
11812 (vec_select:V8SI
11813 (match_operand:V16SI 2 "nonimmediate_operand")
11814 (parallel [(const_int 0) (const_int 2)
11815 (const_int 4) (const_int 6)
11816 (const_int 8) (const_int 10)
11817 (const_int 12) (const_int 14)])))))]
11818 "TARGET_AVX512F"
11819 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11820
11821 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11822 [(set (match_operand:V8DI 0 "register_operand" "=v")
11823 (mult:V8DI
11824 (sign_extend:V8DI
11825 (vec_select:V8SI
11826 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11827 (parallel [(const_int 0) (const_int 2)
11828 (const_int 4) (const_int 6)
11829 (const_int 8) (const_int 10)
11830 (const_int 12) (const_int 14)])))
11831 (sign_extend:V8DI
11832 (vec_select:V8SI
11833 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11834 (parallel [(const_int 0) (const_int 2)
11835 (const_int 4) (const_int 6)
11836 (const_int 8) (const_int 10)
11837 (const_int 12) (const_int 14)])))))]
11838 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11839 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11840 [(set_attr "type" "sseimul")
11841 (set_attr "prefix_extra" "1")
11842 (set_attr "prefix" "evex")
11843 (set_attr "mode" "XI")])
11844
11845 (define_expand "vec_widen_smult_even_v8si<mask_name>"
11846 [(set (match_operand:V4DI 0 "register_operand")
11847 (mult:V4DI
11848 (sign_extend:V4DI
11849 (vec_select:V4SI
11850 (match_operand:V8SI 1 "nonimmediate_operand")
11851 (parallel [(const_int 0) (const_int 2)
11852 (const_int 4) (const_int 6)])))
11853 (sign_extend:V4DI
11854 (vec_select:V4SI
11855 (match_operand:V8SI 2 "nonimmediate_operand")
11856 (parallel [(const_int 0) (const_int 2)
11857 (const_int 4) (const_int 6)])))))]
11858 "TARGET_AVX2 && <mask_avx512vl_condition>"
11859 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11860
11861 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
11862 [(set (match_operand:V4DI 0 "register_operand" "=v")
11863 (mult:V4DI
11864 (sign_extend:V4DI
11865 (vec_select:V4SI
11866 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11867 (parallel [(const_int 0) (const_int 2)
11868 (const_int 4) (const_int 6)])))
11869 (sign_extend:V4DI
11870 (vec_select:V4SI
11871 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11872 (parallel [(const_int 0) (const_int 2)
11873 (const_int 4) (const_int 6)])))))]
11874 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11875 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11876 [(set_attr "type" "sseimul")
11877 (set_attr "prefix_extra" "1")
11878 (set_attr "prefix" "vex")
11879 (set_attr "mode" "OI")])
11880
11881 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
11882 [(set (match_operand:V2DI 0 "register_operand")
11883 (mult:V2DI
11884 (sign_extend:V2DI
11885 (vec_select:V2SI
11886 (match_operand:V4SI 1 "vector_operand")
11887 (parallel [(const_int 0) (const_int 2)])))
11888 (sign_extend:V2DI
11889 (vec_select:V2SI
11890 (match_operand:V4SI 2 "vector_operand")
11891 (parallel [(const_int 0) (const_int 2)])))))]
11892 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11893 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11894
11895 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11896 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11897 (mult:V2DI
11898 (sign_extend:V2DI
11899 (vec_select:V2SI
11900 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11901 (parallel [(const_int 0) (const_int 2)])))
11902 (sign_extend:V2DI
11903 (vec_select:V2SI
11904 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11905 (parallel [(const_int 0) (const_int 2)])))))]
11906 "TARGET_SSE4_1 && <mask_avx512vl_condition>
11907 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11908 "@
11909 pmuldq\t{%2, %0|%0, %2}
11910 pmuldq\t{%2, %0|%0, %2}
11911 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11912 [(set_attr "isa" "noavx,noavx,avx")
11913 (set_attr "type" "sseimul")
11914 (set_attr "prefix_data16" "1,1,*")
11915 (set_attr "prefix_extra" "1")
11916 (set_attr "prefix" "orig,orig,vex")
11917 (set_attr "mode" "TI")])
11918
11919 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11920 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11921 (unspec:<sseunpackmode>
11922 [(match_operand:VI2_AVX2 1 "register_operand" "v")
11923 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11924 UNSPEC_PMADDWD512))]
11925 "TARGET_AVX512BW && <mask_mode512bit_condition>"
11926 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11927 [(set_attr "type" "sseiadd")
11928 (set_attr "prefix" "evex")
11929 (set_attr "mode" "XI")])
11930
11931 (define_expand "avx2_pmaddwd"
11932 [(set (match_operand:V8SI 0 "register_operand")
11933 (plus:V8SI
11934 (mult:V8SI
11935 (sign_extend:V8SI
11936 (vec_select:V8HI
11937 (match_operand:V16HI 1 "nonimmediate_operand")
11938 (parallel [(const_int 0) (const_int 2)
11939 (const_int 4) (const_int 6)
11940 (const_int 8) (const_int 10)
11941 (const_int 12) (const_int 14)])))
11942 (sign_extend:V8SI
11943 (vec_select:V8HI
11944 (match_operand:V16HI 2 "nonimmediate_operand")
11945 (parallel [(const_int 0) (const_int 2)
11946 (const_int 4) (const_int 6)
11947 (const_int 8) (const_int 10)
11948 (const_int 12) (const_int 14)]))))
11949 (mult:V8SI
11950 (sign_extend:V8SI
11951 (vec_select:V8HI (match_dup 1)
11952 (parallel [(const_int 1) (const_int 3)
11953 (const_int 5) (const_int 7)
11954 (const_int 9) (const_int 11)
11955 (const_int 13) (const_int 15)])))
11956 (sign_extend:V8SI
11957 (vec_select:V8HI (match_dup 2)
11958 (parallel [(const_int 1) (const_int 3)
11959 (const_int 5) (const_int 7)
11960 (const_int 9) (const_int 11)
11961 (const_int 13) (const_int 15)]))))))]
11962 "TARGET_AVX2"
11963 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11964
11965 (define_insn "*avx2_pmaddwd"
11966 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11967 (plus:V8SI
11968 (mult:V8SI
11969 (sign_extend:V8SI
11970 (vec_select:V8HI
11971 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11972 (parallel [(const_int 0) (const_int 2)
11973 (const_int 4) (const_int 6)
11974 (const_int 8) (const_int 10)
11975 (const_int 12) (const_int 14)])))
11976 (sign_extend:V8SI
11977 (vec_select:V8HI
11978 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11979 (parallel [(const_int 0) (const_int 2)
11980 (const_int 4) (const_int 6)
11981 (const_int 8) (const_int 10)
11982 (const_int 12) (const_int 14)]))))
11983 (mult:V8SI
11984 (sign_extend:V8SI
11985 (vec_select:V8HI (match_dup 1)
11986 (parallel [(const_int 1) (const_int 3)
11987 (const_int 5) (const_int 7)
11988 (const_int 9) (const_int 11)
11989 (const_int 13) (const_int 15)])))
11990 (sign_extend:V8SI
11991 (vec_select:V8HI (match_dup 2)
11992 (parallel [(const_int 1) (const_int 3)
11993 (const_int 5) (const_int 7)
11994 (const_int 9) (const_int 11)
11995 (const_int 13) (const_int 15)]))))))]
11996 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11997 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11998 [(set_attr "type" "sseiadd")
11999 (set_attr "isa" "*,avx512bw")
12000 (set_attr "prefix" "vex,evex")
12001 (set_attr "mode" "OI")])
12002
12003 (define_expand "sse2_pmaddwd"
12004 [(set (match_operand:V4SI 0 "register_operand")
12005 (plus:V4SI
12006 (mult:V4SI
12007 (sign_extend:V4SI
12008 (vec_select:V4HI
12009 (match_operand:V8HI 1 "vector_operand")
12010 (parallel [(const_int 0) (const_int 2)
12011 (const_int 4) (const_int 6)])))
12012 (sign_extend:V4SI
12013 (vec_select:V4HI
12014 (match_operand:V8HI 2 "vector_operand")
12015 (parallel [(const_int 0) (const_int 2)
12016 (const_int 4) (const_int 6)]))))
12017 (mult:V4SI
12018 (sign_extend:V4SI
12019 (vec_select:V4HI (match_dup 1)
12020 (parallel [(const_int 1) (const_int 3)
12021 (const_int 5) (const_int 7)])))
12022 (sign_extend:V4SI
12023 (vec_select:V4HI (match_dup 2)
12024 (parallel [(const_int 1) (const_int 3)
12025 (const_int 5) (const_int 7)]))))))]
12026 "TARGET_SSE2"
12027 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
12028
12029 (define_insn "*sse2_pmaddwd"
12030 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
12031 (plus:V4SI
12032 (mult:V4SI
12033 (sign_extend:V4SI
12034 (vec_select:V4HI
12035 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12036 (parallel [(const_int 0) (const_int 2)
12037 (const_int 4) (const_int 6)])))
12038 (sign_extend:V4SI
12039 (vec_select:V4HI
12040 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
12041 (parallel [(const_int 0) (const_int 2)
12042 (const_int 4) (const_int 6)]))))
12043 (mult:V4SI
12044 (sign_extend:V4SI
12045 (vec_select:V4HI (match_dup 1)
12046 (parallel [(const_int 1) (const_int 3)
12047 (const_int 5) (const_int 7)])))
12048 (sign_extend:V4SI
12049 (vec_select:V4HI (match_dup 2)
12050 (parallel [(const_int 1) (const_int 3)
12051 (const_int 5) (const_int 7)]))))))]
12052 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12053 "@
12054 pmaddwd\t{%2, %0|%0, %2}
12055 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
12056 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12057 [(set_attr "isa" "noavx,avx,avx512bw")
12058 (set_attr "type" "sseiadd")
12059 (set_attr "atom_unit" "simul")
12060 (set_attr "prefix_data16" "1,*,*")
12061 (set_attr "prefix" "orig,vex,evex")
12062 (set_attr "mode" "TI")])
12063
12064 (define_insn "avx512dq_mul<mode>3<mask_name>"
12065 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
12066 (mult:VI8_AVX512VL
12067 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
12068 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
12069 "TARGET_AVX512DQ && <mask_mode512bit_condition>
12070 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
12071 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12072 [(set_attr "type" "sseimul")
12073 (set_attr "prefix" "evex")
12074 (set_attr "mode" "<sseinsnmode>")])
12075
12076 (define_expand "mul<mode>3<mask_name>"
12077 [(set (match_operand:VI4_AVX512F 0 "register_operand")
12078 (mult:VI4_AVX512F
12079 (match_operand:VI4_AVX512F 1 "general_vector_operand")
12080 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
12081 "TARGET_SSE2 && <mask_mode512bit_condition>"
12082 {
12083 if (TARGET_SSE4_1)
12084 {
12085 if (!vector_operand (operands[1], <MODE>mode))
12086 operands[1] = force_reg (<MODE>mode, operands[1]);
12087 if (!vector_operand (operands[2], <MODE>mode))
12088 operands[2] = force_reg (<MODE>mode, operands[2]);
12089 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
12090 }
12091 else
12092 {
12093 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
12094 DONE;
12095 }
12096 })
12097
12098 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
12099 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
12100 (mult:VI4_AVX512F
12101 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
12102 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
12103 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
12104 && <mask_mode512bit_condition>"
12105 "@
12106 pmulld\t{%2, %0|%0, %2}
12107 pmulld\t{%2, %0|%0, %2}
12108 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12109 [(set_attr "isa" "noavx,noavx,avx")
12110 (set_attr "type" "sseimul")
12111 (set_attr "prefix_extra" "1")
12112 (set_attr "prefix" "<bcst_mask_prefix4>")
12113 (set_attr "btver2_decode" "vector,vector,vector")
12114 (set_attr "mode" "<sseinsnmode>")])
12115
12116 (define_expand "mul<mode>3"
12117 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12118 (mult:VI8_AVX2_AVX512F
12119 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12120 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12121 "TARGET_SSE2"
12122 {
12123 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
12124 DONE;
12125 })
12126
12127 (define_expand "vec_widen_<s>mult_hi_<mode>"
12128 [(match_operand:<sseunpackmode> 0 "register_operand")
12129 (any_extend:<sseunpackmode>
12130 (match_operand:VI124_AVX2 1 "register_operand"))
12131 (match_operand:VI124_AVX2 2 "register_operand")]
12132 "TARGET_SSE2"
12133 {
12134 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12135 <u_bool>, true);
12136 DONE;
12137 })
12138
12139 (define_expand "vec_widen_<s>mult_lo_<mode>"
12140 [(match_operand:<sseunpackmode> 0 "register_operand")
12141 (any_extend:<sseunpackmode>
12142 (match_operand:VI124_AVX2 1 "register_operand"))
12143 (match_operand:VI124_AVX2 2 "register_operand")]
12144 "TARGET_SSE2"
12145 {
12146 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12147 <u_bool>, false);
12148 DONE;
12149 })
12150
12151 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
12152 ;; named patterns, but signed V4SI needs special help for plain SSE2.
12153 (define_expand "vec_widen_smult_even_v4si"
12154 [(match_operand:V2DI 0 "register_operand")
12155 (match_operand:V4SI 1 "vector_operand")
12156 (match_operand:V4SI 2 "vector_operand")]
12157 "TARGET_SSE2"
12158 {
12159 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12160 false, false);
12161 DONE;
12162 })
12163
12164 (define_expand "vec_widen_<s>mult_odd_<mode>"
12165 [(match_operand:<sseunpackmode> 0 "register_operand")
12166 (any_extend:<sseunpackmode>
12167 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
12168 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
12169 "TARGET_SSE2"
12170 {
12171 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12172 <u_bool>, true);
12173 DONE;
12174 })
12175
12176 (define_mode_attr SDOT_PMADD_SUF
12177 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
12178
12179 (define_expand "sdot_prod<mode>"
12180 [(match_operand:<sseunpackmode> 0 "register_operand")
12181 (match_operand:VI2_AVX2 1 "register_operand")
12182 (match_operand:VI2_AVX2 2 "register_operand")
12183 (match_operand:<sseunpackmode> 3 "register_operand")]
12184 "TARGET_SSE2"
12185 {
12186 rtx t = gen_reg_rtx (<sseunpackmode>mode);
12187 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
12188 emit_insn (gen_rtx_SET (operands[0],
12189 gen_rtx_PLUS (<sseunpackmode>mode,
12190 operands[3], t)));
12191 DONE;
12192 })
12193
12194 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
12195 ;; back together when madd is available.
12196 (define_expand "sdot_prodv4si"
12197 [(match_operand:V2DI 0 "register_operand")
12198 (match_operand:V4SI 1 "register_operand")
12199 (match_operand:V4SI 2 "register_operand")
12200 (match_operand:V2DI 3 "register_operand")]
12201 "TARGET_XOP"
12202 {
12203 rtx t = gen_reg_rtx (V2DImode);
12204 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
12205 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
12206 DONE;
12207 })
12208
12209 (define_expand "uavg<mode>3_ceil"
12210 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12211 (truncate:VI12_AVX2_AVX512BW
12212 (lshiftrt:<ssedoublemode>
12213 (plus:<ssedoublemode>
12214 (plus:<ssedoublemode>
12215 (zero_extend:<ssedoublemode>
12216 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
12217 (zero_extend:<ssedoublemode>
12218 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
12219 (match_dup 3))
12220 (const_int 1))))]
12221 "TARGET_SSE2"
12222 {
12223 operands[3] = CONST1_RTX(<ssedoublemode>mode);
12224 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12225 })
12226
12227 (define_expand "usadv16qi"
12228 [(match_operand:V4SI 0 "register_operand")
12229 (match_operand:V16QI 1 "register_operand")
12230 (match_operand:V16QI 2 "vector_operand")
12231 (match_operand:V4SI 3 "vector_operand")]
12232 "TARGET_SSE2"
12233 {
12234 rtx t1 = gen_reg_rtx (V2DImode);
12235 rtx t2 = gen_reg_rtx (V4SImode);
12236 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
12237 convert_move (t2, t1, 0);
12238 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
12239 DONE;
12240 })
12241
12242 (define_expand "usadv32qi"
12243 [(match_operand:V8SI 0 "register_operand")
12244 (match_operand:V32QI 1 "register_operand")
12245 (match_operand:V32QI 2 "nonimmediate_operand")
12246 (match_operand:V8SI 3 "nonimmediate_operand")]
12247 "TARGET_AVX2"
12248 {
12249 rtx t1 = gen_reg_rtx (V4DImode);
12250 rtx t2 = gen_reg_rtx (V8SImode);
12251 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
12252 convert_move (t2, t1, 0);
12253 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
12254 DONE;
12255 })
12256
12257 (define_expand "usadv64qi"
12258 [(match_operand:V16SI 0 "register_operand")
12259 (match_operand:V64QI 1 "register_operand")
12260 (match_operand:V64QI 2 "nonimmediate_operand")
12261 (match_operand:V16SI 3 "nonimmediate_operand")]
12262 "TARGET_AVX512BW"
12263 {
12264 rtx t1 = gen_reg_rtx (V8DImode);
12265 rtx t2 = gen_reg_rtx (V16SImode);
12266 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
12267 convert_move (t2, t1, 0);
12268 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
12269 DONE;
12270 })
12271
12272 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12273 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
12274 (ashiftrt:VI248_AVX512BW_1
12275 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
12276 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12277 "TARGET_AVX512VL"
12278 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12279 [(set_attr "type" "sseishft")
12280 (set (attr "length_immediate")
12281 (if_then_else (match_operand 2 "const_int_operand")
12282 (const_string "1")
12283 (const_string "0")))
12284 (set_attr "mode" "<sseinsnmode>")])
12285
12286 (define_insn "ashr<mode>3"
12287 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
12288 (ashiftrt:VI24_AVX2
12289 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
12290 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12291 "TARGET_SSE2"
12292 "@
12293 psra<ssemodesuffix>\t{%2, %0|%0, %2}
12294 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12295 [(set_attr "isa" "noavx,avx")
12296 (set_attr "type" "sseishft")
12297 (set (attr "length_immediate")
12298 (if_then_else (match_operand 2 "const_int_operand")
12299 (const_string "1")
12300 (const_string "0")))
12301 (set_attr "prefix_data16" "1,*")
12302 (set_attr "prefix" "orig,vex")
12303 (set_attr "mode" "<sseinsnmode>")])
12304
12305 (define_insn "ashr<mode>3<mask_name>"
12306 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12307 (ashiftrt:VI248_AVX512BW_AVX512VL
12308 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12309 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12310 "TARGET_AVX512F"
12311 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12312 [(set_attr "type" "sseishft")
12313 (set (attr "length_immediate")
12314 (if_then_else (match_operand 2 "const_int_operand")
12315 (const_string "1")
12316 (const_string "0")))
12317 (set_attr "mode" "<sseinsnmode>")])
12318
12319 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
12320 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12321 (any_lshift:VI248_AVX512BW_2
12322 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12323 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12324 "TARGET_AVX512VL"
12325 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12326 [(set_attr "type" "sseishft")
12327 (set (attr "length_immediate")
12328 (if_then_else (match_operand 2 "const_int_operand")
12329 (const_string "1")
12330 (const_string "0")))
12331 (set_attr "mode" "<sseinsnmode>")])
12332
12333 (define_insn "<insn><mode>3"
12334 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
12335 (any_lshift:VI248_AVX2
12336 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
12337 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12338 "TARGET_SSE2"
12339 "@
12340 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12341 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12342 [(set_attr "isa" "noavx,avx")
12343 (set_attr "type" "sseishft")
12344 (set (attr "length_immediate")
12345 (if_then_else (match_operand 2 "const_int_operand")
12346 (const_string "1")
12347 (const_string "0")))
12348 (set_attr "prefix_data16" "1,*")
12349 (set_attr "prefix" "orig,vex")
12350 (set_attr "mode" "<sseinsnmode>")])
12351
12352 (define_insn "<insn><mode>3<mask_name>"
12353 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12354 (any_lshift:VI248_AVX512BW
12355 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12356 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12357 "TARGET_AVX512F"
12358 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12359 [(set_attr "type" "sseishft")
12360 (set (attr "length_immediate")
12361 (if_then_else (match_operand 2 "const_int_operand")
12362 (const_string "1")
12363 (const_string "0")))
12364 (set_attr "mode" "<sseinsnmode>")])
12365
12366
12367 (define_expand "vec_shl_<mode>"
12368 [(set (match_dup 3)
12369 (ashift:V1TI
12370 (match_operand:V_128 1 "register_operand")
12371 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12372 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12373 "TARGET_SSE2"
12374 {
12375 operands[1] = gen_lowpart (V1TImode, operands[1]);
12376 operands[3] = gen_reg_rtx (V1TImode);
12377 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12378 })
12379
12380 (define_expand "vec_shr_<mode>"
12381 [(set (match_dup 3)
12382 (lshiftrt:V1TI
12383 (match_operand:V_128 1 "register_operand")
12384 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12385 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12386 "TARGET_SSE2"
12387 {
12388 operands[1] = gen_lowpart (V1TImode, operands[1]);
12389 operands[3] = gen_reg_rtx (V1TImode);
12390 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12391 })
12392
12393 (define_insn "avx512bw_<insn><mode>3"
12394 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
12395 (any_lshift:VIMAX_AVX512VL
12396 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
12397 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
12398 "TARGET_AVX512BW"
12399 {
12400 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12401 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12402 }
12403 [(set_attr "type" "sseishft")
12404 (set_attr "length_immediate" "1")
12405 (set_attr "prefix" "maybe_evex")
12406 (set_attr "mode" "<sseinsnmode>")])
12407
12408 (define_insn "<sse2_avx2>_<insn><mode>3"
12409 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
12410 (any_lshift:VIMAX_AVX2
12411 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
12412 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
12413 "TARGET_SSE2"
12414 {
12415 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12416
12417 switch (which_alternative)
12418 {
12419 case 0:
12420 return "p<vshift>dq\t{%2, %0|%0, %2}";
12421 case 1:
12422 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12423 default:
12424 gcc_unreachable ();
12425 }
12426 }
12427 [(set_attr "isa" "noavx,avx")
12428 (set_attr "type" "sseishft")
12429 (set_attr "length_immediate" "1")
12430 (set_attr "atom_unit" "sishuf")
12431 (set_attr "prefix_data16" "1,*")
12432 (set_attr "prefix" "orig,vex")
12433 (set_attr "mode" "<sseinsnmode>")])
12434
12435 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
12436 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12437 (any_rotate:VI48_AVX512VL
12438 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
12439 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12440 "TARGET_AVX512F"
12441 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12442 [(set_attr "prefix" "evex")
12443 (set_attr "mode" "<sseinsnmode>")])
12444
12445 (define_insn "<avx512>_<rotate><mode><mask_name>"
12446 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12447 (any_rotate:VI48_AVX512VL
12448 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
12449 (match_operand:SI 2 "const_0_to_255_operand")))]
12450 "TARGET_AVX512F"
12451 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12452 [(set_attr "prefix" "evex")
12453 (set_attr "mode" "<sseinsnmode>")])
12454
12455 (define_expand "<code><mode>3"
12456 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
12457 (maxmin:VI124_256_AVX512F_AVX512BW
12458 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
12459 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
12460 "TARGET_AVX2"
12461 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12462
12463 (define_insn "*avx2_<code><mode>3"
12464 [(set (match_operand:VI124_256 0 "register_operand" "=v")
12465 (maxmin:VI124_256
12466 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
12467 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
12468 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12469 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12470 [(set_attr "type" "sseiadd")
12471 (set_attr "prefix_extra" "1")
12472 (set_attr "prefix" "vex")
12473 (set_attr "mode" "OI")])
12474
12475 (define_expand "<code><mode>3_mask"
12476 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12477 (vec_merge:VI48_AVX512VL
12478 (maxmin:VI48_AVX512VL
12479 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12480 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12481 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12482 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12483 "TARGET_AVX512F"
12484 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12485
12486 (define_insn "*avx512f_<code><mode>3<mask_name>"
12487 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12488 (maxmin:VI48_AVX512VL
12489 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
12490 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12491 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12492 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12493 [(set_attr "type" "sseiadd")
12494 (set_attr "prefix_extra" "1")
12495 (set_attr "prefix" "maybe_evex")
12496 (set_attr "mode" "<sseinsnmode>")])
12497
12498 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12499 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12500 (maxmin:VI12_AVX512VL
12501 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
12502 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
12503 "TARGET_AVX512BW"
12504 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12505 [(set_attr "type" "sseiadd")
12506 (set_attr "prefix" "evex")
12507 (set_attr "mode" "<sseinsnmode>")])
12508
12509 (define_expand "<code><mode>3"
12510 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12511 (maxmin:VI8_AVX2_AVX512F
12512 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12513 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12514 "TARGET_SSE4_2"
12515 {
12516 if (TARGET_AVX512F
12517 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
12518 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12519 else
12520 {
12521 enum rtx_code code;
12522 rtx xops[6];
12523 bool ok;
12524
12525
12526 xops[0] = operands[0];
12527
12528 if (<CODE> == SMAX || <CODE> == UMAX)
12529 {
12530 xops[1] = operands[1];
12531 xops[2] = operands[2];
12532 }
12533 else
12534 {
12535 xops[1] = operands[2];
12536 xops[2] = operands[1];
12537 }
12538
12539 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
12540
12541 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
12542 xops[4] = operands[1];
12543 xops[5] = operands[2];
12544
12545 ok = ix86_expand_int_vcond (xops);
12546 gcc_assert (ok);
12547 DONE;
12548 }
12549 })
12550
12551 (define_expand "<code><mode>3"
12552 [(set (match_operand:VI124_128 0 "register_operand")
12553 (smaxmin:VI124_128
12554 (match_operand:VI124_128 1 "vector_operand")
12555 (match_operand:VI124_128 2 "vector_operand")))]
12556 "TARGET_SSE2"
12557 {
12558 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
12559 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12560 else
12561 {
12562 rtx xops[6];
12563 bool ok;
12564
12565 xops[0] = operands[0];
12566 operands[1] = force_reg (<MODE>mode, operands[1]);
12567 operands[2] = force_reg (<MODE>mode, operands[2]);
12568
12569 if (<CODE> == SMAX)
12570 {
12571 xops[1] = operands[1];
12572 xops[2] = operands[2];
12573 }
12574 else
12575 {
12576 xops[1] = operands[2];
12577 xops[2] = operands[1];
12578 }
12579
12580 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
12581 xops[4] = operands[1];
12582 xops[5] = operands[2];
12583
12584 ok = ix86_expand_int_vcond (xops);
12585 gcc_assert (ok);
12586 DONE;
12587 }
12588 })
12589
12590 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12591 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
12592 (smaxmin:VI14_128
12593 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
12594 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12595 "TARGET_SSE4_1
12596 && <mask_mode512bit_condition>
12597 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12598 "@
12599 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12600 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12601 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12602 [(set_attr "isa" "noavx,noavx,avx")
12603 (set_attr "type" "sseiadd")
12604 (set_attr "prefix_extra" "1,1,*")
12605 (set_attr "prefix" "orig,orig,vex")
12606 (set_attr "mode" "TI")])
12607
12608 (define_insn "*<code>v8hi3"
12609 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
12610 (smaxmin:V8HI
12611 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12612 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
12613 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12614 "@
12615 p<maxmin_int>w\t{%2, %0|%0, %2}
12616 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
12617 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
12618 [(set_attr "isa" "noavx,avx,avx512bw")
12619 (set_attr "type" "sseiadd")
12620 (set_attr "prefix_data16" "1,*,*")
12621 (set_attr "prefix_extra" "*,1,1")
12622 (set_attr "prefix" "orig,vex,evex")
12623 (set_attr "mode" "TI")])
12624
12625 (define_expand "<code><mode>3"
12626 [(set (match_operand:VI124_128 0 "register_operand")
12627 (umaxmin:VI124_128
12628 (match_operand:VI124_128 1 "vector_operand")
12629 (match_operand:VI124_128 2 "vector_operand")))]
12630 "TARGET_SSE2"
12631 {
12632 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
12633 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12634 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
12635 {
12636 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
12637 operands[1] = force_reg (<MODE>mode, operands[1]);
12638 if (rtx_equal_p (op3, op2))
12639 op3 = gen_reg_rtx (V8HImode);
12640 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12641 emit_insn (gen_addv8hi3 (op0, op3, op2));
12642 DONE;
12643 }
12644 else
12645 {
12646 rtx xops[6];
12647 bool ok;
12648
12649 operands[1] = force_reg (<MODE>mode, operands[1]);
12650 operands[2] = force_reg (<MODE>mode, operands[2]);
12651
12652 xops[0] = operands[0];
12653
12654 if (<CODE> == UMAX)
12655 {
12656 xops[1] = operands[1];
12657 xops[2] = operands[2];
12658 }
12659 else
12660 {
12661 xops[1] = operands[2];
12662 xops[2] = operands[1];
12663 }
12664
12665 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12666 xops[4] = operands[1];
12667 xops[5] = operands[2];
12668
12669 ok = ix86_expand_int_vcond (xops);
12670 gcc_assert (ok);
12671 DONE;
12672 }
12673 })
12674
12675 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12676 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12677 (umaxmin:VI24_128
12678 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12679 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12680 "TARGET_SSE4_1
12681 && <mask_mode512bit_condition>
12682 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12683 "@
12684 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12685 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12686 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12687 [(set_attr "isa" "noavx,noavx,avx")
12688 (set_attr "type" "sseiadd")
12689 (set_attr "prefix_extra" "1,1,*")
12690 (set_attr "prefix" "orig,orig,vex")
12691 (set_attr "mode" "TI")])
12692
12693 (define_insn "*<code>v16qi3"
12694 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12695 (umaxmin:V16QI
12696 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12697 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12698 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12699 "@
12700 p<maxmin_int>b\t{%2, %0|%0, %2}
12701 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12702 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12703 [(set_attr "isa" "noavx,avx,avx512bw")
12704 (set_attr "type" "sseiadd")
12705 (set_attr "prefix_data16" "1,*,*")
12706 (set_attr "prefix_extra" "*,1,1")
12707 (set_attr "prefix" "orig,vex,evex")
12708 (set_attr "mode" "TI")])
12709
12710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12711 ;;
12712 ;; Parallel integral comparisons
12713 ;;
12714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12715
12716 (define_expand "avx2_eq<mode>3"
12717 [(set (match_operand:VI_256 0 "register_operand")
12718 (eq:VI_256
12719 (match_operand:VI_256 1 "nonimmediate_operand")
12720 (match_operand:VI_256 2 "nonimmediate_operand")))]
12721 "TARGET_AVX2"
12722 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12723
12724 (define_insn "*avx2_eq<mode>3"
12725 [(set (match_operand:VI_256 0 "register_operand" "=x")
12726 (eq:VI_256
12727 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12728 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12729 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12730 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12731 [(set_attr "type" "ssecmp")
12732 (set_attr "prefix_extra" "1")
12733 (set_attr "prefix" "vex")
12734 (set_attr "mode" "OI")])
12735
12736 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12737 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12738 (unspec:<avx512fmaskmode>
12739 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12740 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12741 UNSPEC_MASKED_EQ))]
12742 "TARGET_AVX512BW"
12743 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12744
12745 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12746 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12747 (unspec:<avx512fmaskmode>
12748 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12749 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12750 UNSPEC_MASKED_EQ))]
12751 "TARGET_AVX512F"
12752 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12753
12754 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12755 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12756 (unspec:<avx512fmaskmode>
12757 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12758 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12759 UNSPEC_MASKED_EQ))]
12760 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12761 "@
12762 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12763 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12764 [(set_attr "type" "ssecmp")
12765 (set_attr "prefix_extra" "1")
12766 (set_attr "prefix" "evex")
12767 (set_attr "mode" "<sseinsnmode>")])
12768
12769 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12770 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12771 (unspec:<avx512fmaskmode>
12772 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12773 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12774 UNSPEC_MASKED_EQ))]
12775 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12776 "@
12777 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12778 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12779 [(set_attr "type" "ssecmp")
12780 (set_attr "prefix_extra" "1")
12781 (set_attr "prefix" "evex")
12782 (set_attr "mode" "<sseinsnmode>")])
12783
12784 (define_insn "*sse4_1_eqv2di3"
12785 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12786 (eq:V2DI
12787 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12788 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12789 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12790 "@
12791 pcmpeqq\t{%2, %0|%0, %2}
12792 pcmpeqq\t{%2, %0|%0, %2}
12793 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12794 [(set_attr "isa" "noavx,noavx,avx")
12795 (set_attr "type" "ssecmp")
12796 (set_attr "prefix_extra" "1")
12797 (set_attr "prefix" "orig,orig,vex")
12798 (set_attr "mode" "TI")])
12799
12800 (define_insn "*sse2_eq<mode>3"
12801 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12802 (eq:VI124_128
12803 (match_operand:VI124_128 1 "vector_operand" "%0,x")
12804 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12805 "TARGET_SSE2 && !TARGET_XOP
12806 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12807 "@
12808 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12809 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12810 [(set_attr "isa" "noavx,avx")
12811 (set_attr "type" "ssecmp")
12812 (set_attr "prefix_data16" "1,*")
12813 (set_attr "prefix" "orig,vex")
12814 (set_attr "mode" "TI")])
12815
12816 (define_expand "sse2_eq<mode>3"
12817 [(set (match_operand:VI124_128 0 "register_operand")
12818 (eq:VI124_128
12819 (match_operand:VI124_128 1 "vector_operand")
12820 (match_operand:VI124_128 2 "vector_operand")))]
12821 "TARGET_SSE2 && !TARGET_XOP "
12822 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12823
12824 (define_expand "sse4_1_eqv2di3"
12825 [(set (match_operand:V2DI 0 "register_operand")
12826 (eq:V2DI
12827 (match_operand:V2DI 1 "vector_operand")
12828 (match_operand:V2DI 2 "vector_operand")))]
12829 "TARGET_SSE4_1"
12830 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12831
12832 (define_insn "sse4_2_gtv2di3"
12833 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12834 (gt:V2DI
12835 (match_operand:V2DI 1 "register_operand" "0,0,x")
12836 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12837 "TARGET_SSE4_2"
12838 "@
12839 pcmpgtq\t{%2, %0|%0, %2}
12840 pcmpgtq\t{%2, %0|%0, %2}
12841 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12842 [(set_attr "isa" "noavx,noavx,avx")
12843 (set_attr "type" "ssecmp")
12844 (set_attr "prefix_extra" "1")
12845 (set_attr "prefix" "orig,orig,vex")
12846 (set_attr "mode" "TI")])
12847
12848 (define_insn "avx2_gt<mode>3"
12849 [(set (match_operand:VI_256 0 "register_operand" "=x")
12850 (gt:VI_256
12851 (match_operand:VI_256 1 "register_operand" "x")
12852 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12853 "TARGET_AVX2"
12854 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12855 [(set_attr "type" "ssecmp")
12856 (set_attr "prefix_extra" "1")
12857 (set_attr "prefix" "vex")
12858 (set_attr "mode" "OI")])
12859
12860 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12861 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12862 (unspec:<avx512fmaskmode>
12863 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12864 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12865 "TARGET_AVX512F"
12866 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12867 [(set_attr "type" "ssecmp")
12868 (set_attr "prefix_extra" "1")
12869 (set_attr "prefix" "evex")
12870 (set_attr "mode" "<sseinsnmode>")])
12871
12872 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12873 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12874 (unspec:<avx512fmaskmode>
12875 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12876 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12877 "TARGET_AVX512BW"
12878 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12879 [(set_attr "type" "ssecmp")
12880 (set_attr "prefix_extra" "1")
12881 (set_attr "prefix" "evex")
12882 (set_attr "mode" "<sseinsnmode>")])
12883
12884 (define_insn "sse2_gt<mode>3"
12885 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12886 (gt:VI124_128
12887 (match_operand:VI124_128 1 "register_operand" "0,x")
12888 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12889 "TARGET_SSE2 && !TARGET_XOP"
12890 "@
12891 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12892 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12893 [(set_attr "isa" "noavx,avx")
12894 (set_attr "type" "ssecmp")
12895 (set_attr "prefix_data16" "1,*")
12896 (set_attr "prefix" "orig,vex")
12897 (set_attr "mode" "TI")])
12898
12899 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12900 [(set (match_operand:V_512 0 "register_operand")
12901 (if_then_else:V_512
12902 (match_operator 3 ""
12903 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12904 (match_operand:VI_AVX512BW 5 "general_operand")])
12905 (match_operand:V_512 1)
12906 (match_operand:V_512 2)))]
12907 "TARGET_AVX512F
12908 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12909 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12910 {
12911 bool ok = ix86_expand_int_vcond (operands);
12912 gcc_assert (ok);
12913 DONE;
12914 })
12915
12916 (define_expand "vcond<V_256:mode><VI_256:mode>"
12917 [(set (match_operand:V_256 0 "register_operand")
12918 (if_then_else:V_256
12919 (match_operator 3 ""
12920 [(match_operand:VI_256 4 "nonimmediate_operand")
12921 (match_operand:VI_256 5 "general_operand")])
12922 (match_operand:V_256 1)
12923 (match_operand:V_256 2)))]
12924 "TARGET_AVX2
12925 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12926 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12927 {
12928 bool ok = ix86_expand_int_vcond (operands);
12929 gcc_assert (ok);
12930 DONE;
12931 })
12932
12933 (define_expand "vcond<V_128:mode><VI124_128:mode>"
12934 [(set (match_operand:V_128 0 "register_operand")
12935 (if_then_else:V_128
12936 (match_operator 3 ""
12937 [(match_operand:VI124_128 4 "vector_operand")
12938 (match_operand:VI124_128 5 "general_operand")])
12939 (match_operand:V_128 1)
12940 (match_operand:V_128 2)))]
12941 "TARGET_SSE2
12942 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12943 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12944 {
12945 bool ok = ix86_expand_int_vcond (operands);
12946 gcc_assert (ok);
12947 DONE;
12948 })
12949
12950 (define_expand "vcond<VI8F_128:mode>v2di"
12951 [(set (match_operand:VI8F_128 0 "register_operand")
12952 (if_then_else:VI8F_128
12953 (match_operator 3 ""
12954 [(match_operand:V2DI 4 "vector_operand")
12955 (match_operand:V2DI 5 "general_operand")])
12956 (match_operand:VI8F_128 1)
12957 (match_operand:VI8F_128 2)))]
12958 "TARGET_SSE4_2"
12959 {
12960 bool ok = ix86_expand_int_vcond (operands);
12961 gcc_assert (ok);
12962 DONE;
12963 })
12964
12965 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12966 [(set (match_operand:V_512 0 "register_operand")
12967 (if_then_else:V_512
12968 (match_operator 3 ""
12969 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12970 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12971 (match_operand:V_512 1 "general_operand")
12972 (match_operand:V_512 2 "general_operand")))]
12973 "TARGET_AVX512F
12974 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12975 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12976 {
12977 bool ok = ix86_expand_int_vcond (operands);
12978 gcc_assert (ok);
12979 DONE;
12980 })
12981
12982 (define_expand "vcondu<V_256:mode><VI_256:mode>"
12983 [(set (match_operand:V_256 0 "register_operand")
12984 (if_then_else:V_256
12985 (match_operator 3 ""
12986 [(match_operand:VI_256 4 "nonimmediate_operand")
12987 (match_operand:VI_256 5 "nonimmediate_operand")])
12988 (match_operand:V_256 1 "general_operand")
12989 (match_operand:V_256 2 "general_operand")))]
12990 "TARGET_AVX2
12991 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12992 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12993 {
12994 bool ok = ix86_expand_int_vcond (operands);
12995 gcc_assert (ok);
12996 DONE;
12997 })
12998
12999 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
13000 [(set (match_operand:V_128 0 "register_operand")
13001 (if_then_else:V_128
13002 (match_operator 3 ""
13003 [(match_operand:VI124_128 4 "vector_operand")
13004 (match_operand:VI124_128 5 "vector_operand")])
13005 (match_operand:V_128 1 "general_operand")
13006 (match_operand:V_128 2 "general_operand")))]
13007 "TARGET_SSE2
13008 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13009 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13010 {
13011 bool ok = ix86_expand_int_vcond (operands);
13012 gcc_assert (ok);
13013 DONE;
13014 })
13015
13016 (define_expand "vcondu<VI8F_128:mode>v2di"
13017 [(set (match_operand:VI8F_128 0 "register_operand")
13018 (if_then_else:VI8F_128
13019 (match_operator 3 ""
13020 [(match_operand:V2DI 4 "vector_operand")
13021 (match_operand:V2DI 5 "vector_operand")])
13022 (match_operand:VI8F_128 1 "general_operand")
13023 (match_operand:VI8F_128 2 "general_operand")))]
13024 "TARGET_SSE4_2"
13025 {
13026 bool ok = ix86_expand_int_vcond (operands);
13027 gcc_assert (ok);
13028 DONE;
13029 })
13030
13031 (define_expand "vcondeq<VI8F_128:mode>v2di"
13032 [(set (match_operand:VI8F_128 0 "register_operand")
13033 (if_then_else:VI8F_128
13034 (match_operator 3 ""
13035 [(match_operand:V2DI 4 "vector_operand")
13036 (match_operand:V2DI 5 "general_operand")])
13037 (match_operand:VI8F_128 1)
13038 (match_operand:VI8F_128 2)))]
13039 "TARGET_SSE4_1"
13040 {
13041 bool ok = ix86_expand_int_vcond (operands);
13042 gcc_assert (ok);
13043 DONE;
13044 })
13045
13046 (define_mode_iterator VEC_PERM_AVX2
13047 [V16QI V8HI V4SI V2DI V4SF V2DF
13048 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
13049 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
13050 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
13051 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
13052 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
13053 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
13054
13055 (define_expand "vec_perm<mode>"
13056 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
13057 (match_operand:VEC_PERM_AVX2 1 "register_operand")
13058 (match_operand:VEC_PERM_AVX2 2 "register_operand")
13059 (match_operand:<sseintvecmode> 3 "register_operand")]
13060 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
13061 {
13062 ix86_expand_vec_perm (operands);
13063 DONE;
13064 })
13065
13066 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13067 ;;
13068 ;; Parallel bitwise logical operations
13069 ;;
13070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13071
13072 (define_expand "one_cmpl<mode>2"
13073 [(set (match_operand:VI 0 "register_operand")
13074 (xor:VI (match_operand:VI 1 "vector_operand")
13075 (match_dup 2)))]
13076 "TARGET_SSE"
13077 {
13078 if (!TARGET_AVX512F)
13079 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
13080 else
13081 operands[2] = CONSTM1_RTX (<MODE>mode);
13082 })
13083
13084 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
13085 [(set (match_operand:VI 0 "register_operand" "=v,v")
13086 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
13087 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
13088 "TARGET_AVX512F
13089 && (!<mask_applied>
13090 || <ssescalarmode>mode == SImode
13091 || <ssescalarmode>mode == DImode)"
13092 {
13093 if (TARGET_AVX512VL)
13094 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
13095 else
13096 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
13097 }
13098 [(set_attr "type" "sselog")
13099 (set_attr "prefix" "evex")
13100 (set (attr "mode")
13101 (if_then_else (match_test "TARGET_AVX512VL")
13102 (const_string "<sseinsnmode>")
13103 (const_string "XI")))
13104 (set (attr "enabled")
13105 (if_then_else (eq_attr "alternative" "1")
13106 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
13107 (const_int 1)))])
13108
13109 (define_expand "<sse2_avx2>_andnot<mode>3"
13110 [(set (match_operand:VI_AVX2 0 "register_operand")
13111 (and:VI_AVX2
13112 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
13113 (match_operand:VI_AVX2 2 "vector_operand")))]
13114 "TARGET_SSE2")
13115
13116 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13117 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13118 (vec_merge:VI48_AVX512VL
13119 (and:VI48_AVX512VL
13120 (not:VI48_AVX512VL
13121 (match_operand:VI48_AVX512VL 1 "register_operand"))
13122 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13123 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13124 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13125 "TARGET_AVX512F")
13126
13127 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13128 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
13129 (vec_merge:VI12_AVX512VL
13130 (and:VI12_AVX512VL
13131 (not:VI12_AVX512VL
13132 (match_operand:VI12_AVX512VL 1 "register_operand"))
13133 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
13134 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
13135 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13136 "TARGET_AVX512BW")
13137
13138 (define_insn "*andnot<mode>3"
13139 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
13140 (and:VI
13141 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
13142 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13143 "TARGET_SSE"
13144 {
13145 char buf[64];
13146 const char *ops;
13147 const char *tmp;
13148 const char *ssesuffix;
13149
13150 switch (get_attr_mode (insn))
13151 {
13152 case MODE_XI:
13153 gcc_assert (TARGET_AVX512F);
13154 /* FALLTHRU */
13155 case MODE_OI:
13156 gcc_assert (TARGET_AVX2);
13157 /* FALLTHRU */
13158 case MODE_TI:
13159 gcc_assert (TARGET_SSE2);
13160 tmp = "pandn";
13161 switch (<MODE>mode)
13162 {
13163 case E_V64QImode:
13164 case E_V32HImode:
13165 /* There is no vpandnb or vpandnw instruction, nor vpandn for
13166 512-bit vectors. Use vpandnq instead. */
13167 ssesuffix = "q";
13168 break;
13169 case E_V16SImode:
13170 case E_V8DImode:
13171 ssesuffix = "<ssemodesuffix>";
13172 break;
13173 case E_V8SImode:
13174 case E_V4DImode:
13175 case E_V4SImode:
13176 case E_V2DImode:
13177 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
13178 ? "<ssemodesuffix>" : "");
13179 break;
13180 default:
13181 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13182 }
13183 break;
13184
13185 case MODE_V16SF:
13186 gcc_assert (TARGET_AVX512F);
13187 /* FALLTHRU */
13188 case MODE_V8SF:
13189 gcc_assert (TARGET_AVX);
13190 /* FALLTHRU */
13191 case MODE_V4SF:
13192 gcc_assert (TARGET_SSE);
13193 tmp = "andn";
13194 ssesuffix = "ps";
13195 break;
13196
13197 default:
13198 gcc_unreachable ();
13199 }
13200
13201 switch (which_alternative)
13202 {
13203 case 0:
13204 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13205 break;
13206 case 1:
13207 case 2:
13208 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13209 break;
13210 default:
13211 gcc_unreachable ();
13212 }
13213
13214 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13215 output_asm_insn (buf, operands);
13216 return "";
13217 }
13218 [(set_attr "isa" "noavx,avx,avx")
13219 (set_attr "type" "sselog")
13220 (set (attr "prefix_data16")
13221 (if_then_else
13222 (and (eq_attr "alternative" "0")
13223 (eq_attr "mode" "TI"))
13224 (const_string "1")
13225 (const_string "*")))
13226 (set_attr "prefix" "orig,vex,evex")
13227 (set (attr "mode")
13228 (cond [(match_test "TARGET_AVX2")
13229 (const_string "<sseinsnmode>")
13230 (match_test "TARGET_AVX")
13231 (if_then_else
13232 (match_test "<MODE_SIZE> > 16")
13233 (const_string "V8SF")
13234 (const_string "<sseinsnmode>"))
13235 (ior (not (match_test "TARGET_SSE2"))
13236 (match_test "optimize_function_for_size_p (cfun)"))
13237 (const_string "V4SF")
13238 ]
13239 (const_string "<sseinsnmode>")))])
13240
13241 (define_insn "*andnot<mode>3_mask"
13242 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13243 (vec_merge:VI48_AVX512VL
13244 (and:VI48_AVX512VL
13245 (not:VI48_AVX512VL
13246 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
13247 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
13248 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
13249 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
13250 "TARGET_AVX512F"
13251 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
13252 [(set_attr "type" "sselog")
13253 (set_attr "prefix" "evex")
13254 (set_attr "mode" "<sseinsnmode>")])
13255
13256 (define_expand "<code><mode>3"
13257 [(set (match_operand:VI 0 "register_operand")
13258 (any_logic:VI
13259 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
13260 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
13261 "TARGET_SSE"
13262 {
13263 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
13264 DONE;
13265 })
13266
13267 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13268 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
13269 (any_logic:VI48_AVX_AVX512F
13270 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
13271 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13272 "TARGET_SSE && <mask_mode512bit_condition>
13273 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
13274 {
13275 char buf[64];
13276 const char *ops;
13277 const char *tmp;
13278 const char *ssesuffix;
13279
13280 switch (get_attr_mode (insn))
13281 {
13282 case MODE_XI:
13283 gcc_assert (TARGET_AVX512F);
13284 /* FALLTHRU */
13285 case MODE_OI:
13286 gcc_assert (TARGET_AVX2);
13287 /* FALLTHRU */
13288 case MODE_TI:
13289 gcc_assert (TARGET_SSE2);
13290 tmp = "p<logic>";
13291 switch (<MODE>mode)
13292 {
13293 case E_V16SImode:
13294 case E_V8DImode:
13295 ssesuffix = "<ssemodesuffix>";
13296 break;
13297 case E_V8SImode:
13298 case E_V4DImode:
13299 case E_V4SImode:
13300 case E_V2DImode:
13301 ssesuffix = (TARGET_AVX512VL
13302 && (<mask_applied> || which_alternative == 2)
13303 ? "<ssemodesuffix>" : "");
13304 break;
13305 default:
13306 gcc_unreachable ();
13307 }
13308 break;
13309
13310 case MODE_V8SF:
13311 gcc_assert (TARGET_AVX);
13312 /* FALLTHRU */
13313 case MODE_V4SF:
13314 gcc_assert (TARGET_SSE);
13315 tmp = "<logic>";
13316 ssesuffix = "ps";
13317 break;
13318
13319 default:
13320 gcc_unreachable ();
13321 }
13322
13323 switch (which_alternative)
13324 {
13325 case 0:
13326 if (<mask_applied>)
13327 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
13328 else
13329 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13330 break;
13331 case 1:
13332 case 2:
13333 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
13334 break;
13335 default:
13336 gcc_unreachable ();
13337 }
13338
13339 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13340 output_asm_insn (buf, operands);
13341 return "";
13342 }
13343 [(set_attr "isa" "noavx,avx,avx")
13344 (set_attr "type" "sselog")
13345 (set (attr "prefix_data16")
13346 (if_then_else
13347 (and (eq_attr "alternative" "0")
13348 (eq_attr "mode" "TI"))
13349 (const_string "1")
13350 (const_string "*")))
13351 (set_attr "prefix" "<mask_prefix3>,evex")
13352 (set (attr "mode")
13353 (cond [(match_test "TARGET_AVX2")
13354 (const_string "<sseinsnmode>")
13355 (match_test "TARGET_AVX")
13356 (if_then_else
13357 (match_test "<MODE_SIZE> > 16")
13358 (const_string "V8SF")
13359 (const_string "<sseinsnmode>"))
13360 (ior (not (match_test "TARGET_SSE2"))
13361 (match_test "optimize_function_for_size_p (cfun)"))
13362 (const_string "V4SF")
13363 ]
13364 (const_string "<sseinsnmode>")))])
13365
13366 (define_insn "*<code><mode>3"
13367 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
13368 (any_logic:VI12_AVX_AVX512F
13369 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13370 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13371 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13372 {
13373 char buf[64];
13374 const char *ops;
13375 const char *tmp;
13376 const char *ssesuffix;
13377
13378 switch (get_attr_mode (insn))
13379 {
13380 case MODE_XI:
13381 gcc_assert (TARGET_AVX512F);
13382 /* FALLTHRU */
13383 case MODE_OI:
13384 gcc_assert (TARGET_AVX2);
13385 /* FALLTHRU */
13386 case MODE_TI:
13387 gcc_assert (TARGET_SSE2);
13388 tmp = "p<logic>";
13389 switch (<MODE>mode)
13390 {
13391 case E_V64QImode:
13392 case E_V32HImode:
13393 ssesuffix = "q";
13394 break;
13395 case E_V32QImode:
13396 case E_V16HImode:
13397 case E_V16QImode:
13398 case E_V8HImode:
13399 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13400 break;
13401 default:
13402 gcc_unreachable ();
13403 }
13404 break;
13405
13406 case MODE_V8SF:
13407 gcc_assert (TARGET_AVX);
13408 /* FALLTHRU */
13409 case MODE_V4SF:
13410 gcc_assert (TARGET_SSE);
13411 tmp = "<logic>";
13412 ssesuffix = "ps";
13413 break;
13414
13415 default:
13416 gcc_unreachable ();
13417 }
13418
13419 switch (which_alternative)
13420 {
13421 case 0:
13422 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13423 break;
13424 case 1:
13425 case 2:
13426 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13427 break;
13428 default:
13429 gcc_unreachable ();
13430 }
13431
13432 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13433 output_asm_insn (buf, operands);
13434 return "";
13435 }
13436 [(set_attr "isa" "noavx,avx,avx")
13437 (set_attr "type" "sselog")
13438 (set (attr "prefix_data16")
13439 (if_then_else
13440 (and (eq_attr "alternative" "0")
13441 (eq_attr "mode" "TI"))
13442 (const_string "1")
13443 (const_string "*")))
13444 (set_attr "prefix" "orig,vex,evex")
13445 (set (attr "mode")
13446 (cond [(match_test "TARGET_AVX2")
13447 (const_string "<sseinsnmode>")
13448 (match_test "TARGET_AVX")
13449 (if_then_else
13450 (match_test "<MODE_SIZE> > 16")
13451 (const_string "V8SF")
13452 (const_string "<sseinsnmode>"))
13453 (ior (not (match_test "TARGET_SSE2"))
13454 (match_test "optimize_function_for_size_p (cfun)"))
13455 (const_string "V4SF")
13456 ]
13457 (const_string "<sseinsnmode>")))])
13458
13459 (define_mode_iterator VI1248_AVX512VLBW
13460 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
13461 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
13462 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
13463 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
13464 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
13465 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13466
13467 (define_mode_iterator AVX512ZEXTMASK
13468 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
13469
13470 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
13471 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13472 (unspec:<avx512fmaskmode>
13473 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13474 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13475 UNSPEC_TESTM))]
13476 "TARGET_AVX512F"
13477 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13478 [(set_attr "prefix" "evex")
13479 (set_attr "mode" "<sseinsnmode>")])
13480
13481 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
13482 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13483 (unspec:<avx512fmaskmode>
13484 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13485 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13486 UNSPEC_TESTNM))]
13487 "TARGET_AVX512F"
13488 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13489 [(set_attr "prefix" "evex")
13490 (set_attr "mode" "<sseinsnmode>")])
13491
13492 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
13493 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13494 (zero_extend:AVX512ZEXTMASK
13495 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13496 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13497 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13498 UNSPEC_TESTM)))]
13499 "TARGET_AVX512BW
13500 && (<AVX512ZEXTMASK:MODE_SIZE>
13501 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13502 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13503 [(set_attr "prefix" "evex")
13504 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13505
13506 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
13507 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13508 (zero_extend:AVX512ZEXTMASK
13509 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13510 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13511 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13512 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13513 UNSPEC_TESTM)
13514 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13515 "TARGET_AVX512BW
13516 && (<AVX512ZEXTMASK:MODE_SIZE>
13517 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13518 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13519 [(set_attr "prefix" "evex")
13520 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13521
13522 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
13523 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13524 (zero_extend:AVX512ZEXTMASK
13525 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13526 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13527 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13528 UNSPEC_TESTNM)))]
13529 "TARGET_AVX512BW
13530 && (<AVX512ZEXTMASK:MODE_SIZE>
13531 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13532 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13533 [(set_attr "prefix" "evex")
13534 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13535
13536 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
13537 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13538 (zero_extend:AVX512ZEXTMASK
13539 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13540 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13541 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13542 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13543 UNSPEC_TESTNM)
13544 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13545 "TARGET_AVX512BW
13546 && (<AVX512ZEXTMASK:MODE_SIZE>
13547 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13548 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13549 [(set_attr "prefix" "evex")
13550 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13551
13552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13553 ;;
13554 ;; Parallel integral element swizzling
13555 ;;
13556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13557
13558 (define_expand "vec_pack_trunc_<mode>"
13559 [(match_operand:<ssepackmode> 0 "register_operand")
13560 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
13561 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
13562 "TARGET_SSE2"
13563 {
13564 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
13565 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
13566 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
13567 DONE;
13568 })
13569
13570 (define_expand "vec_pack_trunc_qi"
13571 [(set (match_operand:HI 0 "register_operand")
13572 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
13573 (const_int 8))
13574 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
13575 "TARGET_AVX512F")
13576
13577 (define_expand "vec_pack_trunc_<mode>"
13578 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
13579 (ior:<DOUBLEMASKMODE>
13580 (ashift:<DOUBLEMASKMODE>
13581 (zero_extend:<DOUBLEMASKMODE>
13582 (match_operand:SWI24 2 "register_operand"))
13583 (match_dup 3))
13584 (zero_extend:<DOUBLEMASKMODE>
13585 (match_operand:SWI24 1 "register_operand"))))]
13586 "TARGET_AVX512BW"
13587 {
13588 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
13589 })
13590
13591 (define_expand "vec_pack_sbool_trunc_qi"
13592 [(match_operand:QI 0 "register_operand")
13593 (match_operand:QI 1 "register_operand")
13594 (match_operand:QI 2 "register_operand")
13595 (match_operand:QI 3 "const_int_operand")]
13596 "TARGET_AVX512F"
13597 {
13598 HOST_WIDE_INT nunits = INTVAL (operands[3]);
13599 rtx mask, tem1, tem2;
13600 if (nunits != 8 && nunits != 4)
13601 FAIL;
13602 mask = gen_reg_rtx (QImode);
13603 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
13604 tem1 = gen_reg_rtx (QImode);
13605 emit_insn (gen_kandqi (tem1, operands[1], mask));
13606 if (TARGET_AVX512DQ)
13607 {
13608 tem2 = gen_reg_rtx (QImode);
13609 emit_insn (gen_kashiftqi (tem2, operands[2],
13610 GEN_INT (nunits / 2)));
13611 }
13612 else
13613 {
13614 tem2 = gen_reg_rtx (HImode);
13615 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
13616 QImode),
13617 GEN_INT (nunits / 2)));
13618 tem2 = lowpart_subreg (QImode, tem2, HImode);
13619 }
13620 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
13621 DONE;
13622 })
13623
13624 (define_insn "<sse2_avx2>_packsswb<mask_name>"
13625 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13626 (vec_concat:VI1_AVX512
13627 (ss_truncate:<ssehalfvecmode>
13628 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13629 (ss_truncate:<ssehalfvecmode>
13630 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13631 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13632 "@
13633 packsswb\t{%2, %0|%0, %2}
13634 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13635 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13636 [(set_attr "isa" "noavx,avx,avx512bw")
13637 (set_attr "type" "sselog")
13638 (set_attr "prefix_data16" "1,*,*")
13639 (set_attr "prefix" "orig,<mask_prefix>,evex")
13640 (set_attr "mode" "<sseinsnmode>")])
13641
13642 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13643 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13644 (vec_concat:VI2_AVX2
13645 (ss_truncate:<ssehalfvecmode>
13646 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13647 (ss_truncate:<ssehalfvecmode>
13648 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13649 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13650 "@
13651 packssdw\t{%2, %0|%0, %2}
13652 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13653 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13654 [(set_attr "isa" "noavx,avx,avx512bw")
13655 (set_attr "type" "sselog")
13656 (set_attr "prefix_data16" "1,*,*")
13657 (set_attr "prefix" "orig,<mask_prefix>,evex")
13658 (set_attr "mode" "<sseinsnmode>")])
13659
13660 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13661 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13662 (vec_concat:VI1_AVX512
13663 (us_truncate:<ssehalfvecmode>
13664 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13665 (us_truncate:<ssehalfvecmode>
13666 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13667 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13668 "@
13669 packuswb\t{%2, %0|%0, %2}
13670 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13671 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13672 [(set_attr "isa" "noavx,avx,avx512bw")
13673 (set_attr "type" "sselog")
13674 (set_attr "prefix_data16" "1,*,*")
13675 (set_attr "prefix" "orig,<mask_prefix>,evex")
13676 (set_attr "mode" "<sseinsnmode>")])
13677
13678 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13679 [(set (match_operand:V64QI 0 "register_operand" "=v")
13680 (vec_select:V64QI
13681 (vec_concat:V128QI
13682 (match_operand:V64QI 1 "register_operand" "v")
13683 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13684 (parallel [(const_int 8) (const_int 72)
13685 (const_int 9) (const_int 73)
13686 (const_int 10) (const_int 74)
13687 (const_int 11) (const_int 75)
13688 (const_int 12) (const_int 76)
13689 (const_int 13) (const_int 77)
13690 (const_int 14) (const_int 78)
13691 (const_int 15) (const_int 79)
13692 (const_int 24) (const_int 88)
13693 (const_int 25) (const_int 89)
13694 (const_int 26) (const_int 90)
13695 (const_int 27) (const_int 91)
13696 (const_int 28) (const_int 92)
13697 (const_int 29) (const_int 93)
13698 (const_int 30) (const_int 94)
13699 (const_int 31) (const_int 95)
13700 (const_int 40) (const_int 104)
13701 (const_int 41) (const_int 105)
13702 (const_int 42) (const_int 106)
13703 (const_int 43) (const_int 107)
13704 (const_int 44) (const_int 108)
13705 (const_int 45) (const_int 109)
13706 (const_int 46) (const_int 110)
13707 (const_int 47) (const_int 111)
13708 (const_int 56) (const_int 120)
13709 (const_int 57) (const_int 121)
13710 (const_int 58) (const_int 122)
13711 (const_int 59) (const_int 123)
13712 (const_int 60) (const_int 124)
13713 (const_int 61) (const_int 125)
13714 (const_int 62) (const_int 126)
13715 (const_int 63) (const_int 127)])))]
13716 "TARGET_AVX512BW"
13717 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13718 [(set_attr "type" "sselog")
13719 (set_attr "prefix" "evex")
13720 (set_attr "mode" "XI")])
13721
13722 (define_insn "avx2_interleave_highv32qi<mask_name>"
13723 [(set (match_operand:V32QI 0 "register_operand" "=v")
13724 (vec_select:V32QI
13725 (vec_concat:V64QI
13726 (match_operand:V32QI 1 "register_operand" "v")
13727 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13728 (parallel [(const_int 8) (const_int 40)
13729 (const_int 9) (const_int 41)
13730 (const_int 10) (const_int 42)
13731 (const_int 11) (const_int 43)
13732 (const_int 12) (const_int 44)
13733 (const_int 13) (const_int 45)
13734 (const_int 14) (const_int 46)
13735 (const_int 15) (const_int 47)
13736 (const_int 24) (const_int 56)
13737 (const_int 25) (const_int 57)
13738 (const_int 26) (const_int 58)
13739 (const_int 27) (const_int 59)
13740 (const_int 28) (const_int 60)
13741 (const_int 29) (const_int 61)
13742 (const_int 30) (const_int 62)
13743 (const_int 31) (const_int 63)])))]
13744 "TARGET_AVX2 && <mask_avx512vl_condition>"
13745 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13746 [(set_attr "type" "sselog")
13747 (set_attr "prefix" "<mask_prefix>")
13748 (set_attr "mode" "OI")])
13749
13750 (define_insn "vec_interleave_highv16qi<mask_name>"
13751 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13752 (vec_select:V16QI
13753 (vec_concat:V32QI
13754 (match_operand:V16QI 1 "register_operand" "0,v")
13755 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13756 (parallel [(const_int 8) (const_int 24)
13757 (const_int 9) (const_int 25)
13758 (const_int 10) (const_int 26)
13759 (const_int 11) (const_int 27)
13760 (const_int 12) (const_int 28)
13761 (const_int 13) (const_int 29)
13762 (const_int 14) (const_int 30)
13763 (const_int 15) (const_int 31)])))]
13764 "TARGET_SSE2 && <mask_avx512vl_condition>"
13765 "@
13766 punpckhbw\t{%2, %0|%0, %2}
13767 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13768 [(set_attr "isa" "noavx,avx")
13769 (set_attr "type" "sselog")
13770 (set_attr "prefix_data16" "1,*")
13771 (set_attr "prefix" "orig,<mask_prefix>")
13772 (set_attr "mode" "TI")])
13773
13774 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13775 [(set (match_operand:V64QI 0 "register_operand" "=v")
13776 (vec_select:V64QI
13777 (vec_concat:V128QI
13778 (match_operand:V64QI 1 "register_operand" "v")
13779 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13780 (parallel [(const_int 0) (const_int 64)
13781 (const_int 1) (const_int 65)
13782 (const_int 2) (const_int 66)
13783 (const_int 3) (const_int 67)
13784 (const_int 4) (const_int 68)
13785 (const_int 5) (const_int 69)
13786 (const_int 6) (const_int 70)
13787 (const_int 7) (const_int 71)
13788 (const_int 16) (const_int 80)
13789 (const_int 17) (const_int 81)
13790 (const_int 18) (const_int 82)
13791 (const_int 19) (const_int 83)
13792 (const_int 20) (const_int 84)
13793 (const_int 21) (const_int 85)
13794 (const_int 22) (const_int 86)
13795 (const_int 23) (const_int 87)
13796 (const_int 32) (const_int 96)
13797 (const_int 33) (const_int 97)
13798 (const_int 34) (const_int 98)
13799 (const_int 35) (const_int 99)
13800 (const_int 36) (const_int 100)
13801 (const_int 37) (const_int 101)
13802 (const_int 38) (const_int 102)
13803 (const_int 39) (const_int 103)
13804 (const_int 48) (const_int 112)
13805 (const_int 49) (const_int 113)
13806 (const_int 50) (const_int 114)
13807 (const_int 51) (const_int 115)
13808 (const_int 52) (const_int 116)
13809 (const_int 53) (const_int 117)
13810 (const_int 54) (const_int 118)
13811 (const_int 55) (const_int 119)])))]
13812 "TARGET_AVX512BW"
13813 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13814 [(set_attr "type" "sselog")
13815 (set_attr "prefix" "evex")
13816 (set_attr "mode" "XI")])
13817
13818 (define_insn "avx2_interleave_lowv32qi<mask_name>"
13819 [(set (match_operand:V32QI 0 "register_operand" "=v")
13820 (vec_select:V32QI
13821 (vec_concat:V64QI
13822 (match_operand:V32QI 1 "register_operand" "v")
13823 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13824 (parallel [(const_int 0) (const_int 32)
13825 (const_int 1) (const_int 33)
13826 (const_int 2) (const_int 34)
13827 (const_int 3) (const_int 35)
13828 (const_int 4) (const_int 36)
13829 (const_int 5) (const_int 37)
13830 (const_int 6) (const_int 38)
13831 (const_int 7) (const_int 39)
13832 (const_int 16) (const_int 48)
13833 (const_int 17) (const_int 49)
13834 (const_int 18) (const_int 50)
13835 (const_int 19) (const_int 51)
13836 (const_int 20) (const_int 52)
13837 (const_int 21) (const_int 53)
13838 (const_int 22) (const_int 54)
13839 (const_int 23) (const_int 55)])))]
13840 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13841 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13842 [(set_attr "type" "sselog")
13843 (set_attr "prefix" "maybe_vex")
13844 (set_attr "mode" "OI")])
13845
13846 (define_insn "vec_interleave_lowv16qi<mask_name>"
13847 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13848 (vec_select:V16QI
13849 (vec_concat:V32QI
13850 (match_operand:V16QI 1 "register_operand" "0,v")
13851 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13852 (parallel [(const_int 0) (const_int 16)
13853 (const_int 1) (const_int 17)
13854 (const_int 2) (const_int 18)
13855 (const_int 3) (const_int 19)
13856 (const_int 4) (const_int 20)
13857 (const_int 5) (const_int 21)
13858 (const_int 6) (const_int 22)
13859 (const_int 7) (const_int 23)])))]
13860 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13861 "@
13862 punpcklbw\t{%2, %0|%0, %2}
13863 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13864 [(set_attr "isa" "noavx,avx")
13865 (set_attr "type" "sselog")
13866 (set_attr "prefix_data16" "1,*")
13867 (set_attr "prefix" "orig,vex")
13868 (set_attr "mode" "TI")])
13869
13870 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
13871 [(set (match_operand:V32HI 0 "register_operand" "=v")
13872 (vec_select:V32HI
13873 (vec_concat:V64HI
13874 (match_operand:V32HI 1 "register_operand" "v")
13875 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13876 (parallel [(const_int 4) (const_int 36)
13877 (const_int 5) (const_int 37)
13878 (const_int 6) (const_int 38)
13879 (const_int 7) (const_int 39)
13880 (const_int 12) (const_int 44)
13881 (const_int 13) (const_int 45)
13882 (const_int 14) (const_int 46)
13883 (const_int 15) (const_int 47)
13884 (const_int 20) (const_int 52)
13885 (const_int 21) (const_int 53)
13886 (const_int 22) (const_int 54)
13887 (const_int 23) (const_int 55)
13888 (const_int 28) (const_int 60)
13889 (const_int 29) (const_int 61)
13890 (const_int 30) (const_int 62)
13891 (const_int 31) (const_int 63)])))]
13892 "TARGET_AVX512BW"
13893 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13894 [(set_attr "type" "sselog")
13895 (set_attr "prefix" "evex")
13896 (set_attr "mode" "XI")])
13897
13898 (define_insn "avx2_interleave_highv16hi<mask_name>"
13899 [(set (match_operand:V16HI 0 "register_operand" "=v")
13900 (vec_select:V16HI
13901 (vec_concat:V32HI
13902 (match_operand:V16HI 1 "register_operand" "v")
13903 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13904 (parallel [(const_int 4) (const_int 20)
13905 (const_int 5) (const_int 21)
13906 (const_int 6) (const_int 22)
13907 (const_int 7) (const_int 23)
13908 (const_int 12) (const_int 28)
13909 (const_int 13) (const_int 29)
13910 (const_int 14) (const_int 30)
13911 (const_int 15) (const_int 31)])))]
13912 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13913 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13914 [(set_attr "type" "sselog")
13915 (set_attr "prefix" "maybe_evex")
13916 (set_attr "mode" "OI")])
13917
13918 (define_insn "vec_interleave_highv8hi<mask_name>"
13919 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13920 (vec_select:V8HI
13921 (vec_concat:V16HI
13922 (match_operand:V8HI 1 "register_operand" "0,v")
13923 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13924 (parallel [(const_int 4) (const_int 12)
13925 (const_int 5) (const_int 13)
13926 (const_int 6) (const_int 14)
13927 (const_int 7) (const_int 15)])))]
13928 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13929 "@
13930 punpckhwd\t{%2, %0|%0, %2}
13931 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13932 [(set_attr "isa" "noavx,avx")
13933 (set_attr "type" "sselog")
13934 (set_attr "prefix_data16" "1,*")
13935 (set_attr "prefix" "orig,maybe_vex")
13936 (set_attr "mode" "TI")])
13937
13938 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13939 [(set (match_operand:V32HI 0 "register_operand" "=v")
13940 (vec_select:V32HI
13941 (vec_concat:V64HI
13942 (match_operand:V32HI 1 "register_operand" "v")
13943 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13944 (parallel [(const_int 0) (const_int 32)
13945 (const_int 1) (const_int 33)
13946 (const_int 2) (const_int 34)
13947 (const_int 3) (const_int 35)
13948 (const_int 8) (const_int 40)
13949 (const_int 9) (const_int 41)
13950 (const_int 10) (const_int 42)
13951 (const_int 11) (const_int 43)
13952 (const_int 16) (const_int 48)
13953 (const_int 17) (const_int 49)
13954 (const_int 18) (const_int 50)
13955 (const_int 19) (const_int 51)
13956 (const_int 24) (const_int 56)
13957 (const_int 25) (const_int 57)
13958 (const_int 26) (const_int 58)
13959 (const_int 27) (const_int 59)])))]
13960 "TARGET_AVX512BW"
13961 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13962 [(set_attr "type" "sselog")
13963 (set_attr "prefix" "evex")
13964 (set_attr "mode" "XI")])
13965
13966 (define_insn "avx2_interleave_lowv16hi<mask_name>"
13967 [(set (match_operand:V16HI 0 "register_operand" "=v")
13968 (vec_select:V16HI
13969 (vec_concat:V32HI
13970 (match_operand:V16HI 1 "register_operand" "v")
13971 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13972 (parallel [(const_int 0) (const_int 16)
13973 (const_int 1) (const_int 17)
13974 (const_int 2) (const_int 18)
13975 (const_int 3) (const_int 19)
13976 (const_int 8) (const_int 24)
13977 (const_int 9) (const_int 25)
13978 (const_int 10) (const_int 26)
13979 (const_int 11) (const_int 27)])))]
13980 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13981 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13982 [(set_attr "type" "sselog")
13983 (set_attr "prefix" "maybe_evex")
13984 (set_attr "mode" "OI")])
13985
13986 (define_insn "vec_interleave_lowv8hi<mask_name>"
13987 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13988 (vec_select:V8HI
13989 (vec_concat:V16HI
13990 (match_operand:V8HI 1 "register_operand" "0,v")
13991 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13992 (parallel [(const_int 0) (const_int 8)
13993 (const_int 1) (const_int 9)
13994 (const_int 2) (const_int 10)
13995 (const_int 3) (const_int 11)])))]
13996 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13997 "@
13998 punpcklwd\t{%2, %0|%0, %2}
13999 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14000 [(set_attr "isa" "noavx,avx")
14001 (set_attr "type" "sselog")
14002 (set_attr "prefix_data16" "1,*")
14003 (set_attr "prefix" "orig,maybe_evex")
14004 (set_attr "mode" "TI")])
14005
14006 (define_insn "avx2_interleave_highv8si<mask_name>"
14007 [(set (match_operand:V8SI 0 "register_operand" "=v")
14008 (vec_select:V8SI
14009 (vec_concat:V16SI
14010 (match_operand:V8SI 1 "register_operand" "v")
14011 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14012 (parallel [(const_int 2) (const_int 10)
14013 (const_int 3) (const_int 11)
14014 (const_int 6) (const_int 14)
14015 (const_int 7) (const_int 15)])))]
14016 "TARGET_AVX2 && <mask_avx512vl_condition>"
14017 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14018 [(set_attr "type" "sselog")
14019 (set_attr "prefix" "maybe_evex")
14020 (set_attr "mode" "OI")])
14021
14022 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
14023 [(set (match_operand:V16SI 0 "register_operand" "=v")
14024 (vec_select:V16SI
14025 (vec_concat:V32SI
14026 (match_operand:V16SI 1 "register_operand" "v")
14027 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14028 (parallel [(const_int 2) (const_int 18)
14029 (const_int 3) (const_int 19)
14030 (const_int 6) (const_int 22)
14031 (const_int 7) (const_int 23)
14032 (const_int 10) (const_int 26)
14033 (const_int 11) (const_int 27)
14034 (const_int 14) (const_int 30)
14035 (const_int 15) (const_int 31)])))]
14036 "TARGET_AVX512F"
14037 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14038 [(set_attr "type" "sselog")
14039 (set_attr "prefix" "evex")
14040 (set_attr "mode" "XI")])
14041
14042
14043 (define_insn "vec_interleave_highv4si<mask_name>"
14044 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14045 (vec_select:V4SI
14046 (vec_concat:V8SI
14047 (match_operand:V4SI 1 "register_operand" "0,v")
14048 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14049 (parallel [(const_int 2) (const_int 6)
14050 (const_int 3) (const_int 7)])))]
14051 "TARGET_SSE2 && <mask_avx512vl_condition>"
14052 "@
14053 punpckhdq\t{%2, %0|%0, %2}
14054 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14055 [(set_attr "isa" "noavx,avx")
14056 (set_attr "type" "sselog")
14057 (set_attr "prefix_data16" "1,*")
14058 (set_attr "prefix" "orig,maybe_vex")
14059 (set_attr "mode" "TI")])
14060
14061 (define_insn "avx2_interleave_lowv8si<mask_name>"
14062 [(set (match_operand:V8SI 0 "register_operand" "=v")
14063 (vec_select:V8SI
14064 (vec_concat:V16SI
14065 (match_operand:V8SI 1 "register_operand" "v")
14066 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14067 (parallel [(const_int 0) (const_int 8)
14068 (const_int 1) (const_int 9)
14069 (const_int 4) (const_int 12)
14070 (const_int 5) (const_int 13)])))]
14071 "TARGET_AVX2 && <mask_avx512vl_condition>"
14072 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14073 [(set_attr "type" "sselog")
14074 (set_attr "prefix" "maybe_evex")
14075 (set_attr "mode" "OI")])
14076
14077 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
14078 [(set (match_operand:V16SI 0 "register_operand" "=v")
14079 (vec_select:V16SI
14080 (vec_concat:V32SI
14081 (match_operand:V16SI 1 "register_operand" "v")
14082 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14083 (parallel [(const_int 0) (const_int 16)
14084 (const_int 1) (const_int 17)
14085 (const_int 4) (const_int 20)
14086 (const_int 5) (const_int 21)
14087 (const_int 8) (const_int 24)
14088 (const_int 9) (const_int 25)
14089 (const_int 12) (const_int 28)
14090 (const_int 13) (const_int 29)])))]
14091 "TARGET_AVX512F"
14092 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14093 [(set_attr "type" "sselog")
14094 (set_attr "prefix" "evex")
14095 (set_attr "mode" "XI")])
14096
14097 (define_insn "vec_interleave_lowv4si<mask_name>"
14098 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14099 (vec_select:V4SI
14100 (vec_concat:V8SI
14101 (match_operand:V4SI 1 "register_operand" "0,v")
14102 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14103 (parallel [(const_int 0) (const_int 4)
14104 (const_int 1) (const_int 5)])))]
14105 "TARGET_SSE2 && <mask_avx512vl_condition>"
14106 "@
14107 punpckldq\t{%2, %0|%0, %2}
14108 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14109 [(set_attr "isa" "noavx,avx")
14110 (set_attr "type" "sselog")
14111 (set_attr "prefix_data16" "1,*")
14112 (set_attr "prefix" "orig,vex")
14113 (set_attr "mode" "TI")])
14114
14115 (define_expand "vec_interleave_high<mode>"
14116 [(match_operand:VI_256 0 "register_operand")
14117 (match_operand:VI_256 1 "register_operand")
14118 (match_operand:VI_256 2 "nonimmediate_operand")]
14119 "TARGET_AVX2"
14120 {
14121 rtx t1 = gen_reg_rtx (<MODE>mode);
14122 rtx t2 = gen_reg_rtx (<MODE>mode);
14123 rtx t3 = gen_reg_rtx (V4DImode);
14124 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14125 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14126 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14127 gen_lowpart (V4DImode, t2),
14128 GEN_INT (1 + (3 << 4))));
14129 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14130 DONE;
14131 })
14132
14133 (define_expand "vec_interleave_low<mode>"
14134 [(match_operand:VI_256 0 "register_operand")
14135 (match_operand:VI_256 1 "register_operand")
14136 (match_operand:VI_256 2 "nonimmediate_operand")]
14137 "TARGET_AVX2"
14138 {
14139 rtx t1 = gen_reg_rtx (<MODE>mode);
14140 rtx t2 = gen_reg_rtx (<MODE>mode);
14141 rtx t3 = gen_reg_rtx (V4DImode);
14142 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14143 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14144 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14145 gen_lowpart (V4DImode, t2),
14146 GEN_INT (0 + (2 << 4))));
14147 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14148 DONE;
14149 })
14150
14151 ;; Modes handled by pinsr patterns.
14152 (define_mode_iterator PINSR_MODE
14153 [(V16QI "TARGET_SSE4_1") V8HI
14154 (V4SI "TARGET_SSE4_1")
14155 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
14156
14157 (define_mode_attr sse2p4_1
14158 [(V16QI "sse4_1") (V8HI "sse2")
14159 (V4SI "sse4_1") (V2DI "sse4_1")])
14160
14161 (define_mode_attr pinsr_evex_isa
14162 [(V16QI "avx512bw") (V8HI "avx512bw")
14163 (V4SI "avx512dq") (V2DI "avx512dq")])
14164
14165 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
14166 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
14167 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
14168 (vec_merge:PINSR_MODE
14169 (vec_duplicate:PINSR_MODE
14170 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
14171 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
14172 (match_operand:SI 3 "const_int_operand")))]
14173 "TARGET_SSE2
14174 && ((unsigned) exact_log2 (INTVAL (operands[3]))
14175 < GET_MODE_NUNITS (<MODE>mode))"
14176 {
14177 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
14178
14179 switch (which_alternative)
14180 {
14181 case 0:
14182 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14183 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
14184 /* FALLTHRU */
14185 case 1:
14186 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
14187 case 2:
14188 case 4:
14189 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14190 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
14191 /* FALLTHRU */
14192 case 3:
14193 case 5:
14194 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14195 default:
14196 gcc_unreachable ();
14197 }
14198 }
14199 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
14200 (set_attr "type" "sselog")
14201 (set (attr "prefix_rex")
14202 (if_then_else
14203 (and (not (match_test "TARGET_AVX"))
14204 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
14205 (const_string "1")
14206 (const_string "*")))
14207 (set (attr "prefix_data16")
14208 (if_then_else
14209 (and (not (match_test "TARGET_AVX"))
14210 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14211 (const_string "1")
14212 (const_string "*")))
14213 (set (attr "prefix_extra")
14214 (if_then_else
14215 (and (not (match_test "TARGET_AVX"))
14216 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14217 (const_string "*")
14218 (const_string "1")))
14219 (set_attr "length_immediate" "1")
14220 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
14221 (set_attr "mode" "TI")])
14222
14223 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
14224 [(match_operand:AVX512_VEC 0 "register_operand")
14225 (match_operand:AVX512_VEC 1 "register_operand")
14226 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
14227 (match_operand:SI 3 "const_0_to_3_operand")
14228 (match_operand:AVX512_VEC 4 "register_operand")
14229 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14230 "TARGET_AVX512F"
14231 {
14232 int mask, selector;
14233 mask = INTVAL (operands[3]);
14234 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
14235 ? 0xFFFF ^ (0x000F << mask * 4)
14236 : 0xFF ^ (0x03 << mask * 2));
14237 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
14238 (operands[0], operands[1], operands[2], GEN_INT (selector),
14239 operands[4], operands[5]));
14240 DONE;
14241 })
14242
14243 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
14244 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
14245 (vec_merge:AVX512_VEC
14246 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
14247 (vec_duplicate:AVX512_VEC
14248 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
14249 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
14250 "TARGET_AVX512F
14251 && (INTVAL (operands[3])
14252 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
14253 {
14254 if (which_alternative == 0)
14255 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
14256 switch (<MODE>mode)
14257 {
14258 case E_V8DFmode:
14259 if (misaligned_operand (operands[2], <ssequartermode>mode))
14260 return "vmovupd\t{%2, %x0|%x0, %2}";
14261 else
14262 return "vmovapd\t{%2, %x0|%x0, %2}";
14263 case E_V16SFmode:
14264 if (misaligned_operand (operands[2], <ssequartermode>mode))
14265 return "vmovups\t{%2, %x0|%x0, %2}";
14266 else
14267 return "vmovaps\t{%2, %x0|%x0, %2}";
14268 case E_V8DImode:
14269 if (misaligned_operand (operands[2], <ssequartermode>mode))
14270 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
14271 : "vmovdqu\t{%2, %x0|%x0, %2}";
14272 else
14273 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
14274 : "vmovdqa\t{%2, %x0|%x0, %2}";
14275 case E_V16SImode:
14276 if (misaligned_operand (operands[2], <ssequartermode>mode))
14277 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
14278 : "vmovdqu\t{%2, %x0|%x0, %2}";
14279 else
14280 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
14281 : "vmovdqa\t{%2, %x0|%x0, %2}";
14282 default:
14283 gcc_unreachable ();
14284 }
14285 }
14286 [(set_attr "type" "sselog,ssemov,ssemov")
14287 (set_attr "length_immediate" "1,0,0")
14288 (set_attr "prefix" "evex,vex,evex")
14289 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
14290
14291 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
14292 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
14293 (vec_merge:AVX512_VEC
14294 (match_operand:AVX512_VEC 1 "register_operand" "v")
14295 (vec_duplicate:AVX512_VEC
14296 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
14297 (match_operand:SI 3 "const_int_operand" "n")))]
14298 "TARGET_AVX512F"
14299 {
14300 int mask;
14301 int selector = INTVAL (operands[3]);
14302
14303 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
14304 mask = 0;
14305 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
14306 mask = 1;
14307 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
14308 mask = 2;
14309 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
14310 mask = 3;
14311 else
14312 gcc_unreachable ();
14313
14314 operands[3] = GEN_INT (mask);
14315
14316 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
14317 }
14318 [(set_attr "type" "sselog")
14319 (set_attr "length_immediate" "1")
14320 (set_attr "prefix" "evex")
14321 (set_attr "mode" "<sseinsnmode>")])
14322
14323 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
14324 [(match_operand:AVX512_VEC_2 0 "register_operand")
14325 (match_operand:AVX512_VEC_2 1 "register_operand")
14326 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14327 (match_operand:SI 3 "const_0_to_1_operand")
14328 (match_operand:AVX512_VEC_2 4 "register_operand")
14329 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14330 "TARGET_AVX512F"
14331 {
14332 int mask = INTVAL (operands[3]);
14333 if (mask == 0)
14334 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
14335 operands[2], operands[4],
14336 operands[5]));
14337 else
14338 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
14339 operands[2], operands[4],
14340 operands[5]));
14341 DONE;
14342 })
14343
14344 (define_insn "vec_set_lo_<mode><mask_name>"
14345 [(set (match_operand:V16FI 0 "register_operand" "=v")
14346 (vec_concat:V16FI
14347 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14348 (vec_select:<ssehalfvecmode>
14349 (match_operand:V16FI 1 "register_operand" "v")
14350 (parallel [(const_int 8) (const_int 9)
14351 (const_int 10) (const_int 11)
14352 (const_int 12) (const_int 13)
14353 (const_int 14) (const_int 15)]))))]
14354 "TARGET_AVX512DQ"
14355 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14356 [(set_attr "type" "sselog")
14357 (set_attr "length_immediate" "1")
14358 (set_attr "prefix" "evex")
14359 (set_attr "mode" "<sseinsnmode>")])
14360
14361 (define_insn "vec_set_hi_<mode><mask_name>"
14362 [(set (match_operand:V16FI 0 "register_operand" "=v")
14363 (vec_concat:V16FI
14364 (vec_select:<ssehalfvecmode>
14365 (match_operand:V16FI 1 "register_operand" "v")
14366 (parallel [(const_int 0) (const_int 1)
14367 (const_int 2) (const_int 3)
14368 (const_int 4) (const_int 5)
14369 (const_int 6) (const_int 7)]))
14370 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14371 "TARGET_AVX512DQ"
14372 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14373 [(set_attr "type" "sselog")
14374 (set_attr "length_immediate" "1")
14375 (set_attr "prefix" "evex")
14376 (set_attr "mode" "<sseinsnmode>")])
14377
14378 (define_insn "vec_set_lo_<mode><mask_name>"
14379 [(set (match_operand:V8FI 0 "register_operand" "=v")
14380 (vec_concat:V8FI
14381 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14382 (vec_select:<ssehalfvecmode>
14383 (match_operand:V8FI 1 "register_operand" "v")
14384 (parallel [(const_int 4) (const_int 5)
14385 (const_int 6) (const_int 7)]))))]
14386 "TARGET_AVX512F"
14387 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14388 [(set_attr "type" "sselog")
14389 (set_attr "length_immediate" "1")
14390 (set_attr "prefix" "evex")
14391 (set_attr "mode" "XI")])
14392
14393 (define_insn "vec_set_hi_<mode><mask_name>"
14394 [(set (match_operand:V8FI 0 "register_operand" "=v")
14395 (vec_concat:V8FI
14396 (vec_select:<ssehalfvecmode>
14397 (match_operand:V8FI 1 "register_operand" "v")
14398 (parallel [(const_int 0) (const_int 1)
14399 (const_int 2) (const_int 3)]))
14400 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14401 "TARGET_AVX512F"
14402 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14403 [(set_attr "type" "sselog")
14404 (set_attr "length_immediate" "1")
14405 (set_attr "prefix" "evex")
14406 (set_attr "mode" "XI")])
14407
14408 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
14409 [(match_operand:VI8F_256 0 "register_operand")
14410 (match_operand:VI8F_256 1 "register_operand")
14411 (match_operand:VI8F_256 2 "nonimmediate_operand")
14412 (match_operand:SI 3 "const_0_to_3_operand")
14413 (match_operand:VI8F_256 4 "register_operand")
14414 (match_operand:QI 5 "register_operand")]
14415 "TARGET_AVX512DQ"
14416 {
14417 int mask = INTVAL (operands[3]);
14418 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
14419 (operands[0], operands[1], operands[2],
14420 GEN_INT (((mask >> 0) & 1) * 2 + 0),
14421 GEN_INT (((mask >> 0) & 1) * 2 + 1),
14422 GEN_INT (((mask >> 1) & 1) * 2 + 4),
14423 GEN_INT (((mask >> 1) & 1) * 2 + 5),
14424 operands[4], operands[5]));
14425 DONE;
14426 })
14427
14428 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
14429 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
14430 (vec_select:VI8F_256
14431 (vec_concat:<ssedoublemode>
14432 (match_operand:VI8F_256 1 "register_operand" "v")
14433 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
14434 (parallel [(match_operand 3 "const_0_to_3_operand")
14435 (match_operand 4 "const_0_to_3_operand")
14436 (match_operand 5 "const_4_to_7_operand")
14437 (match_operand 6 "const_4_to_7_operand")])))]
14438 "TARGET_AVX512VL
14439 && (INTVAL (operands[3]) & 1) == 0
14440 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14441 && (INTVAL (operands[5]) & 1) == 0
14442 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
14443 {
14444 int mask;
14445 mask = INTVAL (operands[3]) / 2;
14446 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
14447 operands[3] = GEN_INT (mask);
14448 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
14449 }
14450 [(set_attr "type" "sselog")
14451 (set_attr "length_immediate" "1")
14452 (set_attr "prefix" "evex")
14453 (set_attr "mode" "XI")])
14454
14455 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
14456 [(match_operand:V8FI 0 "register_operand")
14457 (match_operand:V8FI 1 "register_operand")
14458 (match_operand:V8FI 2 "nonimmediate_operand")
14459 (match_operand:SI 3 "const_0_to_255_operand")
14460 (match_operand:V8FI 4 "register_operand")
14461 (match_operand:QI 5 "register_operand")]
14462 "TARGET_AVX512F"
14463 {
14464 int mask = INTVAL (operands[3]);
14465 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
14466 (operands[0], operands[1], operands[2],
14467 GEN_INT (((mask >> 0) & 3) * 2),
14468 GEN_INT (((mask >> 0) & 3) * 2 + 1),
14469 GEN_INT (((mask >> 2) & 3) * 2),
14470 GEN_INT (((mask >> 2) & 3) * 2 + 1),
14471 GEN_INT (((mask >> 4) & 3) * 2 + 8),
14472 GEN_INT (((mask >> 4) & 3) * 2 + 9),
14473 GEN_INT (((mask >> 6) & 3) * 2 + 8),
14474 GEN_INT (((mask >> 6) & 3) * 2 + 9),
14475 operands[4], operands[5]));
14476 DONE;
14477 })
14478
14479 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
14480 [(set (match_operand:V8FI 0 "register_operand" "=v")
14481 (vec_select:V8FI
14482 (vec_concat:<ssedoublemode>
14483 (match_operand:V8FI 1 "register_operand" "v")
14484 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
14485 (parallel [(match_operand 3 "const_0_to_7_operand")
14486 (match_operand 4 "const_0_to_7_operand")
14487 (match_operand 5 "const_0_to_7_operand")
14488 (match_operand 6 "const_0_to_7_operand")
14489 (match_operand 7 "const_8_to_15_operand")
14490 (match_operand 8 "const_8_to_15_operand")
14491 (match_operand 9 "const_8_to_15_operand")
14492 (match_operand 10 "const_8_to_15_operand")])))]
14493 "TARGET_AVX512F
14494 && (INTVAL (operands[3]) & 1) == 0
14495 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14496 && (INTVAL (operands[5]) & 1) == 0
14497 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
14498 && (INTVAL (operands[7]) & 1) == 0
14499 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14500 && (INTVAL (operands[9]) & 1) == 0
14501 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
14502 {
14503 int mask;
14504 mask = INTVAL (operands[3]) / 2;
14505 mask |= INTVAL (operands[5]) / 2 << 2;
14506 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
14507 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
14508 operands[3] = GEN_INT (mask);
14509
14510 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14511 }
14512 [(set_attr "type" "sselog")
14513 (set_attr "length_immediate" "1")
14514 (set_attr "prefix" "evex")
14515 (set_attr "mode" "<sseinsnmode>")])
14516
14517 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
14518 [(match_operand:VI4F_256 0 "register_operand")
14519 (match_operand:VI4F_256 1 "register_operand")
14520 (match_operand:VI4F_256 2 "nonimmediate_operand")
14521 (match_operand:SI 3 "const_0_to_3_operand")
14522 (match_operand:VI4F_256 4 "register_operand")
14523 (match_operand:QI 5 "register_operand")]
14524 "TARGET_AVX512VL"
14525 {
14526 int mask = INTVAL (operands[3]);
14527 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
14528 (operands[0], operands[1], operands[2],
14529 GEN_INT (((mask >> 0) & 1) * 4 + 0),
14530 GEN_INT (((mask >> 0) & 1) * 4 + 1),
14531 GEN_INT (((mask >> 0) & 1) * 4 + 2),
14532 GEN_INT (((mask >> 0) & 1) * 4 + 3),
14533 GEN_INT (((mask >> 1) & 1) * 4 + 8),
14534 GEN_INT (((mask >> 1) & 1) * 4 + 9),
14535 GEN_INT (((mask >> 1) & 1) * 4 + 10),
14536 GEN_INT (((mask >> 1) & 1) * 4 + 11),
14537 operands[4], operands[5]));
14538 DONE;
14539 })
14540
14541 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
14542 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
14543 (vec_select:VI4F_256
14544 (vec_concat:<ssedoublemode>
14545 (match_operand:VI4F_256 1 "register_operand" "v")
14546 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
14547 (parallel [(match_operand 3 "const_0_to_7_operand")
14548 (match_operand 4 "const_0_to_7_operand")
14549 (match_operand 5 "const_0_to_7_operand")
14550 (match_operand 6 "const_0_to_7_operand")
14551 (match_operand 7 "const_8_to_15_operand")
14552 (match_operand 8 "const_8_to_15_operand")
14553 (match_operand 9 "const_8_to_15_operand")
14554 (match_operand 10 "const_8_to_15_operand")])))]
14555 "TARGET_AVX512VL
14556 && (INTVAL (operands[3]) & 3) == 0
14557 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14558 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14559 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14560 && (INTVAL (operands[7]) & 3) == 0
14561 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14562 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14563 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
14564 {
14565 int mask;
14566 mask = INTVAL (operands[3]) / 4;
14567 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
14568 operands[3] = GEN_INT (mask);
14569
14570 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14571 }
14572 [(set_attr "type" "sselog")
14573 (set_attr "length_immediate" "1")
14574 (set_attr "prefix" "evex")
14575 (set_attr "mode" "<sseinsnmode>")])
14576
14577 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
14578 [(match_operand:V16FI 0 "register_operand")
14579 (match_operand:V16FI 1 "register_operand")
14580 (match_operand:V16FI 2 "nonimmediate_operand")
14581 (match_operand:SI 3 "const_0_to_255_operand")
14582 (match_operand:V16FI 4 "register_operand")
14583 (match_operand:HI 5 "register_operand")]
14584 "TARGET_AVX512F"
14585 {
14586 int mask = INTVAL (operands[3]);
14587 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
14588 (operands[0], operands[1], operands[2],
14589 GEN_INT (((mask >> 0) & 3) * 4),
14590 GEN_INT (((mask >> 0) & 3) * 4 + 1),
14591 GEN_INT (((mask >> 0) & 3) * 4 + 2),
14592 GEN_INT (((mask >> 0) & 3) * 4 + 3),
14593 GEN_INT (((mask >> 2) & 3) * 4),
14594 GEN_INT (((mask >> 2) & 3) * 4 + 1),
14595 GEN_INT (((mask >> 2) & 3) * 4 + 2),
14596 GEN_INT (((mask >> 2) & 3) * 4 + 3),
14597 GEN_INT (((mask >> 4) & 3) * 4 + 16),
14598 GEN_INT (((mask >> 4) & 3) * 4 + 17),
14599 GEN_INT (((mask >> 4) & 3) * 4 + 18),
14600 GEN_INT (((mask >> 4) & 3) * 4 + 19),
14601 GEN_INT (((mask >> 6) & 3) * 4 + 16),
14602 GEN_INT (((mask >> 6) & 3) * 4 + 17),
14603 GEN_INT (((mask >> 6) & 3) * 4 + 18),
14604 GEN_INT (((mask >> 6) & 3) * 4 + 19),
14605 operands[4], operands[5]));
14606 DONE;
14607 })
14608
14609 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
14610 [(set (match_operand:V16FI 0 "register_operand" "=v")
14611 (vec_select:V16FI
14612 (vec_concat:<ssedoublemode>
14613 (match_operand:V16FI 1 "register_operand" "v")
14614 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
14615 (parallel [(match_operand 3 "const_0_to_15_operand")
14616 (match_operand 4 "const_0_to_15_operand")
14617 (match_operand 5 "const_0_to_15_operand")
14618 (match_operand 6 "const_0_to_15_operand")
14619 (match_operand 7 "const_0_to_15_operand")
14620 (match_operand 8 "const_0_to_15_operand")
14621 (match_operand 9 "const_0_to_15_operand")
14622 (match_operand 10 "const_0_to_15_operand")
14623 (match_operand 11 "const_16_to_31_operand")
14624 (match_operand 12 "const_16_to_31_operand")
14625 (match_operand 13 "const_16_to_31_operand")
14626 (match_operand 14 "const_16_to_31_operand")
14627 (match_operand 15 "const_16_to_31_operand")
14628 (match_operand 16 "const_16_to_31_operand")
14629 (match_operand 17 "const_16_to_31_operand")
14630 (match_operand 18 "const_16_to_31_operand")])))]
14631 "TARGET_AVX512F
14632 && (INTVAL (operands[3]) & 3) == 0
14633 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14634 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14635 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14636 && (INTVAL (operands[7]) & 3) == 0
14637 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14638 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14639 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
14640 && (INTVAL (operands[11]) & 3) == 0
14641 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
14642 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
14643 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
14644 && (INTVAL (operands[15]) & 3) == 0
14645 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
14646 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
14647 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14648 {
14649 int mask;
14650 mask = INTVAL (operands[3]) / 4;
14651 mask |= INTVAL (operands[7]) / 4 << 2;
14652 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14653 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14654 operands[3] = GEN_INT (mask);
14655
14656 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14657 }
14658 [(set_attr "type" "sselog")
14659 (set_attr "length_immediate" "1")
14660 (set_attr "prefix" "evex")
14661 (set_attr "mode" "<sseinsnmode>")])
14662
14663 (define_expand "avx512f_pshufdv3_mask"
14664 [(match_operand:V16SI 0 "register_operand")
14665 (match_operand:V16SI 1 "nonimmediate_operand")
14666 (match_operand:SI 2 "const_0_to_255_operand")
14667 (match_operand:V16SI 3 "register_operand")
14668 (match_operand:HI 4 "register_operand")]
14669 "TARGET_AVX512F"
14670 {
14671 int mask = INTVAL (operands[2]);
14672 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14673 GEN_INT ((mask >> 0) & 3),
14674 GEN_INT ((mask >> 2) & 3),
14675 GEN_INT ((mask >> 4) & 3),
14676 GEN_INT ((mask >> 6) & 3),
14677 GEN_INT (((mask >> 0) & 3) + 4),
14678 GEN_INT (((mask >> 2) & 3) + 4),
14679 GEN_INT (((mask >> 4) & 3) + 4),
14680 GEN_INT (((mask >> 6) & 3) + 4),
14681 GEN_INT (((mask >> 0) & 3) + 8),
14682 GEN_INT (((mask >> 2) & 3) + 8),
14683 GEN_INT (((mask >> 4) & 3) + 8),
14684 GEN_INT (((mask >> 6) & 3) + 8),
14685 GEN_INT (((mask >> 0) & 3) + 12),
14686 GEN_INT (((mask >> 2) & 3) + 12),
14687 GEN_INT (((mask >> 4) & 3) + 12),
14688 GEN_INT (((mask >> 6) & 3) + 12),
14689 operands[3], operands[4]));
14690 DONE;
14691 })
14692
14693 (define_insn "avx512f_pshufd_1<mask_name>"
14694 [(set (match_operand:V16SI 0 "register_operand" "=v")
14695 (vec_select:V16SI
14696 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14697 (parallel [(match_operand 2 "const_0_to_3_operand")
14698 (match_operand 3 "const_0_to_3_operand")
14699 (match_operand 4 "const_0_to_3_operand")
14700 (match_operand 5 "const_0_to_3_operand")
14701 (match_operand 6 "const_4_to_7_operand")
14702 (match_operand 7 "const_4_to_7_operand")
14703 (match_operand 8 "const_4_to_7_operand")
14704 (match_operand 9 "const_4_to_7_operand")
14705 (match_operand 10 "const_8_to_11_operand")
14706 (match_operand 11 "const_8_to_11_operand")
14707 (match_operand 12 "const_8_to_11_operand")
14708 (match_operand 13 "const_8_to_11_operand")
14709 (match_operand 14 "const_12_to_15_operand")
14710 (match_operand 15 "const_12_to_15_operand")
14711 (match_operand 16 "const_12_to_15_operand")
14712 (match_operand 17 "const_12_to_15_operand")])))]
14713 "TARGET_AVX512F
14714 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14715 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14716 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14717 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14718 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14719 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14720 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14721 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14722 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14723 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14724 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14725 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14726 {
14727 int mask = 0;
14728 mask |= INTVAL (operands[2]) << 0;
14729 mask |= INTVAL (operands[3]) << 2;
14730 mask |= INTVAL (operands[4]) << 4;
14731 mask |= INTVAL (operands[5]) << 6;
14732 operands[2] = GEN_INT (mask);
14733
14734 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14735 }
14736 [(set_attr "type" "sselog1")
14737 (set_attr "prefix" "evex")
14738 (set_attr "length_immediate" "1")
14739 (set_attr "mode" "XI")])
14740
14741 (define_expand "avx512vl_pshufdv3_mask"
14742 [(match_operand:V8SI 0 "register_operand")
14743 (match_operand:V8SI 1 "nonimmediate_operand")
14744 (match_operand:SI 2 "const_0_to_255_operand")
14745 (match_operand:V8SI 3 "register_operand")
14746 (match_operand:QI 4 "register_operand")]
14747 "TARGET_AVX512VL"
14748 {
14749 int mask = INTVAL (operands[2]);
14750 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14751 GEN_INT ((mask >> 0) & 3),
14752 GEN_INT ((mask >> 2) & 3),
14753 GEN_INT ((mask >> 4) & 3),
14754 GEN_INT ((mask >> 6) & 3),
14755 GEN_INT (((mask >> 0) & 3) + 4),
14756 GEN_INT (((mask >> 2) & 3) + 4),
14757 GEN_INT (((mask >> 4) & 3) + 4),
14758 GEN_INT (((mask >> 6) & 3) + 4),
14759 operands[3], operands[4]));
14760 DONE;
14761 })
14762
14763 (define_expand "avx2_pshufdv3"
14764 [(match_operand:V8SI 0 "register_operand")
14765 (match_operand:V8SI 1 "nonimmediate_operand")
14766 (match_operand:SI 2 "const_0_to_255_operand")]
14767 "TARGET_AVX2"
14768 {
14769 int mask = INTVAL (operands[2]);
14770 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14771 GEN_INT ((mask >> 0) & 3),
14772 GEN_INT ((mask >> 2) & 3),
14773 GEN_INT ((mask >> 4) & 3),
14774 GEN_INT ((mask >> 6) & 3),
14775 GEN_INT (((mask >> 0) & 3) + 4),
14776 GEN_INT (((mask >> 2) & 3) + 4),
14777 GEN_INT (((mask >> 4) & 3) + 4),
14778 GEN_INT (((mask >> 6) & 3) + 4)));
14779 DONE;
14780 })
14781
14782 (define_insn "avx2_pshufd_1<mask_name>"
14783 [(set (match_operand:V8SI 0 "register_operand" "=v")
14784 (vec_select:V8SI
14785 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14786 (parallel [(match_operand 2 "const_0_to_3_operand")
14787 (match_operand 3 "const_0_to_3_operand")
14788 (match_operand 4 "const_0_to_3_operand")
14789 (match_operand 5 "const_0_to_3_operand")
14790 (match_operand 6 "const_4_to_7_operand")
14791 (match_operand 7 "const_4_to_7_operand")
14792 (match_operand 8 "const_4_to_7_operand")
14793 (match_operand 9 "const_4_to_7_operand")])))]
14794 "TARGET_AVX2
14795 && <mask_avx512vl_condition>
14796 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14797 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14798 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14799 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14800 {
14801 int mask = 0;
14802 mask |= INTVAL (operands[2]) << 0;
14803 mask |= INTVAL (operands[3]) << 2;
14804 mask |= INTVAL (operands[4]) << 4;
14805 mask |= INTVAL (operands[5]) << 6;
14806 operands[2] = GEN_INT (mask);
14807
14808 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14809 }
14810 [(set_attr "type" "sselog1")
14811 (set_attr "prefix" "maybe_evex")
14812 (set_attr "length_immediate" "1")
14813 (set_attr "mode" "OI")])
14814
14815 (define_expand "avx512vl_pshufd_mask"
14816 [(match_operand:V4SI 0 "register_operand")
14817 (match_operand:V4SI 1 "nonimmediate_operand")
14818 (match_operand:SI 2 "const_0_to_255_operand")
14819 (match_operand:V4SI 3 "register_operand")
14820 (match_operand:QI 4 "register_operand")]
14821 "TARGET_AVX512VL"
14822 {
14823 int mask = INTVAL (operands[2]);
14824 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14825 GEN_INT ((mask >> 0) & 3),
14826 GEN_INT ((mask >> 2) & 3),
14827 GEN_INT ((mask >> 4) & 3),
14828 GEN_INT ((mask >> 6) & 3),
14829 operands[3], operands[4]));
14830 DONE;
14831 })
14832
14833 (define_expand "sse2_pshufd"
14834 [(match_operand:V4SI 0 "register_operand")
14835 (match_operand:V4SI 1 "vector_operand")
14836 (match_operand:SI 2 "const_int_operand")]
14837 "TARGET_SSE2"
14838 {
14839 int mask = INTVAL (operands[2]);
14840 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14841 GEN_INT ((mask >> 0) & 3),
14842 GEN_INT ((mask >> 2) & 3),
14843 GEN_INT ((mask >> 4) & 3),
14844 GEN_INT ((mask >> 6) & 3)));
14845 DONE;
14846 })
14847
14848 (define_insn "sse2_pshufd_1<mask_name>"
14849 [(set (match_operand:V4SI 0 "register_operand" "=v")
14850 (vec_select:V4SI
14851 (match_operand:V4SI 1 "vector_operand" "vBm")
14852 (parallel [(match_operand 2 "const_0_to_3_operand")
14853 (match_operand 3 "const_0_to_3_operand")
14854 (match_operand 4 "const_0_to_3_operand")
14855 (match_operand 5 "const_0_to_3_operand")])))]
14856 "TARGET_SSE2 && <mask_avx512vl_condition>"
14857 {
14858 int mask = 0;
14859 mask |= INTVAL (operands[2]) << 0;
14860 mask |= INTVAL (operands[3]) << 2;
14861 mask |= INTVAL (operands[4]) << 4;
14862 mask |= INTVAL (operands[5]) << 6;
14863 operands[2] = GEN_INT (mask);
14864
14865 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14866 }
14867 [(set_attr "type" "sselog1")
14868 (set_attr "prefix_data16" "1")
14869 (set_attr "prefix" "<mask_prefix2>")
14870 (set_attr "length_immediate" "1")
14871 (set_attr "mode" "TI")])
14872
14873 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14874 [(set (match_operand:V32HI 0 "register_operand" "=v")
14875 (unspec:V32HI
14876 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14877 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14878 UNSPEC_PSHUFLW))]
14879 "TARGET_AVX512BW"
14880 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14881 [(set_attr "type" "sselog")
14882 (set_attr "prefix" "evex")
14883 (set_attr "mode" "XI")])
14884
14885 (define_expand "avx512vl_pshuflwv3_mask"
14886 [(match_operand:V16HI 0 "register_operand")
14887 (match_operand:V16HI 1 "nonimmediate_operand")
14888 (match_operand:SI 2 "const_0_to_255_operand")
14889 (match_operand:V16HI 3 "register_operand")
14890 (match_operand:HI 4 "register_operand")]
14891 "TARGET_AVX512VL && TARGET_AVX512BW"
14892 {
14893 int mask = INTVAL (operands[2]);
14894 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14895 GEN_INT ((mask >> 0) & 3),
14896 GEN_INT ((mask >> 2) & 3),
14897 GEN_INT ((mask >> 4) & 3),
14898 GEN_INT ((mask >> 6) & 3),
14899 GEN_INT (((mask >> 0) & 3) + 8),
14900 GEN_INT (((mask >> 2) & 3) + 8),
14901 GEN_INT (((mask >> 4) & 3) + 8),
14902 GEN_INT (((mask >> 6) & 3) + 8),
14903 operands[3], operands[4]));
14904 DONE;
14905 })
14906
14907 (define_expand "avx2_pshuflwv3"
14908 [(match_operand:V16HI 0 "register_operand")
14909 (match_operand:V16HI 1 "nonimmediate_operand")
14910 (match_operand:SI 2 "const_0_to_255_operand")]
14911 "TARGET_AVX2"
14912 {
14913 int mask = INTVAL (operands[2]);
14914 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14915 GEN_INT ((mask >> 0) & 3),
14916 GEN_INT ((mask >> 2) & 3),
14917 GEN_INT ((mask >> 4) & 3),
14918 GEN_INT ((mask >> 6) & 3),
14919 GEN_INT (((mask >> 0) & 3) + 8),
14920 GEN_INT (((mask >> 2) & 3) + 8),
14921 GEN_INT (((mask >> 4) & 3) + 8),
14922 GEN_INT (((mask >> 6) & 3) + 8)));
14923 DONE;
14924 })
14925
14926 (define_insn "avx2_pshuflw_1<mask_name>"
14927 [(set (match_operand:V16HI 0 "register_operand" "=v")
14928 (vec_select:V16HI
14929 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14930 (parallel [(match_operand 2 "const_0_to_3_operand")
14931 (match_operand 3 "const_0_to_3_operand")
14932 (match_operand 4 "const_0_to_3_operand")
14933 (match_operand 5 "const_0_to_3_operand")
14934 (const_int 4)
14935 (const_int 5)
14936 (const_int 6)
14937 (const_int 7)
14938 (match_operand 6 "const_8_to_11_operand")
14939 (match_operand 7 "const_8_to_11_operand")
14940 (match_operand 8 "const_8_to_11_operand")
14941 (match_operand 9 "const_8_to_11_operand")
14942 (const_int 12)
14943 (const_int 13)
14944 (const_int 14)
14945 (const_int 15)])))]
14946 "TARGET_AVX2
14947 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14948 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14949 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14950 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14951 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14952 {
14953 int mask = 0;
14954 mask |= INTVAL (operands[2]) << 0;
14955 mask |= INTVAL (operands[3]) << 2;
14956 mask |= INTVAL (operands[4]) << 4;
14957 mask |= INTVAL (operands[5]) << 6;
14958 operands[2] = GEN_INT (mask);
14959
14960 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14961 }
14962 [(set_attr "type" "sselog")
14963 (set_attr "prefix" "maybe_evex")
14964 (set_attr "length_immediate" "1")
14965 (set_attr "mode" "OI")])
14966
14967 (define_expand "avx512vl_pshuflw_mask"
14968 [(match_operand:V8HI 0 "register_operand")
14969 (match_operand:V8HI 1 "nonimmediate_operand")
14970 (match_operand:SI 2 "const_0_to_255_operand")
14971 (match_operand:V8HI 3 "register_operand")
14972 (match_operand:QI 4 "register_operand")]
14973 "TARGET_AVX512VL && TARGET_AVX512BW"
14974 {
14975 int mask = INTVAL (operands[2]);
14976 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14977 GEN_INT ((mask >> 0) & 3),
14978 GEN_INT ((mask >> 2) & 3),
14979 GEN_INT ((mask >> 4) & 3),
14980 GEN_INT ((mask >> 6) & 3),
14981 operands[3], operands[4]));
14982 DONE;
14983 })
14984
14985 (define_expand "sse2_pshuflw"
14986 [(match_operand:V8HI 0 "register_operand")
14987 (match_operand:V8HI 1 "vector_operand")
14988 (match_operand:SI 2 "const_int_operand")]
14989 "TARGET_SSE2"
14990 {
14991 int mask = INTVAL (operands[2]);
14992 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14993 GEN_INT ((mask >> 0) & 3),
14994 GEN_INT ((mask >> 2) & 3),
14995 GEN_INT ((mask >> 4) & 3),
14996 GEN_INT ((mask >> 6) & 3)));
14997 DONE;
14998 })
14999
15000 (define_insn "sse2_pshuflw_1<mask_name>"
15001 [(set (match_operand:V8HI 0 "register_operand" "=v")
15002 (vec_select:V8HI
15003 (match_operand:V8HI 1 "vector_operand" "vBm")
15004 (parallel [(match_operand 2 "const_0_to_3_operand")
15005 (match_operand 3 "const_0_to_3_operand")
15006 (match_operand 4 "const_0_to_3_operand")
15007 (match_operand 5 "const_0_to_3_operand")
15008 (const_int 4)
15009 (const_int 5)
15010 (const_int 6)
15011 (const_int 7)])))]
15012 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15013 {
15014 int mask = 0;
15015 mask |= INTVAL (operands[2]) << 0;
15016 mask |= INTVAL (operands[3]) << 2;
15017 mask |= INTVAL (operands[4]) << 4;
15018 mask |= INTVAL (operands[5]) << 6;
15019 operands[2] = GEN_INT (mask);
15020
15021 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15022 }
15023 [(set_attr "type" "sselog")
15024 (set_attr "prefix_data16" "0")
15025 (set_attr "prefix_rep" "1")
15026 (set_attr "prefix" "maybe_vex")
15027 (set_attr "length_immediate" "1")
15028 (set_attr "mode" "TI")])
15029
15030 (define_expand "avx2_pshufhwv3"
15031 [(match_operand:V16HI 0 "register_operand")
15032 (match_operand:V16HI 1 "nonimmediate_operand")
15033 (match_operand:SI 2 "const_0_to_255_operand")]
15034 "TARGET_AVX2"
15035 {
15036 int mask = INTVAL (operands[2]);
15037 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
15038 GEN_INT (((mask >> 0) & 3) + 4),
15039 GEN_INT (((mask >> 2) & 3) + 4),
15040 GEN_INT (((mask >> 4) & 3) + 4),
15041 GEN_INT (((mask >> 6) & 3) + 4),
15042 GEN_INT (((mask >> 0) & 3) + 12),
15043 GEN_INT (((mask >> 2) & 3) + 12),
15044 GEN_INT (((mask >> 4) & 3) + 12),
15045 GEN_INT (((mask >> 6) & 3) + 12)));
15046 DONE;
15047 })
15048
15049 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
15050 [(set (match_operand:V32HI 0 "register_operand" "=v")
15051 (unspec:V32HI
15052 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15053 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15054 UNSPEC_PSHUFHW))]
15055 "TARGET_AVX512BW"
15056 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15057 [(set_attr "type" "sselog")
15058 (set_attr "prefix" "evex")
15059 (set_attr "mode" "XI")])
15060
15061 (define_expand "avx512vl_pshufhwv3_mask"
15062 [(match_operand:V16HI 0 "register_operand")
15063 (match_operand:V16HI 1 "nonimmediate_operand")
15064 (match_operand:SI 2 "const_0_to_255_operand")
15065 (match_operand:V16HI 3 "register_operand")
15066 (match_operand:HI 4 "register_operand")]
15067 "TARGET_AVX512VL && TARGET_AVX512BW"
15068 {
15069 int mask = INTVAL (operands[2]);
15070 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
15071 GEN_INT (((mask >> 0) & 3) + 4),
15072 GEN_INT (((mask >> 2) & 3) + 4),
15073 GEN_INT (((mask >> 4) & 3) + 4),
15074 GEN_INT (((mask >> 6) & 3) + 4),
15075 GEN_INT (((mask >> 0) & 3) + 12),
15076 GEN_INT (((mask >> 2) & 3) + 12),
15077 GEN_INT (((mask >> 4) & 3) + 12),
15078 GEN_INT (((mask >> 6) & 3) + 12),
15079 operands[3], operands[4]));
15080 DONE;
15081 })
15082
15083 (define_insn "avx2_pshufhw_1<mask_name>"
15084 [(set (match_operand:V16HI 0 "register_operand" "=v")
15085 (vec_select:V16HI
15086 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
15087 (parallel [(const_int 0)
15088 (const_int 1)
15089 (const_int 2)
15090 (const_int 3)
15091 (match_operand 2 "const_4_to_7_operand")
15092 (match_operand 3 "const_4_to_7_operand")
15093 (match_operand 4 "const_4_to_7_operand")
15094 (match_operand 5 "const_4_to_7_operand")
15095 (const_int 8)
15096 (const_int 9)
15097 (const_int 10)
15098 (const_int 11)
15099 (match_operand 6 "const_12_to_15_operand")
15100 (match_operand 7 "const_12_to_15_operand")
15101 (match_operand 8 "const_12_to_15_operand")
15102 (match_operand 9 "const_12_to_15_operand")])))]
15103 "TARGET_AVX2
15104 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15105 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15106 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15107 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15108 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15109 {
15110 int mask = 0;
15111 mask |= (INTVAL (operands[2]) - 4) << 0;
15112 mask |= (INTVAL (operands[3]) - 4) << 2;
15113 mask |= (INTVAL (operands[4]) - 4) << 4;
15114 mask |= (INTVAL (operands[5]) - 4) << 6;
15115 operands[2] = GEN_INT (mask);
15116
15117 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15118 }
15119 [(set_attr "type" "sselog")
15120 (set_attr "prefix" "maybe_evex")
15121 (set_attr "length_immediate" "1")
15122 (set_attr "mode" "OI")])
15123
15124 (define_expand "avx512vl_pshufhw_mask"
15125 [(match_operand:V8HI 0 "register_operand")
15126 (match_operand:V8HI 1 "nonimmediate_operand")
15127 (match_operand:SI 2 "const_0_to_255_operand")
15128 (match_operand:V8HI 3 "register_operand")
15129 (match_operand:QI 4 "register_operand")]
15130 "TARGET_AVX512VL && TARGET_AVX512BW"
15131 {
15132 int mask = INTVAL (operands[2]);
15133 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
15134 GEN_INT (((mask >> 0) & 3) + 4),
15135 GEN_INT (((mask >> 2) & 3) + 4),
15136 GEN_INT (((mask >> 4) & 3) + 4),
15137 GEN_INT (((mask >> 6) & 3) + 4),
15138 operands[3], operands[4]));
15139 DONE;
15140 })
15141
15142 (define_expand "sse2_pshufhw"
15143 [(match_operand:V8HI 0 "register_operand")
15144 (match_operand:V8HI 1 "vector_operand")
15145 (match_operand:SI 2 "const_int_operand")]
15146 "TARGET_SSE2"
15147 {
15148 int mask = INTVAL (operands[2]);
15149 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
15150 GEN_INT (((mask >> 0) & 3) + 4),
15151 GEN_INT (((mask >> 2) & 3) + 4),
15152 GEN_INT (((mask >> 4) & 3) + 4),
15153 GEN_INT (((mask >> 6) & 3) + 4)));
15154 DONE;
15155 })
15156
15157 (define_insn "sse2_pshufhw_1<mask_name>"
15158 [(set (match_operand:V8HI 0 "register_operand" "=v")
15159 (vec_select:V8HI
15160 (match_operand:V8HI 1 "vector_operand" "vBm")
15161 (parallel [(const_int 0)
15162 (const_int 1)
15163 (const_int 2)
15164 (const_int 3)
15165 (match_operand 2 "const_4_to_7_operand")
15166 (match_operand 3 "const_4_to_7_operand")
15167 (match_operand 4 "const_4_to_7_operand")
15168 (match_operand 5 "const_4_to_7_operand")])))]
15169 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15170 {
15171 int mask = 0;
15172 mask |= (INTVAL (operands[2]) - 4) << 0;
15173 mask |= (INTVAL (operands[3]) - 4) << 2;
15174 mask |= (INTVAL (operands[4]) - 4) << 4;
15175 mask |= (INTVAL (operands[5]) - 4) << 6;
15176 operands[2] = GEN_INT (mask);
15177
15178 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15179 }
15180 [(set_attr "type" "sselog")
15181 (set_attr "prefix_rep" "1")
15182 (set_attr "prefix_data16" "0")
15183 (set_attr "prefix" "maybe_vex")
15184 (set_attr "length_immediate" "1")
15185 (set_attr "mode" "TI")])
15186
15187 (define_expand "sse2_loadd"
15188 [(set (match_operand:V4SI 0 "register_operand")
15189 (vec_merge:V4SI
15190 (vec_duplicate:V4SI
15191 (match_operand:SI 1 "nonimmediate_operand"))
15192 (match_dup 2)
15193 (const_int 1)))]
15194 "TARGET_SSE"
15195 "operands[2] = CONST0_RTX (V4SImode);")
15196
15197 (define_insn "sse2_loadld"
15198 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
15199 (vec_merge:V4SI
15200 (vec_duplicate:V4SI
15201 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
15202 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
15203 (const_int 1)))]
15204 "TARGET_SSE"
15205 "@
15206 %vmovd\t{%2, %0|%0, %2}
15207 %vmovd\t{%2, %0|%0, %2}
15208 movss\t{%2, %0|%0, %2}
15209 movss\t{%2, %0|%0, %2}
15210 vmovss\t{%2, %1, %0|%0, %1, %2}"
15211 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
15212 (set_attr "type" "ssemov")
15213 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
15214 (set_attr "mode" "TI,TI,V4SF,SF,SF")
15215 (set (attr "preferred_for_speed")
15216 (cond [(eq_attr "alternative" "1")
15217 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15218 ]
15219 (symbol_ref "true")))])
15220
15221 ;; QI and HI modes handled by pextr patterns.
15222 (define_mode_iterator PEXTR_MODE12
15223 [(V16QI "TARGET_SSE4_1") V8HI])
15224
15225 (define_insn "*vec_extract<mode>"
15226 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
15227 (vec_select:<ssescalarmode>
15228 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
15229 (parallel
15230 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
15231 "TARGET_SSE2"
15232 "@
15233 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15234 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
15235 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15236 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15237 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
15238 (set_attr "type" "sselog1")
15239 (set_attr "prefix_data16" "1")
15240 (set (attr "prefix_extra")
15241 (if_then_else
15242 (and (eq_attr "alternative" "0,2")
15243 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15244 (const_string "*")
15245 (const_string "1")))
15246 (set_attr "length_immediate" "1")
15247 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
15248 (set_attr "mode" "TI")])
15249
15250 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
15251 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
15252 (zero_extend:SWI48
15253 (vec_select:<PEXTR_MODE12:ssescalarmode>
15254 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
15255 (parallel
15256 [(match_operand:SI 2
15257 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
15258 "TARGET_SSE2"
15259 "@
15260 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15261 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
15262 [(set_attr "isa" "*,avx512bw")
15263 (set_attr "type" "sselog1")
15264 (set_attr "prefix_data16" "1")
15265 (set (attr "prefix_extra")
15266 (if_then_else
15267 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
15268 (const_string "*")
15269 (const_string "1")))
15270 (set_attr "length_immediate" "1")
15271 (set_attr "prefix" "maybe_vex")
15272 (set_attr "mode" "TI")])
15273
15274 (define_insn "*vec_extractv16qi_zext"
15275 [(set (match_operand:HI 0 "register_operand" "=r,r")
15276 (zero_extend:HI
15277 (vec_select:QI
15278 (match_operand:V16QI 1 "register_operand" "x,v")
15279 (parallel
15280 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
15281 "TARGET_SSE4_1"
15282 "@
15283 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
15284 vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
15285 [(set_attr "isa" "*,avx512bw")
15286 (set_attr "type" "sselog1")
15287 (set_attr "prefix_data16" "1")
15288 (set_attr "prefix_extra" "1")
15289 (set_attr "length_immediate" "1")
15290 (set_attr "prefix" "maybe_vex")
15291 (set_attr "mode" "TI")])
15292
15293 (define_insn "*vec_extract<mode>_mem"
15294 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
15295 (vec_select:<ssescalarmode>
15296 (match_operand:VI12_128 1 "memory_operand" "o")
15297 (parallel
15298 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15299 "TARGET_SSE"
15300 "#")
15301
15302 (define_insn "*vec_extract<ssevecmodelower>_0"
15303 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
15304 (vec_select:SWI48
15305 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
15306 (parallel [(const_int 0)])))]
15307 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15308 "#"
15309 [(set_attr "isa" "*,sse2,*,*")
15310 (set (attr "preferred_for_speed")
15311 (cond [(eq_attr "alternative" "1")
15312 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15313 ]
15314 (symbol_ref "true")))])
15315
15316 (define_insn "*vec_extractv2di_0_sse"
15317 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
15318 (vec_select:DI
15319 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
15320 (parallel [(const_int 0)])))]
15321 "TARGET_SSE && !TARGET_64BIT
15322 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15323 "#"
15324 [(set_attr "isa" "sse4,*,*")
15325 (set (attr "preferred_for_speed")
15326 (cond [(eq_attr "alternative" "0")
15327 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15328 ]
15329 (symbol_ref "true")))])
15330
15331 (define_split
15332 [(set (match_operand:DI 0 "general_reg_operand")
15333 (vec_select:DI
15334 (match_operand:V2DI 1 "register_operand")
15335 (parallel [(const_int 0)])))]
15336 "TARGET_SSE4_1 && !TARGET_64BIT
15337 && reload_completed"
15338 [(set (match_dup 2) (match_dup 4))
15339 (set (match_dup 3)
15340 (vec_select:SI
15341 (match_dup 5)
15342 (parallel [(const_int 1)])))]
15343 {
15344 operands[4] = gen_lowpart (SImode, operands[1]);
15345 operands[5] = gen_lowpart (V4SImode, operands[1]);
15346 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
15347 })
15348
15349 (define_split
15350 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15351 (vec_select:SWI48x
15352 (match_operand:<ssevecmode> 1 "register_operand")
15353 (parallel [(const_int 0)])))]
15354 "TARGET_SSE && reload_completed"
15355 [(set (match_dup 0) (match_dup 1))]
15356 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
15357
15358 (define_insn "*vec_extractv4si_0_zext_sse4"
15359 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
15360 (zero_extend:DI
15361 (vec_select:SI
15362 (match_operand:V4SI 1 "register_operand" "v,x,v")
15363 (parallel [(const_int 0)]))))]
15364 "TARGET_SSE4_1"
15365 "#"
15366 [(set_attr "isa" "x64,*,avx512f")
15367 (set (attr "preferred_for_speed")
15368 (cond [(eq_attr "alternative" "0")
15369 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15370 ]
15371 (symbol_ref "true")))])
15372
15373 (define_insn "*vec_extractv4si_0_zext"
15374 [(set (match_operand:DI 0 "register_operand" "=r")
15375 (zero_extend:DI
15376 (vec_select:SI
15377 (match_operand:V4SI 1 "register_operand" "x")
15378 (parallel [(const_int 0)]))))]
15379 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
15380 "#")
15381
15382 (define_split
15383 [(set (match_operand:DI 0 "register_operand")
15384 (zero_extend:DI
15385 (vec_select:SI
15386 (match_operand:V4SI 1 "register_operand")
15387 (parallel [(const_int 0)]))))]
15388 "TARGET_SSE2 && reload_completed"
15389 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15390 "operands[1] = gen_lowpart (SImode, operands[1]);")
15391
15392 (define_insn "*vec_extractv4si"
15393 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
15394 (vec_select:SI
15395 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
15396 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
15397 "TARGET_SSE4_1"
15398 {
15399 switch (which_alternative)
15400 {
15401 case 0:
15402 case 1:
15403 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
15404
15405 case 2:
15406 case 3:
15407 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15408 return "psrldq\t{%2, %0|%0, %2}";
15409
15410 case 4:
15411 case 5:
15412 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15413 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
15414
15415 default:
15416 gcc_unreachable ();
15417 }
15418 }
15419 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
15420 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
15421 (set (attr "prefix_extra")
15422 (if_then_else (eq_attr "alternative" "0,1")
15423 (const_string "1")
15424 (const_string "*")))
15425 (set_attr "length_immediate" "1")
15426 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
15427 (set_attr "mode" "TI")])
15428
15429 (define_insn "*vec_extractv4si_zext"
15430 [(set (match_operand:DI 0 "register_operand" "=r,r")
15431 (zero_extend:DI
15432 (vec_select:SI
15433 (match_operand:V4SI 1 "register_operand" "x,v")
15434 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15435 "TARGET_64BIT && TARGET_SSE4_1"
15436 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
15437 [(set_attr "isa" "*,avx512dq")
15438 (set_attr "type" "sselog1")
15439 (set_attr "prefix_extra" "1")
15440 (set_attr "length_immediate" "1")
15441 (set_attr "prefix" "maybe_vex")
15442 (set_attr "mode" "TI")])
15443
15444 (define_insn "*vec_extractv4si_mem"
15445 [(set (match_operand:SI 0 "register_operand" "=x,r")
15446 (vec_select:SI
15447 (match_operand:V4SI 1 "memory_operand" "o,o")
15448 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
15449 "TARGET_SSE"
15450 "#")
15451
15452 (define_insn_and_split "*vec_extractv4si_zext_mem"
15453 [(set (match_operand:DI 0 "register_operand" "=x,r")
15454 (zero_extend:DI
15455 (vec_select:SI
15456 (match_operand:V4SI 1 "memory_operand" "o,o")
15457 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15458 "TARGET_64BIT && TARGET_SSE"
15459 "#"
15460 "&& reload_completed"
15461 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15462 {
15463 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
15464 })
15465
15466 (define_insn "*vec_extractv2di_1"
15467 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
15468 (vec_select:DI
15469 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
15470 (parallel [(const_int 1)])))]
15471 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15472 "@
15473 %vpextrq\t{$1, %1, %0|%0, %1, 1}
15474 vpextrq\t{$1, %1, %0|%0, %1, 1}
15475 %vmovhps\t{%1, %0|%0, %1}
15476 psrldq\t{$8, %0|%0, 8}
15477 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15478 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15479 movhlps\t{%1, %0|%0, %1}
15480 #
15481 #"
15482 [(set (attr "isa")
15483 (cond [(eq_attr "alternative" "0")
15484 (const_string "x64_sse4")
15485 (eq_attr "alternative" "1")
15486 (const_string "x64_avx512dq")
15487 (eq_attr "alternative" "3")
15488 (const_string "sse2_noavx")
15489 (eq_attr "alternative" "4")
15490 (const_string "avx")
15491 (eq_attr "alternative" "5")
15492 (const_string "avx512bw")
15493 (eq_attr "alternative" "6")
15494 (const_string "noavx")
15495 (eq_attr "alternative" "8")
15496 (const_string "x64")
15497 ]
15498 (const_string "*")))
15499 (set (attr "type")
15500 (cond [(eq_attr "alternative" "2,6,7")
15501 (const_string "ssemov")
15502 (eq_attr "alternative" "3,4,5")
15503 (const_string "sseishft1")
15504 (eq_attr "alternative" "8")
15505 (const_string "imov")
15506 ]
15507 (const_string "sselog1")))
15508 (set (attr "length_immediate")
15509 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
15510 (const_string "1")
15511 (const_string "*")))
15512 (set (attr "prefix_rex")
15513 (if_then_else (eq_attr "alternative" "0,1")
15514 (const_string "1")
15515 (const_string "*")))
15516 (set (attr "prefix_extra")
15517 (if_then_else (eq_attr "alternative" "0,1")
15518 (const_string "1")
15519 (const_string "*")))
15520 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
15521 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
15522
15523 (define_split
15524 [(set (match_operand:<ssescalarmode> 0 "register_operand")
15525 (vec_select:<ssescalarmode>
15526 (match_operand:VI_128 1 "memory_operand")
15527 (parallel
15528 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15529 "TARGET_SSE && reload_completed"
15530 [(set (match_dup 0) (match_dup 1))]
15531 {
15532 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
15533
15534 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
15535 })
15536
15537 (define_insn "*vec_extractv2ti"
15538 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
15539 (vec_select:TI
15540 (match_operand:V2TI 1 "register_operand" "x,v")
15541 (parallel
15542 [(match_operand:SI 2 "const_0_to_1_operand")])))]
15543 "TARGET_AVX"
15544 "@
15545 vextract%~128\t{%2, %1, %0|%0, %1, %2}
15546 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
15547 [(set_attr "type" "sselog")
15548 (set_attr "prefix_extra" "1")
15549 (set_attr "length_immediate" "1")
15550 (set_attr "prefix" "vex,evex")
15551 (set_attr "mode" "OI")])
15552
15553 (define_insn "*vec_extractv4ti"
15554 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
15555 (vec_select:TI
15556 (match_operand:V4TI 1 "register_operand" "v")
15557 (parallel
15558 [(match_operand:SI 2 "const_0_to_3_operand")])))]
15559 "TARGET_AVX512F"
15560 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
15561 [(set_attr "type" "sselog")
15562 (set_attr "prefix_extra" "1")
15563 (set_attr "length_immediate" "1")
15564 (set_attr "prefix" "evex")
15565 (set_attr "mode" "XI")])
15566
15567 (define_mode_iterator VEXTRACTI128_MODE
15568 [(V4TI "TARGET_AVX512F") V2TI])
15569
15570 (define_split
15571 [(set (match_operand:TI 0 "nonimmediate_operand")
15572 (vec_select:TI
15573 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
15574 (parallel [(const_int 0)])))]
15575 "TARGET_AVX
15576 && reload_completed
15577 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
15578 [(set (match_dup 0) (match_dup 1))]
15579 "operands[1] = gen_lowpart (TImode, operands[1]);")
15580
15581 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
15582 ;; vector modes into vec_extract*.
15583 (define_split
15584 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15585 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
15586 "can_create_pseudo_p ()
15587 && REG_P (operands[1])
15588 && VECTOR_MODE_P (GET_MODE (operands[1]))
15589 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
15590 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
15591 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
15592 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
15593 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
15594 (parallel [(const_int 0)])))]
15595 {
15596 rtx tmp;
15597
15598 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
15599 {
15600 case 64:
15601 if (<MODE>mode == SImode)
15602 {
15603 tmp = gen_reg_rtx (V8SImode);
15604 emit_insn (gen_vec_extract_lo_v16si (tmp,
15605 gen_lowpart (V16SImode,
15606 operands[1])));
15607 }
15608 else
15609 {
15610 tmp = gen_reg_rtx (V4DImode);
15611 emit_insn (gen_vec_extract_lo_v8di (tmp,
15612 gen_lowpart (V8DImode,
15613 operands[1])));
15614 }
15615 operands[1] = tmp;
15616 /* FALLTHRU */
15617 case 32:
15618 tmp = gen_reg_rtx (<ssevecmode>mode);
15619 if (<MODE>mode == SImode)
15620 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
15621 operands[1])));
15622 else
15623 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
15624 operands[1])));
15625 operands[1] = tmp;
15626 break;
15627 case 16:
15628 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
15629 break;
15630 }
15631 })
15632
15633 (define_insn "*vec_concatv2si_sse4_1"
15634 [(set (match_operand:V2SI 0 "register_operand"
15635 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
15636 (vec_concat:V2SI
15637 (match_operand:SI 1 "nonimmediate_operand"
15638 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
15639 (match_operand:SI 2 "nonimm_or_0_operand"
15640 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
15641 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15642 "@
15643 pinsrd\t{$1, %2, %0|%0, %2, 1}
15644 pinsrd\t{$1, %2, %0|%0, %2, 1}
15645 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15646 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15647 punpckldq\t{%2, %0|%0, %2}
15648 punpckldq\t{%2, %0|%0, %2}
15649 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
15650 %vmovd\t{%1, %0|%0, %1}
15651 punpckldq\t{%2, %0|%0, %2}
15652 movd\t{%1, %0|%0, %1}"
15653 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
15654 (set (attr "mmx_isa")
15655 (if_then_else (eq_attr "alternative" "8,9")
15656 (const_string "native")
15657 (const_string "*")))
15658 (set (attr "type")
15659 (cond [(eq_attr "alternative" "7")
15660 (const_string "ssemov")
15661 (eq_attr "alternative" "8")
15662 (const_string "mmxcvt")
15663 (eq_attr "alternative" "9")
15664 (const_string "mmxmov")
15665 ]
15666 (const_string "sselog")))
15667 (set (attr "prefix_extra")
15668 (if_then_else (eq_attr "alternative" "0,1,2,3")
15669 (const_string "1")
15670 (const_string "*")))
15671 (set (attr "length_immediate")
15672 (if_then_else (eq_attr "alternative" "0,1,2,3")
15673 (const_string "1")
15674 (const_string "*")))
15675 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15676 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15677
15678 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15679 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15680 ;; alternatives pretty much forces the MMX alternative to be chosen.
15681 (define_insn "*vec_concatv2si"
15682 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15683 (vec_concat:V2SI
15684 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15685 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15686 "TARGET_SSE && !TARGET_SSE4_1"
15687 "@
15688 punpckldq\t{%2, %0|%0, %2}
15689 movd\t{%1, %0|%0, %1}
15690 unpcklps\t{%2, %0|%0, %2}
15691 movss\t{%1, %0|%0, %1}
15692 punpckldq\t{%2, %0|%0, %2}
15693 movd\t{%1, %0|%0, %1}"
15694 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15695 (set_attr "mmx_isa" "*,*,*,*,native,native")
15696 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15697 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15698
15699 (define_insn "*vec_concatv4si"
15700 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15701 (vec_concat:V4SI
15702 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15703 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15704 "TARGET_SSE"
15705 "@
15706 punpcklqdq\t{%2, %0|%0, %2}
15707 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15708 movlhps\t{%2, %0|%0, %2}
15709 movhps\t{%2, %0|%0, %q2}
15710 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15711 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15712 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15713 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15714 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15715
15716 (define_insn "*vec_concatv4si_0"
15717 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15718 (vec_concat:V4SI
15719 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15720 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15721 "TARGET_SSE2"
15722 "@
15723 %vmovq\t{%1, %0|%0, %1}
15724 movq2dq\t{%1, %0|%0, %1}"
15725 [(set_attr "mmx_isa" "*,native")
15726 (set_attr "type" "ssemov")
15727 (set_attr "prefix" "maybe_vex,orig")
15728 (set_attr "mode" "TI")])
15729
15730 (define_insn "vec_concatv2di"
15731 [(set (match_operand:V2DI 0 "register_operand"
15732 "=Yr,*x,x ,v ,x,v ,x,x,v")
15733 (vec_concat:V2DI
15734 (match_operand:DI 1 "register_operand"
15735 " 0, 0,x ,Yv,0,Yv,0,0,v")
15736 (match_operand:DI 2 "nonimmediate_operand"
15737 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15738 "TARGET_SSE"
15739 "@
15740 pinsrq\t{$1, %2, %0|%0, %2, 1}
15741 pinsrq\t{$1, %2, %0|%0, %2, 1}
15742 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15743 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15744 punpcklqdq\t{%2, %0|%0, %2}
15745 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15746 movlhps\t{%2, %0|%0, %2}
15747 movhps\t{%2, %0|%0, %2}
15748 vmovhps\t{%2, %1, %0|%0, %1, %2}"
15749 [(set (attr "isa")
15750 (cond [(eq_attr "alternative" "0,1")
15751 (const_string "x64_sse4_noavx")
15752 (eq_attr "alternative" "2")
15753 (const_string "x64_avx")
15754 (eq_attr "alternative" "3")
15755 (const_string "x64_avx512dq")
15756 (eq_attr "alternative" "4")
15757 (const_string "sse2_noavx")
15758 (eq_attr "alternative" "5,8")
15759 (const_string "avx")
15760 ]
15761 (const_string "noavx")))
15762 (set (attr "type")
15763 (if_then_else
15764 (eq_attr "alternative" "0,1,2,3,4,5")
15765 (const_string "sselog")
15766 (const_string "ssemov")))
15767 (set (attr "prefix_rex")
15768 (if_then_else (eq_attr "alternative" "0,1,2,3")
15769 (const_string "1")
15770 (const_string "*")))
15771 (set (attr "prefix_extra")
15772 (if_then_else (eq_attr "alternative" "0,1,2,3")
15773 (const_string "1")
15774 (const_string "*")))
15775 (set (attr "length_immediate")
15776 (if_then_else (eq_attr "alternative" "0,1,2,3")
15777 (const_string "1")
15778 (const_string "*")))
15779 (set (attr "prefix")
15780 (cond [(eq_attr "alternative" "2")
15781 (const_string "vex")
15782 (eq_attr "alternative" "3")
15783 (const_string "evex")
15784 (eq_attr "alternative" "5,8")
15785 (const_string "maybe_evex")
15786 ]
15787 (const_string "orig")))
15788 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15789
15790 (define_insn "*vec_concatv2di_0"
15791 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
15792 (vec_concat:V2DI
15793 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15794 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
15795 "TARGET_SSE2"
15796 "@
15797 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15798 %vmovq\t{%1, %0|%0, %1}
15799 movq2dq\t{%1, %0|%0, %1}"
15800 [(set_attr "isa" "x64,*,*")
15801 (set_attr "mmx_isa" "*,*,native")
15802 (set_attr "type" "ssemov")
15803 (set_attr "prefix_rex" "1,*,*")
15804 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15805 (set_attr "mode" "TI")
15806 (set (attr "preferred_for_speed")
15807 (cond [(eq_attr "alternative" "0")
15808 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15809 ]
15810 (symbol_ref "true")))])
15811
15812 ;; vmovq clears also the higher bits.
15813 (define_insn "vec_set<mode>_0"
15814 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15815 (vec_merge:VI8_AVX_AVX512F
15816 (vec_duplicate:VI8_AVX_AVX512F
15817 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15818 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15819 (const_int 1)))]
15820 "TARGET_AVX"
15821 "vmovq\t{%2, %x0|%x0, %2}"
15822 [(set_attr "isa" "x64,*")
15823 (set_attr "type" "ssemov")
15824 (set_attr "prefix_rex" "1,*")
15825 (set_attr "prefix" "maybe_evex")
15826 (set_attr "mode" "TI")
15827 (set (attr "preferred_for_speed")
15828 (cond [(eq_attr "alternative" "0")
15829 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15830 ]
15831 (symbol_ref "true")))])
15832
15833 (define_expand "vec_unpacks_lo_<mode>"
15834 [(match_operand:<sseunpackmode> 0 "register_operand")
15835 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15836 "TARGET_SSE2"
15837 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15838
15839 (define_expand "vec_unpacks_hi_<mode>"
15840 [(match_operand:<sseunpackmode> 0 "register_operand")
15841 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15842 "TARGET_SSE2"
15843 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15844
15845 (define_expand "vec_unpacku_lo_<mode>"
15846 [(match_operand:<sseunpackmode> 0 "register_operand")
15847 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15848 "TARGET_SSE2"
15849 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15850
15851 (define_expand "vec_unpacks_sbool_lo_qi"
15852 [(match_operand:QI 0 "register_operand")
15853 (match_operand:QI 1 "register_operand")
15854 (match_operand:QI 2 "const_int_operand")]
15855 "TARGET_AVX512F"
15856 {
15857 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15858 FAIL;
15859 emit_move_insn (operands[0], operands[1]);
15860 DONE;
15861 })
15862
15863 (define_expand "vec_unpacks_lo_hi"
15864 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15865 (match_operand:HI 1 "register_operand"))]
15866 "TARGET_AVX512F")
15867
15868 (define_expand "vec_unpacks_lo_si"
15869 [(set (match_operand:HI 0 "register_operand")
15870 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15871 "TARGET_AVX512F")
15872
15873 (define_expand "vec_unpacks_lo_di"
15874 [(set (match_operand:SI 0 "register_operand")
15875 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15876 "TARGET_AVX512BW")
15877
15878 (define_expand "vec_unpacku_hi_<mode>"
15879 [(match_operand:<sseunpackmode> 0 "register_operand")
15880 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15881 "TARGET_SSE2"
15882 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15883
15884 (define_expand "vec_unpacks_sbool_hi_qi"
15885 [(match_operand:QI 0 "register_operand")
15886 (match_operand:QI 1 "register_operand")
15887 (match_operand:QI 2 "const_int_operand")]
15888 "TARGET_AVX512F"
15889 {
15890 HOST_WIDE_INT nunits = INTVAL (operands[2]);
15891 if (nunits != 8 && nunits != 4)
15892 FAIL;
15893 if (TARGET_AVX512DQ)
15894 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15895 GEN_INT (nunits / 2)));
15896 else
15897 {
15898 rtx tem = gen_reg_rtx (HImode);
15899 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15900 QImode),
15901 GEN_INT (nunits / 2)));
15902 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15903 }
15904 DONE;
15905 })
15906
15907 (define_expand "vec_unpacks_hi_hi"
15908 [(parallel
15909 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15910 (lshiftrt:HI (match_operand:HI 1 "register_operand")
15911 (const_int 8)))
15912 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15913 "TARGET_AVX512F")
15914
15915 (define_expand "vec_unpacks_hi_<mode>"
15916 [(parallel
15917 [(set (subreg:SWI48x
15918 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15919 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15920 (match_dup 2)))
15921 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15922 "TARGET_AVX512BW"
15923 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15924
15925 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15926 ;;
15927 ;; Miscellaneous
15928 ;;
15929 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15930
15931 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15932 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
15933 (truncate:VI12_AVX2_AVX512BW
15934 (lshiftrt:<ssedoublemode>
15935 (plus:<ssedoublemode>
15936 (plus:<ssedoublemode>
15937 (zero_extend:<ssedoublemode>
15938 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
15939 (zero_extend:<ssedoublemode>
15940 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
15941 (match_dup <mask_expand_op3>))
15942 (const_int 1))))]
15943 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15944 {
15945 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
15946 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15947 })
15948
15949 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15950 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
15951 (truncate:VI12_AVX2_AVX512BW
15952 (lshiftrt:<ssedoublemode>
15953 (plus:<ssedoublemode>
15954 (plus:<ssedoublemode>
15955 (zero_extend:<ssedoublemode>
15956 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
15957 (zero_extend:<ssedoublemode>
15958 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
15959 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
15960 (const_int 1))))]
15961 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15962 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15963 "@
15964 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15965 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15966 [(set_attr "isa" "noavx,avx")
15967 (set_attr "type" "sseiadd")
15968 (set_attr "prefix_data16" "1,*")
15969 (set_attr "prefix" "orig,<mask_prefix>")
15970 (set_attr "mode" "<sseinsnmode>")])
15971
15972 ;; The correct representation for this is absolutely enormous, and
15973 ;; surely not generally useful.
15974 (define_insn "<sse2_avx2>_psadbw"
15975 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15976 (unspec:VI8_AVX2_AVX512BW
15977 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15978 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15979 UNSPEC_PSADBW))]
15980 "TARGET_SSE2"
15981 "@
15982 psadbw\t{%2, %0|%0, %2}
15983 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15984 [(set_attr "isa" "noavx,avx")
15985 (set_attr "type" "sseiadd")
15986 (set_attr "atom_unit" "simul")
15987 (set_attr "prefix_data16" "1,*")
15988 (set_attr "prefix" "orig,maybe_evex")
15989 (set_attr "mode" "<sseinsnmode>")])
15990
15991 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15992 [(set (match_operand:SI 0 "register_operand" "=r")
15993 (unspec:SI
15994 [(match_operand:VF_128_256 1 "register_operand" "x")]
15995 UNSPEC_MOVMSK))]
15996 "TARGET_SSE"
15997 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15998 [(set_attr "type" "ssemov")
15999 (set_attr "prefix" "maybe_vex")
16000 (set_attr "mode" "<MODE>")])
16001
16002 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
16003 [(set (match_operand:DI 0 "register_operand" "=r")
16004 (any_extend:DI
16005 (unspec:SI
16006 [(match_operand:VF_128_256 1 "register_operand" "x")]
16007 UNSPEC_MOVMSK)))]
16008 "TARGET_64BIT && TARGET_SSE"
16009 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
16010 [(set_attr "type" "ssemov")
16011 (set_attr "prefix" "maybe_vex")
16012 (set_attr "mode" "<MODE>")])
16013
16014 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
16015 [(set (match_operand:SI 0 "register_operand" "=r")
16016 (unspec:SI
16017 [(lt:VF_128_256
16018 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16019 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16020 UNSPEC_MOVMSK))]
16021 "TARGET_SSE"
16022 "#"
16023 "&& reload_completed"
16024 [(set (match_dup 0)
16025 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16026 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16027 [(set_attr "type" "ssemov")
16028 (set_attr "prefix" "maybe_vex")
16029 (set_attr "mode" "<MODE>")])
16030
16031 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
16032 [(set (match_operand:DI 0 "register_operand" "=r")
16033 (any_extend:DI
16034 (unspec:SI
16035 [(lt:VF_128_256
16036 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16037 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16038 UNSPEC_MOVMSK)))]
16039 "TARGET_64BIT && TARGET_SSE"
16040 "#"
16041 "&& reload_completed"
16042 [(set (match_dup 0)
16043 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16044 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16045 [(set_attr "type" "ssemov")
16046 (set_attr "prefix" "maybe_vex")
16047 (set_attr "mode" "<MODE>")])
16048
16049 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
16050 [(set (match_operand:SI 0 "register_operand" "=r")
16051 (unspec:SI
16052 [(subreg:VF_128_256
16053 (ashiftrt:<sseintvecmode>
16054 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16055 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16056 UNSPEC_MOVMSK))]
16057 "TARGET_SSE"
16058 "#"
16059 "&& reload_completed"
16060 [(set (match_dup 0)
16061 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16062 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16063 [(set_attr "type" "ssemov")
16064 (set_attr "prefix" "maybe_vex")
16065 (set_attr "mode" "<MODE>")])
16066
16067 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
16068 [(set (match_operand:DI 0 "register_operand" "=r")
16069 (any_extend:DI
16070 (unspec:SI
16071 [(subreg:VF_128_256
16072 (ashiftrt:<sseintvecmode>
16073 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16074 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16075 UNSPEC_MOVMSK)))]
16076 "TARGET_64BIT && TARGET_SSE"
16077 "#"
16078 "&& reload_completed"
16079 [(set (match_dup 0)
16080 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16081 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16082 [(set_attr "type" "ssemov")
16083 (set_attr "prefix" "maybe_vex")
16084 (set_attr "mode" "<MODE>")])
16085
16086 (define_insn "<sse2_avx2>_pmovmskb"
16087 [(set (match_operand:SI 0 "register_operand" "=r")
16088 (unspec:SI
16089 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16090 UNSPEC_MOVMSK))]
16091 "TARGET_SSE2"
16092 "%vpmovmskb\t{%1, %0|%0, %1}"
16093 [(set_attr "type" "ssemov")
16094 (set (attr "prefix_data16")
16095 (if_then_else
16096 (match_test "TARGET_AVX")
16097 (const_string "*")
16098 (const_string "1")))
16099 (set_attr "prefix" "maybe_vex")
16100 (set_attr "mode" "SI")])
16101
16102 (define_insn "*<sse2_avx2>_pmovmskb_zext"
16103 [(set (match_operand:DI 0 "register_operand" "=r")
16104 (zero_extend:DI
16105 (unspec:SI
16106 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16107 UNSPEC_MOVMSK)))]
16108 "TARGET_64BIT && TARGET_SSE2"
16109 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16110 [(set_attr "type" "ssemov")
16111 (set (attr "prefix_data16")
16112 (if_then_else
16113 (match_test "TARGET_AVX")
16114 (const_string "*")
16115 (const_string "1")))
16116 (set_attr "prefix" "maybe_vex")
16117 (set_attr "mode" "SI")])
16118
16119 (define_insn "*sse2_pmovmskb_ext"
16120 [(set (match_operand:DI 0 "register_operand" "=r")
16121 (sign_extend:DI
16122 (unspec:SI
16123 [(match_operand:V16QI 1 "register_operand" "x")]
16124 UNSPEC_MOVMSK)))]
16125 "TARGET_64BIT && TARGET_SSE2"
16126 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16127 [(set_attr "type" "ssemov")
16128 (set (attr "prefix_data16")
16129 (if_then_else
16130 (match_test "TARGET_AVX")
16131 (const_string "*")
16132 (const_string "1")))
16133 (set_attr "prefix" "maybe_vex")
16134 (set_attr "mode" "SI")])
16135
16136 (define_insn_and_split "*sse2_pmovskb_zexthisi"
16137 [(set (match_operand:SI 0 "register_operand")
16138 (zero_extend:SI
16139 (subreg:HI
16140 (unspec:SI
16141 [(match_operand:V16QI 1 "register_operand")]
16142 UNSPEC_MOVMSK) 0)))]
16143 "TARGET_SSE2 && ix86_pre_reload_split ()"
16144 "#"
16145 "&& 1"
16146 [(set (match_dup 0)
16147 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
16148
16149 (define_split
16150 [(set (match_operand:SI 0 "register_operand")
16151 (zero_extend:SI
16152 (not:HI
16153 (subreg:HI
16154 (unspec:SI
16155 [(match_operand:V16QI 1 "register_operand")]
16156 UNSPEC_MOVMSK) 0))))]
16157 "TARGET_SSE2"
16158 [(set (match_dup 2)
16159 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16160 (set (match_dup 0)
16161 (xor:SI (match_dup 2) (const_int 65535)))]
16162 "operands[2] = gen_reg_rtx (SImode);")
16163
16164 (define_split
16165 [(set (match_operand:SI 0 "register_operand")
16166 (unspec:SI
16167 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
16168 UNSPEC_MOVMSK))]
16169 "TARGET_SSE2"
16170 [(set (match_dup 2)
16171 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16172 (set (match_dup 0) (match_dup 3))]
16173 {
16174 operands[2] = gen_reg_rtx (SImode);
16175 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16176 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16177 else
16178 {
16179 operands[3]
16180 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16181 SImode);
16182 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16183 }
16184 })
16185
16186 (define_split
16187 [(set (match_operand:SI 0 "register_operand")
16188 (unspec:SI
16189 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
16190 UNSPEC_MOVMSK))]
16191 "TARGET_SSE2
16192 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
16193 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
16194 [(set (match_dup 2)
16195 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16196 (set (match_dup 0) (match_dup 3))]
16197 {
16198 operands[2] = gen_reg_rtx (SImode);
16199 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
16200 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16201 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16202 else
16203 {
16204 operands[3]
16205 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16206 SImode);
16207 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16208 }
16209 })
16210
16211 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
16212 [(set (match_operand:SI 0 "register_operand" "=r")
16213 (unspec:SI
16214 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16215 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16216 UNSPEC_MOVMSK))]
16217 "TARGET_SSE2"
16218 "#"
16219 ""
16220 [(set (match_dup 0)
16221 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16222 ""
16223 [(set_attr "type" "ssemov")
16224 (set (attr "prefix_data16")
16225 (if_then_else
16226 (match_test "TARGET_AVX")
16227 (const_string "*")
16228 (const_string "1")))
16229 (set_attr "prefix" "maybe_vex")
16230 (set_attr "mode" "SI")])
16231
16232 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
16233 [(set (match_operand:DI 0 "register_operand" "=r")
16234 (zero_extend:DI
16235 (unspec:SI
16236 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16237 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16238 UNSPEC_MOVMSK)))]
16239 "TARGET_64BIT && TARGET_SSE2"
16240 "#"
16241 ""
16242 [(set (match_dup 0)
16243 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16244 ""
16245 [(set_attr "type" "ssemov")
16246 (set (attr "prefix_data16")
16247 (if_then_else
16248 (match_test "TARGET_AVX")
16249 (const_string "*")
16250 (const_string "1")))
16251 (set_attr "prefix" "maybe_vex")
16252 (set_attr "mode" "SI")])
16253
16254 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
16255 [(set (match_operand:DI 0 "register_operand" "=r")
16256 (sign_extend:DI
16257 (unspec:SI
16258 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
16259 (match_operand:V16QI 2 "const0_operand" "C"))]
16260 UNSPEC_MOVMSK)))]
16261 "TARGET_64BIT && TARGET_SSE2"
16262 "#"
16263 ""
16264 [(set (match_dup 0)
16265 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16266 ""
16267 [(set_attr "type" "ssemov")
16268 (set (attr "prefix_data16")
16269 (if_then_else
16270 (match_test "TARGET_AVX")
16271 (const_string "*")
16272 (const_string "1")))
16273 (set_attr "prefix" "maybe_vex")
16274 (set_attr "mode" "SI")])
16275
16276 (define_expand "sse2_maskmovdqu"
16277 [(set (match_operand:V16QI 0 "memory_operand")
16278 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
16279 (match_operand:V16QI 2 "register_operand")
16280 (match_dup 0)]
16281 UNSPEC_MASKMOV))]
16282 "TARGET_SSE2")
16283
16284 (define_insn "*sse2_maskmovdqu"
16285 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
16286 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
16287 (match_operand:V16QI 2 "register_operand" "x")
16288 (mem:V16QI (match_dup 0))]
16289 UNSPEC_MASKMOV))]
16290 "TARGET_SSE2"
16291 {
16292 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
16293 that requires %v to be at the beginning of the opcode name. */
16294 if (Pmode != word_mode)
16295 fputs ("\taddr32", asm_out_file);
16296 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
16297 }
16298 [(set_attr "type" "ssemov")
16299 (set_attr "prefix_data16" "1")
16300 (set (attr "length_address")
16301 (symbol_ref ("Pmode != word_mode")))
16302 ;; The implicit %rdi operand confuses default length_vex computation.
16303 (set (attr "length_vex")
16304 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
16305 (set_attr "prefix" "maybe_vex")
16306 (set_attr "znver1_decode" "vector")
16307 (set_attr "mode" "TI")])
16308
16309 (define_insn "sse_ldmxcsr"
16310 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
16311 UNSPECV_LDMXCSR)]
16312 "TARGET_SSE"
16313 "%vldmxcsr\t%0"
16314 [(set_attr "type" "sse")
16315 (set_attr "atom_sse_attr" "mxcsr")
16316 (set_attr "prefix" "maybe_vex")
16317 (set_attr "memory" "load")])
16318
16319 (define_insn "sse_stmxcsr"
16320 [(set (match_operand:SI 0 "memory_operand" "=m")
16321 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
16322 "TARGET_SSE"
16323 "%vstmxcsr\t%0"
16324 [(set_attr "type" "sse")
16325 (set_attr "atom_sse_attr" "mxcsr")
16326 (set_attr "prefix" "maybe_vex")
16327 (set_attr "memory" "store")])
16328
16329 (define_insn "sse2_clflush"
16330 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
16331 UNSPECV_CLFLUSH)]
16332 "TARGET_SSE2"
16333 "clflush\t%a0"
16334 [(set_attr "type" "sse")
16335 (set_attr "atom_sse_attr" "fence")
16336 (set_attr "memory" "unknown")])
16337
16338 ;; As per AMD and Intel ISA manuals, the first operand is extensions
16339 ;; and it goes to %ecx. The second operand received is hints and it goes
16340 ;; to %eax.
16341 (define_insn "sse3_mwait"
16342 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
16343 (match_operand:SI 1 "register_operand" "a")]
16344 UNSPECV_MWAIT)]
16345 "TARGET_SSE3"
16346 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
16347 ;; Since 32bit register operands are implicitly zero extended to 64bit,
16348 ;; we only need to set up 32bit registers.
16349 "mwait"
16350 [(set_attr "length" "3")])
16351
16352 (define_insn "@sse3_monitor_<mode>"
16353 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
16354 (match_operand:SI 1 "register_operand" "c")
16355 (match_operand:SI 2 "register_operand" "d")]
16356 UNSPECV_MONITOR)]
16357 "TARGET_SSE3"
16358 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
16359 ;; RCX and RDX are used. Since 32bit register operands are implicitly
16360 ;; zero extended to 64bit, we only need to set up 32bit registers.
16361 "%^monitor"
16362 [(set (attr "length")
16363 (symbol_ref ("(Pmode != word_mode) + 3")))])
16364
16365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16366 ;;
16367 ;; SSSE3 instructions
16368 ;;
16369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16370
16371 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
16372
16373 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
16374 [(set (match_operand:V16HI 0 "register_operand" "=x")
16375 (ssse3_plusminus:V16HI
16376 (vec_select:V16HI
16377 (vec_concat:V32HI
16378 (match_operand:V16HI 1 "register_operand" "x")
16379 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
16380 (parallel
16381 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16382 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
16383 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
16384 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
16385 (vec_select:V16HI
16386 (vec_concat:V32HI (match_dup 1) (match_dup 2))
16387 (parallel
16388 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16389 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
16390 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
16391 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
16392 "TARGET_AVX2"
16393 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16394 [(set_attr "type" "sseiadd")
16395 (set_attr "prefix_extra" "1")
16396 (set_attr "prefix" "vex")
16397 (set_attr "mode" "OI")])
16398
16399 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
16400 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16401 (ssse3_plusminus:V8HI
16402 (vec_select:V8HI
16403 (vec_concat:V16HI
16404 (match_operand:V8HI 1 "register_operand" "0,x")
16405 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
16406 (parallel
16407 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16408 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
16409 (vec_select:V8HI
16410 (vec_concat:V16HI (match_dup 1) (match_dup 2))
16411 (parallel
16412 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16413 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
16414 "TARGET_SSSE3"
16415 "@
16416 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16417 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16418 [(set_attr "isa" "noavx,avx")
16419 (set_attr "type" "sseiadd")
16420 (set_attr "atom_unit" "complex")
16421 (set_attr "prefix_data16" "1,*")
16422 (set_attr "prefix_extra" "1")
16423 (set_attr "prefix" "orig,vex")
16424 (set_attr "mode" "TI")])
16425
16426 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
16427 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16428 (ssse3_plusminus:V4HI
16429 (vec_select:V4HI
16430 (vec_concat:V8HI
16431 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
16432 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
16433 (parallel
16434 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16435 (vec_select:V4HI
16436 (vec_concat:V8HI (match_dup 1) (match_dup 2))
16437 (parallel
16438 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16439 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16440 "@
16441 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16442 #
16443 #"
16444 "TARGET_SSSE3 && reload_completed
16445 && SSE_REGNO_P (REGNO (operands[0]))"
16446 [(const_int 0)]
16447 {
16448 /* Generate SSE version of the operation. */
16449 rtx op0 = lowpart_subreg (V8HImode, operands[0],
16450 GET_MODE (operands[0]));
16451 rtx op1 = lowpart_subreg (V8HImode, operands[1],
16452 GET_MODE (operands[1]));
16453 rtx op2 = lowpart_subreg (V8HImode, operands[2],
16454 GET_MODE (operands[2]));
16455 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
16456 ix86_move_vector_high_sse_to_mmx (op0);
16457 DONE;
16458 }
16459 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16460 (set_attr "type" "sseiadd")
16461 (set_attr "atom_unit" "complex")
16462 (set_attr "prefix_extra" "1")
16463 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16464 (set_attr "mode" "DI,TI,TI")])
16465
16466 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
16467 [(set (match_operand:V8SI 0 "register_operand" "=x")
16468 (plusminus:V8SI
16469 (vec_select:V8SI
16470 (vec_concat:V16SI
16471 (match_operand:V8SI 1 "register_operand" "x")
16472 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
16473 (parallel
16474 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
16475 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
16476 (vec_select:V8SI
16477 (vec_concat:V16SI (match_dup 1) (match_dup 2))
16478 (parallel
16479 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
16480 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
16481 "TARGET_AVX2"
16482 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16483 [(set_attr "type" "sseiadd")
16484 (set_attr "prefix_extra" "1")
16485 (set_attr "prefix" "vex")
16486 (set_attr "mode" "OI")])
16487
16488 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
16489 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16490 (plusminus:V4SI
16491 (vec_select:V4SI
16492 (vec_concat:V8SI
16493 (match_operand:V4SI 1 "register_operand" "0,x")
16494 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
16495 (parallel
16496 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16497 (vec_select:V4SI
16498 (vec_concat:V8SI (match_dup 1) (match_dup 2))
16499 (parallel
16500 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16501 "TARGET_SSSE3"
16502 "@
16503 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16504 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16505 [(set_attr "isa" "noavx,avx")
16506 (set_attr "type" "sseiadd")
16507 (set_attr "atom_unit" "complex")
16508 (set_attr "prefix_data16" "1,*")
16509 (set_attr "prefix_extra" "1")
16510 (set_attr "prefix" "orig,vex")
16511 (set_attr "mode" "TI")])
16512
16513 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
16514 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
16515 (plusminus:V2SI
16516 (vec_select:V2SI
16517 (vec_concat:V4SI
16518 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
16519 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
16520 (parallel [(const_int 0) (const_int 2)]))
16521 (vec_select:V2SI
16522 (vec_concat:V4SI (match_dup 1) (match_dup 2))
16523 (parallel [(const_int 1) (const_int 3)]))))]
16524 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16525 "@
16526 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16527 #
16528 #"
16529 "TARGET_SSSE3 && reload_completed
16530 && SSE_REGNO_P (REGNO (operands[0]))"
16531 [(const_int 0)]
16532 {
16533 /* Generate SSE version of the operation. */
16534 rtx op0 = lowpart_subreg (V4SImode, operands[0],
16535 GET_MODE (operands[0]));
16536 rtx op1 = lowpart_subreg (V4SImode, operands[1],
16537 GET_MODE (operands[1]));
16538 rtx op2 = lowpart_subreg (V4SImode, operands[2],
16539 GET_MODE (operands[2]));
16540 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
16541 ix86_move_vector_high_sse_to_mmx (op0);
16542 DONE;
16543 }
16544 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16545 (set_attr "type" "sseiadd")
16546 (set_attr "atom_unit" "complex")
16547 (set_attr "prefix_extra" "1")
16548 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16549 (set_attr "mode" "DI,TI,TI")])
16550
16551 (define_insn "avx2_pmaddubsw256"
16552 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
16553 (ss_plus:V16HI
16554 (mult:V16HI
16555 (zero_extend:V16HI
16556 (vec_select:V16QI
16557 (match_operand:V32QI 1 "register_operand" "x,v")
16558 (parallel [(const_int 0) (const_int 2)
16559 (const_int 4) (const_int 6)
16560 (const_int 8) (const_int 10)
16561 (const_int 12) (const_int 14)
16562 (const_int 16) (const_int 18)
16563 (const_int 20) (const_int 22)
16564 (const_int 24) (const_int 26)
16565 (const_int 28) (const_int 30)])))
16566 (sign_extend:V16HI
16567 (vec_select:V16QI
16568 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
16569 (parallel [(const_int 0) (const_int 2)
16570 (const_int 4) (const_int 6)
16571 (const_int 8) (const_int 10)
16572 (const_int 12) (const_int 14)
16573 (const_int 16) (const_int 18)
16574 (const_int 20) (const_int 22)
16575 (const_int 24) (const_int 26)
16576 (const_int 28) (const_int 30)]))))
16577 (mult:V16HI
16578 (zero_extend:V16HI
16579 (vec_select:V16QI (match_dup 1)
16580 (parallel [(const_int 1) (const_int 3)
16581 (const_int 5) (const_int 7)
16582 (const_int 9) (const_int 11)
16583 (const_int 13) (const_int 15)
16584 (const_int 17) (const_int 19)
16585 (const_int 21) (const_int 23)
16586 (const_int 25) (const_int 27)
16587 (const_int 29) (const_int 31)])))
16588 (sign_extend:V16HI
16589 (vec_select:V16QI (match_dup 2)
16590 (parallel [(const_int 1) (const_int 3)
16591 (const_int 5) (const_int 7)
16592 (const_int 9) (const_int 11)
16593 (const_int 13) (const_int 15)
16594 (const_int 17) (const_int 19)
16595 (const_int 21) (const_int 23)
16596 (const_int 25) (const_int 27)
16597 (const_int 29) (const_int 31)]))))))]
16598 "TARGET_AVX2"
16599 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16600 [(set_attr "isa" "*,avx512bw")
16601 (set_attr "type" "sseiadd")
16602 (set_attr "prefix_extra" "1")
16603 (set_attr "prefix" "vex,evex")
16604 (set_attr "mode" "OI")])
16605
16606 ;; The correct representation for this is absolutely enormous, and
16607 ;; surely not generally useful.
16608 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
16609 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16610 (unspec:VI2_AVX512VL
16611 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
16612 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
16613 UNSPEC_PMADDUBSW512))]
16614 "TARGET_AVX512BW"
16615 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
16616 [(set_attr "type" "sseiadd")
16617 (set_attr "prefix" "evex")
16618 (set_attr "mode" "XI")])
16619
16620 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
16621 [(set (match_operand:V32HI 0 "register_operand" "=v")
16622 (truncate:V32HI
16623 (lshiftrt:V32SI
16624 (plus:V32SI
16625 (lshiftrt:V32SI
16626 (mult:V32SI
16627 (sign_extend:V32SI
16628 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
16629 (sign_extend:V32SI
16630 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
16631 (const_int 14))
16632 (const_vector:V32HI [(const_int 1) (const_int 1)
16633 (const_int 1) (const_int 1)
16634 (const_int 1) (const_int 1)
16635 (const_int 1) (const_int 1)
16636 (const_int 1) (const_int 1)
16637 (const_int 1) (const_int 1)
16638 (const_int 1) (const_int 1)
16639 (const_int 1) (const_int 1)
16640 (const_int 1) (const_int 1)
16641 (const_int 1) (const_int 1)
16642 (const_int 1) (const_int 1)
16643 (const_int 1) (const_int 1)
16644 (const_int 1) (const_int 1)
16645 (const_int 1) (const_int 1)
16646 (const_int 1) (const_int 1)
16647 (const_int 1) (const_int 1)]))
16648 (const_int 1))))]
16649 "TARGET_AVX512BW"
16650 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16651 [(set_attr "type" "sseimul")
16652 (set_attr "prefix" "evex")
16653 (set_attr "mode" "XI")])
16654
16655 (define_insn "ssse3_pmaddubsw128"
16656 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
16657 (ss_plus:V8HI
16658 (mult:V8HI
16659 (zero_extend:V8HI
16660 (vec_select:V8QI
16661 (match_operand:V16QI 1 "register_operand" "0,x,v")
16662 (parallel [(const_int 0) (const_int 2)
16663 (const_int 4) (const_int 6)
16664 (const_int 8) (const_int 10)
16665 (const_int 12) (const_int 14)])))
16666 (sign_extend:V8HI
16667 (vec_select:V8QI
16668 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
16669 (parallel [(const_int 0) (const_int 2)
16670 (const_int 4) (const_int 6)
16671 (const_int 8) (const_int 10)
16672 (const_int 12) (const_int 14)]))))
16673 (mult:V8HI
16674 (zero_extend:V8HI
16675 (vec_select:V8QI (match_dup 1)
16676 (parallel [(const_int 1) (const_int 3)
16677 (const_int 5) (const_int 7)
16678 (const_int 9) (const_int 11)
16679 (const_int 13) (const_int 15)])))
16680 (sign_extend:V8HI
16681 (vec_select:V8QI (match_dup 2)
16682 (parallel [(const_int 1) (const_int 3)
16683 (const_int 5) (const_int 7)
16684 (const_int 9) (const_int 11)
16685 (const_int 13) (const_int 15)]))))))]
16686 "TARGET_SSSE3"
16687 "@
16688 pmaddubsw\t{%2, %0|%0, %2}
16689 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16690 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16691 [(set_attr "isa" "noavx,avx,avx512bw")
16692 (set_attr "type" "sseiadd")
16693 (set_attr "atom_unit" "simul")
16694 (set_attr "prefix_data16" "1,*,*")
16695 (set_attr "prefix_extra" "1")
16696 (set_attr "prefix" "orig,vex,evex")
16697 (set_attr "mode" "TI")])
16698
16699 (define_insn "ssse3_pmaddubsw"
16700 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16701 (ss_plus:V4HI
16702 (mult:V4HI
16703 (zero_extend:V4HI
16704 (vec_select:V4QI
16705 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16706 (parallel [(const_int 0) (const_int 2)
16707 (const_int 4) (const_int 6)])))
16708 (sign_extend:V4HI
16709 (vec_select:V4QI
16710 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16711 (parallel [(const_int 0) (const_int 2)
16712 (const_int 4) (const_int 6)]))))
16713 (mult:V4HI
16714 (zero_extend:V4HI
16715 (vec_select:V4QI (match_dup 1)
16716 (parallel [(const_int 1) (const_int 3)
16717 (const_int 5) (const_int 7)])))
16718 (sign_extend:V4HI
16719 (vec_select:V4QI (match_dup 2)
16720 (parallel [(const_int 1) (const_int 3)
16721 (const_int 5) (const_int 7)]))))))]
16722 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16723 "@
16724 pmaddubsw\t{%2, %0|%0, %2}
16725 pmaddubsw\t{%2, %0|%0, %2}
16726 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16727 [(set_attr "isa" "*,noavx,avx")
16728 (set_attr "mmx_isa" "native,*,*")
16729 (set_attr "type" "sseiadd")
16730 (set_attr "atom_unit" "simul")
16731 (set_attr "prefix_extra" "1")
16732 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16733 (set_attr "mode" "DI,TI,TI")])
16734
16735 (define_mode_iterator PMULHRSW
16736 [V8HI (V16HI "TARGET_AVX2")])
16737
16738 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16739 [(set (match_operand:PMULHRSW 0 "register_operand")
16740 (vec_merge:PMULHRSW
16741 (truncate:PMULHRSW
16742 (lshiftrt:<ssedoublemode>
16743 (plus:<ssedoublemode>
16744 (lshiftrt:<ssedoublemode>
16745 (mult:<ssedoublemode>
16746 (sign_extend:<ssedoublemode>
16747 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16748 (sign_extend:<ssedoublemode>
16749 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16750 (const_int 14))
16751 (match_dup 5))
16752 (const_int 1)))
16753 (match_operand:PMULHRSW 3 "register_operand")
16754 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16755 "TARGET_AVX512BW && TARGET_AVX512VL"
16756 {
16757 operands[5] = CONST1_RTX(<MODE>mode);
16758 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16759 })
16760
16761 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16762 [(set (match_operand:PMULHRSW 0 "register_operand")
16763 (truncate:PMULHRSW
16764 (lshiftrt:<ssedoublemode>
16765 (plus:<ssedoublemode>
16766 (lshiftrt:<ssedoublemode>
16767 (mult:<ssedoublemode>
16768 (sign_extend:<ssedoublemode>
16769 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16770 (sign_extend:<ssedoublemode>
16771 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16772 (const_int 14))
16773 (match_dup 3))
16774 (const_int 1))))]
16775 "TARGET_SSSE3"
16776 {
16777 operands[3] = CONST1_RTX(<MODE>mode);
16778 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16779 })
16780
16781 (define_expand "smulhrs<mode>3"
16782 [(set (match_operand:VI2_AVX2 0 "register_operand")
16783 (truncate:VI2_AVX2
16784 (lshiftrt:<ssedoublemode>
16785 (plus:<ssedoublemode>
16786 (lshiftrt:<ssedoublemode>
16787 (mult:<ssedoublemode>
16788 (sign_extend:<ssedoublemode>
16789 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
16790 (sign_extend:<ssedoublemode>
16791 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
16792 (const_int 14))
16793 (match_dup 3))
16794 (const_int 1))))]
16795 "TARGET_SSSE3"
16796 {
16797 operands[3] = CONST1_RTX(<MODE>mode);
16798 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16799 })
16800
16801 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16802 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16803 (truncate:VI2_AVX2
16804 (lshiftrt:<ssedoublemode>
16805 (plus:<ssedoublemode>
16806 (lshiftrt:<ssedoublemode>
16807 (mult:<ssedoublemode>
16808 (sign_extend:<ssedoublemode>
16809 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16810 (sign_extend:<ssedoublemode>
16811 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16812 (const_int 14))
16813 (match_operand:VI2_AVX2 3 "const1_operand"))
16814 (const_int 1))))]
16815 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16816 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16817 "@
16818 pmulhrsw\t{%2, %0|%0, %2}
16819 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16820 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16821 [(set_attr "isa" "noavx,avx,avx512bw")
16822 (set_attr "type" "sseimul")
16823 (set_attr "prefix_data16" "1,*,*")
16824 (set_attr "prefix_extra" "1")
16825 (set_attr "prefix" "orig,maybe_evex,evex")
16826 (set_attr "mode" "<sseinsnmode>")])
16827
16828 (define_expand "smulhrsv4hi3"
16829 [(set (match_operand:V4HI 0 "register_operand")
16830 (truncate:V4HI
16831 (lshiftrt:V4SI
16832 (plus:V4SI
16833 (lshiftrt:V4SI
16834 (mult:V4SI
16835 (sign_extend:V4SI
16836 (match_operand:V4HI 1 "register_operand"))
16837 (sign_extend:V4SI
16838 (match_operand:V4HI 2 "register_operand")))
16839 (const_int 14))
16840 (match_dup 3))
16841 (const_int 1))))]
16842 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
16843 {
16844 operands[3] = CONST1_RTX(V4HImode);
16845 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16846 })
16847
16848 (define_expand "ssse3_pmulhrswv4hi3"
16849 [(set (match_operand:V4HI 0 "register_operand")
16850 (truncate:V4HI
16851 (lshiftrt:V4SI
16852 (plus:V4SI
16853 (lshiftrt:V4SI
16854 (mult:V4SI
16855 (sign_extend:V4SI
16856 (match_operand:V4HI 1 "register_mmxmem_operand"))
16857 (sign_extend:V4SI
16858 (match_operand:V4HI 2 "register_mmxmem_operand")))
16859 (const_int 14))
16860 (match_dup 3))
16861 (const_int 1))))]
16862 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16863 {
16864 operands[3] = CONST1_RTX(V4HImode);
16865 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16866 })
16867
16868 (define_insn "*ssse3_pmulhrswv4hi3"
16869 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16870 (truncate:V4HI
16871 (lshiftrt:V4SI
16872 (plus:V4SI
16873 (lshiftrt:V4SI
16874 (mult:V4SI
16875 (sign_extend:V4SI
16876 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16877 (sign_extend:V4SI
16878 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16879 (const_int 14))
16880 (match_operand:V4HI 3 "const1_operand"))
16881 (const_int 1))))]
16882 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16883 && TARGET_SSSE3
16884 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16885 "@
16886 pmulhrsw\t{%2, %0|%0, %2}
16887 pmulhrsw\t{%2, %0|%0, %2}
16888 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16889 [(set_attr "isa" "*,noavx,avx")
16890 (set_attr "mmx_isa" "native,*,*")
16891 (set_attr "type" "sseimul")
16892 (set_attr "prefix_extra" "1")
16893 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16894 (set_attr "mode" "DI,TI,TI")])
16895
16896 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16897 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16898 (unspec:VI1_AVX512
16899 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16900 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16901 UNSPEC_PSHUFB))]
16902 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16903 "@
16904 pshufb\t{%2, %0|%0, %2}
16905 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16906 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16907 [(set_attr "isa" "noavx,avx,avx512bw")
16908 (set_attr "type" "sselog1")
16909 (set_attr "prefix_data16" "1,*,*")
16910 (set_attr "prefix_extra" "1")
16911 (set_attr "prefix" "orig,maybe_evex,evex")
16912 (set_attr "btver2_decode" "vector")
16913 (set_attr "mode" "<sseinsnmode>")])
16914
16915 (define_insn_and_split "ssse3_pshufbv8qi3"
16916 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16917 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16918 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16919 UNSPEC_PSHUFB))
16920 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
16921 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16922 "@
16923 pshufb\t{%2, %0|%0, %2}
16924 #
16925 #"
16926 "TARGET_SSSE3 && reload_completed
16927 && SSE_REGNO_P (REGNO (operands[0]))"
16928 [(set (match_dup 3) (match_dup 5))
16929 (set (match_dup 3)
16930 (and:V4SI (match_dup 3) (match_dup 2)))
16931 (set (match_dup 0)
16932 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16933 {
16934 /* Emulate MMX version of pshufb with SSE version by masking out the
16935 bit 3 of the shuffle control byte. */
16936 operands[0] = lowpart_subreg (V16QImode, operands[0],
16937 GET_MODE (operands[0]));
16938 operands[1] = lowpart_subreg (V16QImode, operands[1],
16939 GET_MODE (operands[1]));
16940 operands[2] = lowpart_subreg (V4SImode, operands[2],
16941 GET_MODE (operands[2]));
16942 operands[4] = lowpart_subreg (V16QImode, operands[3],
16943 GET_MODE (operands[3]));
16944 rtx vec_const = ix86_build_const_vector (V4SImode, true,
16945 gen_int_mode (0xf7f7f7f7, SImode));
16946 operands[5] = force_const_mem (V4SImode, vec_const);
16947 }
16948 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16949 (set_attr "prefix_extra" "1")
16950 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16951 (set_attr "mode" "DI,TI,TI")])
16952
16953 (define_insn "<ssse3_avx2>_psign<mode>3"
16954 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16955 (unspec:VI124_AVX2
16956 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16957 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16958 UNSPEC_PSIGN))]
16959 "TARGET_SSSE3"
16960 "@
16961 psign<ssemodesuffix>\t{%2, %0|%0, %2}
16962 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16963 [(set_attr "isa" "noavx,avx")
16964 (set_attr "type" "sselog1")
16965 (set_attr "prefix_data16" "1,*")
16966 (set_attr "prefix_extra" "1")
16967 (set_attr "prefix" "orig,vex")
16968 (set_attr "mode" "<sseinsnmode>")])
16969
16970 (define_insn "ssse3_psign<mode>3"
16971 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
16972 (unspec:MMXMODEI
16973 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
16974 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
16975 UNSPEC_PSIGN))]
16976 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16977 "@
16978 psign<mmxvecsize>\t{%2, %0|%0, %2}
16979 psign<mmxvecsize>\t{%2, %0|%0, %2}
16980 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16981 [(set_attr "isa" "*,noavx,avx")
16982 (set_attr "mmx_isa" "native,*,*")
16983 (set_attr "type" "sselog1")
16984 (set_attr "prefix_extra" "1")
16985 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16986 (set_attr "mode" "DI,TI,TI")])
16987
16988 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
16989 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16990 (vec_merge:VI1_AVX512
16991 (unspec:VI1_AVX512
16992 [(match_operand:VI1_AVX512 1 "register_operand" "v")
16993 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16994 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16995 UNSPEC_PALIGNR)
16996 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16997 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16998 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
16999 {
17000 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17001 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
17002 }
17003 [(set_attr "type" "sseishft")
17004 (set_attr "atom_unit" "sishuf")
17005 (set_attr "prefix_extra" "1")
17006 (set_attr "length_immediate" "1")
17007 (set_attr "prefix" "evex")
17008 (set_attr "mode" "<sseinsnmode>")])
17009
17010 (define_insn "<ssse3_avx2>_palignr<mode>"
17011 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
17012 (unspec:SSESCALARMODE
17013 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
17014 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
17015 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17016 UNSPEC_PALIGNR))]
17017 "TARGET_SSSE3"
17018 {
17019 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17020
17021 switch (which_alternative)
17022 {
17023 case 0:
17024 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17025 case 1:
17026 case 2:
17027 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17028 default:
17029 gcc_unreachable ();
17030 }
17031 }
17032 [(set_attr "isa" "noavx,avx,avx512bw")
17033 (set_attr "type" "sseishft")
17034 (set_attr "atom_unit" "sishuf")
17035 (set_attr "prefix_data16" "1,*,*")
17036 (set_attr "prefix_extra" "1")
17037 (set_attr "length_immediate" "1")
17038 (set_attr "prefix" "orig,vex,evex")
17039 (set_attr "mode" "<sseinsnmode>")])
17040
17041 (define_insn_and_split "ssse3_palignrdi"
17042 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
17043 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
17044 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
17045 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17046 UNSPEC_PALIGNR))]
17047 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17048 {
17049 switch (which_alternative)
17050 {
17051 case 0:
17052 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17053 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17054 case 1:
17055 case 2:
17056 return "#";
17057 default:
17058 gcc_unreachable ();
17059 }
17060 }
17061 "TARGET_SSSE3 && reload_completed
17062 && SSE_REGNO_P (REGNO (operands[0]))"
17063 [(set (match_dup 0)
17064 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
17065 {
17066 /* Emulate MMX palignrdi with SSE psrldq. */
17067 rtx op0 = lowpart_subreg (V2DImode, operands[0],
17068 GET_MODE (operands[0]));
17069 if (TARGET_AVX)
17070 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
17071 else
17072 {
17073 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
17074 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
17075 /* Swap bits 0:63 with bits 64:127. */
17076 rtx mask = gen_rtx_PARALLEL (VOIDmode,
17077 gen_rtvec (4, GEN_INT (2),
17078 GEN_INT (3),
17079 GEN_INT (0),
17080 GEN_INT (1)));
17081 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
17082 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
17083 emit_insn (gen_rtx_SET (op1, op2));
17084 }
17085 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
17086 }
17087 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17088 (set_attr "type" "sseishft")
17089 (set_attr "atom_unit" "sishuf")
17090 (set_attr "prefix_extra" "1")
17091 (set_attr "length_immediate" "1")
17092 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17093 (set_attr "mode" "DI,TI,TI")])
17094
17095 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
17096 ;; modes for abs instruction on pre AVX-512 targets.
17097 (define_mode_iterator VI1248_AVX512VL_AVX512BW
17098 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
17099 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
17100 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
17101 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
17102
17103 (define_insn "*abs<mode>2"
17104 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
17105 (abs:VI1248_AVX512VL_AVX512BW
17106 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
17107 "TARGET_SSSE3"
17108 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
17109 [(set_attr "type" "sselog1")
17110 (set_attr "prefix_data16" "1")
17111 (set_attr "prefix_extra" "1")
17112 (set_attr "prefix" "maybe_vex")
17113 (set_attr "mode" "<sseinsnmode>")])
17114
17115 (define_insn "abs<mode>2_mask"
17116 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17117 (vec_merge:VI48_AVX512VL
17118 (abs:VI48_AVX512VL
17119 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
17120 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
17121 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17122 "TARGET_AVX512F"
17123 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17124 [(set_attr "type" "sselog1")
17125 (set_attr "prefix" "evex")
17126 (set_attr "mode" "<sseinsnmode>")])
17127
17128 (define_insn "abs<mode>2_mask"
17129 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17130 (vec_merge:VI12_AVX512VL
17131 (abs:VI12_AVX512VL
17132 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
17133 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
17134 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17135 "TARGET_AVX512BW"
17136 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17137 [(set_attr "type" "sselog1")
17138 (set_attr "prefix" "evex")
17139 (set_attr "mode" "<sseinsnmode>")])
17140
17141 (define_expand "abs<mode>2"
17142 [(set (match_operand:VI_AVX2 0 "register_operand")
17143 (abs:VI_AVX2
17144 (match_operand:VI_AVX2 1 "vector_operand")))]
17145 "TARGET_SSE2"
17146 {
17147 if (!TARGET_SSSE3
17148 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
17149 && !TARGET_AVX512VL))
17150 {
17151 ix86_expand_sse2_abs (operands[0], operands[1]);
17152 DONE;
17153 }
17154 })
17155
17156 (define_insn "ssse3_abs<mode>2"
17157 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
17158 (abs:MMXMODEI
17159 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
17160 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17161 "@
17162 pabs<mmxvecsize>\t{%1, %0|%0, %1}
17163 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
17164 [(set_attr "mmx_isa" "native,*")
17165 (set_attr "type" "sselog1")
17166 (set_attr "prefix_rep" "0")
17167 (set_attr "prefix_extra" "1")
17168 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17169 (set_attr "mode" "DI,TI")])
17170
17171 (define_insn "abs<mode>2"
17172 [(set (match_operand:MMXMODEI 0 "register_operand")
17173 (abs:MMXMODEI
17174 (match_operand:MMXMODEI 1 "register_operand")))]
17175 "TARGET_MMX_WITH_SSE && TARGET_SSSE3")
17176
17177 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17178 ;;
17179 ;; AMD SSE4A instructions
17180 ;;
17181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17182
17183 (define_insn "sse4a_movnt<mode>"
17184 [(set (match_operand:MODEF 0 "memory_operand" "=m")
17185 (unspec:MODEF
17186 [(match_operand:MODEF 1 "register_operand" "x")]
17187 UNSPEC_MOVNT))]
17188 "TARGET_SSE4A"
17189 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
17190 [(set_attr "type" "ssemov")
17191 (set_attr "mode" "<MODE>")])
17192
17193 (define_insn "sse4a_vmmovnt<mode>"
17194 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
17195 (unspec:<ssescalarmode>
17196 [(vec_select:<ssescalarmode>
17197 (match_operand:VF_128 1 "register_operand" "x")
17198 (parallel [(const_int 0)]))]
17199 UNSPEC_MOVNT))]
17200 "TARGET_SSE4A"
17201 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
17202 [(set_attr "type" "ssemov")
17203 (set_attr "mode" "<ssescalarmode>")])
17204
17205 (define_insn "sse4a_extrqi"
17206 [(set (match_operand:V2DI 0 "register_operand" "=x")
17207 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17208 (match_operand 2 "const_0_to_255_operand")
17209 (match_operand 3 "const_0_to_255_operand")]
17210 UNSPEC_EXTRQI))]
17211 "TARGET_SSE4A"
17212 "extrq\t{%3, %2, %0|%0, %2, %3}"
17213 [(set_attr "type" "sse")
17214 (set_attr "prefix_data16" "1")
17215 (set_attr "length_immediate" "2")
17216 (set_attr "mode" "TI")])
17217
17218 (define_insn "sse4a_extrq"
17219 [(set (match_operand:V2DI 0 "register_operand" "=x")
17220 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17221 (match_operand:V16QI 2 "register_operand" "x")]
17222 UNSPEC_EXTRQ))]
17223 "TARGET_SSE4A"
17224 "extrq\t{%2, %0|%0, %2}"
17225 [(set_attr "type" "sse")
17226 (set_attr "prefix_data16" "1")
17227 (set_attr "mode" "TI")])
17228
17229 (define_insn "sse4a_insertqi"
17230 [(set (match_operand:V2DI 0 "register_operand" "=x")
17231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17232 (match_operand:V2DI 2 "register_operand" "x")
17233 (match_operand 3 "const_0_to_255_operand")
17234 (match_operand 4 "const_0_to_255_operand")]
17235 UNSPEC_INSERTQI))]
17236 "TARGET_SSE4A"
17237 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
17238 [(set_attr "type" "sseins")
17239 (set_attr "prefix_data16" "0")
17240 (set_attr "prefix_rep" "1")
17241 (set_attr "length_immediate" "2")
17242 (set_attr "mode" "TI")])
17243
17244 (define_insn "sse4a_insertq"
17245 [(set (match_operand:V2DI 0 "register_operand" "=x")
17246 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17247 (match_operand:V2DI 2 "register_operand" "x")]
17248 UNSPEC_INSERTQ))]
17249 "TARGET_SSE4A"
17250 "insertq\t{%2, %0|%0, %2}"
17251 [(set_attr "type" "sseins")
17252 (set_attr "prefix_data16" "0")
17253 (set_attr "prefix_rep" "1")
17254 (set_attr "mode" "TI")])
17255
17256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17257 ;;
17258 ;; Intel SSE4.1 instructions
17259 ;;
17260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17261
17262 ;; Mapping of immediate bits for blend instructions
17263 (define_mode_attr blendbits
17264 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
17265
17266 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
17267 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17268 (vec_merge:VF_128_256
17269 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17270 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
17271 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
17272 "TARGET_SSE4_1"
17273 "@
17274 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17275 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17276 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17277 [(set_attr "isa" "noavx,noavx,avx")
17278 (set_attr "type" "ssemov")
17279 (set_attr "length_immediate" "1")
17280 (set_attr "prefix_data16" "1,1,*")
17281 (set_attr "prefix_extra" "1")
17282 (set_attr "prefix" "orig,orig,vex")
17283 (set_attr "mode" "<MODE>")])
17284
17285 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
17286 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17287 (unspec:VF_128_256
17288 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17289 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17290 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
17291 UNSPEC_BLENDV))]
17292 "TARGET_SSE4_1"
17293 "@
17294 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17295 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17296 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17297 [(set_attr "isa" "noavx,noavx,avx")
17298 (set_attr "type" "ssemov")
17299 (set_attr "length_immediate" "1")
17300 (set_attr "prefix_data16" "1,1,*")
17301 (set_attr "prefix_extra" "1")
17302 (set_attr "prefix" "orig,orig,vex")
17303 (set_attr "btver2_decode" "vector,vector,vector")
17304 (set_attr "mode" "<MODE>")])
17305
17306 ;; Also define scalar versions. These are used for conditional move.
17307 ;; Using subregs into vector modes causes register allocation lossage.
17308 ;; These patterns do not allow memory operands because the native
17309 ;; instructions read the full 128-bits.
17310
17311 (define_insn "sse4_1_blendv<ssemodesuffix>"
17312 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
17313 (unspec:MODEF
17314 [(match_operand:MODEF 1 "register_operand" "0,0,x")
17315 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
17316 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
17317 UNSPEC_BLENDV))]
17318 "TARGET_SSE4_1"
17319 {
17320 if (get_attr_mode (insn) == MODE_V4SF)
17321 return (which_alternative == 2
17322 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17323 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
17324 else
17325 return (which_alternative == 2
17326 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17327 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
17328 }
17329 [(set_attr "isa" "noavx,noavx,avx")
17330 (set_attr "type" "ssemov")
17331 (set_attr "length_immediate" "1")
17332 (set_attr "prefix_data16" "1,1,*")
17333 (set_attr "prefix_extra" "1")
17334 (set_attr "prefix" "orig,orig,vex")
17335 (set_attr "btver2_decode" "vector,vector,vector")
17336 (set (attr "mode")
17337 (cond [(match_test "TARGET_AVX")
17338 (const_string "<ssevecmode>")
17339 (match_test "optimize_function_for_size_p (cfun)")
17340 (const_string "V4SF")
17341 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
17342 (const_string "V4SF")
17343 ]
17344 (const_string "<ssevecmode>")))])
17345
17346 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
17347 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17348 (unspec:VF_128_256
17349 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17350 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17351 (lt:VF_128_256
17352 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
17353 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
17354 UNSPEC_BLENDV))]
17355 "TARGET_SSE4_1"
17356 "#"
17357 "&& reload_completed"
17358 [(set (match_dup 0)
17359 (unspec:VF_128_256
17360 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17361 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
17362 [(set_attr "isa" "noavx,noavx,avx")
17363 (set_attr "type" "ssemov")
17364 (set_attr "length_immediate" "1")
17365 (set_attr "prefix_data16" "1,1,*")
17366 (set_attr "prefix_extra" "1")
17367 (set_attr "prefix" "orig,orig,vex")
17368 (set_attr "btver2_decode" "vector,vector,vector")
17369 (set_attr "mode" "<MODE>")])
17370
17371 (define_mode_attr ssefltmodesuffix
17372 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
17373
17374 (define_mode_attr ssefltvecmode
17375 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
17376
17377 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
17378 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
17379 (unspec:<ssebytemode>
17380 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
17381 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
17382 (subreg:<ssebytemode>
17383 (lt:VI48_AVX
17384 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
17385 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
17386 UNSPEC_BLENDV))]
17387 "TARGET_SSE4_1"
17388 "#"
17389 "&& reload_completed"
17390 [(set (match_dup 0)
17391 (unspec:<ssefltvecmode>
17392 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17393 {
17394 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
17395 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
17396 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
17397 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
17398 }
17399 [(set_attr "isa" "noavx,noavx,avx")
17400 (set_attr "type" "ssemov")
17401 (set_attr "length_immediate" "1")
17402 (set_attr "prefix_data16" "1,1,*")
17403 (set_attr "prefix_extra" "1")
17404 (set_attr "prefix" "orig,orig,vex")
17405 (set_attr "btver2_decode" "vector,vector,vector")
17406 (set_attr "mode" "<ssefltvecmode>")])
17407
17408 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
17409 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17410 (unspec:VF_128_256
17411 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
17412 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17413 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17414 UNSPEC_DP))]
17415 "TARGET_SSE4_1"
17416 "@
17417 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17418 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17419 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17420 [(set_attr "isa" "noavx,noavx,avx")
17421 (set_attr "type" "ssemul")
17422 (set_attr "length_immediate" "1")
17423 (set_attr "prefix_data16" "1,1,*")
17424 (set_attr "prefix_extra" "1")
17425 (set_attr "prefix" "orig,orig,vex")
17426 (set_attr "btver2_decode" "vector,vector,vector")
17427 (set_attr "znver1_decode" "vector,vector,vector")
17428 (set_attr "mode" "<MODE>")])
17429
17430 ;; Mode attribute used by `vmovntdqa' pattern
17431 (define_mode_attr vi8_sse4_1_avx2_avx512
17432 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
17433
17434 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
17435 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
17436 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
17437 UNSPEC_MOVNTDQA))]
17438 "TARGET_SSE4_1"
17439 "%vmovntdqa\t{%1, %0|%0, %1}"
17440 [(set_attr "isa" "noavx,noavx,avx")
17441 (set_attr "type" "ssemov")
17442 (set_attr "prefix_extra" "1,1,*")
17443 (set_attr "prefix" "orig,orig,maybe_evex")
17444 (set_attr "mode" "<sseinsnmode>")])
17445
17446 (define_insn "<sse4_1_avx2>_mpsadbw"
17447 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17448 (unspec:VI1_AVX2
17449 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17450 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17451 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17452 UNSPEC_MPSADBW))]
17453 "TARGET_SSE4_1"
17454 "@
17455 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17456 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17457 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17458 [(set_attr "isa" "noavx,noavx,avx")
17459 (set_attr "type" "sselog1")
17460 (set_attr "length_immediate" "1")
17461 (set_attr "prefix_extra" "1")
17462 (set_attr "prefix" "orig,orig,vex")
17463 (set_attr "btver2_decode" "vector,vector,vector")
17464 (set_attr "znver1_decode" "vector,vector,vector")
17465 (set_attr "mode" "<sseinsnmode>")])
17466
17467 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
17468 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
17469 (vec_concat:VI2_AVX2
17470 (us_truncate:<ssehalfvecmode>
17471 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
17472 (us_truncate:<ssehalfvecmode>
17473 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
17474 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17475 "@
17476 packusdw\t{%2, %0|%0, %2}
17477 packusdw\t{%2, %0|%0, %2}
17478 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17479 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17480 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
17481 (set_attr "type" "sselog")
17482 (set_attr "prefix_extra" "1")
17483 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
17484 (set_attr "mode" "<sseinsnmode>")])
17485
17486 (define_insn "<sse4_1_avx2>_pblendvb"
17487 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17488 (unspec:VI1_AVX2
17489 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17490 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17491 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
17492 UNSPEC_BLENDV))]
17493 "TARGET_SSE4_1"
17494 "@
17495 pblendvb\t{%3, %2, %0|%0, %2, %3}
17496 pblendvb\t{%3, %2, %0|%0, %2, %3}
17497 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17498 [(set_attr "isa" "noavx,noavx,avx")
17499 (set_attr "type" "ssemov")
17500 (set_attr "prefix_extra" "1")
17501 (set_attr "length_immediate" "*,*,1")
17502 (set_attr "prefix" "orig,orig,vex")
17503 (set_attr "btver2_decode" "vector,vector,vector")
17504 (set_attr "mode" "<sseinsnmode>")])
17505
17506 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
17507 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17508 (unspec:VI1_AVX2
17509 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17510 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17511 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
17512 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
17513 UNSPEC_BLENDV))]
17514 "TARGET_SSE4_1"
17515 "#"
17516 ""
17517 [(set (match_dup 0)
17518 (unspec:VI1_AVX2
17519 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17520 ""
17521 [(set_attr "isa" "noavx,noavx,avx")
17522 (set_attr "type" "ssemov")
17523 (set_attr "prefix_extra" "1")
17524 (set_attr "length_immediate" "*,*,1")
17525 (set_attr "prefix" "orig,orig,vex")
17526 (set_attr "btver2_decode" "vector,vector,vector")
17527 (set_attr "mode" "<sseinsnmode>")])
17528
17529 (define_insn "sse4_1_pblendw"
17530 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17531 (vec_merge:V8HI
17532 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
17533 (match_operand:V8HI 1 "register_operand" "0,0,x")
17534 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
17535 "TARGET_SSE4_1"
17536 "@
17537 pblendw\t{%3, %2, %0|%0, %2, %3}
17538 pblendw\t{%3, %2, %0|%0, %2, %3}
17539 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17540 [(set_attr "isa" "noavx,noavx,avx")
17541 (set_attr "type" "ssemov")
17542 (set_attr "prefix_extra" "1")
17543 (set_attr "length_immediate" "1")
17544 (set_attr "prefix" "orig,orig,vex")
17545 (set_attr "mode" "TI")])
17546
17547 ;; The builtin uses an 8-bit immediate. Expand that.
17548 (define_expand "avx2_pblendw"
17549 [(set (match_operand:V16HI 0 "register_operand")
17550 (vec_merge:V16HI
17551 (match_operand:V16HI 2 "nonimmediate_operand")
17552 (match_operand:V16HI 1 "register_operand")
17553 (match_operand:SI 3 "const_0_to_255_operand")))]
17554 "TARGET_AVX2"
17555 {
17556 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
17557 operands[3] = GEN_INT (val << 8 | val);
17558 })
17559
17560 (define_insn "*avx2_pblendw"
17561 [(set (match_operand:V16HI 0 "register_operand" "=x")
17562 (vec_merge:V16HI
17563 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
17564 (match_operand:V16HI 1 "register_operand" "x")
17565 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
17566 "TARGET_AVX2"
17567 {
17568 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
17569 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17570 }
17571 [(set_attr "type" "ssemov")
17572 (set_attr "prefix_extra" "1")
17573 (set_attr "length_immediate" "1")
17574 (set_attr "prefix" "vex")
17575 (set_attr "mode" "OI")])
17576
17577 (define_insn "avx2_pblendd<mode>"
17578 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
17579 (vec_merge:VI4_AVX2
17580 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
17581 (match_operand:VI4_AVX2 1 "register_operand" "x")
17582 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
17583 "TARGET_AVX2"
17584 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17585 [(set_attr "type" "ssemov")
17586 (set_attr "prefix_extra" "1")
17587 (set_attr "length_immediate" "1")
17588 (set_attr "prefix" "vex")
17589 (set_attr "mode" "<sseinsnmode>")])
17590
17591 (define_insn "sse4_1_phminposuw"
17592 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17593 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
17594 UNSPEC_PHMINPOSUW))]
17595 "TARGET_SSE4_1"
17596 "%vphminposuw\t{%1, %0|%0, %1}"
17597 [(set_attr "isa" "noavx,noavx,avx")
17598 (set_attr "type" "sselog1")
17599 (set_attr "prefix_extra" "1")
17600 (set_attr "prefix" "orig,orig,vex")
17601 (set_attr "mode" "TI")])
17602
17603 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
17604 [(set (match_operand:V16HI 0 "register_operand" "=v")
17605 (any_extend:V16HI
17606 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17607 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17608 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17609 [(set_attr "type" "ssemov")
17610 (set_attr "prefix_extra" "1")
17611 (set_attr "prefix" "maybe_evex")
17612 (set_attr "mode" "OI")])
17613
17614 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
17615 [(set (match_operand:V32QI 0 "register_operand" "=v")
17616 (vec_select:V32QI
17617 (vec_concat:V64QI
17618 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
17619 (match_operand:V32QI 2 "const0_operand" "C"))
17620 (match_parallel 3 "pmovzx_parallel"
17621 [(match_operand 4 "const_int_operand" "n")])))]
17622 "TARGET_AVX2"
17623 "#"
17624 "&& reload_completed"
17625 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
17626 {
17627 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
17628 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
17629 })
17630
17631 (define_expand "<insn>v16qiv16hi2"
17632 [(set (match_operand:V16HI 0 "register_operand")
17633 (any_extend:V16HI
17634 (match_operand:V16QI 1 "nonimmediate_operand")))]
17635 "TARGET_AVX2")
17636
17637 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
17638 [(set (match_operand:V32HI 0 "register_operand" "=v")
17639 (any_extend:V32HI
17640 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
17641 "TARGET_AVX512BW"
17642 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17643 [(set_attr "type" "ssemov")
17644 (set_attr "prefix_extra" "1")
17645 (set_attr "prefix" "evex")
17646 (set_attr "mode" "XI")])
17647
17648 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
17649 [(set (match_operand:V64QI 0 "register_operand" "=v")
17650 (vec_select:V64QI
17651 (vec_concat:V128QI
17652 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
17653 (match_operand:V64QI 2 "const0_operand" "C"))
17654 (match_parallel 3 "pmovzx_parallel"
17655 [(match_operand 4 "const_int_operand" "n")])))]
17656 "TARGET_AVX512BW"
17657 "#"
17658 "&& reload_completed"
17659 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
17660 {
17661 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
17662 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
17663 })
17664
17665 (define_expand "<insn>v32qiv32hi2"
17666 [(set (match_operand:V32HI 0 "register_operand")
17667 (any_extend:V32HI
17668 (match_operand:V32QI 1 "nonimmediate_operand")))]
17669 "TARGET_AVX512BW")
17670
17671 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
17672 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17673 (any_extend:V8HI
17674 (vec_select:V8QI
17675 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17676 (parallel [(const_int 0) (const_int 1)
17677 (const_int 2) (const_int 3)
17678 (const_int 4) (const_int 5)
17679 (const_int 6) (const_int 7)]))))]
17680 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17681 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17682 [(set_attr "isa" "noavx,noavx,avx")
17683 (set_attr "type" "ssemov")
17684 (set_attr "prefix_extra" "1")
17685 (set_attr "prefix" "orig,orig,maybe_evex")
17686 (set_attr "mode" "TI")])
17687
17688 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
17689 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17690 (any_extend:V8HI
17691 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
17692 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17693 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17694 [(set_attr "isa" "noavx,noavx,avx")
17695 (set_attr "type" "ssemov")
17696 (set_attr "prefix_extra" "1")
17697 (set_attr "prefix" "orig,orig,maybe_evex")
17698 (set_attr "mode" "TI")])
17699
17700 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
17701 [(set (match_operand:V8HI 0 "register_operand")
17702 (any_extend:V8HI
17703 (vec_select:V8QI
17704 (subreg:V16QI
17705 (vec_concat:V2DI
17706 (match_operand:DI 1 "memory_operand")
17707 (const_int 0)) 0)
17708 (parallel [(const_int 0) (const_int 1)
17709 (const_int 2) (const_int 3)
17710 (const_int 4) (const_int 5)
17711 (const_int 6) (const_int 7)]))))]
17712 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
17713 && ix86_pre_reload_split ()"
17714 "#"
17715 "&& 1"
17716 [(set (match_dup 0)
17717 (any_extend:V8HI (match_dup 1)))]
17718 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17719
17720 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
17721 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,v")
17722 (vec_select:V16QI
17723 (vec_concat:V32QI
17724 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,vm")
17725 (match_operand:V16QI 2 "const0_operand" "C,C,C"))
17726 (match_parallel 3 "pmovzx_parallel"
17727 [(match_operand 4 "const_int_operand" "n,n,n")])))]
17728 "TARGET_SSE4_1"
17729 "#"
17730 "&& reload_completed"
17731 [(set (match_dup 0)
17732 (zero_extend:V8HI
17733 (vec_select:V8QI
17734 (match_dup 1)
17735 (parallel [(const_int 0) (const_int 1)
17736 (const_int 2) (const_int 3)
17737 (const_int 4) (const_int 5)
17738 (const_int 6) (const_int 7)]))))]
17739 {
17740 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
17741 if (MEM_P (operands[1]))
17742 {
17743 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
17744 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
17745 emit_insn (gen_rtx_SET (operands[0], operands[1]));
17746 DONE;
17747 }
17748 }
17749 [(set_attr "isa" "noavx,noavx,avx")])
17750
17751 (define_expand "<insn>v8qiv8hi2"
17752 [(set (match_operand:V8HI 0 "register_operand")
17753 (any_extend:V8HI
17754 (match_operand:V8QI 1 "nonimmediate_operand")))]
17755 "TARGET_SSE4_1"
17756 {
17757 if (!MEM_P (operands[1]))
17758 {
17759 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17760 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
17761 DONE;
17762 }
17763 })
17764
17765 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
17766 [(set (match_operand:V16SI 0 "register_operand" "=v")
17767 (any_extend:V16SI
17768 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17769 "TARGET_AVX512F"
17770 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17771 [(set_attr "type" "ssemov")
17772 (set_attr "prefix" "evex")
17773 (set_attr "mode" "XI")])
17774
17775 (define_expand "<insn>v16qiv16si2"
17776 [(set (match_operand:V16SI 0 "register_operand")
17777 (any_extend:V16SI
17778 (match_operand:V16QI 1 "nonimmediate_operand")))]
17779 "TARGET_AVX512F")
17780
17781 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
17782 [(set (match_operand:V8SI 0 "register_operand" "=v")
17783 (any_extend:V8SI
17784 (vec_select:V8QI
17785 (match_operand:V16QI 1 "register_operand" "v")
17786 (parallel [(const_int 0) (const_int 1)
17787 (const_int 2) (const_int 3)
17788 (const_int 4) (const_int 5)
17789 (const_int 6) (const_int 7)]))))]
17790 "TARGET_AVX2 && <mask_avx512vl_condition>"
17791 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17792 [(set_attr "type" "ssemov")
17793 (set_attr "prefix_extra" "1")
17794 (set_attr "prefix" "maybe_evex")
17795 (set_attr "mode" "OI")])
17796
17797 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
17798 [(set (match_operand:V8SI 0 "register_operand" "=v")
17799 (any_extend:V8SI
17800 (match_operand:V8QI 1 "memory_operand" "m")))]
17801 "TARGET_AVX2 && <mask_avx512vl_condition>"
17802 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17803 [(set_attr "type" "ssemov")
17804 (set_attr "prefix_extra" "1")
17805 (set_attr "prefix" "maybe_evex")
17806 (set_attr "mode" "OI")])
17807
17808 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
17809 [(set (match_operand:V8SI 0 "register_operand")
17810 (any_extend:V8SI
17811 (vec_select:V8QI
17812 (subreg:V16QI
17813 (vec_concat:V2DI
17814 (match_operand:DI 1 "memory_operand")
17815 (const_int 0)) 0)
17816 (parallel [(const_int 0) (const_int 1)
17817 (const_int 2) (const_int 3)
17818 (const_int 4) (const_int 5)
17819 (const_int 6) (const_int 7)]))))]
17820 "TARGET_AVX2 && <mask_avx512vl_condition>
17821 && ix86_pre_reload_split ()"
17822 "#"
17823 "&& 1"
17824 [(set (match_dup 0)
17825 (any_extend:V8SI (match_dup 1)))]
17826 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17827
17828 (define_expand "<insn>v8qiv8si2"
17829 [(set (match_operand:V8SI 0 "register_operand")
17830 (any_extend:V8SI
17831 (match_operand:V8QI 1 "nonimmediate_operand")))]
17832 "TARGET_AVX2"
17833 {
17834 if (!MEM_P (operands[1]))
17835 {
17836 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17837 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
17838 DONE;
17839 }
17840 })
17841
17842 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17843 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17844 (any_extend:V4SI
17845 (vec_select:V4QI
17846 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17847 (parallel [(const_int 0) (const_int 1)
17848 (const_int 2) (const_int 3)]))))]
17849 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17850 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17851 [(set_attr "isa" "noavx,noavx,avx")
17852 (set_attr "type" "ssemov")
17853 (set_attr "prefix_extra" "1")
17854 (set_attr "prefix" "orig,orig,maybe_evex")
17855 (set_attr "mode" "TI")])
17856
17857 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17858 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17859 (any_extend:V4SI
17860 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17861 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17862 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17863 [(set_attr "isa" "noavx,noavx,avx")
17864 (set_attr "type" "ssemov")
17865 (set_attr "prefix_extra" "1")
17866 (set_attr "prefix" "orig,orig,maybe_evex")
17867 (set_attr "mode" "TI")])
17868
17869 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17870 [(set (match_operand:V4SI 0 "register_operand")
17871 (any_extend:V4SI
17872 (vec_select:V4QI
17873 (subreg:V16QI
17874 (vec_merge:V4SI
17875 (vec_duplicate:V4SI
17876 (match_operand:SI 1 "memory_operand"))
17877 (const_vector:V4SI
17878 [(const_int 0) (const_int 0)
17879 (const_int 0) (const_int 0)])
17880 (const_int 1)) 0)
17881 (parallel [(const_int 0) (const_int 1)
17882 (const_int 2) (const_int 3)]))))]
17883 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17884 && ix86_pre_reload_split ()"
17885 "#"
17886 "&& 1"
17887 [(set (match_dup 0)
17888 (any_extend:V4SI (match_dup 1)))]
17889 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17890
17891 (define_expand "<insn>v4qiv4si2"
17892 [(set (match_operand:V4SI 0 "register_operand")
17893 (any_extend:V4SI
17894 (match_operand:V4QI 1 "nonimmediate_operand")))]
17895 "TARGET_SSE4_1"
17896 {
17897 if (!MEM_P (operands[1]))
17898 {
17899 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
17900 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
17901 DONE;
17902 }
17903 })
17904
17905 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17906 [(set (match_operand:V16SI 0 "register_operand" "=v")
17907 (any_extend:V16SI
17908 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17909 "TARGET_AVX512F"
17910 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17911 [(set_attr "type" "ssemov")
17912 (set_attr "prefix" "evex")
17913 (set_attr "mode" "XI")])
17914
17915 (define_expand "<insn>v16hiv16si2"
17916 [(set (match_operand:V16SI 0 "register_operand")
17917 (any_extend:V16SI
17918 (match_operand:V16HI 1 "nonimmediate_operand")))]
17919 "TARGET_AVX512F")
17920
17921 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
17922 [(set (match_operand:V32HI 0 "register_operand" "=v")
17923 (vec_select:V32HI
17924 (vec_concat:V64HI
17925 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
17926 (match_operand:V32HI 2 "const0_operand" "C"))
17927 (match_parallel 3 "pmovzx_parallel"
17928 [(match_operand 4 "const_int_operand" "n")])))]
17929 "TARGET_AVX512F"
17930 "#"
17931 "&& reload_completed"
17932 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
17933 {
17934 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
17935 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
17936 })
17937
17938 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
17939 [(set (match_operand:V8SI 0 "register_operand" "=v")
17940 (any_extend:V8SI
17941 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17942 "TARGET_AVX2 && <mask_avx512vl_condition>"
17943 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17944 [(set_attr "type" "ssemov")
17945 (set_attr "prefix_extra" "1")
17946 (set_attr "prefix" "maybe_evex")
17947 (set_attr "mode" "OI")])
17948
17949 (define_expand "<insn>v8hiv8si2"
17950 [(set (match_operand:V8SI 0 "register_operand")
17951 (any_extend:V8SI
17952 (match_operand:V8HI 1 "nonimmediate_operand")))]
17953 "TARGET_AVX2")
17954
17955 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
17956 [(set (match_operand:V16HI 0 "register_operand" "=v")
17957 (vec_select:V16HI
17958 (vec_concat:V32HI
17959 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
17960 (match_operand:V16HI 2 "const0_operand" "C"))
17961 (match_parallel 3 "pmovzx_parallel"
17962 [(match_operand 4 "const_int_operand" "n")])))]
17963 "TARGET_AVX2"
17964 "#"
17965 "&& reload_completed"
17966 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
17967 {
17968 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
17969 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
17970 })
17971
17972 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17973 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17974 (any_extend:V4SI
17975 (vec_select:V4HI
17976 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17977 (parallel [(const_int 0) (const_int 1)
17978 (const_int 2) (const_int 3)]))))]
17979 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17980 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17981 [(set_attr "isa" "noavx,noavx,avx")
17982 (set_attr "type" "ssemov")
17983 (set_attr "prefix_extra" "1")
17984 (set_attr "prefix" "orig,orig,maybe_evex")
17985 (set_attr "mode" "TI")])
17986
17987 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17988 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17989 (any_extend:V4SI
17990 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17991 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17992 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17993 [(set_attr "isa" "noavx,noavx,avx")
17994 (set_attr "type" "ssemov")
17995 (set_attr "prefix_extra" "1")
17996 (set_attr "prefix" "orig,orig,maybe_evex")
17997 (set_attr "mode" "TI")])
17998
17999 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
18000 [(set (match_operand:V4SI 0 "register_operand")
18001 (any_extend:V4SI
18002 (vec_select:V4HI
18003 (subreg:V8HI
18004 (vec_concat:V2DI
18005 (match_operand:DI 1 "memory_operand")
18006 (const_int 0)) 0)
18007 (parallel [(const_int 0) (const_int 1)
18008 (const_int 2) (const_int 3)]))))]
18009 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18010 && ix86_pre_reload_split ()"
18011 "#"
18012 "&& 1"
18013 [(set (match_dup 0)
18014 (any_extend:V4SI (match_dup 1)))]
18015 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18016
18017 (define_expand "<insn>v4hiv4si2"
18018 [(set (match_operand:V4SI 0 "register_operand")
18019 (any_extend:V4SI
18020 (match_operand:V4HI 1 "nonimmediate_operand")))]
18021 "TARGET_SSE4_1"
18022 {
18023 if (!MEM_P (operands[1]))
18024 {
18025 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18026 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
18027 DONE;
18028 }
18029 })
18030
18031 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
18032 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
18033 (vec_select:V8HI
18034 (vec_concat:V16HI
18035 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
18036 (match_operand:V8HI 2 "const0_operand" "C,C,C"))
18037 (match_parallel 3 "pmovzx_parallel"
18038 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18039 "TARGET_SSE4_1"
18040 "#"
18041 "&& reload_completed"
18042 [(set (match_dup 0)
18043 (zero_extend:V4SI
18044 (vec_select:V4HI
18045 (match_dup 1)
18046 (parallel [(const_int 0) (const_int 1)
18047 (const_int 2) (const_int 3)]))))]
18048 {
18049 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
18050 if (MEM_P (operands[1]))
18051 {
18052 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
18053 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
18054 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18055 DONE;
18056 }
18057 }
18058 [(set_attr "isa" "noavx,noavx,avx")])
18059
18060 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
18061 [(set (match_operand:V8DI 0 "register_operand" "=v")
18062 (any_extend:V8DI
18063 (vec_select:V8QI
18064 (match_operand:V16QI 1 "register_operand" "v")
18065 (parallel [(const_int 0) (const_int 1)
18066 (const_int 2) (const_int 3)
18067 (const_int 4) (const_int 5)
18068 (const_int 6) (const_int 7)]))))]
18069 "TARGET_AVX512F"
18070 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18071 [(set_attr "type" "ssemov")
18072 (set_attr "prefix" "evex")
18073 (set_attr "mode" "XI")])
18074
18075 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
18076 [(set (match_operand:V8DI 0 "register_operand" "=v")
18077 (any_extend:V8DI
18078 (match_operand:V8QI 1 "memory_operand" "m")))]
18079 "TARGET_AVX512F"
18080 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18081 [(set_attr "type" "ssemov")
18082 (set_attr "prefix" "evex")
18083 (set_attr "mode" "XI")])
18084
18085 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
18086 [(set (match_operand:V8DI 0 "register_operand")
18087 (any_extend:V8DI
18088 (vec_select:V8QI
18089 (subreg:V16QI
18090 (vec_concat:V2DI
18091 (match_operand:DI 1 "memory_operand")
18092 (const_int 0)) 0)
18093 (parallel [(const_int 0) (const_int 1)
18094 (const_int 2) (const_int 3)
18095 (const_int 4) (const_int 5)
18096 (const_int 6) (const_int 7)]))))]
18097 "TARGET_AVX512F && ix86_pre_reload_split ()"
18098 "#"
18099 "&& 1"
18100 [(set (match_dup 0)
18101 (any_extend:V8DI (match_dup 1)))]
18102 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18103
18104 (define_expand "<insn>v8qiv8di2"
18105 [(set (match_operand:V8DI 0 "register_operand")
18106 (any_extend:V8DI
18107 (match_operand:V8QI 1 "nonimmediate_operand")))]
18108 "TARGET_AVX512F"
18109 {
18110 if (!MEM_P (operands[1]))
18111 {
18112 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18113 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
18114 DONE;
18115 }
18116 })
18117
18118 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
18119 [(set (match_operand:V4DI 0 "register_operand" "=v")
18120 (any_extend:V4DI
18121 (vec_select:V4QI
18122 (match_operand:V16QI 1 "register_operand" "v")
18123 (parallel [(const_int 0) (const_int 1)
18124 (const_int 2) (const_int 3)]))))]
18125 "TARGET_AVX2 && <mask_avx512vl_condition>"
18126 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18127 [(set_attr "type" "ssemov")
18128 (set_attr "prefix_extra" "1")
18129 (set_attr "prefix" "maybe_evex")
18130 (set_attr "mode" "OI")])
18131
18132 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
18133 [(set (match_operand:V4DI 0 "register_operand" "=v")
18134 (any_extend:V4DI
18135 (match_operand:V4QI 1 "memory_operand" "m")))]
18136 "TARGET_AVX2 && <mask_avx512vl_condition>"
18137 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18138 [(set_attr "type" "ssemov")
18139 (set_attr "prefix_extra" "1")
18140 (set_attr "prefix" "maybe_evex")
18141 (set_attr "mode" "OI")])
18142
18143 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
18144 [(set (match_operand:V4DI 0 "register_operand")
18145 (any_extend:V4DI
18146 (vec_select:V4QI
18147 (subreg:V16QI
18148 (vec_merge:V4SI
18149 (vec_duplicate:V4SI
18150 (match_operand:SI 1 "memory_operand"))
18151 (const_vector:V4SI
18152 [(const_int 0) (const_int 0)
18153 (const_int 0) (const_int 0)])
18154 (const_int 1)) 0)
18155 (parallel [(const_int 0) (const_int 1)
18156 (const_int 2) (const_int 3)]))))]
18157 "TARGET_AVX2 && <mask_avx512vl_condition>
18158 && ix86_pre_reload_split ()"
18159 "#"
18160 "&& 1"
18161 [(set (match_dup 0)
18162 (any_extend:V4DI (match_dup 1)))]
18163 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18164
18165 (define_expand "<insn>v4qiv4di2"
18166 [(set (match_operand:V4DI 0 "register_operand")
18167 (any_extend:V4DI
18168 (match_operand:V4QI 1 "nonimmediate_operand")))]
18169 "TARGET_AVX2"
18170 {
18171 if (!MEM_P (operands[1]))
18172 {
18173 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18174 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
18175 DONE;
18176 }
18177 })
18178
18179 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
18180 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18181 (any_extend:V2DI
18182 (vec_select:V2QI
18183 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18184 (parallel [(const_int 0) (const_int 1)]))))]
18185 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18186 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18187 [(set_attr "isa" "noavx,noavx,avx")
18188 (set_attr "type" "ssemov")
18189 (set_attr "prefix_extra" "1")
18190 (set_attr "prefix" "orig,orig,maybe_evex")
18191 (set_attr "mode" "TI")])
18192
18193 (define_expand "<insn>v2qiv2di2"
18194 [(set (match_operand:V2DI 0 "register_operand")
18195 (any_extend:V2DI
18196 (match_operand:V2QI 1 "register_operand")))]
18197 "TARGET_SSE4_1"
18198 {
18199 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
18200 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
18201 DONE;
18202 })
18203
18204 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
18205 [(set (match_operand:V8DI 0 "register_operand" "=v")
18206 (any_extend:V8DI
18207 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
18208 "TARGET_AVX512F"
18209 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18210 [(set_attr "type" "ssemov")
18211 (set_attr "prefix" "evex")
18212 (set_attr "mode" "XI")])
18213
18214 (define_expand "<insn>v8hiv8di2"
18215 [(set (match_operand:V8DI 0 "register_operand")
18216 (any_extend:V8DI
18217 (match_operand:V8HI 1 "nonimmediate_operand")))]
18218 "TARGET_AVX512F")
18219
18220 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
18221 [(set (match_operand:V4DI 0 "register_operand" "=v")
18222 (any_extend:V4DI
18223 (vec_select:V4HI
18224 (match_operand:V8HI 1 "register_operand" "v")
18225 (parallel [(const_int 0) (const_int 1)
18226 (const_int 2) (const_int 3)]))))]
18227 "TARGET_AVX2 && <mask_avx512vl_condition>"
18228 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18229 [(set_attr "type" "ssemov")
18230 (set_attr "prefix_extra" "1")
18231 (set_attr "prefix" "maybe_evex")
18232 (set_attr "mode" "OI")])
18233
18234 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
18235 [(set (match_operand:V4DI 0 "register_operand" "=v")
18236 (any_extend:V4DI
18237 (match_operand:V4HI 1 "memory_operand" "m")))]
18238 "TARGET_AVX2 && <mask_avx512vl_condition>"
18239 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18240 [(set_attr "type" "ssemov")
18241 (set_attr "prefix_extra" "1")
18242 (set_attr "prefix" "maybe_evex")
18243 (set_attr "mode" "OI")])
18244
18245 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
18246 [(set (match_operand:V4DI 0 "register_operand")
18247 (any_extend:V4DI
18248 (vec_select:V4HI
18249 (subreg:V8HI
18250 (vec_concat:V2DI
18251 (match_operand:DI 1 "memory_operand")
18252 (const_int 0)) 0)
18253 (parallel [(const_int 0) (const_int 1)
18254 (const_int 2) (const_int 3)]))))]
18255 "TARGET_AVX2 && <mask_avx512vl_condition>
18256 && ix86_pre_reload_split ()"
18257 "#"
18258 "&& 1"
18259 [(set (match_dup 0)
18260 (any_extend:V4DI (match_dup 1)))]
18261 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18262
18263 (define_expand "<insn>v4hiv4di2"
18264 [(set (match_operand:V4DI 0 "register_operand")
18265 (any_extend:V4DI
18266 (match_operand:V4HI 1 "nonimmediate_operand")))]
18267 "TARGET_AVX2"
18268 {
18269 if (!MEM_P (operands[1]))
18270 {
18271 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18272 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
18273 DONE;
18274 }
18275 })
18276
18277 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
18278 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18279 (any_extend:V2DI
18280 (vec_select:V2HI
18281 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
18282 (parallel [(const_int 0) (const_int 1)]))))]
18283 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18284 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18285 [(set_attr "isa" "noavx,noavx,avx")
18286 (set_attr "type" "ssemov")
18287 (set_attr "prefix_extra" "1")
18288 (set_attr "prefix" "orig,orig,maybe_evex")
18289 (set_attr "mode" "TI")])
18290
18291 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
18292 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18293 (any_extend:V2DI
18294 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
18295 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18296 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18297 [(set_attr "isa" "noavx,noavx,avx")
18298 (set_attr "type" "ssemov")
18299 (set_attr "prefix_extra" "1")
18300 (set_attr "prefix" "orig,orig,maybe_evex")
18301 (set_attr "mode" "TI")])
18302
18303 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
18304 [(set (match_operand:V2DI 0 "register_operand")
18305 (any_extend:V2DI
18306 (vec_select:V2HI
18307 (subreg:V8HI
18308 (vec_merge:V4SI
18309 (vec_duplicate:V4SI
18310 (match_operand:SI 1 "memory_operand"))
18311 (const_vector:V4SI
18312 [(const_int 0) (const_int 0)
18313 (const_int 0) (const_int 0)])
18314 (const_int 1)) 0)
18315 (parallel [(const_int 0) (const_int 1)]))))]
18316 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18317 && ix86_pre_reload_split ()"
18318 "#"
18319 "&& 1"
18320 [(set (match_dup 0)
18321 (any_extend:V2DI (match_dup 1)))]
18322 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
18323
18324 (define_expand "<insn>v2hiv2di2"
18325 [(set (match_operand:V2DI 0 "register_operand")
18326 (any_extend:V2DI
18327 (match_operand:V2HI 1 "nonimmediate_operand")))]
18328 "TARGET_SSE4_1"
18329 {
18330 if (!MEM_P (operands[1]))
18331 {
18332 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
18333 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
18334 DONE;
18335 }
18336 })
18337
18338 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
18339 [(set (match_operand:V8DI 0 "register_operand" "=v")
18340 (any_extend:V8DI
18341 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18342 "TARGET_AVX512F"
18343 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18344 [(set_attr "type" "ssemov")
18345 (set_attr "prefix" "evex")
18346 (set_attr "mode" "XI")])
18347
18348 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
18349 [(set (match_operand:V16SI 0 "register_operand" "=v")
18350 (vec_select:V16SI
18351 (vec_concat:V32SI
18352 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
18353 (match_operand:V16SI 2 "const0_operand" "C"))
18354 (match_parallel 3 "pmovzx_parallel"
18355 [(match_operand 4 "const_int_operand" "n")])))]
18356 "TARGET_AVX512F"
18357 "#"
18358 "&& reload_completed"
18359 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
18360 {
18361 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
18362 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
18363 })
18364
18365 (define_expand "<insn>v8siv8di2"
18366 [(set (match_operand:V8DI 0 "register_operand" "=v")
18367 (any_extend:V8DI
18368 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18369 "TARGET_AVX512F")
18370
18371 (define_insn "avx2_<code>v4siv4di2<mask_name>"
18372 [(set (match_operand:V4DI 0 "register_operand" "=v")
18373 (any_extend:V4DI
18374 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18375 "TARGET_AVX2 && <mask_avx512vl_condition>"
18376 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18377 [(set_attr "type" "ssemov")
18378 (set_attr "prefix" "maybe_evex")
18379 (set_attr "prefix_extra" "1")
18380 (set_attr "mode" "OI")])
18381
18382 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
18383 [(set (match_operand:V8SI 0 "register_operand" "=v")
18384 (vec_select:V8SI
18385 (vec_concat:V16SI
18386 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
18387 (match_operand:V8SI 2 "const0_operand" "C"))
18388 (match_parallel 3 "pmovzx_parallel"
18389 [(match_operand 4 "const_int_operand" "n")])))]
18390 "TARGET_AVX2"
18391 "#"
18392 "&& reload_completed"
18393 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
18394 {
18395 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
18396 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
18397 })
18398
18399 (define_expand "<insn>v4siv4di2"
18400 [(set (match_operand:V4DI 0 "register_operand" "=v")
18401 (any_extend:V4DI
18402 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18403 "TARGET_AVX2")
18404
18405 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
18406 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18407 (any_extend:V2DI
18408 (vec_select:V2SI
18409 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
18410 (parallel [(const_int 0) (const_int 1)]))))]
18411 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18412 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18413 [(set_attr "isa" "noavx,noavx,avx")
18414 (set_attr "type" "ssemov")
18415 (set_attr "prefix_extra" "1")
18416 (set_attr "prefix" "orig,orig,maybe_evex")
18417 (set_attr "mode" "TI")])
18418
18419 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
18420 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18421 (any_extend:V2DI
18422 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
18423 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18424 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18425 [(set_attr "isa" "noavx,noavx,avx")
18426 (set_attr "type" "ssemov")
18427 (set_attr "prefix_extra" "1")
18428 (set_attr "prefix" "orig,orig,maybe_evex")
18429 (set_attr "mode" "TI")])
18430
18431 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
18432 [(set (match_operand:V2DI 0 "register_operand")
18433 (any_extend:V2DI
18434 (vec_select:V2SI
18435 (subreg:V4SI
18436 (vec_concat:V2DI
18437 (match_operand:DI 1 "memory_operand")
18438 (const_int 0)) 0)
18439 (parallel [(const_int 0) (const_int 1)]))))]
18440 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18441 && ix86_pre_reload_split ()"
18442 "#"
18443 "&& 1"
18444 [(set (match_dup 0)
18445 (any_extend:V2DI (match_dup 1)))]
18446 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
18447
18448 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
18449 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18450 (vec_select:V4SI
18451 (vec_concat:V8SI
18452 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
18453 (match_operand:V4SI 2 "const0_operand" "C,C,C"))
18454 (match_parallel 3 "pmovzx_parallel"
18455 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18456 "TARGET_SSE4_1"
18457 "#"
18458 "&& reload_completed"
18459 [(set (match_dup 0)
18460 (zero_extend:V2DI
18461 (vec_select:V2SI (match_dup 1)
18462 (parallel [(const_int 0) (const_int 1)]))))]
18463 {
18464 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
18465 if (MEM_P (operands[1]))
18466 {
18467 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
18468 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
18469 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18470 DONE;
18471 }
18472 }
18473 [(set_attr "isa" "noavx,noavx,avx")])
18474
18475 (define_expand "<insn>v2siv2di2"
18476 [(set (match_operand:V2DI 0 "register_operand")
18477 (any_extend:V2DI
18478 (match_operand:V2SI 1 "nonimmediate_operand")))]
18479 "TARGET_SSE4_1"
18480 {
18481 if (!MEM_P (operands[1]))
18482 {
18483 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
18484 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
18485 DONE;
18486 }
18487 })
18488
18489 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
18490 ;; setting FLAGS_REG. But it is not a really compare instruction.
18491 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
18492 [(set (reg:CC FLAGS_REG)
18493 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
18494 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
18495 UNSPEC_VTESTP))]
18496 "TARGET_AVX"
18497 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
18498 [(set_attr "type" "ssecomi")
18499 (set_attr "prefix_extra" "1")
18500 (set_attr "prefix" "vex")
18501 (set_attr "mode" "<MODE>")])
18502
18503 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
18504 ;; But it is not a really compare instruction.
18505 (define_insn "<sse4_1>_ptest<mode>"
18506 [(set (reg:CC FLAGS_REG)
18507 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
18508 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
18509 UNSPEC_PTEST))]
18510 "TARGET_SSE4_1"
18511 "%vptest\t{%1, %0|%0, %1}"
18512 [(set_attr "isa" "noavx,noavx,avx")
18513 (set_attr "type" "ssecomi")
18514 (set_attr "prefix_extra" "1")
18515 (set_attr "prefix" "orig,orig,vex")
18516 (set (attr "btver2_decode")
18517 (if_then_else
18518 (match_test "<sseinsnmode>mode==OImode")
18519 (const_string "vector")
18520 (const_string "*")))
18521 (set_attr "mode" "<sseinsnmode>")])
18522
18523 (define_insn "ptesttf2"
18524 [(set (reg:CC FLAGS_REG)
18525 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
18526 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
18527 UNSPEC_PTEST))]
18528 "TARGET_SSE4_1"
18529 "%vptest\t{%1, %0|%0, %1}"
18530 [(set_attr "isa" "noavx,noavx,avx")
18531 (set_attr "type" "ssecomi")
18532 (set_attr "prefix_extra" "1")
18533 (set_attr "prefix" "orig,orig,vex")
18534 (set_attr "mode" "TI")])
18535
18536 (define_expand "nearbyint<mode>2"
18537 [(set (match_operand:VF 0 "register_operand")
18538 (unspec:VF
18539 [(match_operand:VF 1 "vector_operand")
18540 (match_dup 2)]
18541 UNSPEC_ROUND))]
18542 "TARGET_SSE4_1"
18543 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
18544
18545 (define_expand "rint<mode>2"
18546 [(set (match_operand:VF 0 "register_operand")
18547 (unspec:VF
18548 [(match_operand:VF 1 "vector_operand")
18549 (match_dup 2)]
18550 UNSPEC_ROUND))]
18551 "TARGET_SSE4_1"
18552 "operands[2] = GEN_INT (ROUND_MXCSR);")
18553
18554 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
18555 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18556 (unspec:VF_128_256
18557 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
18558 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
18559 UNSPEC_ROUND))]
18560 "TARGET_SSE4_1"
18561 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18562 [(set_attr "isa" "noavx,noavx,avx")
18563 (set_attr "type" "ssecvt")
18564 (set_attr "prefix_data16" "1,1,*")
18565 (set_attr "prefix_extra" "1")
18566 (set_attr "length_immediate" "1")
18567 (set_attr "prefix" "orig,orig,vex")
18568 (set_attr "mode" "<MODE>")])
18569
18570 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
18571 [(match_operand:<sseintvecmode> 0 "register_operand")
18572 (match_operand:VF1_128_256 1 "vector_operand")
18573 (match_operand:SI 2 "const_0_to_15_operand")]
18574 "TARGET_SSE4_1"
18575 {
18576 rtx tmp = gen_reg_rtx (<MODE>mode);
18577
18578 emit_insn
18579 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
18580 operands[2]));
18581 emit_insn
18582 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18583 DONE;
18584 })
18585
18586 (define_expand "avx512f_round<castmode>512"
18587 [(match_operand:VF_512 0 "register_operand")
18588 (match_operand:VF_512 1 "nonimmediate_operand")
18589 (match_operand:SI 2 "const_0_to_15_operand")]
18590 "TARGET_AVX512F"
18591 {
18592 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
18593 DONE;
18594 })
18595
18596 (define_expand "avx512f_roundps512_sfix"
18597 [(match_operand:V16SI 0 "register_operand")
18598 (match_operand:V16SF 1 "nonimmediate_operand")
18599 (match_operand:SI 2 "const_0_to_15_operand")]
18600 "TARGET_AVX512F"
18601 {
18602 rtx tmp = gen_reg_rtx (V16SFmode);
18603 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
18604 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
18605 DONE;
18606 })
18607
18608 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
18609 [(match_operand:<ssepackfltmode> 0 "register_operand")
18610 (match_operand:VF2 1 "vector_operand")
18611 (match_operand:VF2 2 "vector_operand")
18612 (match_operand:SI 3 "const_0_to_15_operand")]
18613 "TARGET_SSE4_1"
18614 {
18615 rtx tmp0, tmp1;
18616
18617 if (<MODE>mode == V2DFmode
18618 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18619 {
18620 rtx tmp2 = gen_reg_rtx (V4DFmode);
18621
18622 tmp0 = gen_reg_rtx (V4DFmode);
18623 tmp1 = force_reg (V2DFmode, operands[1]);
18624
18625 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18626 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
18627 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18628 }
18629 else
18630 {
18631 tmp0 = gen_reg_rtx (<MODE>mode);
18632 tmp1 = gen_reg_rtx (<MODE>mode);
18633
18634 emit_insn
18635 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
18636 operands[3]));
18637 emit_insn
18638 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
18639 operands[3]));
18640 emit_insn
18641 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18642 }
18643 DONE;
18644 })
18645
18646 (define_insn "sse4_1_round<ssescalarmodesuffix>"
18647 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18648 (vec_merge:VF_128
18649 (unspec:VF_128
18650 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18651 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18652 UNSPEC_ROUND)
18653 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18654 (const_int 1)))]
18655 "TARGET_SSE4_1"
18656 "@
18657 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18658 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18659 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
18660 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
18661 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18662 (set_attr "type" "ssecvt")
18663 (set_attr "length_immediate" "1")
18664 (set_attr "prefix_data16" "1,1,*,*")
18665 (set_attr "prefix_extra" "1")
18666 (set_attr "prefix" "orig,orig,vex,evex")
18667 (set_attr "mode" "<MODE>")])
18668
18669 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
18670 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18671 (vec_merge:VF_128
18672 (vec_duplicate:VF_128
18673 (unspec:<ssescalarmode>
18674 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18675 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18676 UNSPEC_ROUND))
18677 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18678 (const_int 1)))]
18679 "TARGET_SSE4_1"
18680 "@
18681 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18682 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18683 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
18684 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18685 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18686 (set_attr "type" "ssecvt")
18687 (set_attr "length_immediate" "1")
18688 (set_attr "prefix_data16" "1,1,*,*")
18689 (set_attr "prefix_extra" "1")
18690 (set_attr "prefix" "orig,orig,vex,evex")
18691 (set_attr "mode" "<MODE>")])
18692
18693 (define_expand "round<mode>2"
18694 [(set (match_dup 3)
18695 (plus:VF
18696 (match_operand:VF 1 "register_operand")
18697 (match_dup 2)))
18698 (set (match_operand:VF 0 "register_operand")
18699 (unspec:VF
18700 [(match_dup 3) (match_dup 4)]
18701 UNSPEC_ROUND))]
18702 "TARGET_SSE4_1 && !flag_trapping_math"
18703 {
18704 machine_mode scalar_mode;
18705 const struct real_format *fmt;
18706 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
18707 rtx half, vec_half;
18708
18709 scalar_mode = GET_MODE_INNER (<MODE>mode);
18710
18711 /* load nextafter (0.5, 0.0) */
18712 fmt = REAL_MODE_FORMAT (scalar_mode);
18713 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
18714 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
18715 half = const_double_from_real_value (pred_half, scalar_mode);
18716
18717 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
18718 vec_half = force_reg (<MODE>mode, vec_half);
18719
18720 operands[2] = gen_reg_rtx (<MODE>mode);
18721 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
18722
18723 operands[3] = gen_reg_rtx (<MODE>mode);
18724 operands[4] = GEN_INT (ROUND_TRUNC);
18725 })
18726
18727 (define_expand "round<mode>2_sfix"
18728 [(match_operand:<sseintvecmode> 0 "register_operand")
18729 (match_operand:VF1 1 "register_operand")]
18730 "TARGET_SSE4_1 && !flag_trapping_math"
18731 {
18732 rtx tmp = gen_reg_rtx (<MODE>mode);
18733
18734 emit_insn (gen_round<mode>2 (tmp, operands[1]));
18735
18736 emit_insn
18737 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18738 DONE;
18739 })
18740
18741 (define_expand "round<mode>2_vec_pack_sfix"
18742 [(match_operand:<ssepackfltmode> 0 "register_operand")
18743 (match_operand:VF2 1 "register_operand")
18744 (match_operand:VF2 2 "register_operand")]
18745 "TARGET_SSE4_1 && !flag_trapping_math"
18746 {
18747 rtx tmp0, tmp1;
18748
18749 if (<MODE>mode == V2DFmode
18750 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18751 {
18752 rtx tmp2 = gen_reg_rtx (V4DFmode);
18753
18754 tmp0 = gen_reg_rtx (V4DFmode);
18755 tmp1 = force_reg (V2DFmode, operands[1]);
18756
18757 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18758 emit_insn (gen_roundv4df2 (tmp2, tmp0));
18759 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18760 }
18761 else
18762 {
18763 tmp0 = gen_reg_rtx (<MODE>mode);
18764 tmp1 = gen_reg_rtx (<MODE>mode);
18765
18766 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
18767 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
18768
18769 emit_insn
18770 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18771 }
18772 DONE;
18773 })
18774
18775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18776 ;;
18777 ;; Intel SSE4.2 string/text processing instructions
18778 ;;
18779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18780
18781 (define_insn_and_split "sse4_2_pcmpestr"
18782 [(set (match_operand:SI 0 "register_operand" "=c,c")
18783 (unspec:SI
18784 [(match_operand:V16QI 2 "register_operand" "x,x")
18785 (match_operand:SI 3 "register_operand" "a,a")
18786 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
18787 (match_operand:SI 5 "register_operand" "d,d")
18788 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
18789 UNSPEC_PCMPESTR))
18790 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18791 (unspec:V16QI
18792 [(match_dup 2)
18793 (match_dup 3)
18794 (match_dup 4)
18795 (match_dup 5)
18796 (match_dup 6)]
18797 UNSPEC_PCMPESTR))
18798 (set (reg:CC FLAGS_REG)
18799 (unspec:CC
18800 [(match_dup 2)
18801 (match_dup 3)
18802 (match_dup 4)
18803 (match_dup 5)
18804 (match_dup 6)]
18805 UNSPEC_PCMPESTR))]
18806 "TARGET_SSE4_2
18807 && ix86_pre_reload_split ()"
18808 "#"
18809 "&& 1"
18810 [(const_int 0)]
18811 {
18812 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18813 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18814 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18815
18816 if (ecx)
18817 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
18818 operands[3], operands[4],
18819 operands[5], operands[6]));
18820 if (xmm0)
18821 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
18822 operands[3], operands[4],
18823 operands[5], operands[6]));
18824 if (flags && !(ecx || xmm0))
18825 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
18826 operands[2], operands[3],
18827 operands[4], operands[5],
18828 operands[6]));
18829 if (!(flags || ecx || xmm0))
18830 emit_note (NOTE_INSN_DELETED);
18831
18832 DONE;
18833 }
18834 [(set_attr "type" "sselog")
18835 (set_attr "prefix_data16" "1")
18836 (set_attr "prefix_extra" "1")
18837 (set_attr "length_immediate" "1")
18838 (set_attr "memory" "none,load")
18839 (set_attr "mode" "TI")])
18840
18841 (define_insn "sse4_2_pcmpestri"
18842 [(set (match_operand:SI 0 "register_operand" "=c,c")
18843 (unspec:SI
18844 [(match_operand:V16QI 1 "register_operand" "x,x")
18845 (match_operand:SI 2 "register_operand" "a,a")
18846 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18847 (match_operand:SI 4 "register_operand" "d,d")
18848 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18849 UNSPEC_PCMPESTR))
18850 (set (reg:CC FLAGS_REG)
18851 (unspec:CC
18852 [(match_dup 1)
18853 (match_dup 2)
18854 (match_dup 3)
18855 (match_dup 4)
18856 (match_dup 5)]
18857 UNSPEC_PCMPESTR))]
18858 "TARGET_SSE4_2"
18859 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
18860 [(set_attr "type" "sselog")
18861 (set_attr "prefix_data16" "1")
18862 (set_attr "prefix_extra" "1")
18863 (set_attr "prefix" "maybe_vex")
18864 (set_attr "length_immediate" "1")
18865 (set_attr "btver2_decode" "vector")
18866 (set_attr "memory" "none,load")
18867 (set_attr "mode" "TI")])
18868
18869 (define_insn "sse4_2_pcmpestrm"
18870 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
18871 (unspec:V16QI
18872 [(match_operand:V16QI 1 "register_operand" "x,x")
18873 (match_operand:SI 2 "register_operand" "a,a")
18874 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18875 (match_operand:SI 4 "register_operand" "d,d")
18876 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18877 UNSPEC_PCMPESTR))
18878 (set (reg:CC FLAGS_REG)
18879 (unspec:CC
18880 [(match_dup 1)
18881 (match_dup 2)
18882 (match_dup 3)
18883 (match_dup 4)
18884 (match_dup 5)]
18885 UNSPEC_PCMPESTR))]
18886 "TARGET_SSE4_2"
18887 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
18888 [(set_attr "type" "sselog")
18889 (set_attr "prefix_data16" "1")
18890 (set_attr "prefix_extra" "1")
18891 (set_attr "length_immediate" "1")
18892 (set_attr "prefix" "maybe_vex")
18893 (set_attr "btver2_decode" "vector")
18894 (set_attr "memory" "none,load")
18895 (set_attr "mode" "TI")])
18896
18897 (define_insn "sse4_2_pcmpestr_cconly"
18898 [(set (reg:CC FLAGS_REG)
18899 (unspec:CC
18900 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
18901 (match_operand:SI 3 "register_operand" "a,a,a,a")
18902 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
18903 (match_operand:SI 5 "register_operand" "d,d,d,d")
18904 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
18905 UNSPEC_PCMPESTR))
18906 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
18907 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
18908 "TARGET_SSE4_2"
18909 "@
18910 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18911 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18912 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
18913 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
18914 [(set_attr "type" "sselog")
18915 (set_attr "prefix_data16" "1")
18916 (set_attr "prefix_extra" "1")
18917 (set_attr "length_immediate" "1")
18918 (set_attr "memory" "none,load,none,load")
18919 (set_attr "btver2_decode" "vector,vector,vector,vector")
18920 (set_attr "prefix" "maybe_vex")
18921 (set_attr "mode" "TI")])
18922
18923 (define_insn_and_split "sse4_2_pcmpistr"
18924 [(set (match_operand:SI 0 "register_operand" "=c,c")
18925 (unspec:SI
18926 [(match_operand:V16QI 2 "register_operand" "x,x")
18927 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18928 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
18929 UNSPEC_PCMPISTR))
18930 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18931 (unspec:V16QI
18932 [(match_dup 2)
18933 (match_dup 3)
18934 (match_dup 4)]
18935 UNSPEC_PCMPISTR))
18936 (set (reg:CC FLAGS_REG)
18937 (unspec:CC
18938 [(match_dup 2)
18939 (match_dup 3)
18940 (match_dup 4)]
18941 UNSPEC_PCMPISTR))]
18942 "TARGET_SSE4_2
18943 && ix86_pre_reload_split ()"
18944 "#"
18945 "&& 1"
18946 [(const_int 0)]
18947 {
18948 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18949 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18950 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18951
18952 if (ecx)
18953 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
18954 operands[3], operands[4]));
18955 if (xmm0)
18956 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
18957 operands[3], operands[4]));
18958 if (flags && !(ecx || xmm0))
18959 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
18960 operands[2], operands[3],
18961 operands[4]));
18962 if (!(flags || ecx || xmm0))
18963 emit_note (NOTE_INSN_DELETED);
18964
18965 DONE;
18966 }
18967 [(set_attr "type" "sselog")
18968 (set_attr "prefix_data16" "1")
18969 (set_attr "prefix_extra" "1")
18970 (set_attr "length_immediate" "1")
18971 (set_attr "memory" "none,load")
18972 (set_attr "mode" "TI")])
18973
18974 (define_insn "sse4_2_pcmpistri"
18975 [(set (match_operand:SI 0 "register_operand" "=c,c")
18976 (unspec:SI
18977 [(match_operand:V16QI 1 "register_operand" "x,x")
18978 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18979 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18980 UNSPEC_PCMPISTR))
18981 (set (reg:CC FLAGS_REG)
18982 (unspec:CC
18983 [(match_dup 1)
18984 (match_dup 2)
18985 (match_dup 3)]
18986 UNSPEC_PCMPISTR))]
18987 "TARGET_SSE4_2"
18988 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
18989 [(set_attr "type" "sselog")
18990 (set_attr "prefix_data16" "1")
18991 (set_attr "prefix_extra" "1")
18992 (set_attr "length_immediate" "1")
18993 (set_attr "prefix" "maybe_vex")
18994 (set_attr "memory" "none,load")
18995 (set_attr "btver2_decode" "vector")
18996 (set_attr "mode" "TI")])
18997
18998 (define_insn "sse4_2_pcmpistrm"
18999 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
19000 (unspec:V16QI
19001 [(match_operand:V16QI 1 "register_operand" "x,x")
19002 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19003 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19004 UNSPEC_PCMPISTR))
19005 (set (reg:CC FLAGS_REG)
19006 (unspec:CC
19007 [(match_dup 1)
19008 (match_dup 2)
19009 (match_dup 3)]
19010 UNSPEC_PCMPISTR))]
19011 "TARGET_SSE4_2"
19012 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
19013 [(set_attr "type" "sselog")
19014 (set_attr "prefix_data16" "1")
19015 (set_attr "prefix_extra" "1")
19016 (set_attr "length_immediate" "1")
19017 (set_attr "prefix" "maybe_vex")
19018 (set_attr "memory" "none,load")
19019 (set_attr "btver2_decode" "vector")
19020 (set_attr "mode" "TI")])
19021
19022 (define_insn "sse4_2_pcmpistr_cconly"
19023 [(set (reg:CC FLAGS_REG)
19024 (unspec:CC
19025 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
19026 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
19027 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
19028 UNSPEC_PCMPISTR))
19029 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
19030 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
19031 "TARGET_SSE4_2"
19032 "@
19033 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19034 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19035 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
19036 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
19037 [(set_attr "type" "sselog")
19038 (set_attr "prefix_data16" "1")
19039 (set_attr "prefix_extra" "1")
19040 (set_attr "length_immediate" "1")
19041 (set_attr "memory" "none,load,none,load")
19042 (set_attr "prefix" "maybe_vex")
19043 (set_attr "btver2_decode" "vector,vector,vector,vector")
19044 (set_attr "mode" "TI")])
19045
19046 ;; Packed float variants
19047 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
19048 [(V8DI "V8SF") (V16SI "V16SF")])
19049
19050 (define_expand "avx512pf_gatherpf<mode>sf"
19051 [(unspec
19052 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19053 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19054 (match_par_dup 5
19055 [(match_operand 2 "vsib_address_operand")
19056 (match_operand:VI48_512 1 "register_operand")
19057 (match_operand:SI 3 "const1248_operand")]))
19058 (match_operand:SI 4 "const_2_to_3_operand")]
19059 UNSPEC_GATHER_PREFETCH)]
19060 "TARGET_AVX512PF"
19061 {
19062 operands[5]
19063 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19064 operands[3]), UNSPEC_VSIBADDR);
19065 })
19066
19067 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
19068 [(unspec
19069 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19070 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19071 [(unspec:P
19072 [(match_operand:P 2 "vsib_address_operand" "Tv")
19073 (match_operand:VI48_512 1 "register_operand" "v")
19074 (match_operand:SI 3 "const1248_operand" "n")]
19075 UNSPEC_VSIBADDR)])
19076 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19077 UNSPEC_GATHER_PREFETCH)]
19078 "TARGET_AVX512PF"
19079 {
19080 switch (INTVAL (operands[4]))
19081 {
19082 case 3:
19083 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19084 gas changed what it requires incompatibly. */
19085 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19086 case 2:
19087 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19088 default:
19089 gcc_unreachable ();
19090 }
19091 }
19092 [(set_attr "type" "sse")
19093 (set_attr "prefix" "evex")
19094 (set_attr "mode" "XI")])
19095
19096 ;; Packed double variants
19097 (define_expand "avx512pf_gatherpf<mode>df"
19098 [(unspec
19099 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19100 (mem:V8DF
19101 (match_par_dup 5
19102 [(match_operand 2 "vsib_address_operand")
19103 (match_operand:VI4_256_8_512 1 "register_operand")
19104 (match_operand:SI 3 "const1248_operand")]))
19105 (match_operand:SI 4 "const_2_to_3_operand")]
19106 UNSPEC_GATHER_PREFETCH)]
19107 "TARGET_AVX512PF"
19108 {
19109 operands[5]
19110 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19111 operands[3]), UNSPEC_VSIBADDR);
19112 })
19113
19114 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
19115 [(unspec
19116 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19117 (match_operator:V8DF 5 "vsib_mem_operator"
19118 [(unspec:P
19119 [(match_operand:P 2 "vsib_address_operand" "Tv")
19120 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19121 (match_operand:SI 3 "const1248_operand" "n")]
19122 UNSPEC_VSIBADDR)])
19123 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19124 UNSPEC_GATHER_PREFETCH)]
19125 "TARGET_AVX512PF"
19126 {
19127 switch (INTVAL (operands[4]))
19128 {
19129 case 3:
19130 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19131 gas changed what it requires incompatibly. */
19132 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19133 case 2:
19134 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19135 default:
19136 gcc_unreachable ();
19137 }
19138 }
19139 [(set_attr "type" "sse")
19140 (set_attr "prefix" "evex")
19141 (set_attr "mode" "XI")])
19142
19143 ;; Packed float variants
19144 (define_expand "avx512pf_scatterpf<mode>sf"
19145 [(unspec
19146 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19147 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19148 (match_par_dup 5
19149 [(match_operand 2 "vsib_address_operand")
19150 (match_operand:VI48_512 1 "register_operand")
19151 (match_operand:SI 3 "const1248_operand")]))
19152 (match_operand:SI 4 "const2367_operand")]
19153 UNSPEC_SCATTER_PREFETCH)]
19154 "TARGET_AVX512PF"
19155 {
19156 operands[5]
19157 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19158 operands[3]), UNSPEC_VSIBADDR);
19159 })
19160
19161 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
19162 [(unspec
19163 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19164 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19165 [(unspec:P
19166 [(match_operand:P 2 "vsib_address_operand" "Tv")
19167 (match_operand:VI48_512 1 "register_operand" "v")
19168 (match_operand:SI 3 "const1248_operand" "n")]
19169 UNSPEC_VSIBADDR)])
19170 (match_operand:SI 4 "const2367_operand" "n")]
19171 UNSPEC_SCATTER_PREFETCH)]
19172 "TARGET_AVX512PF"
19173 {
19174 switch (INTVAL (operands[4]))
19175 {
19176 case 3:
19177 case 7:
19178 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19179 gas changed what it requires incompatibly. */
19180 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19181 case 2:
19182 case 6:
19183 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19184 default:
19185 gcc_unreachable ();
19186 }
19187 }
19188 [(set_attr "type" "sse")
19189 (set_attr "prefix" "evex")
19190 (set_attr "mode" "XI")])
19191
19192 ;; Packed double variants
19193 (define_expand "avx512pf_scatterpf<mode>df"
19194 [(unspec
19195 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19196 (mem:V8DF
19197 (match_par_dup 5
19198 [(match_operand 2 "vsib_address_operand")
19199 (match_operand:VI4_256_8_512 1 "register_operand")
19200 (match_operand:SI 3 "const1248_operand")]))
19201 (match_operand:SI 4 "const2367_operand")]
19202 UNSPEC_SCATTER_PREFETCH)]
19203 "TARGET_AVX512PF"
19204 {
19205 operands[5]
19206 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19207 operands[3]), UNSPEC_VSIBADDR);
19208 })
19209
19210 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
19211 [(unspec
19212 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19213 (match_operator:V8DF 5 "vsib_mem_operator"
19214 [(unspec:P
19215 [(match_operand:P 2 "vsib_address_operand" "Tv")
19216 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19217 (match_operand:SI 3 "const1248_operand" "n")]
19218 UNSPEC_VSIBADDR)])
19219 (match_operand:SI 4 "const2367_operand" "n")]
19220 UNSPEC_SCATTER_PREFETCH)]
19221 "TARGET_AVX512PF"
19222 {
19223 switch (INTVAL (operands[4]))
19224 {
19225 case 3:
19226 case 7:
19227 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19228 gas changed what it requires incompatibly. */
19229 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19230 case 2:
19231 case 6:
19232 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19233 default:
19234 gcc_unreachable ();
19235 }
19236 }
19237 [(set_attr "type" "sse")
19238 (set_attr "prefix" "evex")
19239 (set_attr "mode" "XI")])
19240
19241 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
19242 [(set (match_operand:VF_512 0 "register_operand" "=v")
19243 (unspec:VF_512
19244 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19245 UNSPEC_EXP2))]
19246 "TARGET_AVX512ER"
19247 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19248 [(set_attr "prefix" "evex")
19249 (set_attr "type" "sse")
19250 (set_attr "mode" "<MODE>")])
19251
19252 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
19253 [(set (match_operand:VF_512 0 "register_operand" "=v")
19254 (unspec:VF_512
19255 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19256 UNSPEC_RCP28))]
19257 "TARGET_AVX512ER"
19258 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19259 [(set_attr "prefix" "evex")
19260 (set_attr "type" "sse")
19261 (set_attr "mode" "<MODE>")])
19262
19263 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
19264 [(set (match_operand:VF_128 0 "register_operand" "=v")
19265 (vec_merge:VF_128
19266 (unspec:VF_128
19267 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19268 UNSPEC_RCP28)
19269 (match_operand:VF_128 2 "register_operand" "v")
19270 (const_int 1)))]
19271 "TARGET_AVX512ER"
19272 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19273 [(set_attr "length_immediate" "1")
19274 (set_attr "prefix" "evex")
19275 (set_attr "type" "sse")
19276 (set_attr "mode" "<MODE>")])
19277
19278 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
19279 [(set (match_operand:VF_512 0 "register_operand" "=v")
19280 (unspec:VF_512
19281 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19282 UNSPEC_RSQRT28))]
19283 "TARGET_AVX512ER"
19284 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19285 [(set_attr "prefix" "evex")
19286 (set_attr "type" "sse")
19287 (set_attr "mode" "<MODE>")])
19288
19289 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
19290 [(set (match_operand:VF_128 0 "register_operand" "=v")
19291 (vec_merge:VF_128
19292 (unspec:VF_128
19293 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19294 UNSPEC_RSQRT28)
19295 (match_operand:VF_128 2 "register_operand" "v")
19296 (const_int 1)))]
19297 "TARGET_AVX512ER"
19298 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19299 [(set_attr "length_immediate" "1")
19300 (set_attr "type" "sse")
19301 (set_attr "prefix" "evex")
19302 (set_attr "mode" "<MODE>")])
19303
19304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19305 ;;
19306 ;; XOP instructions
19307 ;;
19308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19309
19310 (define_code_iterator xop_plus [plus ss_plus])
19311
19312 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
19313 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
19314
19315 ;; XOP parallel integer multiply/add instructions.
19316
19317 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
19318 [(set (match_operand:VI24_128 0 "register_operand" "=x")
19319 (xop_plus:VI24_128
19320 (mult:VI24_128
19321 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
19322 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
19323 (match_operand:VI24_128 3 "register_operand" "x")))]
19324 "TARGET_XOP"
19325 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19326 [(set_attr "type" "ssemuladd")
19327 (set_attr "mode" "TI")])
19328
19329 (define_insn "xop_p<macs>dql"
19330 [(set (match_operand:V2DI 0 "register_operand" "=x")
19331 (xop_plus:V2DI
19332 (mult:V2DI
19333 (sign_extend:V2DI
19334 (vec_select:V2SI
19335 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19336 (parallel [(const_int 0) (const_int 2)])))
19337 (sign_extend:V2DI
19338 (vec_select:V2SI
19339 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19340 (parallel [(const_int 0) (const_int 2)]))))
19341 (match_operand:V2DI 3 "register_operand" "x")))]
19342 "TARGET_XOP"
19343 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19344 [(set_attr "type" "ssemuladd")
19345 (set_attr "mode" "TI")])
19346
19347 (define_insn "xop_p<macs>dqh"
19348 [(set (match_operand:V2DI 0 "register_operand" "=x")
19349 (xop_plus:V2DI
19350 (mult:V2DI
19351 (sign_extend:V2DI
19352 (vec_select:V2SI
19353 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19354 (parallel [(const_int 1) (const_int 3)])))
19355 (sign_extend:V2DI
19356 (vec_select:V2SI
19357 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19358 (parallel [(const_int 1) (const_int 3)]))))
19359 (match_operand:V2DI 3 "register_operand" "x")))]
19360 "TARGET_XOP"
19361 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19362 [(set_attr "type" "ssemuladd")
19363 (set_attr "mode" "TI")])
19364
19365 ;; XOP parallel integer multiply/add instructions for the intrinisics
19366 (define_insn "xop_p<macs>wd"
19367 [(set (match_operand:V4SI 0 "register_operand" "=x")
19368 (xop_plus:V4SI
19369 (mult:V4SI
19370 (sign_extend:V4SI
19371 (vec_select:V4HI
19372 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19373 (parallel [(const_int 1) (const_int 3)
19374 (const_int 5) (const_int 7)])))
19375 (sign_extend:V4SI
19376 (vec_select:V4HI
19377 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19378 (parallel [(const_int 1) (const_int 3)
19379 (const_int 5) (const_int 7)]))))
19380 (match_operand:V4SI 3 "register_operand" "x")))]
19381 "TARGET_XOP"
19382 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19383 [(set_attr "type" "ssemuladd")
19384 (set_attr "mode" "TI")])
19385
19386 (define_insn "xop_p<madcs>wd"
19387 [(set (match_operand:V4SI 0 "register_operand" "=x")
19388 (xop_plus:V4SI
19389 (plus:V4SI
19390 (mult:V4SI
19391 (sign_extend:V4SI
19392 (vec_select:V4HI
19393 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19394 (parallel [(const_int 0) (const_int 2)
19395 (const_int 4) (const_int 6)])))
19396 (sign_extend:V4SI
19397 (vec_select:V4HI
19398 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19399 (parallel [(const_int 0) (const_int 2)
19400 (const_int 4) (const_int 6)]))))
19401 (mult:V4SI
19402 (sign_extend:V4SI
19403 (vec_select:V4HI
19404 (match_dup 1)
19405 (parallel [(const_int 1) (const_int 3)
19406 (const_int 5) (const_int 7)])))
19407 (sign_extend:V4SI
19408 (vec_select:V4HI
19409 (match_dup 2)
19410 (parallel [(const_int 1) (const_int 3)
19411 (const_int 5) (const_int 7)])))))
19412 (match_operand:V4SI 3 "register_operand" "x")))]
19413 "TARGET_XOP"
19414 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19415 [(set_attr "type" "ssemuladd")
19416 (set_attr "mode" "TI")])
19417
19418 ;; XOP parallel XMM conditional moves
19419 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
19420 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
19421 (if_then_else:V_128_256
19422 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
19423 (match_operand:V_128_256 1 "register_operand" "x,x")
19424 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
19425 "TARGET_XOP"
19426 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19427 [(set_attr "type" "sse4arg")])
19428
19429 ;; XOP horizontal add/subtract instructions
19430 (define_insn "xop_phadd<u>bw"
19431 [(set (match_operand:V8HI 0 "register_operand" "=x")
19432 (plus:V8HI
19433 (any_extend:V8HI
19434 (vec_select:V8QI
19435 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19436 (parallel [(const_int 0) (const_int 2)
19437 (const_int 4) (const_int 6)
19438 (const_int 8) (const_int 10)
19439 (const_int 12) (const_int 14)])))
19440 (any_extend:V8HI
19441 (vec_select:V8QI
19442 (match_dup 1)
19443 (parallel [(const_int 1) (const_int 3)
19444 (const_int 5) (const_int 7)
19445 (const_int 9) (const_int 11)
19446 (const_int 13) (const_int 15)])))))]
19447 "TARGET_XOP"
19448 "vphadd<u>bw\t{%1, %0|%0, %1}"
19449 [(set_attr "type" "sseiadd1")])
19450
19451 (define_insn "xop_phadd<u>bd"
19452 [(set (match_operand:V4SI 0 "register_operand" "=x")
19453 (plus:V4SI
19454 (plus:V4SI
19455 (any_extend:V4SI
19456 (vec_select:V4QI
19457 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19458 (parallel [(const_int 0) (const_int 4)
19459 (const_int 8) (const_int 12)])))
19460 (any_extend:V4SI
19461 (vec_select:V4QI
19462 (match_dup 1)
19463 (parallel [(const_int 1) (const_int 5)
19464 (const_int 9) (const_int 13)]))))
19465 (plus:V4SI
19466 (any_extend:V4SI
19467 (vec_select:V4QI
19468 (match_dup 1)
19469 (parallel [(const_int 2) (const_int 6)
19470 (const_int 10) (const_int 14)])))
19471 (any_extend:V4SI
19472 (vec_select:V4QI
19473 (match_dup 1)
19474 (parallel [(const_int 3) (const_int 7)
19475 (const_int 11) (const_int 15)]))))))]
19476 "TARGET_XOP"
19477 "vphadd<u>bd\t{%1, %0|%0, %1}"
19478 [(set_attr "type" "sseiadd1")])
19479
19480 (define_insn "xop_phadd<u>bq"
19481 [(set (match_operand:V2DI 0 "register_operand" "=x")
19482 (plus:V2DI
19483 (plus:V2DI
19484 (plus:V2DI
19485 (any_extend:V2DI
19486 (vec_select:V2QI
19487 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19488 (parallel [(const_int 0) (const_int 8)])))
19489 (any_extend:V2DI
19490 (vec_select:V2QI
19491 (match_dup 1)
19492 (parallel [(const_int 1) (const_int 9)]))))
19493 (plus:V2DI
19494 (any_extend:V2DI
19495 (vec_select:V2QI
19496 (match_dup 1)
19497 (parallel [(const_int 2) (const_int 10)])))
19498 (any_extend:V2DI
19499 (vec_select:V2QI
19500 (match_dup 1)
19501 (parallel [(const_int 3) (const_int 11)])))))
19502 (plus:V2DI
19503 (plus:V2DI
19504 (any_extend:V2DI
19505 (vec_select:V2QI
19506 (match_dup 1)
19507 (parallel [(const_int 4) (const_int 12)])))
19508 (any_extend:V2DI
19509 (vec_select:V2QI
19510 (match_dup 1)
19511 (parallel [(const_int 5) (const_int 13)]))))
19512 (plus:V2DI
19513 (any_extend:V2DI
19514 (vec_select:V2QI
19515 (match_dup 1)
19516 (parallel [(const_int 6) (const_int 14)])))
19517 (any_extend:V2DI
19518 (vec_select:V2QI
19519 (match_dup 1)
19520 (parallel [(const_int 7) (const_int 15)])))))))]
19521 "TARGET_XOP"
19522 "vphadd<u>bq\t{%1, %0|%0, %1}"
19523 [(set_attr "type" "sseiadd1")])
19524
19525 (define_insn "xop_phadd<u>wd"
19526 [(set (match_operand:V4SI 0 "register_operand" "=x")
19527 (plus:V4SI
19528 (any_extend:V4SI
19529 (vec_select:V4HI
19530 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19531 (parallel [(const_int 0) (const_int 2)
19532 (const_int 4) (const_int 6)])))
19533 (any_extend:V4SI
19534 (vec_select:V4HI
19535 (match_dup 1)
19536 (parallel [(const_int 1) (const_int 3)
19537 (const_int 5) (const_int 7)])))))]
19538 "TARGET_XOP"
19539 "vphadd<u>wd\t{%1, %0|%0, %1}"
19540 [(set_attr "type" "sseiadd1")])
19541
19542 (define_insn "xop_phadd<u>wq"
19543 [(set (match_operand:V2DI 0 "register_operand" "=x")
19544 (plus:V2DI
19545 (plus:V2DI
19546 (any_extend:V2DI
19547 (vec_select:V2HI
19548 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19549 (parallel [(const_int 0) (const_int 4)])))
19550 (any_extend:V2DI
19551 (vec_select:V2HI
19552 (match_dup 1)
19553 (parallel [(const_int 1) (const_int 5)]))))
19554 (plus:V2DI
19555 (any_extend:V2DI
19556 (vec_select:V2HI
19557 (match_dup 1)
19558 (parallel [(const_int 2) (const_int 6)])))
19559 (any_extend:V2DI
19560 (vec_select:V2HI
19561 (match_dup 1)
19562 (parallel [(const_int 3) (const_int 7)]))))))]
19563 "TARGET_XOP"
19564 "vphadd<u>wq\t{%1, %0|%0, %1}"
19565 [(set_attr "type" "sseiadd1")])
19566
19567 (define_insn "xop_phadd<u>dq"
19568 [(set (match_operand:V2DI 0 "register_operand" "=x")
19569 (plus:V2DI
19570 (any_extend:V2DI
19571 (vec_select:V2SI
19572 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19573 (parallel [(const_int 0) (const_int 2)])))
19574 (any_extend:V2DI
19575 (vec_select:V2SI
19576 (match_dup 1)
19577 (parallel [(const_int 1) (const_int 3)])))))]
19578 "TARGET_XOP"
19579 "vphadd<u>dq\t{%1, %0|%0, %1}"
19580 [(set_attr "type" "sseiadd1")])
19581
19582 (define_insn "xop_phsubbw"
19583 [(set (match_operand:V8HI 0 "register_operand" "=x")
19584 (minus:V8HI
19585 (sign_extend:V8HI
19586 (vec_select:V8QI
19587 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19588 (parallel [(const_int 0) (const_int 2)
19589 (const_int 4) (const_int 6)
19590 (const_int 8) (const_int 10)
19591 (const_int 12) (const_int 14)])))
19592 (sign_extend:V8HI
19593 (vec_select:V8QI
19594 (match_dup 1)
19595 (parallel [(const_int 1) (const_int 3)
19596 (const_int 5) (const_int 7)
19597 (const_int 9) (const_int 11)
19598 (const_int 13) (const_int 15)])))))]
19599 "TARGET_XOP"
19600 "vphsubbw\t{%1, %0|%0, %1}"
19601 [(set_attr "type" "sseiadd1")])
19602
19603 (define_insn "xop_phsubwd"
19604 [(set (match_operand:V4SI 0 "register_operand" "=x")
19605 (minus:V4SI
19606 (sign_extend:V4SI
19607 (vec_select:V4HI
19608 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19609 (parallel [(const_int 0) (const_int 2)
19610 (const_int 4) (const_int 6)])))
19611 (sign_extend:V4SI
19612 (vec_select:V4HI
19613 (match_dup 1)
19614 (parallel [(const_int 1) (const_int 3)
19615 (const_int 5) (const_int 7)])))))]
19616 "TARGET_XOP"
19617 "vphsubwd\t{%1, %0|%0, %1}"
19618 [(set_attr "type" "sseiadd1")])
19619
19620 (define_insn "xop_phsubdq"
19621 [(set (match_operand:V2DI 0 "register_operand" "=x")
19622 (minus:V2DI
19623 (sign_extend:V2DI
19624 (vec_select:V2SI
19625 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19626 (parallel [(const_int 0) (const_int 2)])))
19627 (sign_extend:V2DI
19628 (vec_select:V2SI
19629 (match_dup 1)
19630 (parallel [(const_int 1) (const_int 3)])))))]
19631 "TARGET_XOP"
19632 "vphsubdq\t{%1, %0|%0, %1}"
19633 [(set_attr "type" "sseiadd1")])
19634
19635 ;; XOP permute instructions
19636 (define_insn "xop_pperm"
19637 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19638 (unspec:V16QI
19639 [(match_operand:V16QI 1 "register_operand" "x,x")
19640 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19641 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
19642 UNSPEC_XOP_PERMUTE))]
19643 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19644 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19645 [(set_attr "type" "sse4arg")
19646 (set_attr "mode" "TI")])
19647
19648 ;; XOP pack instructions that combine two vectors into a smaller vector
19649 (define_insn "xop_pperm_pack_v2di_v4si"
19650 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
19651 (vec_concat:V4SI
19652 (truncate:V2SI
19653 (match_operand:V2DI 1 "register_operand" "x,x"))
19654 (truncate:V2SI
19655 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
19656 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19657 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19658 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19659 [(set_attr "type" "sse4arg")
19660 (set_attr "mode" "TI")])
19661
19662 (define_insn "xop_pperm_pack_v4si_v8hi"
19663 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
19664 (vec_concat:V8HI
19665 (truncate:V4HI
19666 (match_operand:V4SI 1 "register_operand" "x,x"))
19667 (truncate:V4HI
19668 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
19669 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19670 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19671 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19672 [(set_attr "type" "sse4arg")
19673 (set_attr "mode" "TI")])
19674
19675 (define_insn "xop_pperm_pack_v8hi_v16qi"
19676 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19677 (vec_concat:V16QI
19678 (truncate:V8QI
19679 (match_operand:V8HI 1 "register_operand" "x,x"))
19680 (truncate:V8QI
19681 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
19682 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19683 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19684 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19685 [(set_attr "type" "sse4arg")
19686 (set_attr "mode" "TI")])
19687
19688 ;; XOP packed rotate instructions
19689 (define_expand "rotl<mode>3"
19690 [(set (match_operand:VI_128 0 "register_operand")
19691 (rotate:VI_128
19692 (match_operand:VI_128 1 "nonimmediate_operand")
19693 (match_operand:SI 2 "general_operand")))]
19694 "TARGET_XOP"
19695 {
19696 /* If we were given a scalar, convert it to parallel */
19697 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19698 {
19699 rtvec vs = rtvec_alloc (<ssescalarnum>);
19700 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19701 rtx reg = gen_reg_rtx (<MODE>mode);
19702 rtx op2 = operands[2];
19703 int i;
19704
19705 if (GET_MODE (op2) != <ssescalarmode>mode)
19706 {
19707 op2 = gen_reg_rtx (<ssescalarmode>mode);
19708 convert_move (op2, operands[2], false);
19709 }
19710
19711 for (i = 0; i < <ssescalarnum>; i++)
19712 RTVEC_ELT (vs, i) = op2;
19713
19714 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19715 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19716 DONE;
19717 }
19718 })
19719
19720 (define_expand "rotr<mode>3"
19721 [(set (match_operand:VI_128 0 "register_operand")
19722 (rotatert:VI_128
19723 (match_operand:VI_128 1 "nonimmediate_operand")
19724 (match_operand:SI 2 "general_operand")))]
19725 "TARGET_XOP"
19726 {
19727 /* If we were given a scalar, convert it to parallel */
19728 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19729 {
19730 rtvec vs = rtvec_alloc (<ssescalarnum>);
19731 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19732 rtx neg = gen_reg_rtx (<MODE>mode);
19733 rtx reg = gen_reg_rtx (<MODE>mode);
19734 rtx op2 = operands[2];
19735 int i;
19736
19737 if (GET_MODE (op2) != <ssescalarmode>mode)
19738 {
19739 op2 = gen_reg_rtx (<ssescalarmode>mode);
19740 convert_move (op2, operands[2], false);
19741 }
19742
19743 for (i = 0; i < <ssescalarnum>; i++)
19744 RTVEC_ELT (vs, i) = op2;
19745
19746 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19747 emit_insn (gen_neg<mode>2 (neg, reg));
19748 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
19749 DONE;
19750 }
19751 })
19752
19753 (define_insn "xop_rotl<mode>3"
19754 [(set (match_operand:VI_128 0 "register_operand" "=x")
19755 (rotate:VI_128
19756 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19757 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19758 "TARGET_XOP"
19759 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19760 [(set_attr "type" "sseishft")
19761 (set_attr "length_immediate" "1")
19762 (set_attr "mode" "TI")])
19763
19764 (define_insn "xop_rotr<mode>3"
19765 [(set (match_operand:VI_128 0 "register_operand" "=x")
19766 (rotatert:VI_128
19767 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19768 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19769 "TARGET_XOP"
19770 {
19771 operands[3]
19772 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
19773 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
19774 }
19775 [(set_attr "type" "sseishft")
19776 (set_attr "length_immediate" "1")
19777 (set_attr "mode" "TI")])
19778
19779 (define_expand "vrotr<mode>3"
19780 [(match_operand:VI_128 0 "register_operand")
19781 (match_operand:VI_128 1 "register_operand")
19782 (match_operand:VI_128 2 "register_operand")]
19783 "TARGET_XOP"
19784 {
19785 rtx reg = gen_reg_rtx (<MODE>mode);
19786 emit_insn (gen_neg<mode>2 (reg, operands[2]));
19787 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19788 DONE;
19789 })
19790
19791 (define_expand "vrotl<mode>3"
19792 [(match_operand:VI_128 0 "register_operand")
19793 (match_operand:VI_128 1 "register_operand")
19794 (match_operand:VI_128 2 "register_operand")]
19795 "TARGET_XOP"
19796 {
19797 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
19798 DONE;
19799 })
19800
19801 (define_insn "xop_vrotl<mode>3"
19802 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19803 (if_then_else:VI_128
19804 (ge:VI_128
19805 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19806 (const_int 0))
19807 (rotate:VI_128
19808 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19809 (match_dup 2))
19810 (rotatert:VI_128
19811 (match_dup 1)
19812 (neg:VI_128 (match_dup 2)))))]
19813 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19814 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19815 [(set_attr "type" "sseishft")
19816 (set_attr "prefix_data16" "0")
19817 (set_attr "prefix_extra" "2")
19818 (set_attr "mode" "TI")])
19819
19820 ;; XOP packed shift instructions.
19821 (define_expand "vlshr<mode>3"
19822 [(set (match_operand:VI12_128 0 "register_operand")
19823 (lshiftrt:VI12_128
19824 (match_operand:VI12_128 1 "register_operand")
19825 (match_operand:VI12_128 2 "nonimmediate_operand")))]
19826 "TARGET_XOP"
19827 {
19828 rtx neg = gen_reg_rtx (<MODE>mode);
19829 emit_insn (gen_neg<mode>2 (neg, operands[2]));
19830 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19831 DONE;
19832 })
19833
19834 (define_expand "vlshr<mode>3"
19835 [(set (match_operand:VI48_128 0 "register_operand")
19836 (lshiftrt:VI48_128
19837 (match_operand:VI48_128 1 "register_operand")
19838 (match_operand:VI48_128 2 "nonimmediate_operand")))]
19839 "TARGET_AVX2 || TARGET_XOP"
19840 {
19841 if (!TARGET_AVX2)
19842 {
19843 rtx neg = gen_reg_rtx (<MODE>mode);
19844 emit_insn (gen_neg<mode>2 (neg, operands[2]));
19845 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19846 DONE;
19847 }
19848 })
19849
19850 (define_expand "vlshr<mode>3"
19851 [(set (match_operand:VI48_512 0 "register_operand")
19852 (lshiftrt:VI48_512
19853 (match_operand:VI48_512 1 "register_operand")
19854 (match_operand:VI48_512 2 "nonimmediate_operand")))]
19855 "TARGET_AVX512F")
19856
19857 (define_expand "vlshr<mode>3"
19858 [(set (match_operand:VI48_256 0 "register_operand")
19859 (lshiftrt:VI48_256
19860 (match_operand:VI48_256 1 "register_operand")
19861 (match_operand:VI48_256 2 "nonimmediate_operand")))]
19862 "TARGET_AVX2")
19863
19864 (define_expand "vashrv8hi3<mask_name>"
19865 [(set (match_operand:V8HI 0 "register_operand")
19866 (ashiftrt:V8HI
19867 (match_operand:V8HI 1 "register_operand")
19868 (match_operand:V8HI 2 "nonimmediate_operand")))]
19869 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
19870 {
19871 if (TARGET_XOP)
19872 {
19873 rtx neg = gen_reg_rtx (V8HImode);
19874 emit_insn (gen_negv8hi2 (neg, operands[2]));
19875 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
19876 DONE;
19877 }
19878 })
19879
19880 (define_expand "vashrv16qi3"
19881 [(set (match_operand:V16QI 0 "register_operand")
19882 (ashiftrt:V16QI
19883 (match_operand:V16QI 1 "register_operand")
19884 (match_operand:V16QI 2 "nonimmediate_operand")))]
19885 "TARGET_XOP"
19886 {
19887 rtx neg = gen_reg_rtx (V16QImode);
19888 emit_insn (gen_negv16qi2 (neg, operands[2]));
19889 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
19890 DONE;
19891 })
19892
19893 (define_expand "vashrv2di3<mask_name>"
19894 [(set (match_operand:V2DI 0 "register_operand")
19895 (ashiftrt:V2DI
19896 (match_operand:V2DI 1 "register_operand")
19897 (match_operand:V2DI 2 "nonimmediate_operand")))]
19898 "TARGET_XOP || TARGET_AVX512VL"
19899 {
19900 if (TARGET_XOP)
19901 {
19902 rtx neg = gen_reg_rtx (V2DImode);
19903 emit_insn (gen_negv2di2 (neg, operands[2]));
19904 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
19905 DONE;
19906 }
19907 })
19908
19909 (define_expand "vashrv4si3"
19910 [(set (match_operand:V4SI 0 "register_operand")
19911 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
19912 (match_operand:V4SI 2 "nonimmediate_operand")))]
19913 "TARGET_AVX2 || TARGET_XOP"
19914 {
19915 if (!TARGET_AVX2)
19916 {
19917 rtx neg = gen_reg_rtx (V4SImode);
19918 emit_insn (gen_negv4si2 (neg, operands[2]));
19919 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
19920 DONE;
19921 }
19922 })
19923
19924 (define_expand "vashrv16si3"
19925 [(set (match_operand:V16SI 0 "register_operand")
19926 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
19927 (match_operand:V16SI 2 "nonimmediate_operand")))]
19928 "TARGET_AVX512F")
19929
19930 (define_expand "vashrv8si3"
19931 [(set (match_operand:V8SI 0 "register_operand")
19932 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
19933 (match_operand:V8SI 2 "nonimmediate_operand")))]
19934 "TARGET_AVX2")
19935
19936 (define_expand "vashl<mode>3"
19937 [(set (match_operand:VI12_128 0 "register_operand")
19938 (ashift:VI12_128
19939 (match_operand:VI12_128 1 "register_operand")
19940 (match_operand:VI12_128 2 "nonimmediate_operand")))]
19941 "TARGET_XOP"
19942 {
19943 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19944 DONE;
19945 })
19946
19947 (define_expand "vashl<mode>3"
19948 [(set (match_operand:VI48_128 0 "register_operand")
19949 (ashift:VI48_128
19950 (match_operand:VI48_128 1 "register_operand")
19951 (match_operand:VI48_128 2 "nonimmediate_operand")))]
19952 "TARGET_AVX2 || TARGET_XOP"
19953 {
19954 if (!TARGET_AVX2)
19955 {
19956 operands[2] = force_reg (<MODE>mode, operands[2]);
19957 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19958 DONE;
19959 }
19960 })
19961
19962 (define_expand "vashl<mode>3"
19963 [(set (match_operand:VI48_512 0 "register_operand")
19964 (ashift:VI48_512
19965 (match_operand:VI48_512 1 "register_operand")
19966 (match_operand:VI48_512 2 "nonimmediate_operand")))]
19967 "TARGET_AVX512F")
19968
19969 (define_expand "vashl<mode>3"
19970 [(set (match_operand:VI48_256 0 "register_operand")
19971 (ashift:VI48_256
19972 (match_operand:VI48_256 1 "register_operand")
19973 (match_operand:VI48_256 2 "nonimmediate_operand")))]
19974 "TARGET_AVX2")
19975
19976 (define_insn "xop_sha<mode>3"
19977 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19978 (if_then_else:VI_128
19979 (ge:VI_128
19980 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19981 (const_int 0))
19982 (ashift:VI_128
19983 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19984 (match_dup 2))
19985 (ashiftrt:VI_128
19986 (match_dup 1)
19987 (neg:VI_128 (match_dup 2)))))]
19988 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19989 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19990 [(set_attr "type" "sseishft")
19991 (set_attr "prefix_data16" "0")
19992 (set_attr "prefix_extra" "2")
19993 (set_attr "mode" "TI")])
19994
19995 (define_insn "xop_shl<mode>3"
19996 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19997 (if_then_else:VI_128
19998 (ge:VI_128
19999 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20000 (const_int 0))
20001 (ashift:VI_128
20002 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20003 (match_dup 2))
20004 (lshiftrt:VI_128
20005 (match_dup 1)
20006 (neg:VI_128 (match_dup 2)))))]
20007 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20008 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20009 [(set_attr "type" "sseishft")
20010 (set_attr "prefix_data16" "0")
20011 (set_attr "prefix_extra" "2")
20012 (set_attr "mode" "TI")])
20013
20014 (define_expand "<insn><mode>3"
20015 [(set (match_operand:VI1_AVX512 0 "register_operand")
20016 (any_shift:VI1_AVX512
20017 (match_operand:VI1_AVX512 1 "register_operand")
20018 (match_operand:SI 2 "nonmemory_operand")))]
20019 "TARGET_SSE2"
20020 {
20021 if (TARGET_XOP && <MODE>mode == V16QImode)
20022 {
20023 bool negate = false;
20024 rtx (*gen) (rtx, rtx, rtx);
20025 rtx tmp, par;
20026 int i;
20027
20028 if (<CODE> != ASHIFT)
20029 {
20030 if (CONST_INT_P (operands[2]))
20031 operands[2] = GEN_INT (-INTVAL (operands[2]));
20032 else
20033 negate = true;
20034 }
20035 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
20036 for (i = 0; i < 16; i++)
20037 XVECEXP (par, 0, i) = operands[2];
20038
20039 tmp = gen_reg_rtx (V16QImode);
20040 emit_insn (gen_vec_initv16qiqi (tmp, par));
20041
20042 if (negate)
20043 emit_insn (gen_negv16qi2 (tmp, tmp));
20044
20045 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
20046 emit_insn (gen (operands[0], operands[1], tmp));
20047 }
20048 else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
20049 operands[1], operands[2]))
20050 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
20051 DONE;
20052 })
20053
20054 (define_expand "ashrv2di3"
20055 [(set (match_operand:V2DI 0 "register_operand")
20056 (ashiftrt:V2DI
20057 (match_operand:V2DI 1 "register_operand")
20058 (match_operand:DI 2 "nonmemory_operand")))]
20059 "TARGET_XOP || TARGET_AVX512VL"
20060 {
20061 if (!TARGET_AVX512VL)
20062 {
20063 rtx reg = gen_reg_rtx (V2DImode);
20064 rtx par;
20065 bool negate = false;
20066 int i;
20067
20068 if (CONST_INT_P (operands[2]))
20069 operands[2] = GEN_INT (-INTVAL (operands[2]));
20070 else
20071 negate = true;
20072
20073 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
20074 for (i = 0; i < 2; i++)
20075 XVECEXP (par, 0, i) = operands[2];
20076
20077 emit_insn (gen_vec_initv2didi (reg, par));
20078
20079 if (negate)
20080 emit_insn (gen_negv2di2 (reg, reg));
20081
20082 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
20083 DONE;
20084 }
20085 })
20086
20087 ;; XOP FRCZ support
20088 (define_insn "xop_frcz<mode>2"
20089 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
20090 (unspec:FMAMODE
20091 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
20092 UNSPEC_FRCZ))]
20093 "TARGET_XOP"
20094 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
20095 [(set_attr "type" "ssecvt1")
20096 (set_attr "mode" "<MODE>")])
20097
20098 (define_expand "xop_vmfrcz<mode>2"
20099 [(set (match_operand:VF_128 0 "register_operand")
20100 (vec_merge:VF_128
20101 (unspec:VF_128
20102 [(match_operand:VF_128 1 "nonimmediate_operand")]
20103 UNSPEC_FRCZ)
20104 (match_dup 2)
20105 (const_int 1)))]
20106 "TARGET_XOP"
20107 "operands[2] = CONST0_RTX (<MODE>mode);")
20108
20109 (define_insn "*xop_vmfrcz<mode>2"
20110 [(set (match_operand:VF_128 0 "register_operand" "=x")
20111 (vec_merge:VF_128
20112 (unspec:VF_128
20113 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
20114 UNSPEC_FRCZ)
20115 (match_operand:VF_128 2 "const0_operand")
20116 (const_int 1)))]
20117 "TARGET_XOP"
20118 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
20119 [(set_attr "type" "ssecvt1")
20120 (set_attr "mode" "<MODE>")])
20121
20122 (define_insn "xop_maskcmp<mode>3"
20123 [(set (match_operand:VI_128 0 "register_operand" "=x")
20124 (match_operator:VI_128 1 "ix86_comparison_int_operator"
20125 [(match_operand:VI_128 2 "register_operand" "x")
20126 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20127 "TARGET_XOP"
20128 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20129 [(set_attr "type" "sse4arg")
20130 (set_attr "prefix_data16" "0")
20131 (set_attr "prefix_rep" "0")
20132 (set_attr "prefix_extra" "2")
20133 (set_attr "length_immediate" "1")
20134 (set_attr "mode" "TI")])
20135
20136 (define_insn "xop_maskcmp_uns<mode>3"
20137 [(set (match_operand:VI_128 0 "register_operand" "=x")
20138 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
20139 [(match_operand:VI_128 2 "register_operand" "x")
20140 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20141 "TARGET_XOP"
20142 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20143 [(set_attr "type" "ssecmp")
20144 (set_attr "prefix_data16" "0")
20145 (set_attr "prefix_rep" "0")
20146 (set_attr "prefix_extra" "2")
20147 (set_attr "length_immediate" "1")
20148 (set_attr "mode" "TI")])
20149
20150 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
20151 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
20152 ;; the exact instruction generated for the intrinsic.
20153 (define_insn "xop_maskcmp_uns2<mode>3"
20154 [(set (match_operand:VI_128 0 "register_operand" "=x")
20155 (unspec:VI_128
20156 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
20157 [(match_operand:VI_128 2 "register_operand" "x")
20158 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
20159 UNSPEC_XOP_UNSIGNED_CMP))]
20160 "TARGET_XOP"
20161 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20162 [(set_attr "type" "ssecmp")
20163 (set_attr "prefix_data16" "0")
20164 (set_attr "prefix_extra" "2")
20165 (set_attr "length_immediate" "1")
20166 (set_attr "mode" "TI")])
20167
20168 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
20169 ;; being added here to be complete.
20170 (define_insn "xop_pcom_tf<mode>3"
20171 [(set (match_operand:VI_128 0 "register_operand" "=x")
20172 (unspec:VI_128
20173 [(match_operand:VI_128 1 "register_operand" "x")
20174 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
20175 (match_operand:SI 3 "const_int_operand" "n")]
20176 UNSPEC_XOP_TRUEFALSE))]
20177 "TARGET_XOP"
20178 {
20179 return ((INTVAL (operands[3]) != 0)
20180 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20181 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
20182 }
20183 [(set_attr "type" "ssecmp")
20184 (set_attr "prefix_data16" "0")
20185 (set_attr "prefix_extra" "2")
20186 (set_attr "length_immediate" "1")
20187 (set_attr "mode" "TI")])
20188
20189 (define_insn "xop_vpermil2<mode>3"
20190 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
20191 (unspec:VF_128_256
20192 [(match_operand:VF_128_256 1 "register_operand" "x,x")
20193 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
20194 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
20195 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
20196 UNSPEC_VPERMIL2))]
20197 "TARGET_XOP"
20198 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
20199 [(set_attr "type" "sse4arg")
20200 (set_attr "length_immediate" "1")
20201 (set_attr "mode" "<MODE>")])
20202
20203 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20204
20205 (define_insn "aesenc"
20206 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20207 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20208 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20209 UNSPEC_AESENC))]
20210 "TARGET_AES"
20211 "@
20212 aesenc\t{%2, %0|%0, %2}
20213 vaesenc\t{%2, %1, %0|%0, %1, %2}"
20214 [(set_attr "isa" "noavx,avx")
20215 (set_attr "type" "sselog1")
20216 (set_attr "prefix_extra" "1")
20217 (set_attr "prefix" "orig,vex")
20218 (set_attr "btver2_decode" "double,double")
20219 (set_attr "mode" "TI")])
20220
20221 (define_insn "aesenclast"
20222 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20223 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20224 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20225 UNSPEC_AESENCLAST))]
20226 "TARGET_AES"
20227 "@
20228 aesenclast\t{%2, %0|%0, %2}
20229 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
20230 [(set_attr "isa" "noavx,avx")
20231 (set_attr "type" "sselog1")
20232 (set_attr "prefix_extra" "1")
20233 (set_attr "prefix" "orig,vex")
20234 (set_attr "btver2_decode" "double,double")
20235 (set_attr "mode" "TI")])
20236
20237 (define_insn "aesdec"
20238 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20239 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20240 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20241 UNSPEC_AESDEC))]
20242 "TARGET_AES"
20243 "@
20244 aesdec\t{%2, %0|%0, %2}
20245 vaesdec\t{%2, %1, %0|%0, %1, %2}"
20246 [(set_attr "isa" "noavx,avx")
20247 (set_attr "type" "sselog1")
20248 (set_attr "prefix_extra" "1")
20249 (set_attr "prefix" "orig,vex")
20250 (set_attr "btver2_decode" "double,double")
20251 (set_attr "mode" "TI")])
20252
20253 (define_insn "aesdeclast"
20254 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20255 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20256 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20257 UNSPEC_AESDECLAST))]
20258 "TARGET_AES"
20259 "@
20260 aesdeclast\t{%2, %0|%0, %2}
20261 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
20262 [(set_attr "isa" "noavx,avx")
20263 (set_attr "type" "sselog1")
20264 (set_attr "prefix_extra" "1")
20265 (set_attr "prefix" "orig,vex")
20266 (set_attr "btver2_decode" "double,double")
20267 (set_attr "mode" "TI")])
20268
20269 (define_insn "aesimc"
20270 [(set (match_operand:V2DI 0 "register_operand" "=x")
20271 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
20272 UNSPEC_AESIMC))]
20273 "TARGET_AES"
20274 "%vaesimc\t{%1, %0|%0, %1}"
20275 [(set_attr "type" "sselog1")
20276 (set_attr "prefix_extra" "1")
20277 (set_attr "prefix" "maybe_vex")
20278 (set_attr "mode" "TI")])
20279
20280 (define_insn "aeskeygenassist"
20281 [(set (match_operand:V2DI 0 "register_operand" "=x")
20282 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
20283 (match_operand:SI 2 "const_0_to_255_operand" "n")]
20284 UNSPEC_AESKEYGENASSIST))]
20285 "TARGET_AES"
20286 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
20287 [(set_attr "type" "sselog1")
20288 (set_attr "prefix_extra" "1")
20289 (set_attr "length_immediate" "1")
20290 (set_attr "prefix" "maybe_vex")
20291 (set_attr "mode" "TI")])
20292
20293 (define_insn "pclmulqdq"
20294 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20295 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20296 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
20297 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20298 UNSPEC_PCLMUL))]
20299 "TARGET_PCLMUL"
20300 "@
20301 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
20302 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20303 [(set_attr "isa" "noavx,avx")
20304 (set_attr "type" "sselog1")
20305 (set_attr "prefix_extra" "1")
20306 (set_attr "length_immediate" "1")
20307 (set_attr "prefix" "orig,vex")
20308 (set_attr "mode" "TI")])
20309
20310 (define_expand "avx_vzeroall"
20311 [(match_par_dup 0 [(const_int 0)])]
20312 "TARGET_AVX"
20313 {
20314 int nregs = TARGET_64BIT ? 16 : 8;
20315 int regno;
20316
20317 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
20318
20319 XVECEXP (operands[0], 0, 0)
20320 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
20321 UNSPECV_VZEROALL);
20322
20323 for (regno = 0; regno < nregs; regno++)
20324 XVECEXP (operands[0], 0, regno + 1)
20325 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
20326 CONST0_RTX (V8SImode));
20327 })
20328
20329 (define_insn "*avx_vzeroall"
20330 [(match_parallel 0 "vzeroall_operation"
20331 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
20332 "TARGET_AVX"
20333 "vzeroall"
20334 [(set_attr "type" "sse")
20335 (set_attr "modrm" "0")
20336 (set_attr "memory" "none")
20337 (set_attr "prefix" "vex")
20338 (set_attr "btver2_decode" "vector")
20339 (set_attr "mode" "OI")])
20340
20341 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
20342 ;; if the upper 128bits are unused. Initially we expand the instructions
20343 ;; as though they had no effect on the SSE registers, but later add SETs and
20344 ;; CLOBBERs to the PARALLEL to model the real effect.
20345 (define_expand "avx_vzeroupper"
20346 [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20347 "TARGET_AVX")
20348
20349 (define_insn "*avx_vzeroupper"
20350 [(match_parallel 0 "vzeroupper_pattern"
20351 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20352 "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
20353 "vzeroupper"
20354 [(set_attr "type" "sse")
20355 (set_attr "modrm" "0")
20356 (set_attr "memory" "none")
20357 (set_attr "prefix" "vex")
20358 (set_attr "btver2_decode" "vector")
20359 (set_attr "mode" "OI")])
20360
20361 (define_insn_and_split "*avx_vzeroupper_1"
20362 [(match_parallel 0 "vzeroupper_pattern"
20363 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20364 "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
20365 "#"
20366 "&& epilogue_completed"
20367 [(match_dup 0)]
20368 {
20369 /* For IPA-RA purposes, make it clear the instruction clobbers
20370 even XMM registers not mentioned explicitly in the pattern. */
20371 unsigned int nregs = TARGET_64BIT ? 16 : 8;
20372 unsigned int npats = XVECLEN (operands[0], 0);
20373 rtvec vec = rtvec_alloc (nregs + 1);
20374 RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
20375 for (unsigned int i = 0, j = 1; i < nregs; ++i)
20376 {
20377 unsigned int regno = GET_SSE_REGNO (i);
20378 if (j < npats
20379 && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
20380 {
20381 RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
20382 j++;
20383 }
20384 else
20385 {
20386 rtx reg = gen_rtx_REG (V2DImode, regno);
20387 RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
20388 }
20389 }
20390 operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
20391 }
20392 [(set_attr "type" "sse")
20393 (set_attr "modrm" "0")
20394 (set_attr "memory" "none")
20395 (set_attr "prefix" "vex")
20396 (set_attr "btver2_decode" "vector")
20397 (set_attr "mode" "OI")])
20398
20399 (define_mode_attr pbroadcast_evex_isa
20400 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
20401 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
20402 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
20403 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
20404
20405 (define_insn "avx2_pbroadcast<mode>"
20406 [(set (match_operand:VI 0 "register_operand" "=x,v")
20407 (vec_duplicate:VI
20408 (vec_select:<ssescalarmode>
20409 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
20410 (parallel [(const_int 0)]))))]
20411 "TARGET_AVX2"
20412 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
20413 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
20414 (set_attr "type" "ssemov")
20415 (set_attr "prefix_extra" "1")
20416 (set_attr "prefix" "vex,evex")
20417 (set_attr "mode" "<sseinsnmode>")])
20418
20419 (define_insn "avx2_pbroadcast<mode>_1"
20420 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
20421 (vec_duplicate:VI_256
20422 (vec_select:<ssescalarmode>
20423 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
20424 (parallel [(const_int 0)]))))]
20425 "TARGET_AVX2"
20426 "@
20427 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20428 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20429 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20430 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
20431 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
20432 (set_attr "type" "ssemov")
20433 (set_attr "prefix_extra" "1")
20434 (set_attr "prefix" "vex")
20435 (set_attr "mode" "<sseinsnmode>")])
20436
20437 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
20438 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
20439 (unspec:VI48F_256_512
20440 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
20441 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20442 UNSPEC_VPERMVAR))]
20443 "TARGET_AVX2 && <mask_mode512bit_condition>"
20444 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20445 [(set_attr "type" "sselog")
20446 (set_attr "prefix" "<mask_prefix2>")
20447 (set_attr "mode" "<sseinsnmode>")])
20448
20449 (define_insn "<avx512>_permvar<mode><mask_name>"
20450 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
20451 (unspec:VI1_AVX512VL
20452 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
20453 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20454 UNSPEC_VPERMVAR))]
20455 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
20456 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20457 [(set_attr "type" "sselog")
20458 (set_attr "prefix" "<mask_prefix2>")
20459 (set_attr "mode" "<sseinsnmode>")])
20460
20461 (define_insn "<avx512>_permvar<mode><mask_name>"
20462 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20463 (unspec:VI2_AVX512VL
20464 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
20465 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20466 UNSPEC_VPERMVAR))]
20467 "TARGET_AVX512BW && <mask_mode512bit_condition>"
20468 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20469 [(set_attr "type" "sselog")
20470 (set_attr "prefix" "<mask_prefix2>")
20471 (set_attr "mode" "<sseinsnmode>")])
20472
20473 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
20474 ;; If it so happens that the input is in memory, use vbroadcast.
20475 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
20476 (define_insn "*avx_vperm_broadcast_v4sf"
20477 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
20478 (vec_select:V4SF
20479 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
20480 (match_parallel 2 "avx_vbroadcast_operand"
20481 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20482 "TARGET_AVX"
20483 {
20484 int elt = INTVAL (operands[3]);
20485 switch (which_alternative)
20486 {
20487 case 0:
20488 case 1:
20489 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
20490 return "vbroadcastss\t{%1, %0|%0, %k1}";
20491 case 2:
20492 operands[2] = GEN_INT (elt * 0x55);
20493 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
20494 default:
20495 gcc_unreachable ();
20496 }
20497 }
20498 [(set_attr "type" "ssemov,ssemov,sselog1")
20499 (set_attr "prefix_extra" "1")
20500 (set_attr "length_immediate" "0,0,1")
20501 (set_attr "prefix" "maybe_evex")
20502 (set_attr "mode" "SF,SF,V4SF")])
20503
20504 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
20505 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
20506 (vec_select:VF_256
20507 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
20508 (match_parallel 2 "avx_vbroadcast_operand"
20509 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20510 "TARGET_AVX
20511 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
20512 "#"
20513 "&& reload_completed"
20514 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
20515 {
20516 rtx op0 = operands[0], op1 = operands[1];
20517 int elt = INTVAL (operands[3]);
20518
20519 if (REG_P (op1))
20520 {
20521 int mask;
20522
20523 if (TARGET_AVX2 && elt == 0)
20524 {
20525 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
20526 op1)));
20527 DONE;
20528 }
20529
20530 /* Shuffle element we care about into all elements of the 128-bit lane.
20531 The other lane gets shuffled too, but we don't care. */
20532 if (<MODE>mode == V4DFmode)
20533 mask = (elt & 1 ? 15 : 0);
20534 else
20535 mask = (elt & 3) * 0x55;
20536 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
20537
20538 /* Shuffle the lane we care about into both lanes of the dest. */
20539 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
20540 if (EXT_REX_SSE_REG_P (op0))
20541 {
20542 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
20543 or VSHUFF128. */
20544 gcc_assert (<MODE>mode == V8SFmode);
20545 if ((mask & 1) == 0)
20546 emit_insn (gen_avx2_vec_dupv8sf (op0,
20547 gen_lowpart (V4SFmode, op0)));
20548 else
20549 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
20550 GEN_INT (4), GEN_INT (5),
20551 GEN_INT (6), GEN_INT (7),
20552 GEN_INT (12), GEN_INT (13),
20553 GEN_INT (14), GEN_INT (15)));
20554 DONE;
20555 }
20556
20557 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
20558 DONE;
20559 }
20560
20561 operands[1] = adjust_address (op1, <ssescalarmode>mode,
20562 elt * GET_MODE_SIZE (<ssescalarmode>mode));
20563 })
20564
20565 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20566 [(set (match_operand:VF2 0 "register_operand")
20567 (vec_select:VF2
20568 (match_operand:VF2 1 "nonimmediate_operand")
20569 (match_operand:SI 2 "const_0_to_255_operand")))]
20570 "TARGET_AVX && <mask_mode512bit_condition>"
20571 {
20572 int mask = INTVAL (operands[2]);
20573 rtx perm[<ssescalarnum>];
20574
20575 int i;
20576 for (i = 0; i < <ssescalarnum>; i = i + 2)
20577 {
20578 perm[i] = GEN_INT (((mask >> i) & 1) + i);
20579 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
20580 }
20581
20582 operands[2]
20583 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20584 })
20585
20586 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20587 [(set (match_operand:VF1 0 "register_operand")
20588 (vec_select:VF1
20589 (match_operand:VF1 1 "nonimmediate_operand")
20590 (match_operand:SI 2 "const_0_to_255_operand")))]
20591 "TARGET_AVX && <mask_mode512bit_condition>"
20592 {
20593 int mask = INTVAL (operands[2]);
20594 rtx perm[<ssescalarnum>];
20595
20596 int i;
20597 for (i = 0; i < <ssescalarnum>; i = i + 4)
20598 {
20599 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
20600 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
20601 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
20602 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
20603 }
20604
20605 operands[2]
20606 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20607 })
20608
20609 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
20610 ;; patterns, as they have the same RTL representation (vpermilp*
20611 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
20612 ;; latency as it never crosses lanes.
20613 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
20614 [(set (match_operand:VF 0 "register_operand" "=v")
20615 (vec_select:VF
20616 (match_operand:VF 1 "nonimmediate_operand" "vm")
20617 (match_parallel 2 ""
20618 [(match_operand 3 "const_int_operand")])))]
20619 "TARGET_AVX && <mask_mode512bit_condition>
20620 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
20621 {
20622 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
20623 operands[2] = GEN_INT (mask);
20624 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
20625 }
20626 [(set_attr "type" "sselog")
20627 (set_attr "prefix_extra" "1")
20628 (set_attr "length_immediate" "1")
20629 (set_attr "prefix" "<mask_prefix>")
20630 (set_attr "mode" "<sseinsnmode>")])
20631
20632 (define_expand "avx2_perm<mode>"
20633 [(match_operand:VI8F_256 0 "register_operand")
20634 (match_operand:VI8F_256 1 "nonimmediate_operand")
20635 (match_operand:SI 2 "const_0_to_255_operand")]
20636 "TARGET_AVX2"
20637 {
20638 int mask = INTVAL (operands[2]);
20639 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
20640 GEN_INT ((mask >> 0) & 3),
20641 GEN_INT ((mask >> 2) & 3),
20642 GEN_INT ((mask >> 4) & 3),
20643 GEN_INT ((mask >> 6) & 3)));
20644 DONE;
20645 })
20646
20647 (define_expand "avx512vl_perm<mode>_mask"
20648 [(match_operand:VI8F_256 0 "register_operand")
20649 (match_operand:VI8F_256 1 "nonimmediate_operand")
20650 (match_operand:SI 2 "const_0_to_255_operand")
20651 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
20652 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20653 "TARGET_AVX512VL"
20654 {
20655 int mask = INTVAL (operands[2]);
20656 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
20657 GEN_INT ((mask >> 0) & 3),
20658 GEN_INT ((mask >> 2) & 3),
20659 GEN_INT ((mask >> 4) & 3),
20660 GEN_INT ((mask >> 6) & 3),
20661 operands[3], operands[4]));
20662 DONE;
20663 })
20664
20665 (define_insn "avx2_perm<mode>_1<mask_name>"
20666 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20667 (vec_select:VI8F_256
20668 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
20669 (parallel [(match_operand 2 "const_0_to_3_operand")
20670 (match_operand 3 "const_0_to_3_operand")
20671 (match_operand 4 "const_0_to_3_operand")
20672 (match_operand 5 "const_0_to_3_operand")])))]
20673 "TARGET_AVX2 && <mask_mode512bit_condition>"
20674 {
20675 int mask = 0;
20676 mask |= INTVAL (operands[2]) << 0;
20677 mask |= INTVAL (operands[3]) << 2;
20678 mask |= INTVAL (operands[4]) << 4;
20679 mask |= INTVAL (operands[5]) << 6;
20680 operands[2] = GEN_INT (mask);
20681 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
20682 }
20683 [(set_attr "type" "sselog")
20684 (set_attr "prefix" "<mask_prefix2>")
20685 (set_attr "mode" "<sseinsnmode>")])
20686
20687 (define_expand "avx512f_perm<mode>"
20688 [(match_operand:V8FI 0 "register_operand")
20689 (match_operand:V8FI 1 "nonimmediate_operand")
20690 (match_operand:SI 2 "const_0_to_255_operand")]
20691 "TARGET_AVX512F"
20692 {
20693 int mask = INTVAL (operands[2]);
20694 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
20695 GEN_INT ((mask >> 0) & 3),
20696 GEN_INT ((mask >> 2) & 3),
20697 GEN_INT ((mask >> 4) & 3),
20698 GEN_INT ((mask >> 6) & 3),
20699 GEN_INT (((mask >> 0) & 3) + 4),
20700 GEN_INT (((mask >> 2) & 3) + 4),
20701 GEN_INT (((mask >> 4) & 3) + 4),
20702 GEN_INT (((mask >> 6) & 3) + 4)));
20703 DONE;
20704 })
20705
20706 (define_expand "avx512f_perm<mode>_mask"
20707 [(match_operand:V8FI 0 "register_operand")
20708 (match_operand:V8FI 1 "nonimmediate_operand")
20709 (match_operand:SI 2 "const_0_to_255_operand")
20710 (match_operand:V8FI 3 "nonimm_or_0_operand")
20711 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20712 "TARGET_AVX512F"
20713 {
20714 int mask = INTVAL (operands[2]);
20715 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
20716 GEN_INT ((mask >> 0) & 3),
20717 GEN_INT ((mask >> 2) & 3),
20718 GEN_INT ((mask >> 4) & 3),
20719 GEN_INT ((mask >> 6) & 3),
20720 GEN_INT (((mask >> 0) & 3) + 4),
20721 GEN_INT (((mask >> 2) & 3) + 4),
20722 GEN_INT (((mask >> 4) & 3) + 4),
20723 GEN_INT (((mask >> 6) & 3) + 4),
20724 operands[3], operands[4]));
20725 DONE;
20726 })
20727
20728 (define_insn "avx512f_perm<mode>_1<mask_name>"
20729 [(set (match_operand:V8FI 0 "register_operand" "=v")
20730 (vec_select:V8FI
20731 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
20732 (parallel [(match_operand 2 "const_0_to_3_operand")
20733 (match_operand 3 "const_0_to_3_operand")
20734 (match_operand 4 "const_0_to_3_operand")
20735 (match_operand 5 "const_0_to_3_operand")
20736 (match_operand 6 "const_4_to_7_operand")
20737 (match_operand 7 "const_4_to_7_operand")
20738 (match_operand 8 "const_4_to_7_operand")
20739 (match_operand 9 "const_4_to_7_operand")])))]
20740 "TARGET_AVX512F && <mask_mode512bit_condition>
20741 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
20742 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
20743 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
20744 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
20745 {
20746 int mask = 0;
20747 mask |= INTVAL (operands[2]) << 0;
20748 mask |= INTVAL (operands[3]) << 2;
20749 mask |= INTVAL (operands[4]) << 4;
20750 mask |= INTVAL (operands[5]) << 6;
20751 operands[2] = GEN_INT (mask);
20752 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
20753 }
20754 [(set_attr "type" "sselog")
20755 (set_attr "prefix" "<mask_prefix2>")
20756 (set_attr "mode" "<sseinsnmode>")])
20757
20758 (define_insn "avx2_permv2ti"
20759 [(set (match_operand:V4DI 0 "register_operand" "=x")
20760 (unspec:V4DI
20761 [(match_operand:V4DI 1 "register_operand" "x")
20762 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
20763 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20764 UNSPEC_VPERMTI))]
20765 "TARGET_AVX2"
20766 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20767 [(set_attr "type" "sselog")
20768 (set_attr "prefix" "vex")
20769 (set_attr "mode" "OI")])
20770
20771 (define_insn "avx2_vec_dupv4df"
20772 [(set (match_operand:V4DF 0 "register_operand" "=v")
20773 (vec_duplicate:V4DF
20774 (vec_select:DF
20775 (match_operand:V2DF 1 "register_operand" "v")
20776 (parallel [(const_int 0)]))))]
20777 "TARGET_AVX2"
20778 "vbroadcastsd\t{%1, %0|%0, %1}"
20779 [(set_attr "type" "sselog1")
20780 (set_attr "prefix" "maybe_evex")
20781 (set_attr "mode" "V4DF")])
20782
20783 (define_insn "<avx512>_vec_dup<mode>_1"
20784 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
20785 (vec_duplicate:VI_AVX512BW
20786 (vec_select:<ssescalarmode>
20787 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
20788 (parallel [(const_int 0)]))))]
20789 "TARGET_AVX512F"
20790 "@
20791 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20792 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
20793 [(set_attr "type" "ssemov")
20794 (set_attr "prefix" "evex")
20795 (set_attr "mode" "<sseinsnmode>")])
20796
20797 (define_insn "<avx512>_vec_dup<mode><mask_name>"
20798 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
20799 (vec_duplicate:V48_AVX512VL
20800 (vec_select:<ssescalarmode>
20801 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20802 (parallel [(const_int 0)]))))]
20803 "TARGET_AVX512F"
20804 {
20805 /* There is no DF broadcast (in AVX-512*) to 128b register.
20806 Mimic it with integer variant. */
20807 if (<MODE>mode == V2DFmode)
20808 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
20809
20810 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
20811 }
20812 [(set_attr "type" "ssemov")
20813 (set_attr "prefix" "evex")
20814 (set_attr "mode" "<sseinsnmode>")])
20815
20816 (define_insn "<avx512>_vec_dup<mode><mask_name>"
20817 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
20818 (vec_duplicate:VI12_AVX512VL
20819 (vec_select:<ssescalarmode>
20820 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20821 (parallel [(const_int 0)]))))]
20822 "TARGET_AVX512BW"
20823 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
20824 [(set_attr "type" "ssemov")
20825 (set_attr "prefix" "evex")
20826 (set_attr "mode" "<sseinsnmode>")])
20827
20828 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20829 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
20830 (vec_duplicate:V16FI
20831 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
20832 "TARGET_AVX512F"
20833 "@
20834 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
20835 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20836 [(set_attr "type" "ssemov")
20837 (set_attr "prefix" "evex")
20838 (set_attr "mode" "<sseinsnmode>")])
20839
20840 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20841 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
20842 (vec_duplicate:V8FI
20843 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
20844 "TARGET_AVX512F"
20845 "@
20846 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
20847 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20848 [(set_attr "type" "ssemov")
20849 (set_attr "prefix" "evex")
20850 (set_attr "mode" "<sseinsnmode>")])
20851
20852 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20853 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
20854 (vec_duplicate:VI12_AVX512VL
20855 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20856 "TARGET_AVX512BW"
20857 "@
20858 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
20859 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
20860 [(set_attr "type" "ssemov")
20861 (set_attr "prefix" "evex")
20862 (set_attr "mode" "<sseinsnmode>")])
20863
20864 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20865 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
20866 (vec_duplicate:V48_AVX512VL
20867 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20868 "TARGET_AVX512F"
20869 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20870 [(set_attr "type" "ssemov")
20871 (set_attr "prefix" "evex")
20872 (set_attr "mode" "<sseinsnmode>")
20873 (set (attr "enabled")
20874 (if_then_else (eq_attr "alternative" "1")
20875 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
20876 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
20877 (const_int 1)))])
20878
20879 (define_insn "vec_dupv4sf"
20880 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
20881 (vec_duplicate:V4SF
20882 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
20883 "TARGET_SSE"
20884 "@
20885 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
20886 vbroadcastss\t{%1, %0|%0, %1}
20887 shufps\t{$0, %0, %0|%0, %0, 0}"
20888 [(set_attr "isa" "avx,avx,noavx")
20889 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
20890 (set_attr "length_immediate" "1,0,1")
20891 (set_attr "prefix_extra" "0,1,*")
20892 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
20893 (set_attr "mode" "V4SF")])
20894
20895 (define_insn "*vec_dupv4si"
20896 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
20897 (vec_duplicate:V4SI
20898 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
20899 "TARGET_SSE"
20900 "@
20901 %vpshufd\t{$0, %1, %0|%0, %1, 0}
20902 vbroadcastss\t{%1, %0|%0, %1}
20903 shufps\t{$0, %0, %0|%0, %0, 0}"
20904 [(set_attr "isa" "sse2,avx,noavx")
20905 (set_attr "type" "sselog1,ssemov,sselog1")
20906 (set_attr "length_immediate" "1,0,1")
20907 (set_attr "prefix_extra" "0,1,*")
20908 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
20909 (set_attr "mode" "TI,V4SF,V4SF")])
20910
20911 (define_insn "*vec_dupv2di"
20912 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
20913 (vec_duplicate:V2DI
20914 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
20915 "TARGET_SSE"
20916 "@
20917 punpcklqdq\t%0, %0
20918 vpunpcklqdq\t{%d1, %0|%0, %d1}
20919 %vmovddup\t{%1, %0|%0, %1}
20920 movlhps\t%0, %0"
20921 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
20922 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
20923 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
20924 (set_attr "mode" "TI,TI,DF,V4SF")])
20925
20926 (define_insn "avx2_vbroadcasti128_<mode>"
20927 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
20928 (vec_concat:VI_256
20929 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
20930 (match_dup 1)))]
20931 "TARGET_AVX2"
20932 "@
20933 vbroadcasti128\t{%1, %0|%0, %1}
20934 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
20935 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
20936 [(set_attr "isa" "*,avx512dq,avx512vl")
20937 (set_attr "type" "ssemov")
20938 (set_attr "prefix_extra" "1")
20939 (set_attr "prefix" "vex,evex,evex")
20940 (set_attr "mode" "OI")])
20941
20942 ;; Modes handled by AVX vec_dup patterns.
20943 (define_mode_iterator AVX_VEC_DUP_MODE
20944 [V8SI V8SF V4DI V4DF])
20945 (define_mode_attr vecdupssescalarmodesuffix
20946 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
20947 ;; Modes handled by AVX2 vec_dup patterns.
20948 (define_mode_iterator AVX2_VEC_DUP_MODE
20949 [V32QI V16QI V16HI V8HI V8SI V4SI])
20950
20951 (define_insn "*vec_dup<mode>"
20952 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
20953 (vec_duplicate:AVX2_VEC_DUP_MODE
20954 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
20955 "TARGET_AVX2"
20956 "@
20957 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20958 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20959 #"
20960 [(set_attr "isa" "*,*,noavx512vl")
20961 (set_attr "type" "ssemov")
20962 (set_attr "prefix_extra" "1")
20963 (set_attr "prefix" "maybe_evex")
20964 (set_attr "mode" "<sseinsnmode>")
20965 (set (attr "preferred_for_speed")
20966 (cond [(eq_attr "alternative" "2")
20967 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
20968 ]
20969 (symbol_ref "true")))])
20970
20971 (define_insn "vec_dup<mode>"
20972 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
20973 (vec_duplicate:AVX_VEC_DUP_MODE
20974 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
20975 "TARGET_AVX"
20976 "@
20977 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20978 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
20979 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20980 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
20981 #"
20982 [(set_attr "type" "ssemov")
20983 (set_attr "prefix_extra" "1")
20984 (set_attr "prefix" "maybe_evex")
20985 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
20986 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
20987
20988 (define_split
20989 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
20990 (vec_duplicate:AVX2_VEC_DUP_MODE
20991 (match_operand:<ssescalarmode> 1 "register_operand")))]
20992 "TARGET_AVX2
20993 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
20994 available, because then we can broadcast from GPRs directly.
20995 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
20996 for V*SI mode it requires just -mavx512vl. */
20997 && !(TARGET_AVX512VL
20998 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
20999 && reload_completed && GENERAL_REG_P (operands[1])"
21000 [(const_int 0)]
21001 {
21002 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
21003 CONST0_RTX (V4SImode),
21004 gen_lowpart (SImode, operands[1])));
21005 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
21006 gen_lowpart (<ssexmmmode>mode,
21007 operands[0])));
21008 DONE;
21009 })
21010
21011 (define_split
21012 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
21013 (vec_duplicate:AVX_VEC_DUP_MODE
21014 (match_operand:<ssescalarmode> 1 "register_operand")))]
21015 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
21016 [(set (match_dup 2)
21017 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
21018 (set (match_dup 0)
21019 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
21020 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
21021
21022 (define_insn "avx_vbroadcastf128_<mode>"
21023 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
21024 (vec_concat:V_256
21025 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
21026 (match_dup 1)))]
21027 "TARGET_AVX"
21028 "@
21029 vbroadcast<i128>\t{%1, %0|%0, %1}
21030 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21031 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
21032 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
21033 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21034 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
21035 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
21036 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
21037 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
21038 (set_attr "prefix_extra" "1")
21039 (set_attr "length_immediate" "0,1,1,0,1,0,1")
21040 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
21041 (set_attr "mode" "<sseinsnmode>")])
21042
21043 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
21044 (define_mode_iterator VI4F_BRCST32x2
21045 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21046 V16SF (V8SF "TARGET_AVX512VL")])
21047
21048 (define_mode_attr 64x2mode
21049 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
21050
21051 (define_mode_attr 32x2mode
21052 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
21053 (V8SF "V2SF") (V4SI "V2SI")])
21054
21055 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
21056 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
21057 (vec_duplicate:VI4F_BRCST32x2
21058 (vec_select:<32x2mode>
21059 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21060 (parallel [(const_int 0) (const_int 1)]))))]
21061 "TARGET_AVX512DQ"
21062 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
21063 [(set_attr "type" "ssemov")
21064 (set_attr "prefix_extra" "1")
21065 (set_attr "prefix" "evex")
21066 (set_attr "mode" "<sseinsnmode>")])
21067
21068 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
21069 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
21070 (vec_duplicate:VI4F_256
21071 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
21072 "TARGET_AVX512VL"
21073 "@
21074 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
21075 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21076 [(set_attr "type" "ssemov")
21077 (set_attr "prefix_extra" "1")
21078 (set_attr "prefix" "evex")
21079 (set_attr "mode" "<sseinsnmode>")])
21080
21081 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21082 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
21083 (vec_duplicate:V16FI
21084 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
21085 "TARGET_AVX512DQ"
21086 "@
21087 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
21088 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21089 [(set_attr "type" "ssemov")
21090 (set_attr "prefix_extra" "1")
21091 (set_attr "prefix" "evex")
21092 (set_attr "mode" "<sseinsnmode>")])
21093
21094 ;; For broadcast[i|f]64x2
21095 (define_mode_iterator VI8F_BRCST64x2
21096 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
21097
21098 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21099 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
21100 (vec_duplicate:VI8F_BRCST64x2
21101 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
21102 "TARGET_AVX512DQ"
21103 "@
21104 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
21105 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21106 [(set_attr "type" "ssemov")
21107 (set_attr "prefix_extra" "1")
21108 (set_attr "prefix" "evex")
21109 (set_attr "mode" "<sseinsnmode>")])
21110
21111 (define_insn "avx512cd_maskb_vec_dup<mode>"
21112 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21113 (vec_duplicate:VI8_AVX512VL
21114 (zero_extend:DI
21115 (match_operand:QI 1 "register_operand" "k"))))]
21116 "TARGET_AVX512CD"
21117 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
21118 [(set_attr "type" "mskmov")
21119 (set_attr "prefix" "evex")
21120 (set_attr "mode" "XI")])
21121
21122 (define_insn "avx512cd_maskw_vec_dup<mode>"
21123 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21124 (vec_duplicate:VI4_AVX512VL
21125 (zero_extend:SI
21126 (match_operand:HI 1 "register_operand" "k"))))]
21127 "TARGET_AVX512CD"
21128 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
21129 [(set_attr "type" "mskmov")
21130 (set_attr "prefix" "evex")
21131 (set_attr "mode" "XI")])
21132
21133 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
21134 [(set (match_operand:VF 0 "register_operand" "=v")
21135 (unspec:VF
21136 [(match_operand:VF 1 "register_operand" "v")
21137 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
21138 UNSPEC_VPERMIL))]
21139 "TARGET_AVX && <mask_mode512bit_condition>"
21140 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21141 [(set_attr "type" "sselog")
21142 (set_attr "prefix_extra" "1")
21143 (set_attr "btver2_decode" "vector")
21144 (set_attr "prefix" "<mask_prefix>")
21145 (set_attr "mode" "<sseinsnmode>")])
21146
21147 (define_mode_iterator VPERMI2
21148 [V16SI V16SF V8DI V8DF
21149 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
21150 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
21151 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
21152 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
21153 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21154 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21155 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21156 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21157
21158 (define_mode_iterator VPERMI2I
21159 [V16SI V8DI
21160 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21161 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
21162 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21163 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21164 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21165 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21166
21167 (define_expand "<avx512>_vpermi2var<mode>3_mask"
21168 [(set (match_operand:VPERMI2 0 "register_operand")
21169 (vec_merge:VPERMI2
21170 (unspec:VPERMI2
21171 [(match_operand:<sseintvecmode> 2 "register_operand")
21172 (match_operand:VPERMI2 1 "register_operand")
21173 (match_operand:VPERMI2 3 "nonimmediate_operand")]
21174 UNSPEC_VPERMT2)
21175 (match_dup 5)
21176 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
21177 "TARGET_AVX512F"
21178 {
21179 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
21180 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
21181 })
21182
21183 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21184 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
21185 (vec_merge:VPERMI2I
21186 (unspec:VPERMI2I
21187 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21188 (match_operand:VPERMI2I 1 "register_operand" "v")
21189 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
21190 UNSPEC_VPERMT2)
21191 (match_dup 2)
21192 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21193 "TARGET_AVX512F"
21194 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21195 [(set_attr "type" "sselog")
21196 (set_attr "prefix" "evex")
21197 (set_attr "mode" "<sseinsnmode>")])
21198
21199 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21200 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21201 (vec_merge:VF_AVX512VL
21202 (unspec:VF_AVX512VL
21203 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21204 (match_operand:VF_AVX512VL 1 "register_operand" "v")
21205 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
21206 UNSPEC_VPERMT2)
21207 (subreg:VF_AVX512VL (match_dup 2) 0)
21208 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21209 "TARGET_AVX512F"
21210 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21211 [(set_attr "type" "sselog")
21212 (set_attr "prefix" "evex")
21213 (set_attr "mode" "<sseinsnmode>")])
21214
21215 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
21216 [(match_operand:VPERMI2 0 "register_operand")
21217 (match_operand:<sseintvecmode> 1 "register_operand")
21218 (match_operand:VPERMI2 2 "register_operand")
21219 (match_operand:VPERMI2 3 "nonimmediate_operand")
21220 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21221 "TARGET_AVX512F"
21222 {
21223 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
21224 operands[0], operands[1], operands[2], operands[3],
21225 CONST0_RTX (<MODE>mode), operands[4]));
21226 DONE;
21227 })
21228
21229 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
21230 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
21231 (unspec:VPERMI2
21232 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
21233 (match_operand:VPERMI2 2 "register_operand" "0,v")
21234 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
21235 UNSPEC_VPERMT2))]
21236 "TARGET_AVX512F"
21237 "@
21238 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
21239 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21240 [(set_attr "type" "sselog")
21241 (set_attr "prefix" "evex")
21242 (set_attr "mode" "<sseinsnmode>")])
21243
21244 (define_insn "<avx512>_vpermt2var<mode>3_mask"
21245 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
21246 (vec_merge:VPERMI2
21247 (unspec:VPERMI2
21248 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
21249 (match_operand:VPERMI2 2 "register_operand" "0")
21250 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
21251 UNSPEC_VPERMT2)
21252 (match_dup 2)
21253 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21254 "TARGET_AVX512F"
21255 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21256 [(set_attr "type" "sselog")
21257 (set_attr "prefix" "evex")
21258 (set_attr "mode" "<sseinsnmode>")])
21259
21260 (define_expand "avx_vperm2f128<mode>3"
21261 [(set (match_operand:AVX256MODE2P 0 "register_operand")
21262 (unspec:AVX256MODE2P
21263 [(match_operand:AVX256MODE2P 1 "register_operand")
21264 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
21265 (match_operand:SI 3 "const_0_to_255_operand")]
21266 UNSPEC_VPERMIL2F128))]
21267 "TARGET_AVX"
21268 {
21269 int mask = INTVAL (operands[3]);
21270 if ((mask & 0x88) == 0)
21271 {
21272 rtx perm[<ssescalarnum>], t1, t2;
21273 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
21274
21275 base = (mask & 3) * nelt2;
21276 for (i = 0; i < nelt2; ++i)
21277 perm[i] = GEN_INT (base + i);
21278
21279 base = ((mask >> 4) & 3) * nelt2;
21280 for (i = 0; i < nelt2; ++i)
21281 perm[i + nelt2] = GEN_INT (base + i);
21282
21283 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
21284 operands[1], operands[2]);
21285 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
21286 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
21287 t2 = gen_rtx_SET (operands[0], t2);
21288 emit_insn (t2);
21289 DONE;
21290 }
21291 })
21292
21293 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
21294 ;; means that in order to represent this properly in rtl we'd have to
21295 ;; nest *another* vec_concat with a zero operand and do the select from
21296 ;; a 4x wide vector. That doesn't seem very nice.
21297 (define_insn "*avx_vperm2f128<mode>_full"
21298 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21299 (unspec:AVX256MODE2P
21300 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
21301 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
21302 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21303 UNSPEC_VPERMIL2F128))]
21304 "TARGET_AVX"
21305 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21306 [(set_attr "type" "sselog")
21307 (set_attr "prefix_extra" "1")
21308 (set_attr "length_immediate" "1")
21309 (set_attr "prefix" "vex")
21310 (set_attr "mode" "<sseinsnmode>")])
21311
21312 (define_insn "*avx_vperm2f128<mode>_nozero"
21313 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21314 (vec_select:AVX256MODE2P
21315 (vec_concat:<ssedoublevecmode>
21316 (match_operand:AVX256MODE2P 1 "register_operand" "x")
21317 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
21318 (match_parallel 3 ""
21319 [(match_operand 4 "const_int_operand")])))]
21320 "TARGET_AVX
21321 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
21322 {
21323 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
21324 if (mask == 0x12)
21325 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
21326 if (mask == 0x20)
21327 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
21328 operands[3] = GEN_INT (mask);
21329 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
21330 }
21331 [(set_attr "type" "sselog")
21332 (set_attr "prefix_extra" "1")
21333 (set_attr "length_immediate" "1")
21334 (set_attr "prefix" "vex")
21335 (set_attr "mode" "<sseinsnmode>")])
21336
21337 (define_insn "*ssse3_palignr<mode>_perm"
21338 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
21339 (vec_select:V_128
21340 (match_operand:V_128 1 "register_operand" "0,x,v")
21341 (match_parallel 2 "palignr_operand"
21342 [(match_operand 3 "const_int_operand" "n,n,n")])))]
21343 "TARGET_SSSE3"
21344 {
21345 operands[2] = (GEN_INT (INTVAL (operands[3])
21346 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
21347
21348 switch (which_alternative)
21349 {
21350 case 0:
21351 return "palignr\t{%2, %1, %0|%0, %1, %2}";
21352 case 1:
21353 case 2:
21354 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
21355 default:
21356 gcc_unreachable ();
21357 }
21358 }
21359 [(set_attr "isa" "noavx,avx,avx512bw")
21360 (set_attr "type" "sseishft")
21361 (set_attr "atom_unit" "sishuf")
21362 (set_attr "prefix_data16" "1,*,*")
21363 (set_attr "prefix_extra" "1")
21364 (set_attr "length_immediate" "1")
21365 (set_attr "prefix" "orig,vex,evex")])
21366
21367 (define_expand "avx512vl_vinsert<mode>"
21368 [(match_operand:VI48F_256 0 "register_operand")
21369 (match_operand:VI48F_256 1 "register_operand")
21370 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21371 (match_operand:SI 3 "const_0_to_1_operand")
21372 (match_operand:VI48F_256 4 "register_operand")
21373 (match_operand:<avx512fmaskmode> 5 "register_operand")]
21374 "TARGET_AVX512VL"
21375 {
21376 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21377
21378 switch (INTVAL (operands[3]))
21379 {
21380 case 0:
21381 insn = gen_vec_set_lo_<mode>_mask;
21382 break;
21383 case 1:
21384 insn = gen_vec_set_hi_<mode>_mask;
21385 break;
21386 default:
21387 gcc_unreachable ();
21388 }
21389
21390 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
21391 operands[5]));
21392 DONE;
21393 })
21394
21395 (define_expand "avx_vinsertf128<mode>"
21396 [(match_operand:V_256 0 "register_operand")
21397 (match_operand:V_256 1 "register_operand")
21398 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21399 (match_operand:SI 3 "const_0_to_1_operand")]
21400 "TARGET_AVX"
21401 {
21402 rtx (*insn)(rtx, rtx, rtx);
21403
21404 switch (INTVAL (operands[3]))
21405 {
21406 case 0:
21407 insn = gen_vec_set_lo_<mode>;
21408 break;
21409 case 1:
21410 insn = gen_vec_set_hi_<mode>;
21411 break;
21412 default:
21413 gcc_unreachable ();
21414 }
21415
21416 emit_insn (insn (operands[0], operands[1], operands[2]));
21417 DONE;
21418 })
21419
21420 (define_insn "vec_set_lo_<mode><mask_name>"
21421 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21422 (vec_concat:VI8F_256
21423 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21424 (vec_select:<ssehalfvecmode>
21425 (match_operand:VI8F_256 1 "register_operand" "v")
21426 (parallel [(const_int 2) (const_int 3)]))))]
21427 "TARGET_AVX && <mask_avx512dq_condition>"
21428 {
21429 if (TARGET_AVX512DQ)
21430 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21431 else if (TARGET_AVX512VL)
21432 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21433 else
21434 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21435 }
21436 [(set_attr "type" "sselog")
21437 (set_attr "prefix_extra" "1")
21438 (set_attr "length_immediate" "1")
21439 (set_attr "prefix" "vex")
21440 (set_attr "mode" "<sseinsnmode>")])
21441
21442 (define_insn "vec_set_hi_<mode><mask_name>"
21443 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21444 (vec_concat:VI8F_256
21445 (vec_select:<ssehalfvecmode>
21446 (match_operand:VI8F_256 1 "register_operand" "v")
21447 (parallel [(const_int 0) (const_int 1)]))
21448 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21449 "TARGET_AVX && <mask_avx512dq_condition>"
21450 {
21451 if (TARGET_AVX512DQ)
21452 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21453 else if (TARGET_AVX512VL)
21454 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21455 else
21456 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21457 }
21458 [(set_attr "type" "sselog")
21459 (set_attr "prefix_extra" "1")
21460 (set_attr "length_immediate" "1")
21461 (set_attr "prefix" "vex")
21462 (set_attr "mode" "<sseinsnmode>")])
21463
21464 (define_insn "vec_set_lo_<mode><mask_name>"
21465 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21466 (vec_concat:VI4F_256
21467 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21468 (vec_select:<ssehalfvecmode>
21469 (match_operand:VI4F_256 1 "register_operand" "v")
21470 (parallel [(const_int 4) (const_int 5)
21471 (const_int 6) (const_int 7)]))))]
21472 "TARGET_AVX"
21473 {
21474 if (TARGET_AVX512VL)
21475 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21476 else
21477 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21478 }
21479 [(set_attr "type" "sselog")
21480 (set_attr "prefix_extra" "1")
21481 (set_attr "length_immediate" "1")
21482 (set_attr "prefix" "vex")
21483 (set_attr "mode" "<sseinsnmode>")])
21484
21485 (define_insn "vec_set_hi_<mode><mask_name>"
21486 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21487 (vec_concat:VI4F_256
21488 (vec_select:<ssehalfvecmode>
21489 (match_operand:VI4F_256 1 "register_operand" "v")
21490 (parallel [(const_int 0) (const_int 1)
21491 (const_int 2) (const_int 3)]))
21492 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21493 "TARGET_AVX"
21494 {
21495 if (TARGET_AVX512VL)
21496 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21497 else
21498 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21499 }
21500 [(set_attr "type" "sselog")
21501 (set_attr "prefix_extra" "1")
21502 (set_attr "length_immediate" "1")
21503 (set_attr "prefix" "vex")
21504 (set_attr "mode" "<sseinsnmode>")])
21505
21506 (define_insn "vec_set_lo_v16hi"
21507 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21508 (vec_concat:V16HI
21509 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
21510 (vec_select:V8HI
21511 (match_operand:V16HI 1 "register_operand" "x,v")
21512 (parallel [(const_int 8) (const_int 9)
21513 (const_int 10) (const_int 11)
21514 (const_int 12) (const_int 13)
21515 (const_int 14) (const_int 15)]))))]
21516 "TARGET_AVX"
21517 "@
21518 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21519 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21520 [(set_attr "type" "sselog")
21521 (set_attr "prefix_extra" "1")
21522 (set_attr "length_immediate" "1")
21523 (set_attr "prefix" "vex,evex")
21524 (set_attr "mode" "OI")])
21525
21526 (define_insn "vec_set_hi_v16hi"
21527 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21528 (vec_concat:V16HI
21529 (vec_select:V8HI
21530 (match_operand:V16HI 1 "register_operand" "x,v")
21531 (parallel [(const_int 0) (const_int 1)
21532 (const_int 2) (const_int 3)
21533 (const_int 4) (const_int 5)
21534 (const_int 6) (const_int 7)]))
21535 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
21536 "TARGET_AVX"
21537 "@
21538 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21539 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21540 [(set_attr "type" "sselog")
21541 (set_attr "prefix_extra" "1")
21542 (set_attr "length_immediate" "1")
21543 (set_attr "prefix" "vex,evex")
21544 (set_attr "mode" "OI")])
21545
21546 (define_insn "vec_set_lo_v32qi"
21547 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21548 (vec_concat:V32QI
21549 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
21550 (vec_select:V16QI
21551 (match_operand:V32QI 1 "register_operand" "x,v")
21552 (parallel [(const_int 16) (const_int 17)
21553 (const_int 18) (const_int 19)
21554 (const_int 20) (const_int 21)
21555 (const_int 22) (const_int 23)
21556 (const_int 24) (const_int 25)
21557 (const_int 26) (const_int 27)
21558 (const_int 28) (const_int 29)
21559 (const_int 30) (const_int 31)]))))]
21560 "TARGET_AVX"
21561 "@
21562 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21563 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21564 [(set_attr "type" "sselog")
21565 (set_attr "prefix_extra" "1")
21566 (set_attr "length_immediate" "1")
21567 (set_attr "prefix" "vex,evex")
21568 (set_attr "mode" "OI")])
21569
21570 (define_insn "vec_set_hi_v32qi"
21571 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21572 (vec_concat:V32QI
21573 (vec_select:V16QI
21574 (match_operand:V32QI 1 "register_operand" "x,v")
21575 (parallel [(const_int 0) (const_int 1)
21576 (const_int 2) (const_int 3)
21577 (const_int 4) (const_int 5)
21578 (const_int 6) (const_int 7)
21579 (const_int 8) (const_int 9)
21580 (const_int 10) (const_int 11)
21581 (const_int 12) (const_int 13)
21582 (const_int 14) (const_int 15)]))
21583 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
21584 "TARGET_AVX"
21585 "@
21586 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21587 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21588 [(set_attr "type" "sselog")
21589 (set_attr "prefix_extra" "1")
21590 (set_attr "length_immediate" "1")
21591 (set_attr "prefix" "vex,evex")
21592 (set_attr "mode" "OI")])
21593
21594 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
21595 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
21596 (unspec:V48_AVX2
21597 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
21598 (match_operand:V48_AVX2 1 "memory_operand" "m")]
21599 UNSPEC_MASKMOV))]
21600 "TARGET_AVX"
21601 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
21602 [(set_attr "type" "sselog1")
21603 (set_attr "prefix_extra" "1")
21604 (set_attr "prefix" "vex")
21605 (set_attr "btver2_decode" "vector")
21606 (set_attr "mode" "<sseinsnmode>")])
21607
21608 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
21609 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
21610 (unspec:V48_AVX2
21611 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
21612 (match_operand:V48_AVX2 2 "register_operand" "x")
21613 (match_dup 0)]
21614 UNSPEC_MASKMOV))]
21615 "TARGET_AVX"
21616 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21617 [(set_attr "type" "sselog1")
21618 (set_attr "prefix_extra" "1")
21619 (set_attr "prefix" "vex")
21620 (set_attr "btver2_decode" "vector")
21621 (set_attr "mode" "<sseinsnmode>")])
21622
21623 (define_expand "maskload<mode><sseintvecmodelower>"
21624 [(set (match_operand:V48_AVX2 0 "register_operand")
21625 (unspec:V48_AVX2
21626 [(match_operand:<sseintvecmode> 2 "register_operand")
21627 (match_operand:V48_AVX2 1 "memory_operand")]
21628 UNSPEC_MASKMOV))]
21629 "TARGET_AVX")
21630
21631 (define_expand "maskload<mode><avx512fmaskmodelower>"
21632 [(set (match_operand:V48_AVX512VL 0 "register_operand")
21633 (vec_merge:V48_AVX512VL
21634 (match_operand:V48_AVX512VL 1 "memory_operand")
21635 (match_dup 0)
21636 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21637 "TARGET_AVX512F")
21638
21639 (define_expand "maskload<mode><avx512fmaskmodelower>"
21640 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
21641 (vec_merge:VI12_AVX512VL
21642 (match_operand:VI12_AVX512VL 1 "memory_operand")
21643 (match_dup 0)
21644 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21645 "TARGET_AVX512BW")
21646
21647 (define_expand "maskstore<mode><sseintvecmodelower>"
21648 [(set (match_operand:V48_AVX2 0 "memory_operand")
21649 (unspec:V48_AVX2
21650 [(match_operand:<sseintvecmode> 2 "register_operand")
21651 (match_operand:V48_AVX2 1 "register_operand")
21652 (match_dup 0)]
21653 UNSPEC_MASKMOV))]
21654 "TARGET_AVX")
21655
21656 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21657 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
21658 (vec_merge:V48_AVX512VL
21659 (match_operand:V48_AVX512VL 1 "register_operand")
21660 (match_dup 0)
21661 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21662 "TARGET_AVX512F")
21663
21664 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21665 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
21666 (vec_merge:VI12_AVX512VL
21667 (match_operand:VI12_AVX512VL 1 "register_operand")
21668 (match_dup 0)
21669 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21670 "TARGET_AVX512BW")
21671
21672 (define_expand "cbranch<mode>4"
21673 [(set (reg:CC FLAGS_REG)
21674 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
21675 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
21676 (set (pc) (if_then_else
21677 (match_operator 0 "bt_comparison_operator"
21678 [(reg:CC FLAGS_REG) (const_int 0)])
21679 (label_ref (match_operand 3))
21680 (pc)))]
21681 "TARGET_SSE4_1"
21682 {
21683 ix86_expand_branch (GET_CODE (operands[0]),
21684 operands[1], operands[2], operands[3]);
21685 DONE;
21686 })
21687
21688
21689 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
21690 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
21691 (vec_concat:AVX256MODE2P
21692 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
21693 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
21694 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21695 "#"
21696 "&& reload_completed"
21697 [(set (match_dup 0) (match_dup 1))]
21698 {
21699 if (REG_P (operands[0]))
21700 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21701 else
21702 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21703 <ssehalfvecmode>mode);
21704 })
21705
21706 ;; Modes handled by vec_init expanders.
21707 (define_mode_iterator VEC_INIT_MODE
21708 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21709 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21710 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21711 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
21712 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21713 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
21714 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
21715
21716 ;; Likewise, but for initialization from half sized vectors.
21717 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
21718 (define_mode_iterator VEC_INIT_HALF_MODE
21719 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21720 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21721 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21722 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
21723 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21724 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
21725 (V4TI "TARGET_AVX512F")])
21726
21727 (define_expand "vec_init<mode><ssescalarmodelower>"
21728 [(match_operand:VEC_INIT_MODE 0 "register_operand")
21729 (match_operand 1)]
21730 "TARGET_SSE"
21731 {
21732 ix86_expand_vector_init (false, operands[0], operands[1]);
21733 DONE;
21734 })
21735
21736 (define_expand "vec_init<mode><ssehalfvecmodelower>"
21737 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
21738 (match_operand 1)]
21739 "TARGET_SSE"
21740 {
21741 ix86_expand_vector_init (false, operands[0], operands[1]);
21742 DONE;
21743 })
21744
21745 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21746 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
21747 (ashiftrt:VI48_AVX512F_AVX512VL
21748 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
21749 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
21750 "TARGET_AVX2 && <mask_mode512bit_condition>"
21751 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21752 [(set_attr "type" "sseishft")
21753 (set_attr "prefix" "maybe_evex")
21754 (set_attr "mode" "<sseinsnmode>")])
21755
21756 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21757 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21758 (ashiftrt:VI2_AVX512VL
21759 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21760 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21761 "TARGET_AVX512BW"
21762 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21763 [(set_attr "type" "sseishft")
21764 (set_attr "prefix" "maybe_evex")
21765 (set_attr "mode" "<sseinsnmode>")])
21766
21767 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
21768 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
21769 (any_lshift:VI48_AVX512F
21770 (match_operand:VI48_AVX512F 1 "register_operand" "v")
21771 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
21772 "TARGET_AVX2 && <mask_mode512bit_condition>"
21773 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21774 [(set_attr "type" "sseishft")
21775 (set_attr "prefix" "maybe_evex")
21776 (set_attr "mode" "<sseinsnmode>")])
21777
21778 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
21779 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21780 (any_lshift:VI2_AVX512VL
21781 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21782 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21783 "TARGET_AVX512BW"
21784 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21785 [(set_attr "type" "sseishft")
21786 (set_attr "prefix" "maybe_evex")
21787 (set_attr "mode" "<sseinsnmode>")])
21788
21789 (define_insn "avx_vec_concat<mode>"
21790 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
21791 (vec_concat:V_256_512
21792 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
21793 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
21794 "TARGET_AVX
21795 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
21796 || !MEM_P (operands[1]))"
21797 {
21798 switch (which_alternative)
21799 {
21800 case 0:
21801 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21802 case 1:
21803 if (<MODE_SIZE> == 64)
21804 {
21805 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
21806 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21807 else
21808 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21809 }
21810 else
21811 {
21812 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21813 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21814 else
21815 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21816 }
21817 case 2:
21818 case 3:
21819 switch (get_attr_mode (insn))
21820 {
21821 case MODE_V16SF:
21822 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21823 return "vmovups\t{%1, %t0|%t0, %1}";
21824 else
21825 return "vmovaps\t{%1, %t0|%t0, %1}";
21826 case MODE_V8DF:
21827 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21828 return "vmovupd\t{%1, %t0|%t0, %1}";
21829 else
21830 return "vmovapd\t{%1, %t0|%t0, %1}";
21831 case MODE_V8SF:
21832 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21833 return "vmovups\t{%1, %x0|%x0, %1}";
21834 else
21835 return "vmovaps\t{%1, %x0|%x0, %1}";
21836 case MODE_V4DF:
21837 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21838 return "vmovupd\t{%1, %x0|%x0, %1}";
21839 else
21840 return "vmovapd\t{%1, %x0|%x0, %1}";
21841 case MODE_XI:
21842 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21843 {
21844 if (which_alternative == 2)
21845 return "vmovdqu\t{%1, %t0|%t0, %1}";
21846 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21847 return "vmovdqu64\t{%1, %t0|%t0, %1}";
21848 else
21849 return "vmovdqu32\t{%1, %t0|%t0, %1}";
21850 }
21851 else
21852 {
21853 if (which_alternative == 2)
21854 return "vmovdqa\t{%1, %t0|%t0, %1}";
21855 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21856 return "vmovdqa64\t{%1, %t0|%t0, %1}";
21857 else
21858 return "vmovdqa32\t{%1, %t0|%t0, %1}";
21859 }
21860 case MODE_OI:
21861 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21862 {
21863 if (which_alternative == 2)
21864 return "vmovdqu\t{%1, %x0|%x0, %1}";
21865 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21866 return "vmovdqu64\t{%1, %x0|%x0, %1}";
21867 else
21868 return "vmovdqu32\t{%1, %x0|%x0, %1}";
21869 }
21870 else
21871 {
21872 if (which_alternative == 2)
21873 return "vmovdqa\t{%1, %x0|%x0, %1}";
21874 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21875 return "vmovdqa64\t{%1, %x0|%x0, %1}";
21876 else
21877 return "vmovdqa32\t{%1, %x0|%x0, %1}";
21878 }
21879 default:
21880 gcc_unreachable ();
21881 }
21882 default:
21883 gcc_unreachable ();
21884 }
21885 }
21886 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
21887 (set_attr "prefix_extra" "1,1,*,*")
21888 (set_attr "length_immediate" "1,1,*,*")
21889 (set_attr "prefix" "maybe_evex")
21890 (set_attr "mode" "<sseinsnmode>")])
21891
21892 (define_insn "vcvtph2ps<mask_name>"
21893 [(set (match_operand:V4SF 0 "register_operand" "=v")
21894 (vec_select:V4SF
21895 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
21896 UNSPEC_VCVTPH2PS)
21897 (parallel [(const_int 0) (const_int 1)
21898 (const_int 2) (const_int 3)])))]
21899 "TARGET_F16C || TARGET_AVX512VL"
21900 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21901 [(set_attr "type" "ssecvt")
21902 (set_attr "prefix" "maybe_evex")
21903 (set_attr "mode" "V4SF")])
21904
21905 (define_insn "*vcvtph2ps_load<mask_name>"
21906 [(set (match_operand:V4SF 0 "register_operand" "=v")
21907 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
21908 UNSPEC_VCVTPH2PS))]
21909 "TARGET_F16C || TARGET_AVX512VL"
21910 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21911 [(set_attr "type" "ssecvt")
21912 (set_attr "prefix" "vex")
21913 (set_attr "mode" "V8SF")])
21914
21915 (define_insn "vcvtph2ps256<mask_name>"
21916 [(set (match_operand:V8SF 0 "register_operand" "=v")
21917 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
21918 UNSPEC_VCVTPH2PS))]
21919 "TARGET_F16C || TARGET_AVX512VL"
21920 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21921 [(set_attr "type" "ssecvt")
21922 (set_attr "prefix" "vex")
21923 (set_attr "btver2_decode" "double")
21924 (set_attr "mode" "V8SF")])
21925
21926 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
21927 [(set (match_operand:V16SF 0 "register_operand" "=v")
21928 (unspec:V16SF
21929 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
21930 UNSPEC_VCVTPH2PS))]
21931 "TARGET_AVX512F"
21932 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
21933 [(set_attr "type" "ssecvt")
21934 (set_attr "prefix" "evex")
21935 (set_attr "mode" "V16SF")])
21936
21937 (define_expand "vcvtps2ph_mask"
21938 [(set (match_operand:V8HI 0 "register_operand")
21939 (vec_merge:V8HI
21940 (vec_concat:V8HI
21941 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21942 (match_operand:SI 2 "const_0_to_255_operand")]
21943 UNSPEC_VCVTPS2PH)
21944 (match_dup 5))
21945 (match_operand:V8HI 3 "nonimm_or_0_operand")
21946 (match_operand:QI 4 "register_operand")))]
21947 "TARGET_AVX512VL"
21948 "operands[5] = CONST0_RTX (V4HImode);")
21949
21950 (define_expand "vcvtps2ph"
21951 [(set (match_operand:V8HI 0 "register_operand")
21952 (vec_concat:V8HI
21953 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21954 (match_operand:SI 2 "const_0_to_255_operand")]
21955 UNSPEC_VCVTPS2PH)
21956 (match_dup 3)))]
21957 "TARGET_F16C"
21958 "operands[3] = CONST0_RTX (V4HImode);")
21959
21960 (define_insn "*vcvtps2ph<mask_name>"
21961 [(set (match_operand:V8HI 0 "register_operand" "=v")
21962 (vec_concat:V8HI
21963 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21964 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21965 UNSPEC_VCVTPS2PH)
21966 (match_operand:V4HI 3 "const0_operand")))]
21967 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
21968 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
21969 [(set_attr "type" "ssecvt")
21970 (set_attr "prefix" "maybe_evex")
21971 (set_attr "mode" "V4SF")])
21972
21973 (define_insn "*vcvtps2ph_store<merge_mask_name>"
21974 [(set (match_operand:V4HI 0 "memory_operand" "=m")
21975 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21976 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21977 UNSPEC_VCVTPS2PH))]
21978 "TARGET_F16C || TARGET_AVX512VL"
21979 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
21980 [(set_attr "type" "ssecvt")
21981 (set_attr "prefix" "maybe_evex")
21982 (set_attr "mode" "V4SF")])
21983
21984 (define_insn "vcvtps2ph256<mask_name>"
21985 [(set (match_operand:V8HI 0 "register_operand" "=v")
21986 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
21987 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21988 UNSPEC_VCVTPS2PH))]
21989 "TARGET_F16C || TARGET_AVX512VL"
21990 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21991 [(set_attr "type" "ssecvt")
21992 (set_attr "prefix" "maybe_evex")
21993 (set_attr "btver2_decode" "vector")
21994 (set_attr "mode" "V8SF")])
21995
21996 (define_insn "*vcvtps2ph256<merge_mask_name>"
21997 [(set (match_operand:V8HI 0 "memory_operand" "=m")
21998 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
21999 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22000 UNSPEC_VCVTPS2PH))]
22001 "TARGET_F16C || TARGET_AVX512VL"
22002 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22003 [(set_attr "type" "ssecvt")
22004 (set_attr "prefix" "maybe_evex")
22005 (set_attr "btver2_decode" "vector")
22006 (set_attr "mode" "V8SF")])
22007
22008 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
22009 [(set (match_operand:V16HI 0 "register_operand" "=v")
22010 (unspec:V16HI
22011 [(match_operand:V16SF 1 "register_operand" "v")
22012 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22013 UNSPEC_VCVTPS2PH))]
22014 "TARGET_AVX512F"
22015 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22016 [(set_attr "type" "ssecvt")
22017 (set_attr "prefix" "evex")
22018 (set_attr "mode" "V16SF")])
22019
22020 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
22021 [(set (match_operand:V16HI 0 "memory_operand" "=m")
22022 (unspec:V16HI
22023 [(match_operand:V16SF 1 "register_operand" "v")
22024 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22025 UNSPEC_VCVTPS2PH))]
22026 "TARGET_AVX512F"
22027 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22028 [(set_attr "type" "ssecvt")
22029 (set_attr "prefix" "evex")
22030 (set_attr "mode" "V16SF")])
22031
22032 ;; For gather* insn patterns
22033 (define_mode_iterator VEC_GATHER_MODE
22034 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
22035 (define_mode_attr VEC_GATHER_IDXSI
22036 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
22037 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
22038 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
22039 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
22040
22041 (define_mode_attr VEC_GATHER_IDXDI
22042 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22043 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
22044 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
22045 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
22046
22047 (define_mode_attr VEC_GATHER_SRCDI
22048 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22049 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
22050 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
22051 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
22052
22053 (define_expand "avx2_gathersi<mode>"
22054 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22055 (unspec:VEC_GATHER_MODE
22056 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
22057 (mem:<ssescalarmode>
22058 (match_par_dup 6
22059 [(match_operand 2 "vsib_address_operand")
22060 (match_operand:<VEC_GATHER_IDXSI>
22061 3 "register_operand")
22062 (match_operand:SI 5 "const1248_operand ")]))
22063 (mem:BLK (scratch))
22064 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
22065 UNSPEC_GATHER))
22066 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22067 "TARGET_AVX2"
22068 {
22069 operands[6]
22070 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22071 operands[5]), UNSPEC_VSIBADDR);
22072 })
22073
22074 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
22075 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22076 (unspec:VEC_GATHER_MODE
22077 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
22078 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22079 [(unspec:P
22080 [(match_operand:P 3 "vsib_address_operand" "Tv")
22081 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
22082 (match_operand:SI 6 "const1248_operand" "n")]
22083 UNSPEC_VSIBADDR)])
22084 (mem:BLK (scratch))
22085 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
22086 UNSPEC_GATHER))
22087 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22088 "TARGET_AVX2"
22089 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
22090 [(set_attr "type" "ssemov")
22091 (set_attr "prefix" "vex")
22092 (set_attr "mode" "<sseinsnmode>")])
22093
22094 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
22095 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22096 (unspec:VEC_GATHER_MODE
22097 [(pc)
22098 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22099 [(unspec:P
22100 [(match_operand:P 2 "vsib_address_operand" "Tv")
22101 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
22102 (match_operand:SI 5 "const1248_operand" "n")]
22103 UNSPEC_VSIBADDR)])
22104 (mem:BLK (scratch))
22105 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
22106 UNSPEC_GATHER))
22107 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22108 "TARGET_AVX2"
22109 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
22110 [(set_attr "type" "ssemov")
22111 (set_attr "prefix" "vex")
22112 (set_attr "mode" "<sseinsnmode>")])
22113
22114 (define_expand "avx2_gatherdi<mode>"
22115 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22116 (unspec:VEC_GATHER_MODE
22117 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22118 (mem:<ssescalarmode>
22119 (match_par_dup 6
22120 [(match_operand 2 "vsib_address_operand")
22121 (match_operand:<VEC_GATHER_IDXDI>
22122 3 "register_operand")
22123 (match_operand:SI 5 "const1248_operand ")]))
22124 (mem:BLK (scratch))
22125 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
22126 UNSPEC_GATHER))
22127 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22128 "TARGET_AVX2"
22129 {
22130 operands[6]
22131 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22132 operands[5]), UNSPEC_VSIBADDR);
22133 })
22134
22135 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
22136 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22137 (unspec:VEC_GATHER_MODE
22138 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22139 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22140 [(unspec:P
22141 [(match_operand:P 3 "vsib_address_operand" "Tv")
22142 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22143 (match_operand:SI 6 "const1248_operand" "n")]
22144 UNSPEC_VSIBADDR)])
22145 (mem:BLK (scratch))
22146 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22147 UNSPEC_GATHER))
22148 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22149 "TARGET_AVX2"
22150 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
22151 [(set_attr "type" "ssemov")
22152 (set_attr "prefix" "vex")
22153 (set_attr "mode" "<sseinsnmode>")])
22154
22155 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
22156 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22157 (unspec:VEC_GATHER_MODE
22158 [(pc)
22159 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22160 [(unspec:P
22161 [(match_operand:P 2 "vsib_address_operand" "Tv")
22162 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22163 (match_operand:SI 5 "const1248_operand" "n")]
22164 UNSPEC_VSIBADDR)])
22165 (mem:BLK (scratch))
22166 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22167 UNSPEC_GATHER))
22168 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22169 "TARGET_AVX2"
22170 {
22171 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
22172 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
22173 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
22174 }
22175 [(set_attr "type" "ssemov")
22176 (set_attr "prefix" "vex")
22177 (set_attr "mode" "<sseinsnmode>")])
22178
22179 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
22180 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22181 (vec_select:<VEC_GATHER_SRCDI>
22182 (unspec:VI4F_256
22183 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22184 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22185 [(unspec:P
22186 [(match_operand:P 3 "vsib_address_operand" "Tv")
22187 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22188 (match_operand:SI 6 "const1248_operand" "n")]
22189 UNSPEC_VSIBADDR)])
22190 (mem:BLK (scratch))
22191 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22192 UNSPEC_GATHER)
22193 (parallel [(const_int 0) (const_int 1)
22194 (const_int 2) (const_int 3)])))
22195 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22196 "TARGET_AVX2"
22197 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
22198 [(set_attr "type" "ssemov")
22199 (set_attr "prefix" "vex")
22200 (set_attr "mode" "<sseinsnmode>")])
22201
22202 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
22203 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22204 (vec_select:<VEC_GATHER_SRCDI>
22205 (unspec:VI4F_256
22206 [(pc)
22207 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22208 [(unspec:P
22209 [(match_operand:P 2 "vsib_address_operand" "Tv")
22210 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22211 (match_operand:SI 5 "const1248_operand" "n")]
22212 UNSPEC_VSIBADDR)])
22213 (mem:BLK (scratch))
22214 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22215 UNSPEC_GATHER)
22216 (parallel [(const_int 0) (const_int 1)
22217 (const_int 2) (const_int 3)])))
22218 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22219 "TARGET_AVX2"
22220 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
22221 [(set_attr "type" "ssemov")
22222 (set_attr "prefix" "vex")
22223 (set_attr "mode" "<sseinsnmode>")])
22224
22225 (define_expand "<avx512>_gathersi<mode>"
22226 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22227 (unspec:VI48F
22228 [(match_operand:VI48F 1 "register_operand")
22229 (match_operand:<avx512fmaskmode> 4 "register_operand")
22230 (mem:<ssescalarmode>
22231 (match_par_dup 6
22232 [(match_operand 2 "vsib_address_operand")
22233 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
22234 (match_operand:SI 5 "const1248_operand")]))]
22235 UNSPEC_GATHER))
22236 (clobber (match_scratch:<avx512fmaskmode> 7))])]
22237 "TARGET_AVX512F"
22238 {
22239 operands[6]
22240 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22241 operands[5]), UNSPEC_VSIBADDR);
22242 })
22243
22244 (define_insn "*avx512f_gathersi<VI48F:mode>"
22245 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22246 (unspec:VI48F
22247 [(match_operand:VI48F 1 "register_operand" "0")
22248 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
22249 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22250 [(unspec:P
22251 [(match_operand:P 4 "vsib_address_operand" "Tv")
22252 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
22253 (match_operand:SI 5 "const1248_operand" "n")]
22254 UNSPEC_VSIBADDR)])]
22255 UNSPEC_GATHER))
22256 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
22257 "TARGET_AVX512F"
22258 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22259 ;; gas changed what it requires incompatibly.
22260 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
22261 [(set_attr "type" "ssemov")
22262 (set_attr "prefix" "evex")
22263 (set_attr "mode" "<sseinsnmode>")])
22264
22265 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
22266 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22267 (unspec:VI48F
22268 [(pc)
22269 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22270 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22271 [(unspec:P
22272 [(match_operand:P 3 "vsib_address_operand" "Tv")
22273 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22274 (match_operand:SI 4 "const1248_operand" "n")]
22275 UNSPEC_VSIBADDR)])]
22276 UNSPEC_GATHER))
22277 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22278 "TARGET_AVX512F"
22279 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22280 ;; gas changed what it requires incompatibly.
22281 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
22282 [(set_attr "type" "ssemov")
22283 (set_attr "prefix" "evex")
22284 (set_attr "mode" "<sseinsnmode>")])
22285
22286
22287 (define_expand "<avx512>_gatherdi<mode>"
22288 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22289 (unspec:VI48F
22290 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22291 (match_operand:QI 4 "register_operand")
22292 (mem:<ssescalarmode>
22293 (match_par_dup 6
22294 [(match_operand 2 "vsib_address_operand")
22295 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
22296 (match_operand:SI 5 "const1248_operand")]))]
22297 UNSPEC_GATHER))
22298 (clobber (match_scratch:QI 7))])]
22299 "TARGET_AVX512F"
22300 {
22301 operands[6]
22302 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22303 operands[5]), UNSPEC_VSIBADDR);
22304 })
22305
22306 (define_insn "*avx512f_gatherdi<VI48F:mode>"
22307 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22308 (unspec:VI48F
22309 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
22310 (match_operand:QI 7 "register_operand" "2")
22311 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22312 [(unspec:P
22313 [(match_operand:P 4 "vsib_address_operand" "Tv")
22314 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
22315 (match_operand:SI 5 "const1248_operand" "n")]
22316 UNSPEC_VSIBADDR)])]
22317 UNSPEC_GATHER))
22318 (clobber (match_scratch:QI 2 "=&Yk"))]
22319 "TARGET_AVX512F"
22320 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22321 ;; gas changed what it requires incompatibly.
22322 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
22323 [(set_attr "type" "ssemov")
22324 (set_attr "prefix" "evex")
22325 (set_attr "mode" "<sseinsnmode>")])
22326
22327 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
22328 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22329 (unspec:VI48F
22330 [(pc)
22331 (match_operand:QI 6 "register_operand" "1")
22332 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22333 [(unspec:P
22334 [(match_operand:P 3 "vsib_address_operand" "Tv")
22335 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22336 (match_operand:SI 4 "const1248_operand" "n")]
22337 UNSPEC_VSIBADDR)])]
22338 UNSPEC_GATHER))
22339 (clobber (match_scratch:QI 1 "=&Yk"))]
22340 "TARGET_AVX512F"
22341 {
22342 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22343 gas changed what it requires incompatibly. */
22344 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
22345 {
22346 if (<VI48F:MODE_SIZE> != 64)
22347 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
22348 else
22349 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
22350 }
22351 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
22352 }
22353 [(set_attr "type" "ssemov")
22354 (set_attr "prefix" "evex")
22355 (set_attr "mode" "<sseinsnmode>")])
22356
22357 (define_expand "<avx512>_scattersi<mode>"
22358 [(parallel [(set (mem:VI48F
22359 (match_par_dup 5
22360 [(match_operand 0 "vsib_address_operand")
22361 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
22362 (match_operand:SI 4 "const1248_operand")]))
22363 (unspec:VI48F
22364 [(match_operand:<avx512fmaskmode> 1 "register_operand")
22365 (match_operand:VI48F 3 "register_operand")]
22366 UNSPEC_SCATTER))
22367 (clobber (match_scratch:<avx512fmaskmode> 6))])]
22368 "TARGET_AVX512F"
22369 {
22370 operands[5]
22371 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22372 operands[4]), UNSPEC_VSIBADDR);
22373 })
22374
22375 (define_insn "*avx512f_scattersi<VI48F:mode>"
22376 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22377 [(unspec:P
22378 [(match_operand:P 0 "vsib_address_operand" "Tv")
22379 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22380 (match_operand:SI 4 "const1248_operand" "n")]
22381 UNSPEC_VSIBADDR)])
22382 (unspec:VI48F
22383 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22384 (match_operand:VI48F 3 "register_operand" "v")]
22385 UNSPEC_SCATTER))
22386 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22387 "TARGET_AVX512F"
22388 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22389 ;; gas changed what it requires incompatibly.
22390 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22391 [(set_attr "type" "ssemov")
22392 (set_attr "prefix" "evex")
22393 (set_attr "mode" "<sseinsnmode>")])
22394
22395 (define_expand "<avx512>_scatterdi<mode>"
22396 [(parallel [(set (mem:VI48F
22397 (match_par_dup 5
22398 [(match_operand 0 "vsib_address_operand")
22399 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
22400 (match_operand:SI 4 "const1248_operand")]))
22401 (unspec:VI48F
22402 [(match_operand:QI 1 "register_operand")
22403 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
22404 UNSPEC_SCATTER))
22405 (clobber (match_scratch:QI 6))])]
22406 "TARGET_AVX512F"
22407 {
22408 operands[5]
22409 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22410 operands[4]), UNSPEC_VSIBADDR);
22411 })
22412
22413 (define_insn "*avx512f_scatterdi<VI48F:mode>"
22414 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22415 [(unspec:P
22416 [(match_operand:P 0 "vsib_address_operand" "Tv")
22417 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22418 (match_operand:SI 4 "const1248_operand" "n")]
22419 UNSPEC_VSIBADDR)])
22420 (unspec:VI48F
22421 [(match_operand:QI 6 "register_operand" "1")
22422 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
22423 UNSPEC_SCATTER))
22424 (clobber (match_scratch:QI 1 "=&Yk"))]
22425 "TARGET_AVX512F"
22426 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22427 ;; gas changed what it requires incompatibly.
22428 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22429 [(set_attr "type" "ssemov")
22430 (set_attr "prefix" "evex")
22431 (set_attr "mode" "<sseinsnmode>")])
22432
22433 (define_insn "<avx512>_compress<mode>_mask"
22434 [(set (match_operand:VI48F 0 "register_operand" "=v")
22435 (unspec:VI48F
22436 [(match_operand:VI48F 1 "register_operand" "v")
22437 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
22438 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22439 UNSPEC_COMPRESS))]
22440 "TARGET_AVX512F"
22441 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22442 [(set_attr "type" "ssemov")
22443 (set_attr "prefix" "evex")
22444 (set_attr "mode" "<sseinsnmode>")])
22445
22446 (define_insn "compress<mode>_mask"
22447 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
22448 (unspec:VI12_AVX512VLBW
22449 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
22450 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
22451 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22452 UNSPEC_COMPRESS))]
22453 "TARGET_AVX512VBMI2"
22454 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22455 [(set_attr "type" "ssemov")
22456 (set_attr "prefix" "evex")
22457 (set_attr "mode" "<sseinsnmode>")])
22458
22459 (define_insn "<avx512>_compressstore<mode>_mask"
22460 [(set (match_operand:VI48F 0 "memory_operand" "=m")
22461 (unspec:VI48F
22462 [(match_operand:VI48F 1 "register_operand" "x")
22463 (match_dup 0)
22464 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22465 UNSPEC_COMPRESS_STORE))]
22466 "TARGET_AVX512F"
22467 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22468 [(set_attr "type" "ssemov")
22469 (set_attr "prefix" "evex")
22470 (set_attr "memory" "store")
22471 (set_attr "mode" "<sseinsnmode>")])
22472
22473 (define_insn "compressstore<mode>_mask"
22474 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
22475 (unspec:VI12_AVX512VLBW
22476 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
22477 (match_dup 0)
22478 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22479 UNSPEC_COMPRESS_STORE))]
22480 "TARGET_AVX512VBMI2"
22481 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22482 [(set_attr "type" "ssemov")
22483 (set_attr "prefix" "evex")
22484 (set_attr "memory" "store")
22485 (set_attr "mode" "<sseinsnmode>")])
22486
22487 (define_expand "<avx512>_expand<mode>_maskz"
22488 [(set (match_operand:VI48F 0 "register_operand")
22489 (unspec:VI48F
22490 [(match_operand:VI48F 1 "nonimmediate_operand")
22491 (match_operand:VI48F 2 "nonimm_or_0_operand")
22492 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22493 UNSPEC_EXPAND))]
22494 "TARGET_AVX512F"
22495 "operands[2] = CONST0_RTX (<MODE>mode);")
22496
22497 (define_insn "<avx512>_expand<mode>_mask"
22498 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
22499 (unspec:VI48F
22500 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
22501 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
22502 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22503 UNSPEC_EXPAND))]
22504 "TARGET_AVX512F"
22505 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22506 [(set_attr "type" "ssemov")
22507 (set_attr "prefix" "evex")
22508 (set_attr "memory" "none,load")
22509 (set_attr "mode" "<sseinsnmode>")])
22510
22511 (define_insn "expand<mode>_mask"
22512 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
22513 (unspec:VI12_AVX512VLBW
22514 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
22515 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
22516 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22517 UNSPEC_EXPAND))]
22518 "TARGET_AVX512VBMI2"
22519 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22520 [(set_attr "type" "ssemov")
22521 (set_attr "prefix" "evex")
22522 (set_attr "memory" "none,load")
22523 (set_attr "mode" "<sseinsnmode>")])
22524
22525 (define_expand "expand<mode>_maskz"
22526 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
22527 (unspec:VI12_AVX512VLBW
22528 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
22529 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
22530 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22531 UNSPEC_EXPAND))]
22532 "TARGET_AVX512VBMI2"
22533 "operands[2] = CONST0_RTX (<MODE>mode);")
22534
22535 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
22536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22537 (unspec:VF_AVX512VL
22538 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
22539 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
22540 (match_operand:SI 3 "const_0_to_15_operand")]
22541 UNSPEC_RANGE))]
22542 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
22543 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
22544 [(set_attr "type" "sse")
22545 (set_attr "prefix" "evex")
22546 (set_attr "mode" "<MODE>")])
22547
22548 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
22549 [(set (match_operand:VF_128 0 "register_operand" "=v")
22550 (vec_merge:VF_128
22551 (unspec:VF_128
22552 [(match_operand:VF_128 1 "register_operand" "v")
22553 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22554 (match_operand:SI 3 "const_0_to_15_operand")]
22555 UNSPEC_RANGE)
22556 (match_dup 1)
22557 (const_int 1)))]
22558 "TARGET_AVX512DQ"
22559 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
22560 [(set_attr "type" "sse")
22561 (set_attr "prefix" "evex")
22562 (set_attr "mode" "<MODE>")])
22563
22564 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
22565 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22566 (unspec:<avx512fmaskmode>
22567 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
22568 (match_operand 2 "const_0_to_255_operand" "n")]
22569 UNSPEC_FPCLASS))]
22570 "TARGET_AVX512DQ"
22571 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22572 [(set_attr "type" "sse")
22573 (set_attr "length_immediate" "1")
22574 (set_attr "prefix" "evex")
22575 (set_attr "mode" "<MODE>")])
22576
22577 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
22578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22579 (and:<avx512fmaskmode>
22580 (unspec:<avx512fmaskmode>
22581 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
22582 (match_operand 2 "const_0_to_255_operand" "n")]
22583 UNSPEC_FPCLASS)
22584 (const_int 1)))]
22585 "TARGET_AVX512DQ"
22586 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22587 [(set_attr "type" "sse")
22588 (set_attr "length_immediate" "1")
22589 (set_attr "prefix" "evex")
22590 (set_attr "mode" "<MODE>")])
22591
22592 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
22593 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22594 (unspec:VF_AVX512VL
22595 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
22596 (match_operand:SI 2 "const_0_to_15_operand")]
22597 UNSPEC_GETMANT))]
22598 "TARGET_AVX512F"
22599 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
22600 [(set_attr "prefix" "evex")
22601 (set_attr "mode" "<MODE>")])
22602
22603 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
22604 [(set (match_operand:VF_128 0 "register_operand" "=v")
22605 (vec_merge:VF_128
22606 (unspec:VF_128
22607 [(match_operand:VF_128 1 "register_operand" "v")
22608 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22609 (match_operand:SI 3 "const_0_to_15_operand")]
22610 UNSPEC_GETMANT)
22611 (match_dup 1)
22612 (const_int 1)))]
22613 "TARGET_AVX512F"
22614 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
22615 [(set_attr "prefix" "evex")
22616 (set_attr "mode" "<ssescalarmode>")])
22617
22618 ;; The correct representation for this is absolutely enormous, and
22619 ;; surely not generally useful.
22620 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
22621 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22622 (unspec:VI2_AVX512VL
22623 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
22624 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
22625 (match_operand:SI 3 "const_0_to_255_operand")]
22626 UNSPEC_DBPSADBW))]
22627 "TARGET_AVX512BW"
22628 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
22629 [(set_attr "type" "sselog1")
22630 (set_attr "length_immediate" "1")
22631 (set_attr "prefix" "evex")
22632 (set_attr "mode" "<sseinsnmode>")])
22633
22634 (define_insn "clz<mode>2<mask_name>"
22635 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22636 (clz:VI48_AVX512VL
22637 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22638 "TARGET_AVX512CD"
22639 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22640 [(set_attr "type" "sse")
22641 (set_attr "prefix" "evex")
22642 (set_attr "mode" "<sseinsnmode>")])
22643
22644 (define_insn "<mask_codefor>conflict<mode><mask_name>"
22645 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22646 (unspec:VI48_AVX512VL
22647 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
22648 UNSPEC_CONFLICT))]
22649 "TARGET_AVX512CD"
22650 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22651 [(set_attr "type" "sse")
22652 (set_attr "prefix" "evex")
22653 (set_attr "mode" "<sseinsnmode>")])
22654
22655 (define_insn "sha1msg1"
22656 [(set (match_operand:V4SI 0 "register_operand" "=x")
22657 (unspec:V4SI
22658 [(match_operand:V4SI 1 "register_operand" "0")
22659 (match_operand:V4SI 2 "vector_operand" "xBm")]
22660 UNSPEC_SHA1MSG1))]
22661 "TARGET_SHA"
22662 "sha1msg1\t{%2, %0|%0, %2}"
22663 [(set_attr "type" "sselog1")
22664 (set_attr "mode" "TI")])
22665
22666 (define_insn "sha1msg2"
22667 [(set (match_operand:V4SI 0 "register_operand" "=x")
22668 (unspec:V4SI
22669 [(match_operand:V4SI 1 "register_operand" "0")
22670 (match_operand:V4SI 2 "vector_operand" "xBm")]
22671 UNSPEC_SHA1MSG2))]
22672 "TARGET_SHA"
22673 "sha1msg2\t{%2, %0|%0, %2}"
22674 [(set_attr "type" "sselog1")
22675 (set_attr "mode" "TI")])
22676
22677 (define_insn "sha1nexte"
22678 [(set (match_operand:V4SI 0 "register_operand" "=x")
22679 (unspec:V4SI
22680 [(match_operand:V4SI 1 "register_operand" "0")
22681 (match_operand:V4SI 2 "vector_operand" "xBm")]
22682 UNSPEC_SHA1NEXTE))]
22683 "TARGET_SHA"
22684 "sha1nexte\t{%2, %0|%0, %2}"
22685 [(set_attr "type" "sselog1")
22686 (set_attr "mode" "TI")])
22687
22688 (define_insn "sha1rnds4"
22689 [(set (match_operand:V4SI 0 "register_operand" "=x")
22690 (unspec:V4SI
22691 [(match_operand:V4SI 1 "register_operand" "0")
22692 (match_operand:V4SI 2 "vector_operand" "xBm")
22693 (match_operand:SI 3 "const_0_to_3_operand" "n")]
22694 UNSPEC_SHA1RNDS4))]
22695 "TARGET_SHA"
22696 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
22697 [(set_attr "type" "sselog1")
22698 (set_attr "length_immediate" "1")
22699 (set_attr "mode" "TI")])
22700
22701 (define_insn "sha256msg1"
22702 [(set (match_operand:V4SI 0 "register_operand" "=x")
22703 (unspec:V4SI
22704 [(match_operand:V4SI 1 "register_operand" "0")
22705 (match_operand:V4SI 2 "vector_operand" "xBm")]
22706 UNSPEC_SHA256MSG1))]
22707 "TARGET_SHA"
22708 "sha256msg1\t{%2, %0|%0, %2}"
22709 [(set_attr "type" "sselog1")
22710 (set_attr "mode" "TI")])
22711
22712 (define_insn "sha256msg2"
22713 [(set (match_operand:V4SI 0 "register_operand" "=x")
22714 (unspec:V4SI
22715 [(match_operand:V4SI 1 "register_operand" "0")
22716 (match_operand:V4SI 2 "vector_operand" "xBm")]
22717 UNSPEC_SHA256MSG2))]
22718 "TARGET_SHA"
22719 "sha256msg2\t{%2, %0|%0, %2}"
22720 [(set_attr "type" "sselog1")
22721 (set_attr "mode" "TI")])
22722
22723 (define_insn "sha256rnds2"
22724 [(set (match_operand:V4SI 0 "register_operand" "=x")
22725 (unspec:V4SI
22726 [(match_operand:V4SI 1 "register_operand" "0")
22727 (match_operand:V4SI 2 "vector_operand" "xBm")
22728 (match_operand:V4SI 3 "register_operand" "Yz")]
22729 UNSPEC_SHA256RNDS2))]
22730 "TARGET_SHA"
22731 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
22732 [(set_attr "type" "sselog1")
22733 (set_attr "length_immediate" "1")
22734 (set_attr "mode" "TI")])
22735
22736 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
22737 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22738 (vec_concat:AVX512MODE2P
22739 (vec_concat:<ssehalfvecmode>
22740 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
22741 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
22742 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22743 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22744 "#"
22745 "&& reload_completed"
22746 [(set (match_dup 0) (match_dup 1))]
22747 {
22748 if (REG_P (operands[0]))
22749 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
22750 else
22751 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22752 <ssequartermode>mode);
22753 })
22754
22755 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
22756 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22757 (vec_concat:AVX512MODE2P
22758 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
22759 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22760 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22761 "#"
22762 "&& reload_completed"
22763 [(set (match_dup 0) (match_dup 1))]
22764 {
22765 if (REG_P (operands[0]))
22766 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
22767 else
22768 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22769 <ssehalfvecmode>mode);
22770 })
22771
22772 (define_int_iterator VPMADD52
22773 [UNSPEC_VPMADD52LUQ
22774 UNSPEC_VPMADD52HUQ])
22775
22776 (define_int_attr vpmadd52type
22777 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
22778
22779 (define_expand "vpamdd52huq<mode>_maskz"
22780 [(match_operand:VI8_AVX512VL 0 "register_operand")
22781 (match_operand:VI8_AVX512VL 1 "register_operand")
22782 (match_operand:VI8_AVX512VL 2 "register_operand")
22783 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22784 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22785 "TARGET_AVX512IFMA"
22786 {
22787 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
22788 operands[0], operands[1], operands[2], operands[3],
22789 CONST0_RTX (<MODE>mode), operands[4]));
22790 DONE;
22791 })
22792
22793 (define_expand "vpamdd52luq<mode>_maskz"
22794 [(match_operand:VI8_AVX512VL 0 "register_operand")
22795 (match_operand:VI8_AVX512VL 1 "register_operand")
22796 (match_operand:VI8_AVX512VL 2 "register_operand")
22797 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22798 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22799 "TARGET_AVX512IFMA"
22800 {
22801 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
22802 operands[0], operands[1], operands[2], operands[3],
22803 CONST0_RTX (<MODE>mode), operands[4]));
22804 DONE;
22805 })
22806
22807 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
22808 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22809 (unspec:VI8_AVX512VL
22810 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22811 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22812 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22813 VPMADD52))]
22814 "TARGET_AVX512IFMA"
22815 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
22816 [(set_attr "type" "ssemuladd")
22817 (set_attr "prefix" "evex")
22818 (set_attr "mode" "<sseinsnmode>")])
22819
22820 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
22821 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22822 (vec_merge:VI8_AVX512VL
22823 (unspec:VI8_AVX512VL
22824 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22825 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22826 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22827 VPMADD52)
22828 (match_dup 1)
22829 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22830 "TARGET_AVX512IFMA"
22831 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
22832 [(set_attr "type" "ssemuladd")
22833 (set_attr "prefix" "evex")
22834 (set_attr "mode" "<sseinsnmode>")])
22835
22836 (define_insn "vpmultishiftqb<mode><mask_name>"
22837 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
22838 (unspec:VI1_AVX512VL
22839 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
22840 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
22841 UNSPEC_VPMULTISHIFT))]
22842 "TARGET_AVX512VBMI"
22843 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22844 [(set_attr "type" "sselog")
22845 (set_attr "prefix" "evex")
22846 (set_attr "mode" "<sseinsnmode>")])
22847
22848 (define_mode_iterator IMOD4
22849 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
22850
22851 (define_mode_attr imod4_narrow
22852 [(V64SF "V16SF") (V64SI "V16SI")])
22853
22854 (define_expand "mov<mode>"
22855 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
22856 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
22857 "TARGET_AVX512F"
22858 {
22859 ix86_expand_vector_move (<MODE>mode, operands);
22860 DONE;
22861 })
22862
22863 (define_insn_and_split "*mov<mode>_internal"
22864 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
22865 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
22866 "TARGET_AVX512F
22867 && (register_operand (operands[0], <MODE>mode)
22868 || register_operand (operands[1], <MODE>mode))"
22869 "#"
22870 "&& reload_completed"
22871 [(const_int 0)]
22872 {
22873 rtx op0, op1;
22874 int i;
22875
22876 for (i = 0; i < 4; i++)
22877 {
22878 op0 = simplify_subreg
22879 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
22880 op1 = simplify_subreg
22881 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
22882 emit_move_insn (op0, op1);
22883 }
22884 DONE;
22885 })
22886
22887 (define_insn "avx5124fmaddps_4fmaddps"
22888 [(set (match_operand:V16SF 0 "register_operand" "=v")
22889 (unspec:V16SF
22890 [(match_operand:V16SF 1 "register_operand" "0")
22891 (match_operand:V64SF 2 "register_operand" "v")
22892 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22893 "TARGET_AVX5124FMAPS"
22894 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22895 [(set_attr ("type") ("ssemuladd"))
22896 (set_attr ("prefix") ("evex"))
22897 (set_attr ("mode") ("V16SF"))])
22898
22899 (define_insn "avx5124fmaddps_4fmaddps_mask"
22900 [(set (match_operand:V16SF 0 "register_operand" "=v")
22901 (vec_merge:V16SF
22902 (unspec:V16SF
22903 [(match_operand:V64SF 1 "register_operand" "v")
22904 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22905 (match_operand:V16SF 3 "register_operand" "0")
22906 (match_operand:HI 4 "register_operand" "Yk")))]
22907 "TARGET_AVX5124FMAPS"
22908 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22909 [(set_attr ("type") ("ssemuladd"))
22910 (set_attr ("prefix") ("evex"))
22911 (set_attr ("mode") ("V16SF"))])
22912
22913 (define_insn "avx5124fmaddps_4fmaddps_maskz"
22914 [(set (match_operand:V16SF 0 "register_operand" "=v")
22915 (vec_merge:V16SF
22916 (unspec:V16SF
22917 [(match_operand:V16SF 1 "register_operand" "0")
22918 (match_operand:V64SF 2 "register_operand" "v")
22919 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22920 (match_operand:V16SF 4 "const0_operand" "C")
22921 (match_operand:HI 5 "register_operand" "Yk")))]
22922 "TARGET_AVX5124FMAPS"
22923 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22924 [(set_attr ("type") ("ssemuladd"))
22925 (set_attr ("prefix") ("evex"))
22926 (set_attr ("mode") ("V16SF"))])
22927
22928 (define_insn "avx5124fmaddps_4fmaddss"
22929 [(set (match_operand:V4SF 0 "register_operand" "=v")
22930 (unspec:V4SF
22931 [(match_operand:V4SF 1 "register_operand" "0")
22932 (match_operand:V64SF 2 "register_operand" "v")
22933 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22934 "TARGET_AVX5124FMAPS"
22935 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
22936 [(set_attr ("type") ("ssemuladd"))
22937 (set_attr ("prefix") ("evex"))
22938 (set_attr ("mode") ("SF"))])
22939
22940 (define_insn "avx5124fmaddps_4fmaddss_mask"
22941 [(set (match_operand:V4SF 0 "register_operand" "=v")
22942 (vec_merge:V4SF
22943 (unspec:V4SF
22944 [(match_operand:V64SF 1 "register_operand" "v")
22945 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22946 (match_operand:V4SF 3 "register_operand" "0")
22947 (match_operand:QI 4 "register_operand" "Yk")))]
22948 "TARGET_AVX5124FMAPS"
22949 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
22950 [(set_attr ("type") ("ssemuladd"))
22951 (set_attr ("prefix") ("evex"))
22952 (set_attr ("mode") ("SF"))])
22953
22954 (define_insn "avx5124fmaddps_4fmaddss_maskz"
22955 [(set (match_operand:V4SF 0 "register_operand" "=v")
22956 (vec_merge:V4SF
22957 (unspec:V4SF
22958 [(match_operand:V4SF 1 "register_operand" "0")
22959 (match_operand:V64SF 2 "register_operand" "v")
22960 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22961 (match_operand:V4SF 4 "const0_operand" "C")
22962 (match_operand:QI 5 "register_operand" "Yk")))]
22963 "TARGET_AVX5124FMAPS"
22964 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
22965 [(set_attr ("type") ("ssemuladd"))
22966 (set_attr ("prefix") ("evex"))
22967 (set_attr ("mode") ("SF"))])
22968
22969 (define_insn "avx5124fmaddps_4fnmaddps"
22970 [(set (match_operand:V16SF 0 "register_operand" "=v")
22971 (unspec:V16SF
22972 [(match_operand:V16SF 1 "register_operand" "0")
22973 (match_operand:V64SF 2 "register_operand" "v")
22974 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
22975 "TARGET_AVX5124FMAPS"
22976 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22977 [(set_attr ("type") ("ssemuladd"))
22978 (set_attr ("prefix") ("evex"))
22979 (set_attr ("mode") ("V16SF"))])
22980
22981 (define_insn "avx5124fmaddps_4fnmaddps_mask"
22982 [(set (match_operand:V16SF 0 "register_operand" "=v")
22983 (vec_merge:V16SF
22984 (unspec:V16SF
22985 [(match_operand:V64SF 1 "register_operand" "v")
22986 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22987 (match_operand:V16SF 3 "register_operand" "0")
22988 (match_operand:HI 4 "register_operand" "Yk")))]
22989 "TARGET_AVX5124FMAPS"
22990 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22991 [(set_attr ("type") ("ssemuladd"))
22992 (set_attr ("prefix") ("evex"))
22993 (set_attr ("mode") ("V16SF"))])
22994
22995 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
22996 [(set (match_operand:V16SF 0 "register_operand" "=v")
22997 (vec_merge:V16SF
22998 (unspec:V16SF
22999 [(match_operand:V16SF 1 "register_operand" "0")
23000 (match_operand:V64SF 2 "register_operand" "v")
23001 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23002 (match_operand:V16SF 4 "const0_operand" "C")
23003 (match_operand:HI 5 "register_operand" "Yk")))]
23004 "TARGET_AVX5124FMAPS"
23005 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23006 [(set_attr ("type") ("ssemuladd"))
23007 (set_attr ("prefix") ("evex"))
23008 (set_attr ("mode") ("V16SF"))])
23009
23010 (define_insn "avx5124fmaddps_4fnmaddss"
23011 [(set (match_operand:V4SF 0 "register_operand" "=v")
23012 (unspec:V4SF
23013 [(match_operand:V4SF 1 "register_operand" "0")
23014 (match_operand:V64SF 2 "register_operand" "v")
23015 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
23016 "TARGET_AVX5124FMAPS"
23017 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
23018 [(set_attr ("type") ("ssemuladd"))
23019 (set_attr ("prefix") ("evex"))
23020 (set_attr ("mode") ("SF"))])
23021
23022 (define_insn "avx5124fmaddps_4fnmaddss_mask"
23023 [(set (match_operand:V4SF 0 "register_operand" "=v")
23024 (vec_merge:V4SF
23025 (unspec:V4SF
23026 [(match_operand:V64SF 1 "register_operand" "v")
23027 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23028 (match_operand:V4SF 3 "register_operand" "0")
23029 (match_operand:QI 4 "register_operand" "Yk")))]
23030 "TARGET_AVX5124FMAPS"
23031 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
23032 [(set_attr ("type") ("ssemuladd"))
23033 (set_attr ("prefix") ("evex"))
23034 (set_attr ("mode") ("SF"))])
23035
23036 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
23037 [(set (match_operand:V4SF 0 "register_operand" "=v")
23038 (vec_merge:V4SF
23039 (unspec:V4SF
23040 [(match_operand:V4SF 1 "register_operand" "0")
23041 (match_operand:V64SF 2 "register_operand" "v")
23042 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23043 (match_operand:V4SF 4 "const0_operand" "C")
23044 (match_operand:QI 5 "register_operand" "Yk")))]
23045 "TARGET_AVX5124FMAPS"
23046 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
23047 [(set_attr ("type") ("ssemuladd"))
23048 (set_attr ("prefix") ("evex"))
23049 (set_attr ("mode") ("SF"))])
23050
23051 (define_insn "avx5124vnniw_vp4dpwssd"
23052 [(set (match_operand:V16SI 0 "register_operand" "=v")
23053 (unspec:V16SI
23054 [(match_operand:V16SI 1 "register_operand" "0")
23055 (match_operand:V64SI 2 "register_operand" "v")
23056 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
23057 "TARGET_AVX5124VNNIW"
23058 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
23059 [(set_attr ("type") ("ssemuladd"))
23060 (set_attr ("prefix") ("evex"))
23061 (set_attr ("mode") ("TI"))])
23062
23063 (define_insn "avx5124vnniw_vp4dpwssd_mask"
23064 [(set (match_operand:V16SI 0 "register_operand" "=v")
23065 (vec_merge:V16SI
23066 (unspec:V16SI
23067 [(match_operand:V64SI 1 "register_operand" "v")
23068 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23069 (match_operand:V16SI 3 "register_operand" "0")
23070 (match_operand:HI 4 "register_operand" "Yk")))]
23071 "TARGET_AVX5124VNNIW"
23072 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23073 [(set_attr ("type") ("ssemuladd"))
23074 (set_attr ("prefix") ("evex"))
23075 (set_attr ("mode") ("TI"))])
23076
23077 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
23078 [(set (match_operand:V16SI 0 "register_operand" "=v")
23079 (vec_merge:V16SI
23080 (unspec:V16SI
23081 [(match_operand:V16SI 1 "register_operand" "0")
23082 (match_operand:V64SI 2 "register_operand" "v")
23083 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23084 (match_operand:V16SI 4 "const0_operand" "C")
23085 (match_operand:HI 5 "register_operand" "Yk")))]
23086 "TARGET_AVX5124VNNIW"
23087 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23088 [(set_attr ("type") ("ssemuladd"))
23089 (set_attr ("prefix") ("evex"))
23090 (set_attr ("mode") ("TI"))])
23091
23092 (define_insn "avx5124vnniw_vp4dpwssds"
23093 [(set (match_operand:V16SI 0 "register_operand" "=v")
23094 (unspec:V16SI
23095 [(match_operand:V16SI 1 "register_operand" "0")
23096 (match_operand:V64SI 2 "register_operand" "v")
23097 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
23098 "TARGET_AVX5124VNNIW"
23099 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
23100 [(set_attr ("type") ("ssemuladd"))
23101 (set_attr ("prefix") ("evex"))
23102 (set_attr ("mode") ("TI"))])
23103
23104 (define_insn "avx5124vnniw_vp4dpwssds_mask"
23105 [(set (match_operand:V16SI 0 "register_operand" "=v")
23106 (vec_merge:V16SI
23107 (unspec:V16SI
23108 [(match_operand:V64SI 1 "register_operand" "v")
23109 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23110 (match_operand:V16SI 3 "register_operand" "0")
23111 (match_operand:HI 4 "register_operand" "Yk")))]
23112 "TARGET_AVX5124VNNIW"
23113 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23114 [(set_attr ("type") ("ssemuladd"))
23115 (set_attr ("prefix") ("evex"))
23116 (set_attr ("mode") ("TI"))])
23117
23118 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
23119 [(set (match_operand:V16SI 0 "register_operand" "=v")
23120 (vec_merge:V16SI
23121 (unspec:V16SI
23122 [(match_operand:V16SI 1 "register_operand" "0")
23123 (match_operand:V64SI 2 "register_operand" "v")
23124 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23125 (match_operand:V16SI 4 "const0_operand" "C")
23126 (match_operand:HI 5 "register_operand" "Yk")))]
23127 "TARGET_AVX5124VNNIW"
23128 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23129 [(set_attr ("type") ("ssemuladd"))
23130 (set_attr ("prefix") ("evex"))
23131 (set_attr ("mode") ("TI"))])
23132
23133 (define_expand "popcount<mode>2"
23134 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
23135 (popcount:VI48_AVX512VL
23136 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
23137 "TARGET_AVX512VPOPCNTDQ")
23138
23139 (define_insn "vpopcount<mode><mask_name>"
23140 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
23141 (popcount:VI48_AVX512VL
23142 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
23143 "TARGET_AVX512VPOPCNTDQ"
23144 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23145
23146 ;; Save multiple registers out-of-line.
23147 (define_insn "*save_multiple<mode>"
23148 [(match_parallel 0 "save_multiple"
23149 [(use (match_operand:P 1 "symbol_operand"))])]
23150 "TARGET_SSE && TARGET_64BIT"
23151 "call\t%P1")
23152
23153 ;; Restore multiple registers out-of-line.
23154 (define_insn "*restore_multiple<mode>"
23155 [(match_parallel 0 "restore_multiple"
23156 [(use (match_operand:P 1 "symbol_operand"))])]
23157 "TARGET_SSE && TARGET_64BIT"
23158 "call\t%P1")
23159
23160 ;; Restore multiple registers out-of-line and return.
23161 (define_insn "*restore_multiple_and_return<mode>"
23162 [(match_parallel 0 "restore_multiple"
23163 [(return)
23164 (use (match_operand:P 1 "symbol_operand"))
23165 (set (reg:DI SP_REG) (reg:DI R10_REG))
23166 ])]
23167 "TARGET_SSE && TARGET_64BIT"
23168 "jmp\t%P1")
23169
23170 ;; Restore multiple registers out-of-line when hard frame pointer is used,
23171 ;; perform the leave operation prior to returning (from the function).
23172 (define_insn "*restore_multiple_leave_return<mode>"
23173 [(match_parallel 0 "restore_multiple"
23174 [(return)
23175 (use (match_operand:P 1 "symbol_operand"))
23176 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
23177 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
23178 (clobber (mem:BLK (scratch)))
23179 ])]
23180 "TARGET_SSE && TARGET_64BIT"
23181 "jmp\t%P1")
23182
23183 (define_expand "popcount<mode>2"
23184 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23185 (popcount:VI12_AVX512VL
23186 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23187 "TARGET_AVX512BITALG")
23188
23189 (define_insn "vpopcount<mode><mask_name>"
23190 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23191 (popcount:VI12_AVX512VL
23192 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23193 "TARGET_AVX512BITALG"
23194 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23195
23196 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
23197 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23198 (unspec:VI1_AVX512F
23199 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23200 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23201 (match_operand 3 "const_0_to_255_operand" "n,n")]
23202 UNSPEC_GF2P8AFFINEINV))]
23203 "TARGET_GFNI"
23204 "@
23205 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
23206 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23207 [(set_attr "isa" "noavx,avx")
23208 (set_attr "prefix_data16" "1,*")
23209 (set_attr "prefix_extra" "1")
23210 (set_attr "prefix" "orig,maybe_evex")
23211 (set_attr "mode" "<sseinsnmode>")])
23212
23213 (define_insn "vgf2p8affineqb_<mode><mask_name>"
23214 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23215 (unspec:VI1_AVX512F
23216 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23217 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23218 (match_operand 3 "const_0_to_255_operand" "n,n")]
23219 UNSPEC_GF2P8AFFINE))]
23220 "TARGET_GFNI"
23221 "@
23222 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
23223 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23224 [(set_attr "isa" "noavx,avx")
23225 (set_attr "prefix_data16" "1,*")
23226 (set_attr "prefix_extra" "1")
23227 (set_attr "prefix" "orig,maybe_evex")
23228 (set_attr "mode" "<sseinsnmode>")])
23229
23230 (define_insn "vgf2p8mulb_<mode><mask_name>"
23231 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23232 (unspec:VI1_AVX512F
23233 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
23234 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
23235 UNSPEC_GF2P8MUL))]
23236 "TARGET_GFNI"
23237 "@
23238 gf2p8mulb\t{%2, %0| %0, %2}
23239 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
23240 [(set_attr "isa" "noavx,avx")
23241 (set_attr "prefix_data16" "1,*")
23242 (set_attr "prefix_extra" "1")
23243 (set_attr "prefix" "orig,maybe_evex")
23244 (set_attr "mode" "<sseinsnmode>")])
23245
23246 (define_insn "vpshrd_<mode><mask_name>"
23247 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23248 (unspec:VI248_AVX512VL
23249 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23250 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23251 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23252 UNSPEC_VPSHRD))]
23253 "TARGET_AVX512VBMI2"
23254 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23255 [(set_attr ("prefix") ("evex"))])
23256
23257 (define_insn "vpshld_<mode><mask_name>"
23258 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23259 (unspec:VI248_AVX512VL
23260 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23261 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23262 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23263 UNSPEC_VPSHLD))]
23264 "TARGET_AVX512VBMI2"
23265 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23266 [(set_attr ("prefix") ("evex"))])
23267
23268 (define_insn "vpshrdv_<mode>"
23269 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23270 (unspec:VI248_AVX512VL
23271 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23272 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23273 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23274 UNSPEC_VPSHRDV))]
23275 "TARGET_AVX512VBMI2"
23276 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23277 [(set_attr ("prefix") ("evex"))
23278 (set_attr "mode" "<sseinsnmode>")])
23279
23280 (define_insn "vpshrdv_<mode>_mask"
23281 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23282 (vec_merge:VI248_AVX512VL
23283 (unspec:VI248_AVX512VL
23284 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23285 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23286 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23287 UNSPEC_VPSHRDV)
23288 (match_dup 1)
23289 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23290 "TARGET_AVX512VBMI2"
23291 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23292 [(set_attr ("prefix") ("evex"))
23293 (set_attr "mode" "<sseinsnmode>")])
23294
23295 (define_expand "vpshrdv_<mode>_maskz"
23296 [(match_operand:VI248_AVX512VL 0 "register_operand")
23297 (match_operand:VI248_AVX512VL 1 "register_operand")
23298 (match_operand:VI248_AVX512VL 2 "register_operand")
23299 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23300 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23301 "TARGET_AVX512VBMI2"
23302 {
23303 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
23304 operands[2], operands[3],
23305 CONST0_RTX (<MODE>mode),
23306 operands[4]));
23307 DONE;
23308 })
23309
23310 (define_insn "vpshrdv_<mode>_maskz_1"
23311 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23312 (vec_merge:VI248_AVX512VL
23313 (unspec:VI248_AVX512VL
23314 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23315 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23316 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23317 UNSPEC_VPSHRDV)
23318 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23319 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23320 "TARGET_AVX512VBMI2"
23321 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23322 [(set_attr ("prefix") ("evex"))
23323 (set_attr "mode" "<sseinsnmode>")])
23324
23325 (define_insn "vpshldv_<mode>"
23326 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23327 (unspec:VI248_AVX512VL
23328 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23329 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23330 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23331 UNSPEC_VPSHLDV))]
23332 "TARGET_AVX512VBMI2"
23333 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23334 [(set_attr ("prefix") ("evex"))
23335 (set_attr "mode" "<sseinsnmode>")])
23336
23337 (define_insn "vpshldv_<mode>_mask"
23338 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23339 (vec_merge:VI248_AVX512VL
23340 (unspec:VI248_AVX512VL
23341 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23342 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23343 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23344 UNSPEC_VPSHLDV)
23345 (match_dup 1)
23346 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23347 "TARGET_AVX512VBMI2"
23348 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23349 [(set_attr ("prefix") ("evex"))
23350 (set_attr "mode" "<sseinsnmode>")])
23351
23352 (define_expand "vpshldv_<mode>_maskz"
23353 [(match_operand:VI248_AVX512VL 0 "register_operand")
23354 (match_operand:VI248_AVX512VL 1 "register_operand")
23355 (match_operand:VI248_AVX512VL 2 "register_operand")
23356 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23357 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23358 "TARGET_AVX512VBMI2"
23359 {
23360 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
23361 operands[2], operands[3],
23362 CONST0_RTX (<MODE>mode),
23363 operands[4]));
23364 DONE;
23365 })
23366
23367 (define_insn "vpshldv_<mode>_maskz_1"
23368 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23369 (vec_merge:VI248_AVX512VL
23370 (unspec:VI248_AVX512VL
23371 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23372 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23373 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23374 UNSPEC_VPSHLDV)
23375 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23376 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23377 "TARGET_AVX512VBMI2"
23378 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23379 [(set_attr ("prefix") ("evex"))
23380 (set_attr "mode" "<sseinsnmode>")])
23381
23382 (define_insn "vpdpbusd_v16si"
23383 [(set (match_operand:V16SI 0 "register_operand" "=v")
23384 (unspec:V16SI
23385 [(match_operand:V16SI 1 "register_operand" "0")
23386 (match_operand:V16SI 2 "register_operand" "v")
23387 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23388 UNSPEC_VPMADDUBSWACCD))]
23389 "TARGET_AVX512VNNI"
23390 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23391 [(set_attr ("prefix") ("evex"))])
23392
23393 (define_insn "vpdpbusd_<mode>"
23394 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23395 (unspec:VI4_AVX2
23396 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23397 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23398 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23399 UNSPEC_VPMADDUBSWACCD))]
23400 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23401 "@
23402 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
23403 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23404 [(set_attr ("prefix") ("vex,evex"))
23405 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23406
23407 (define_insn "vpdpbusd_<mode>_mask"
23408 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23409 (vec_merge:VI4_AVX512VL
23410 (unspec:VI4_AVX512VL
23411 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23412 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23413 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23414 UNSPEC_VPMADDUBSWACCD)
23415 (match_dup 1)
23416 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23417 "TARGET_AVX512VNNI"
23418 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23419 [(set_attr ("prefix") ("evex"))])
23420
23421 (define_expand "vpdpbusd_<mode>_maskz"
23422 [(match_operand:VI4_AVX512VL 0 "register_operand")
23423 (match_operand:VI4_AVX512VL 1 "register_operand")
23424 (match_operand:VI4_AVX512VL 2 "register_operand")
23425 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23426 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23427 "TARGET_AVX512VNNI"
23428 {
23429 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
23430 operands[2], operands[3],
23431 CONST0_RTX (<MODE>mode),
23432 operands[4]));
23433 DONE;
23434 })
23435
23436 (define_insn "vpdpbusd_<mode>_maskz_1"
23437 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23438 (vec_merge:VI4_AVX512VL
23439 (unspec:VI4_AVX512VL
23440 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23441 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23442 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
23443 ] UNSPEC_VPMADDUBSWACCD)
23444 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23445 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23446 "TARGET_AVX512VNNI"
23447 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23448 [(set_attr ("prefix") ("evex"))])
23449
23450 (define_insn "vpdpbusds_v16si"
23451 [(set (match_operand:V16SI 0 "register_operand" "=v")
23452 (unspec:V16SI
23453 [(match_operand:V16SI 1 "register_operand" "0")
23454 (match_operand:V16SI 2 "register_operand" "v")
23455 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23456 UNSPEC_VPMADDUBSWACCSSD))]
23457 "TARGET_AVX512VNNI"
23458 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23459 [(set_attr ("prefix") ("evex"))])
23460
23461 (define_insn "vpdpbusds_<mode>"
23462 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23463 (unspec:VI4_AVX2
23464 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23465 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23466 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23467 UNSPEC_VPMADDUBSWACCSSD))]
23468 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23469 "@
23470 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
23471 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23472 [(set_attr ("prefix") ("vex,evex"))
23473 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23474
23475 (define_insn "vpdpbusds_<mode>_mask"
23476 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23477 (vec_merge:VI4_AVX512VL
23478 (unspec:VI4_AVX512VL
23479 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23480 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23481 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23482 UNSPEC_VPMADDUBSWACCSSD)
23483 (match_dup 1)
23484 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23485 "TARGET_AVX512VNNI"
23486 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23487 [(set_attr ("prefix") ("evex"))])
23488
23489 (define_expand "vpdpbusds_<mode>_maskz"
23490 [(match_operand:VI4_AVX512VL 0 "register_operand")
23491 (match_operand:VI4_AVX512VL 1 "register_operand")
23492 (match_operand:VI4_AVX512VL 2 "register_operand")
23493 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23494 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23495 "TARGET_AVX512VNNI"
23496 {
23497 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
23498 operands[2], operands[3],
23499 CONST0_RTX (<MODE>mode),
23500 operands[4]));
23501 DONE;
23502 })
23503
23504 (define_insn "vpdpbusds_<mode>_maskz_1"
23505 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23506 (vec_merge:VI4_AVX512VL
23507 (unspec:VI4_AVX512VL
23508 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23509 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23510 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23511 UNSPEC_VPMADDUBSWACCSSD)
23512 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23513 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23514 "TARGET_AVX512VNNI"
23515 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23516 [(set_attr ("prefix") ("evex"))])
23517
23518 (define_insn "vpdpwssd_v16si"
23519 [(set (match_operand:V16SI 0 "register_operand" "=v")
23520 (unspec:V16SI
23521 [(match_operand:V16SI 1 "register_operand" "0")
23522 (match_operand:V16SI 2 "register_operand" "v")
23523 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23524 UNSPEC_VPMADDWDACCD))]
23525 "TARGET_AVX512VNNI"
23526 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23527 [(set_attr ("prefix") ("evex"))])
23528
23529 (define_insn "vpdpwssd_<mode>"
23530 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23531 (unspec:VI4_AVX2
23532 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23533 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23534 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23535 UNSPEC_VPMADDWDACCD))]
23536 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23537 "@
23538 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
23539 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23540 [(set_attr ("prefix") ("vex,evex"))
23541 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23542
23543 (define_insn "vpdpwssd_<mode>_mask"
23544 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23545 (vec_merge:VI4_AVX512VL
23546 (unspec:VI4_AVX512VL
23547 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23548 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23549 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23550 UNSPEC_VPMADDWDACCD)
23551 (match_dup 1)
23552 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23553 "TARGET_AVX512VNNI"
23554 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23555 [(set_attr ("prefix") ("evex"))])
23556
23557 (define_expand "vpdpwssd_<mode>_maskz"
23558 [(match_operand:VI4_AVX512VL 0 "register_operand")
23559 (match_operand:VI4_AVX512VL 1 "register_operand")
23560 (match_operand:VI4_AVX512VL 2 "register_operand")
23561 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23562 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23563 "TARGET_AVX512VNNI"
23564 {
23565 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
23566 operands[2], operands[3],
23567 CONST0_RTX (<MODE>mode),
23568 operands[4]));
23569 DONE;
23570 })
23571
23572 (define_insn "vpdpwssd_<mode>_maskz_1"
23573 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23574 (vec_merge:VI4_AVX512VL
23575 (unspec:VI4_AVX512VL
23576 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23577 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23578 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23579 UNSPEC_VPMADDWDACCD)
23580 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23581 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23582 "TARGET_AVX512VNNI"
23583 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23584 [(set_attr ("prefix") ("evex"))])
23585
23586 (define_insn "vpdpwssds_v16si"
23587 [(set (match_operand:V16SI 0 "register_operand" "=v")
23588 (unspec:V16SI
23589 [(match_operand:V16SI 1 "register_operand" "0")
23590 (match_operand:V16SI 2 "register_operand" "v")
23591 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23592 UNSPEC_VPMADDWDACCSSD))]
23593 "TARGET_AVX512VNNI"
23594 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23595 [(set_attr ("prefix") ("evex"))])
23596
23597 (define_insn "vpdpwssds_<mode>"
23598 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23599 (unspec:VI4_AVX2
23600 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23601 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23602 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23603 UNSPEC_VPMADDWDACCSSD))]
23604 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23605 "@
23606 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
23607 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23608 [(set_attr ("prefix") ("vex,evex"))
23609 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23610
23611 (define_insn "vpdpwssds_<mode>_mask"
23612 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23613 (vec_merge:VI4_AVX512VL
23614 (unspec:VI4_AVX512VL
23615 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23616 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23617 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23618 UNSPEC_VPMADDWDACCSSD)
23619 (match_dup 1)
23620 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23621 "TARGET_AVX512VNNI"
23622 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23623 [(set_attr ("prefix") ("evex"))])
23624
23625 (define_expand "vpdpwssds_<mode>_maskz"
23626 [(match_operand:VI4_AVX512VL 0 "register_operand")
23627 (match_operand:VI4_AVX512VL 1 "register_operand")
23628 (match_operand:VI4_AVX512VL 2 "register_operand")
23629 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23630 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23631 "TARGET_AVX512VNNI"
23632 {
23633 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
23634 operands[2], operands[3],
23635 CONST0_RTX (<MODE>mode),
23636 operands[4]));
23637 DONE;
23638 })
23639
23640 (define_insn "vpdpwssds_<mode>_maskz_1"
23641 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23642 (vec_merge:VI4_AVX512VL
23643 (unspec:VI4_AVX512VL
23644 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23645 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23646 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23647 UNSPEC_VPMADDWDACCSSD)
23648 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23649 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23650 "TARGET_AVX512VNNI"
23651 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23652 [(set_attr ("prefix") ("evex"))])
23653
23654 (define_insn "vaesdec_<mode>"
23655 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23656 (unspec:VI1_AVX512VL_F
23657 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23658 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23659 UNSPEC_VAESDEC))]
23660 "TARGET_VAES"
23661 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
23662 )
23663
23664 (define_insn "vaesdeclast_<mode>"
23665 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23666 (unspec:VI1_AVX512VL_F
23667 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23668 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23669 UNSPEC_VAESDECLAST))]
23670 "TARGET_VAES"
23671 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
23672 )
23673
23674 (define_insn "vaesenc_<mode>"
23675 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23676 (unspec:VI1_AVX512VL_F
23677 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23678 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23679 UNSPEC_VAESENC))]
23680 "TARGET_VAES"
23681 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
23682 )
23683
23684 (define_insn "vaesenclast_<mode>"
23685 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23686 (unspec:VI1_AVX512VL_F
23687 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23688 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23689 UNSPEC_VAESENCLAST))]
23690 "TARGET_VAES"
23691 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
23692 )
23693
23694 (define_insn "vpclmulqdq_<mode>"
23695 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
23696 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
23697 (match_operand:VI8_FVL 2 "vector_operand" "vm")
23698 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23699 UNSPEC_VPCLMULQDQ))]
23700 "TARGET_VPCLMULQDQ"
23701 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23702 [(set_attr "mode" "DI")])
23703
23704 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
23705 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
23706 (unspec:<avx512fmaskmode>
23707 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
23708 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
23709 UNSPEC_VPSHUFBIT))]
23710 "TARGET_AVX512BITALG"
23711 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
23712 [(set_attr "prefix" "evex")
23713 (set_attr "mode" "<sseinsnmode>")])
23714
23715 (define_mode_iterator VI48_AVX512VP2VL
23716 [V8DI
23717 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
23718 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
23719
23720 (define_mode_iterator MASK_DWI [P2QI P2HI])
23721
23722 (define_expand "mov<mode>"
23723 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
23724 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
23725 "TARGET_AVX512VP2INTERSECT"
23726 {
23727 if (MEM_P (operands[0]) && MEM_P (operands[1]))
23728 operands[1] = force_reg (<MODE>mode, operands[1]);
23729 })
23730
23731 (define_insn_and_split "*mov<mode>_internal"
23732 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
23733 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
23734 "TARGET_AVX512VP2INTERSECT
23735 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23736 "#"
23737 "&& reload_completed"
23738 [(set (match_dup 0) (match_dup 1))
23739 (set (match_dup 2) (match_dup 3))]
23740 {
23741 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
23742 })
23743
23744 (define_insn "avx512vp2intersect_2intersect<mode>"
23745 [(set (match_operand:P2QI 0 "register_operand" "=k")
23746 (unspec:P2QI
23747 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
23748 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
23749 UNSPEC_VP2INTERSECT))]
23750 "TARGET_AVX512VP2INTERSECT"
23751 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23752 [(set_attr ("prefix") ("evex"))])
23753
23754 (define_insn "avx512vp2intersect_2intersectv16si"
23755 [(set (match_operand:P2HI 0 "register_operand" "=k")
23756 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
23757 (match_operand:V16SI 2 "vector_operand" "vm")]
23758 UNSPEC_VP2INTERSECT))]
23759 "TARGET_AVX512VP2INTERSECT"
23760 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
23761 [(set_attr ("prefix") ("evex"))])
23762
23763 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
23764 ;; Converting from BF to SF
23765 (define_mode_attr bf16_cvt_2sf
23766 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
23767 ;; Converting from SF to BF
23768 (define_mode_attr sf_cvt_bf16
23769 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
23770 ;; Mapping from BF to SF
23771 (define_mode_attr sf_bf16
23772 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
23773
23774 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
23775 [(match_operand:BF16 0 "register_operand")
23776 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
23777 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
23778 (match_operand:<avx512fmaskmode> 3 "register_operand")]
23779 "TARGET_AVX512BF16"
23780 {
23781 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
23782 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
23783 DONE;
23784 })
23785
23786 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
23787 [(set (match_operand:BF16 0 "register_operand" "=v")
23788 (unspec:BF16
23789 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
23790 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
23791 UNSPEC_VCVTNE2PS2BF16))]
23792 "TARGET_AVX512BF16"
23793 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
23794
23795 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
23796 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
23797 (match_operand:VF1_AVX512VL 1 "register_operand")
23798 (match_operand:<avx512fmaskmode> 2 "register_operand")]
23799 "TARGET_AVX512BF16"
23800 {
23801 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
23802 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
23803 DONE;
23804 })
23805
23806 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
23807 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
23808 (unspec:<sf_cvt_bf16>
23809 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
23810 UNSPEC_VCVTNEPS2BF16))]
23811 "TARGET_AVX512BF16"
23812 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23813
23814 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
23815 [(match_operand:VF1_AVX512VL 0 "register_operand")
23816 (match_operand:VF1_AVX512VL 1 "register_operand")
23817 (match_operand:<sf_bf16> 2 "register_operand")
23818 (match_operand:<sf_bf16> 3 "register_operand")
23819 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
23820 "TARGET_AVX512BF16"
23821 {
23822 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
23823 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
23824 DONE;
23825 })
23826
23827 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
23828 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23829 (unspec:VF1_AVX512VL
23830 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23831 (match_operand:<sf_bf16> 2 "register_operand" "v")
23832 (match_operand:<sf_bf16> 3 "register_operand" "v")]
23833 UNSPEC_VDPBF16PS))]
23834 "TARGET_AVX512BF16"
23835 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
23836
23837 (define_insn "avx512f_dpbf16ps_<mode>_mask"
23838 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23839 (vec_merge:VF1_AVX512VL
23840 (unspec:VF1_AVX512VL
23841 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23842 (match_operand:<sf_bf16> 2 "register_operand" "v")
23843 (match_operand:<sf_bf16> 3 "register_operand" "v")]
23844 UNSPEC_VDPBF16PS)
23845 (match_dup 1)
23846 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
23847 "TARGET_AVX512BF16"
23848 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
23849
23850 ;; KEYLOCKER
23851 (define_insn "loadiwkey"
23852 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
23853 (match_operand:V2DI 1 "register_operand" "v")
23854 (match_operand:V2DI 2 "register_operand" "Yz")
23855 (match_operand:SI 3 "register_operand" "a")]
23856 UNSPECV_LOADIWKEY)
23857 (clobber (reg:CC FLAGS_REG))]
23858 "TARGET_KL"
23859 "loadiwkey\t{%0, %1|%1, %0}"
23860 [(set_attr "type" "other")])
23861
23862 (define_expand "encodekey128u32"
23863 [(match_par_dup 2
23864 [(set (match_operand:SI 0 "register_operand")
23865 (unspec_volatile:SI
23866 [(match_operand:SI 1 "register_operand")
23867 (reg:V2DI XMM0_REG)]
23868 UNSPECV_ENCODEKEY128U32))])]
23869 "TARGET_KL"
23870 {
23871 rtx xmm_regs[7];
23872 rtx tmp_unspec;
23873 unsigned i;
23874
23875 /* parallel rtx for encodekey128 predicate */
23876 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
23877
23878 for (i = 0; i < 7; i++)
23879 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
23880
23881 tmp_unspec
23882 = gen_rtx_UNSPEC_VOLATILE (SImode,
23883 gen_rtvec (2, operands[1], xmm_regs[0]),
23884 UNSPECV_ENCODEKEY128U32);
23885
23886 XVECEXP (operands[2], 0, 0)
23887 = gen_rtx_SET (operands[0], tmp_unspec);
23888
23889 tmp_unspec
23890 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
23891 gen_rtvec (1, const0_rtx),
23892 UNSPECV_ENCODEKEY128U32);
23893
23894 for (i = 0; i < 3; i++)
23895 XVECEXP (operands[2], 0, i + 1)
23896 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
23897
23898 for (i = 4; i < 7; i++)
23899 XVECEXP (operands[2], 0, i)
23900 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
23901
23902 XVECEXP (operands[2], 0, 7)
23903 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
23904 })
23905
23906 (define_insn "*encodekey128u32"
23907 [(match_parallel 2 "encodekey128_operation"
23908 [(set (match_operand:SI 0 "register_operand" "=r")
23909 (unspec_volatile:SI
23910 [(match_operand:SI 1 "register_operand" "r")
23911 (reg:V2DI XMM0_REG)]
23912 UNSPECV_ENCODEKEY128U32))])]
23913 "TARGET_KL"
23914 "encodekey128\t{%1, %0|%0, %1}"
23915 [(set_attr "type" "other")])
23916
23917 (define_expand "encodekey256u32"
23918 [(match_par_dup 2
23919 [(set (match_operand:SI 0 "register_operand")
23920 (unspec_volatile:SI
23921 [(match_operand:SI 1 "register_operand")
23922 (reg:V2DI XMM0_REG)
23923 (reg:V2DI XMM1_REG)]
23924 UNSPECV_ENCODEKEY256U32))])]
23925 "TARGET_KL"
23926 {
23927 rtx xmm_regs[7];
23928 rtx tmp_unspec;
23929 unsigned i;
23930
23931 /* parallel rtx for encodekey256 predicate */
23932 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
23933
23934 for (i = 0; i < 7; i++)
23935 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
23936
23937 tmp_unspec
23938 = gen_rtx_UNSPEC_VOLATILE (SImode,
23939 gen_rtvec (3, operands[1],
23940 xmm_regs[0], xmm_regs[1]),
23941 UNSPECV_ENCODEKEY256U32);
23942
23943 XVECEXP (operands[2], 0, 0)
23944 = gen_rtx_SET (operands[0], tmp_unspec);
23945
23946 tmp_unspec
23947 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
23948 gen_rtvec (1, const0_rtx),
23949 UNSPECV_ENCODEKEY256U32);
23950
23951 for (i = 0; i < 4; i++)
23952 XVECEXP (operands[2], 0, i + 1)
23953 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
23954
23955 for (i = 4; i < 7; i++)
23956 XVECEXP (operands[2], 0, i + 1)
23957 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
23958
23959 XVECEXP (operands[2], 0, 8)
23960 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
23961 })
23962
23963 (define_insn "*encodekey256u32"
23964 [(match_parallel 2 "encodekey256_operation"
23965 [(set (match_operand:SI 0 "register_operand" "=r")
23966 (unspec_volatile:SI
23967 [(match_operand:SI 1 "register_operand" "r")
23968 (reg:V2DI XMM0_REG)
23969 (reg:V2DI XMM1_REG)]
23970 UNSPECV_ENCODEKEY256U32))])]
23971 "TARGET_KL"
23972 "encodekey256\t{%1, %0|%0, %1}"
23973 [(set_attr "type" "other")])
23974
23975 (define_int_iterator AESDECENCKL
23976 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
23977 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
23978
23979 (define_int_attr aesklvariant
23980 [(UNSPECV_AESDEC128KLU8 "dec128kl")
23981 (UNSPECV_AESDEC256KLU8 "dec256kl")
23982 (UNSPECV_AESENC128KLU8 "enc128kl")
23983 (UNSPECV_AESENC256KLU8 "enc256kl")])
23984
23985 (define_insn "aes<aesklvariant>u8"
23986 [(set (match_operand:V2DI 0 "register_operand" "=v")
23987 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
23988 (match_operand:BLK 2 "memory_operand" "m")]
23989 AESDECENCKL))
23990 (set (reg:CCZ FLAGS_REG)
23991 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
23992 "TARGET_KL"
23993 "aes<aesklvariant>\t{%2, %0|%0, %2}"
23994 [(set_attr "type" "other")])
23995
23996 (define_int_iterator AESDECENCWIDEKL
23997 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
23998 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
23999
24000 (define_int_attr aeswideklvariant
24001 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
24002 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
24003 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
24004 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
24005
24006 (define_int_attr AESWIDEKLVARIANT
24007 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
24008 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
24009 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
24010 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
24011
24012 (define_expand "aes<aeswideklvariant>u8"
24013 [(match_par_dup 1
24014 [(set (reg:CCZ FLAGS_REG)
24015 (unspec_volatile:CCZ
24016 [(match_operand:BLK 0 "memory_operand")]
24017 AESDECENCWIDEKL))])]
24018 "TARGET_WIDEKL"
24019 {
24020 rtx tmp_unspec;
24021 unsigned i;
24022
24023 /* parallel rtx for widekl predicate */
24024 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
24025
24026 tmp_unspec
24027 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
24028 gen_rtvec (1, operands[0]),
24029 UNSPECV_<AESWIDEKLVARIANT>);
24030
24031 XVECEXP (operands[1], 0, 0)
24032 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
24033 tmp_unspec);
24034
24035 for (i = 0; i < 8; i++)
24036 {
24037 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24038
24039 tmp_unspec
24040 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24041 gen_rtvec (1, xmm_reg),
24042 UNSPECV_<AESWIDEKLVARIANT>);
24043 XVECEXP (operands[1], 0, i + 1)
24044 = gen_rtx_SET (xmm_reg, tmp_unspec);
24045 }
24046 })
24047
24048 (define_insn "*aes<aeswideklvariant>u8"
24049 [(match_parallel 1 "aeswidekl_operation"
24050 [(set (reg:CCZ FLAGS_REG)
24051 (unspec_volatile:CCZ
24052 [(match_operand:BLK 0 "memory_operand" "m")]
24053 AESDECENCWIDEKL))])]
24054 "TARGET_WIDEKL"
24055 "aes<aeswideklvariant>\t{%0}"
24056 [(set_attr "type" "other")])