Lower AVX512 vector comparison to AVX version when dest is vector.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE2
25 UNSPEC_MOVDI_TO_SSE
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSHUFB
32 UNSPEC_PSIGN
33 UNSPEC_PALIGNR
34
35 ;; For SSE4A support
36 UNSPEC_EXTRQI
37 UNSPEC_EXTRQ
38 UNSPEC_INSERTQI
39 UNSPEC_INSERTQ
40
41 ;; For SSE4.1 support
42 UNSPEC_BLENDV
43 UNSPEC_INSERTPS
44 UNSPEC_DP
45 UNSPEC_MOVNTDQA
46 UNSPEC_MPSADBW
47 UNSPEC_PHMINPOSUW
48 UNSPEC_PTEST
49
50 ;; For SSE4.2 support
51 UNSPEC_PCMPESTR
52 UNSPEC_PCMPISTR
53
54 ;; For FMA4 support
55 UNSPEC_FMADDSUB
56 UNSPEC_XOP_UNSIGNED_CMP
57 UNSPEC_XOP_TRUEFALSE
58 UNSPEC_XOP_PERMUTE
59 UNSPEC_FRCZ
60
61 ;; For AES support
62 UNSPEC_AESENC
63 UNSPEC_AESENCLAST
64 UNSPEC_AESDEC
65 UNSPEC_AESDECLAST
66 UNSPEC_AESIMC
67 UNSPEC_AESKEYGENASSIST
68
69 ;; For PCLMUL support
70 UNSPEC_PCLMUL
71
72 ;; For AVX support
73 UNSPEC_PCMP
74 UNSPEC_VPERMIL
75 UNSPEC_VPERMIL2
76 UNSPEC_VPERMIL2F128
77 UNSPEC_CAST
78 UNSPEC_VTESTP
79 UNSPEC_VCVTPH2PS
80 UNSPEC_VCVTPS2PH
81
82 ;; For AVX2 support
83 UNSPEC_VPERMVAR
84 UNSPEC_VPERMTI
85 UNSPEC_GATHER
86 UNSPEC_VSIBADDR
87
88 ;; For AVX512F support
89 UNSPEC_VPERMT2
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
91 UNSPEC_UNSIGNED_PCMP
92 UNSPEC_TESTM
93 UNSPEC_TESTNM
94 UNSPEC_SCATTER
95 UNSPEC_RCP14
96 UNSPEC_RSQRT14
97 UNSPEC_FIXUPIMM
98 UNSPEC_SCALEF
99 UNSPEC_VTERNLOG
100 UNSPEC_GETEXP
101 UNSPEC_GETMANT
102 UNSPEC_ALIGN
103 UNSPEC_CONFLICT
104 UNSPEC_COMPRESS
105 UNSPEC_COMPRESS_STORE
106 UNSPEC_EXPAND
107 UNSPEC_MASKED_EQ
108 UNSPEC_MASKED_GT
109
110 ;; Mask operations
111 UNSPEC_MASKOP
112 UNSPEC_KORTEST
113 UNSPEC_KTEST
114 ;; Mask load
115 UNSPEC_MASKLOAD
116
117 ;; For embed. rounding feature
118 UNSPEC_EMBEDDED_ROUNDING
119
120 ;; For AVX512PF support
121 UNSPEC_GATHER_PREFETCH
122 UNSPEC_SCATTER_PREFETCH
123
124 ;; For AVX512ER support
125 UNSPEC_EXP2
126 UNSPEC_RCP28
127 UNSPEC_RSQRT28
128
129 ;; For SHA support
130 UNSPEC_SHA1MSG1
131 UNSPEC_SHA1MSG2
132 UNSPEC_SHA1NEXTE
133 UNSPEC_SHA1RNDS4
134 UNSPEC_SHA256MSG1
135 UNSPEC_SHA256MSG2
136 UNSPEC_SHA256RNDS2
137
138 ;; For AVX512BW support
139 UNSPEC_DBPSADBW
140 UNSPEC_PMADDUBSW512
141 UNSPEC_PMADDWD512
142 UNSPEC_PSHUFHW
143 UNSPEC_PSHUFLW
144 UNSPEC_CVTINT2MASK
145
146 ;; For AVX512DQ support
147 UNSPEC_REDUCE
148 UNSPEC_FPCLASS
149 UNSPEC_RANGE
150
151 ;; For AVX512IFMA support
152 UNSPEC_VPMADD52LUQ
153 UNSPEC_VPMADD52HUQ
154
155 ;; For AVX512VBMI support
156 UNSPEC_VPMULTISHIFT
157
158 ;; For AVX5124FMAPS/AVX5124VNNIW support
159 UNSPEC_VP4FMADD
160 UNSPEC_VP4FNMADD
161 UNSPEC_VP4DPWSSD
162 UNSPEC_VP4DPWSSDS
163
164 ;; For GFNI support
165 UNSPEC_GF2P8AFFINEINV
166 UNSPEC_GF2P8AFFINE
167 UNSPEC_GF2P8MUL
168
169 ;; For AVX512VBMI2 support
170 UNSPEC_VPSHLD
171 UNSPEC_VPSHRD
172 UNSPEC_VPSHRDV
173 UNSPEC_VPSHLDV
174
175 ;; For AVX512VNNI support
176 UNSPEC_VPMADDUBSWACCD
177 UNSPEC_VPMADDUBSWACCSSD
178 UNSPEC_VPMADDWDACCD
179 UNSPEC_VPMADDWDACCSSD
180
181 ;; For VAES support
182 UNSPEC_VAESDEC
183 UNSPEC_VAESDECLAST
184 UNSPEC_VAESENC
185 UNSPEC_VAESENCLAST
186
187 ;; For VPCLMULQDQ support
188 UNSPEC_VPCLMULQDQ
189
190 ;; For AVX512BITALG support
191 UNSPEC_VPSHUFBIT
192
193 ;; For VP2INTERSECT support
194 UNSPEC_VP2INTERSECT
195
196 ;; For AVX512BF16 support
197 UNSPEC_VCVTNE2PS2BF16
198 UNSPEC_VCVTNEPS2BF16
199 UNSPEC_VDPBF16PS
200 ])
201
202 (define_c_enum "unspecv" [
203 UNSPECV_LDMXCSR
204 UNSPECV_STMXCSR
205 UNSPECV_CLFLUSH
206 UNSPECV_MONITOR
207 UNSPECV_MWAIT
208 UNSPECV_VZEROALL
209 UNSPECV_VZEROUPPER
210
211 ;; For KEYLOCKER
212 UNSPECV_LOADIWKEY
213 UNSPECV_AESDEC128KLU8
214 UNSPECV_AESENC128KLU8
215 UNSPECV_AESDEC256KLU8
216 UNSPECV_AESENC256KLU8
217 UNSPECV_AESDECWIDE128KLU8
218 UNSPECV_AESENCWIDE128KLU8
219 UNSPECV_AESDECWIDE256KLU8
220 UNSPECV_AESENCWIDE256KLU8
221 UNSPECV_ENCODEKEY128U32
222 UNSPECV_ENCODEKEY256U32
223 ])
224
225 ;; All vector modes including V?TImode, used in move patterns.
226 (define_mode_iterator VMOVE
227 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
228 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
229 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
230 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
231 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
232 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
233 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
234
235 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
236 (define_mode_iterator V48_AVX512VL
237 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
238 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
239 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
240 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
241
242 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
243 (define_mode_iterator VI12_AVX512VL
244 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
245 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
246
247 ;; Same iterator, but without supposed TARGET_AVX512BW
248 (define_mode_iterator VI12_AVX512VLBW
249 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
250 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
251 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
252
253 (define_mode_iterator VI1_AVX512VL
254 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
255
256 ;; All vector modes
257 (define_mode_iterator V
258 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
259 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
260 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
261 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
262 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
263 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
264
265 ;; All 128bit vector modes
266 (define_mode_iterator V_128
267 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
268
269 ;; All 256bit vector modes
270 (define_mode_iterator V_256
271 [V32QI V16HI V8SI V4DI V8SF V4DF])
272
273 ;; All 128bit and 256bit vector modes
274 (define_mode_iterator V_128_256
275 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
276
277 ;; All 512bit vector modes
278 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
279
280 ;; All 256bit and 512bit vector modes
281 (define_mode_iterator V_256_512
282 [V32QI V16HI V8SI V4DI V8SF V4DF
283 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
284 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
285
286 ;; All vector float modes
287 (define_mode_iterator VF
288 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
289 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
290
291 ;; 128- and 256-bit float vector modes
292 (define_mode_iterator VF_128_256
293 [(V8SF "TARGET_AVX") V4SF
294 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
295
296 ;; All SFmode vector float modes
297 (define_mode_iterator VF1
298 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
299
300 (define_mode_iterator VF1_AVX2
301 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
302
303 ;; 128- and 256-bit SF vector modes
304 (define_mode_iterator VF1_128_256
305 [(V8SF "TARGET_AVX") V4SF])
306
307 (define_mode_iterator VF1_128_256VL
308 [V8SF (V4SF "TARGET_AVX512VL")])
309
310 ;; All DFmode vector float modes
311 (define_mode_iterator VF2
312 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
313
314 ;; 128- and 256-bit DF vector modes
315 (define_mode_iterator VF2_128_256
316 [(V4DF "TARGET_AVX") V2DF])
317
318 (define_mode_iterator VF2_512_256
319 [(V8DF "TARGET_AVX512F") V4DF])
320
321 (define_mode_iterator VF2_512_256VL
322 [V8DF (V4DF "TARGET_AVX512VL")])
323
324 ;; All 128bit vector float modes
325 (define_mode_iterator VF_128
326 [V4SF (V2DF "TARGET_SSE2")])
327
328 ;; All 256bit vector float modes
329 (define_mode_iterator VF_256
330 [V8SF V4DF])
331
332 ;; All 512bit vector float modes
333 (define_mode_iterator VF_512
334 [V16SF V8DF])
335
336 (define_mode_iterator VI48_AVX512VL
337 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
338 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
339
340 (define_mode_iterator VF_AVX512VL
341 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
342 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
343
344 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
345 (define_mode_iterator VF1_AVX512ER_128_256
346 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
347
348 (define_mode_iterator VF2_AVX512VL
349 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
350
351 (define_mode_iterator VF1_AVX512VL
352 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
353
354 ;; All vector integer modes
355 (define_mode_iterator VI
356 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
357 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
358 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
359 (V8SI "TARGET_AVX") V4SI
360 (V4DI "TARGET_AVX") V2DI])
361
362 (define_mode_iterator VI_AVX2
363 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
364 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
365 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
366 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
367
368 ;; All QImode vector integer modes
369 (define_mode_iterator VI1
370 [(V32QI "TARGET_AVX") V16QI])
371
372 ;; All DImode vector integer modes
373 (define_mode_iterator V_AVX
374 [V16QI V8HI V4SI V2DI V4SF V2DF
375 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
376 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
377 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
378
379 (define_mode_iterator VI48_AVX
380 [V4SI V2DI
381 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
382
383 (define_mode_iterator VI8
384 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
385
386 (define_mode_iterator VI8_FVL
387 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
388
389 (define_mode_iterator VI8_AVX512VL
390 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
391
392 (define_mode_iterator VI8_256_512
393 [V8DI (V4DI "TARGET_AVX512VL")])
394
395 (define_mode_iterator VI1_AVX2
396 [(V32QI "TARGET_AVX2") V16QI])
397
398 (define_mode_iterator VI1_AVX512
399 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
400
401 (define_mode_iterator VI1_AVX512F
402 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
403
404 (define_mode_iterator VI2_AVX2
405 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
406
407 (define_mode_iterator VI2_AVX512F
408 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
409
410 (define_mode_iterator VI4_AVX
411 [(V8SI "TARGET_AVX") V4SI])
412
413 (define_mode_iterator VI4_AVX2
414 [(V8SI "TARGET_AVX2") V4SI])
415
416 (define_mode_iterator VI4_AVX512F
417 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
418
419 (define_mode_iterator VI4_AVX512VL
420 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
421
422 (define_mode_iterator VI48_AVX512F_AVX512VL
423 [V4SI V8SI (V16SI "TARGET_AVX512F")
424 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
425
426 (define_mode_iterator VI2_AVX512VL
427 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
428
429 (define_mode_iterator VI1_AVX512VL_F
430 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
431
432 (define_mode_iterator VI8_AVX2_AVX512BW
433 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
434
435 (define_mode_iterator VI8_AVX2
436 [(V4DI "TARGET_AVX2") V2DI])
437
438 (define_mode_iterator VI8_AVX2_AVX512F
439 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
440
441 (define_mode_iterator VI8_AVX_AVX512F
442 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
443
444 (define_mode_iterator VI4_128_8_256
445 [V4SI V4DI])
446
447 ;; All V8D* modes
448 (define_mode_iterator V8FI
449 [V8DF V8DI])
450
451 ;; All V16S* modes
452 (define_mode_iterator V16FI
453 [V16SF V16SI])
454
455 ;; ??? We should probably use TImode instead.
456 (define_mode_iterator VIMAX_AVX2_AVX512BW
457 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
458
459 ;; Suppose TARGET_AVX512BW as baseline
460 (define_mode_iterator VIMAX_AVX512VL
461 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
462
463 (define_mode_iterator VIMAX_AVX2
464 [(V2TI "TARGET_AVX2") V1TI])
465
466 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
467 (define_mode_iterator SSESCALARMODE
468 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
469
470 (define_mode_iterator VI12_AVX2
471 [(V32QI "TARGET_AVX2") V16QI
472 (V16HI "TARGET_AVX2") V8HI])
473
474 (define_mode_iterator VI12_AVX2_AVX512BW
475 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
476 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
477
478 (define_mode_iterator VI24_AVX2
479 [(V16HI "TARGET_AVX2") V8HI
480 (V8SI "TARGET_AVX2") V4SI])
481
482 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
483 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
484 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
485 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
486
487 (define_mode_iterator VI124_AVX2
488 [(V32QI "TARGET_AVX2") V16QI
489 (V16HI "TARGET_AVX2") V8HI
490 (V8SI "TARGET_AVX2") V4SI])
491
492 (define_mode_iterator VI2_AVX2_AVX512BW
493 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
494
495 (define_mode_iterator VI248_AVX512VL
496 [V32HI V16SI V8DI
497 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
498 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
499 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
500
501 (define_mode_iterator VI48_AVX2
502 [(V8SI "TARGET_AVX2") V4SI
503 (V4DI "TARGET_AVX2") V2DI])
504
505 (define_mode_iterator VI248_AVX2
506 [(V16HI "TARGET_AVX2") V8HI
507 (V8SI "TARGET_AVX2") V4SI
508 (V4DI "TARGET_AVX2") V2DI])
509
510 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
511 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
512 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
513 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
514
515 (define_mode_iterator VI248_AVX512BW
516 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
517
518 (define_mode_iterator VI248_AVX512BW_AVX512VL
519 [(V32HI "TARGET_AVX512BW")
520 (V4DI "TARGET_AVX512VL") V16SI V8DI])
521
522 ;; Suppose TARGET_AVX512VL as baseline
523 (define_mode_iterator VI248_AVX512BW_1
524 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
525 V8SI V4SI
526 V2DI])
527
528 (define_mode_iterator VI248_AVX512BW_2
529 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
530 V8SI V4SI
531 V4DI V2DI])
532
533 (define_mode_iterator VI48_AVX512F
534 [(V16SI "TARGET_AVX512F") V8SI V4SI
535 (V8DI "TARGET_AVX512F") V4DI V2DI])
536
537 (define_mode_iterator VI48_AVX_AVX512F
538 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
540
541 (define_mode_iterator VI12_AVX_AVX512F
542 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
543 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
544
545 (define_mode_iterator V48_AVX2
546 [V4SF V2DF
547 V8SF V4DF
548 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
549 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
550
551 (define_mode_iterator VI1_AVX512VLBW
552 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
553 (V16QI "TARGET_AVX512VL")])
554
555 (define_mode_attr avx512
556 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
557 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
558 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
559 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
560 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
561 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
562
563 (define_mode_attr sse2_avx_avx512f
564 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
565 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
566 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
567 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
568 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
569 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
570
571 (define_mode_attr sse2_avx2
572 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
573 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
574 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
575 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
576 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
577
578 (define_mode_attr ssse3_avx2
579 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
580 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
581 (V4SI "ssse3") (V8SI "avx2")
582 (V2DI "ssse3") (V4DI "avx2")
583 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
584
585 (define_mode_attr sse4_1_avx2
586 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
587 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
588 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
589 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
590
591 (define_mode_attr avx_avx2
592 [(V4SF "avx") (V2DF "avx")
593 (V8SF "avx") (V4DF "avx")
594 (V4SI "avx2") (V2DI "avx2")
595 (V8SI "avx2") (V4DI "avx2")])
596
597 (define_mode_attr vec_avx2
598 [(V16QI "vec") (V32QI "avx2")
599 (V8HI "vec") (V16HI "avx2")
600 (V4SI "vec") (V8SI "avx2")
601 (V2DI "vec") (V4DI "avx2")])
602
603 (define_mode_attr avx2_avx512
604 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
605 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
606 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
607 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
608 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
609
610 (define_mode_attr shuffletype
611 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
612 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
613 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
614 (V32HI "i") (V16HI "i") (V8HI "i")
615 (V64QI "i") (V32QI "i") (V16QI "i")
616 (V4TI "i") (V2TI "i") (V1TI "i")])
617
618 (define_mode_attr ssequartermode
619 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
620
621 (define_mode_attr ssequarterinsnmode
622 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
623
624 (define_mode_attr vecmemsuffix
625 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
626 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
627
628 (define_mode_attr ssedoublemodelower
629 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
630 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
631 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
632
633 (define_mode_attr ssedoublemode
634 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
635 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
636 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
637 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
638 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
639 (V4DI "V8DI") (V8DI "V16DI")])
640
641 (define_mode_attr ssebytemode
642 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
643 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
644
645 ;; All 128bit vector integer modes
646 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
647
648 ;; All 256bit vector integer modes
649 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
650
651 ;; All 128 and 256bit vector integer modes
652 (define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
653
654 ;; Various 128bit vector integer mode combinations
655 (define_mode_iterator VI12_128 [V16QI V8HI])
656 (define_mode_iterator VI14_128 [V16QI V4SI])
657 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
658 (define_mode_iterator VI24_128 [V8HI V4SI])
659 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
660 (define_mode_iterator VI48_128 [V4SI V2DI])
661
662 ;; Various 256bit and 512 vector integer mode combinations
663 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
664 (define_mode_iterator VI124_256_AVX512F_AVX512BW
665 [V32QI V16HI V8SI
666 (V64QI "TARGET_AVX512BW")
667 (V32HI "TARGET_AVX512BW")
668 (V16SI "TARGET_AVX512F")])
669 (define_mode_iterator VI48_256 [V8SI V4DI])
670 (define_mode_iterator VI48_512 [V16SI V8DI])
671 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
672 (define_mode_iterator VI_AVX512BW
673 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
674
675 ;; Int-float size matches
676 (define_mode_iterator VI4F_128 [V4SI V4SF])
677 (define_mode_iterator VI8F_128 [V2DI V2DF])
678 (define_mode_iterator VI4F_256 [V8SI V8SF])
679 (define_mode_iterator VI8F_256 [V4DI V4DF])
680 (define_mode_iterator VI4F_256_512
681 [V8SI V8SF
682 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
683 (define_mode_iterator VI48F_256_512
684 [V8SI V8SF
685 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
686 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
687 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
688 (define_mode_iterator VF48_I1248
689 [V16SI V16SF V8DI V8DF V32HI V64QI])
690 (define_mode_iterator VI48F
691 [V16SI V16SF V8DI V8DF
692 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
693 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
694 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
695 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
696 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
697
698 (define_mode_iterator VF_AVX512
699 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
700 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
701 V16SF V8DF])
702
703 (define_mode_attr avx512bcst
704 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
705 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
706 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
707 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
708 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
709 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
710
711 ;; Mapping from float mode to required SSE level
712 (define_mode_attr sse
713 [(SF "sse") (DF "sse2")
714 (V4SF "sse") (V2DF "sse2")
715 (V16SF "avx512f") (V8SF "avx")
716 (V8DF "avx512f") (V4DF "avx")])
717
718 (define_mode_attr sse2
719 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
720 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
721
722 (define_mode_attr sse3
723 [(V16QI "sse3") (V32QI "avx")])
724
725 (define_mode_attr sse4_1
726 [(V4SF "sse4_1") (V2DF "sse4_1")
727 (V8SF "avx") (V4DF "avx")
728 (V8DF "avx512f")
729 (V4DI "avx") (V2DI "sse4_1")
730 (V8SI "avx") (V4SI "sse4_1")
731 (V16QI "sse4_1") (V32QI "avx")
732 (V8HI "sse4_1") (V16HI "avx")])
733
734 (define_mode_attr avxsizesuffix
735 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
736 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
737 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
738 (V16SF "512") (V8DF "512")
739 (V8SF "256") (V4DF "256")
740 (V4SF "") (V2DF "")])
741
742 ;; SSE instruction mode
743 (define_mode_attr sseinsnmode
744 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
745 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
746 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
747 (V16SF "V16SF") (V8DF "V8DF")
748 (V8SF "V8SF") (V4DF "V4DF")
749 (V4SF "V4SF") (V2DF "V2DF")
750 (TI "TI")])
751
752 ;; Mapping of vector modes to corresponding mask size
753 (define_mode_attr avx512fmaskmode
754 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
755 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
756 (V16SI "HI") (V8SI "QI") (V4SI "QI")
757 (V8DI "QI") (V4DI "QI") (V2DI "QI")
758 (V16SF "HI") (V8SF "QI") (V4SF "QI")
759 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
760
761 ;; Mapping of vector modes to corresponding mask size
762 (define_mode_attr avx512fmaskmodelower
763 [(V64QI "di") (V32QI "si") (V16QI "hi")
764 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
765 (V16SI "hi") (V8SI "qi") (V4SI "qi")
766 (V8DI "qi") (V4DI "qi") (V2DI "qi")
767 (V16SF "hi") (V8SF "qi") (V4SF "qi")
768 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
769
770 ;; Mapping of vector modes to corresponding mask half size
771 (define_mode_attr avx512fmaskhalfmode
772 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
773 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
774 (V16SI "QI") (V8SI "QI") (V4SI "QI")
775 (V8DI "QI") (V4DI "QI") (V2DI "QI")
776 (V16SF "QI") (V8SF "QI") (V4SF "QI")
777 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
778
779 ;; Mapping of vector float modes to an integer mode of the same size
780 (define_mode_attr sseintvecmode
781 [(V16SF "V16SI") (V8DF "V8DI")
782 (V8SF "V8SI") (V4DF "V4DI")
783 (V4SF "V4SI") (V2DF "V2DI")
784 (V16SI "V16SI") (V8DI "V8DI")
785 (V8SI "V8SI") (V4DI "V4DI")
786 (V4SI "V4SI") (V2DI "V2DI")
787 (V16HI "V16HI") (V8HI "V8HI")
788 (V32HI "V32HI") (V64QI "V64QI")
789 (V32QI "V32QI") (V16QI "V16QI")])
790
791 (define_mode_attr sseintvecmode2
792 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
793 (V8SF "OI") (V4SF "TI")])
794
795 (define_mode_attr sseintvecmodelower
796 [(V16SF "v16si") (V8DF "v8di")
797 (V8SF "v8si") (V4DF "v4di")
798 (V4SF "v4si") (V2DF "v2di")
799 (V8SI "v8si") (V4DI "v4di")
800 (V4SI "v4si") (V2DI "v2di")
801 (V16HI "v16hi") (V8HI "v8hi")
802 (V32QI "v32qi") (V16QI "v16qi")])
803
804 ;; Mapping of vector modes to a vector mode of double size
805 (define_mode_attr ssedoublevecmode
806 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
807 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
808 (V8SF "V16SF") (V4DF "V8DF")
809 (V4SF "V8SF") (V2DF "V4DF")])
810
811 ;; Mapping of vector modes to a vector mode of half size
812 (define_mode_attr ssehalfvecmode
813 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
814 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
815 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
816 (V16SF "V8SF") (V8DF "V4DF")
817 (V8SF "V4SF") (V4DF "V2DF")
818 (V4SF "V2SF")])
819
820 (define_mode_attr ssehalfvecmodelower
821 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
822 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
823 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
824 (V16SF "v8sf") (V8DF "v4df")
825 (V8SF "v4sf") (V4DF "v2df")
826 (V4SF "v2sf")])
827
828 ;; Mapping of vector modes ti packed single mode of the same size
829 (define_mode_attr ssePSmode
830 [(V16SI "V16SF") (V8DF "V16SF")
831 (V16SF "V16SF") (V8DI "V16SF")
832 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
833 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
834 (V8SI "V8SF") (V4SI "V4SF")
835 (V4DI "V8SF") (V2DI "V4SF")
836 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
837 (V8SF "V8SF") (V4SF "V4SF")
838 (V4DF "V8SF") (V2DF "V4SF")])
839
840 (define_mode_attr ssePSmode2
841 [(V8DI "V8SF") (V4DI "V4SF")])
842
843 ;; Mapping of vector modes back to the scalar modes
844 (define_mode_attr ssescalarmode
845 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
846 (V32HI "HI") (V16HI "HI") (V8HI "HI")
847 (V16SI "SI") (V8SI "SI") (V4SI "SI")
848 (V8DI "DI") (V4DI "DI") (V2DI "DI")
849 (V16SF "SF") (V8SF "SF") (V4SF "SF")
850 (V8DF "DF") (V4DF "DF") (V2DF "DF")
851 (V4TI "TI") (V2TI "TI")])
852
853 ;; Mapping of vector modes back to the scalar modes
854 (define_mode_attr ssescalarmodelower
855 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
856 (V32HI "hi") (V16HI "hi") (V8HI "hi")
857 (V16SI "si") (V8SI "si") (V4SI "si")
858 (V8DI "di") (V4DI "di") (V2DI "di")
859 (V16SF "sf") (V8SF "sf") (V4SF "sf")
860 (V8DF "df") (V4DF "df") (V2DF "df")
861 (V4TI "ti") (V2TI "ti")])
862
863 ;; Mapping of vector modes to the 128bit modes
864 (define_mode_attr ssexmmmode
865 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
866 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
867 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
868 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
869 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
870 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
871
872 ;; Pointer size override for scalar modes (Intel asm dialect)
873 (define_mode_attr iptr
874 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
875 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
876 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
877 (V16SF "k") (V8DF "q")
878 (V8SF "k") (V4DF "q")
879 (V4SF "k") (V2DF "q")
880 (SF "k") (DF "q")])
881
882 ;; Mapping of vector modes to VPTERNLOG suffix
883 (define_mode_attr ternlogsuffix
884 [(V8DI "q") (V4DI "q") (V2DI "q")
885 (V16SI "d") (V8SI "d") (V4SI "d")
886 (V32HI "d") (V16HI "d") (V8HI "d")
887 (V64QI "d") (V32QI "d") (V16QI "d")])
888
889 ;; Number of scalar elements in each vector type
890 (define_mode_attr ssescalarnum
891 [(V64QI "64") (V16SI "16") (V8DI "8")
892 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
893 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
894 (V16SF "16") (V8DF "8")
895 (V8SF "8") (V4DF "4")
896 (V4SF "4") (V2DF "2")])
897
898 ;; Mask of scalar elements in each vector type
899 (define_mode_attr ssescalarnummask
900 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
901 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
902 (V8SF "7") (V4DF "3")
903 (V4SF "3") (V2DF "1")])
904
905 (define_mode_attr ssescalarsize
906 [(V4TI "64") (V2TI "64") (V1TI "64")
907 (V8DI "64") (V4DI "64") (V2DI "64")
908 (V64QI "8") (V32QI "8") (V16QI "8")
909 (V32HI "16") (V16HI "16") (V8HI "16")
910 (V16SI "32") (V8SI "32") (V4SI "32")
911 (V16SF "32") (V8SF "32") (V4SF "32")
912 (V8DF "64") (V4DF "64") (V2DF "64")])
913
914 ;; SSE prefix for integer vector modes
915 (define_mode_attr sseintprefix
916 [(V2DI "p") (V2DF "")
917 (V4DI "p") (V4DF "")
918 (V8DI "p") (V8DF "")
919 (V4SI "p") (V4SF "")
920 (V8SI "p") (V8SF "")
921 (V16SI "p") (V16SF "")
922 (V16QI "p") (V8HI "p")
923 (V32QI "p") (V16HI "p")
924 (V64QI "p") (V32HI "p")])
925
926 ;; SSE scalar suffix for vector modes
927 (define_mode_attr ssescalarmodesuffix
928 [(SF "ss") (DF "sd")
929 (V16SF "ss") (V8DF "sd")
930 (V8SF "ss") (V4DF "sd")
931 (V4SF "ss") (V2DF "sd")
932 (V16SI "d") (V8DI "q")
933 (V8SI "d") (V4DI "q")
934 (V4SI "d") (V2DI "q")])
935
936 ;; Pack/unpack vector modes
937 (define_mode_attr sseunpackmode
938 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
939 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
940 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
941
942 (define_mode_attr ssepackmode
943 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
944 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
945 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
946
947 ;; Mapping of the max integer size for xop rotate immediate constraint
948 (define_mode_attr sserotatemax
949 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
950
951 ;; Mapping of mode to cast intrinsic name
952 (define_mode_attr castmode
953 [(V8SI "si") (V8SF "ps") (V4DF "pd")
954 (V16SI "si") (V16SF "ps") (V8DF "pd")])
955
956 ;; Instruction suffix for sign and zero extensions.
957 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
958
959 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
960 ;; i64x4 or f64x4 for 512bit modes.
961 (define_mode_attr i128
962 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
963 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
964 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
965
966 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
967 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
968 (define_mode_attr i128vldq
969 [(V8SF "f32x4") (V4DF "f64x2")
970 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
971
972 ;; Mix-n-match
973 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
974 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
975
976 ;; Mapping for dbpsabbw modes
977 (define_mode_attr dbpsadbwmode
978 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
979
980 ;; Mapping suffixes for broadcast
981 (define_mode_attr bcstscalarsuff
982 [(V64QI "b") (V32QI "b") (V16QI "b")
983 (V32HI "w") (V16HI "w") (V8HI "w")
984 (V16SI "d") (V8SI "d") (V4SI "d")
985 (V8DI "q") (V4DI "q") (V2DI "q")
986 (V16SF "ss") (V8SF "ss") (V4SF "ss")
987 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
988
989 ;; Tie mode of assembler operand to mode iterator
990 (define_mode_attr xtg_mode
991 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
992 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
993 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
994
995 ;; Half mask mode for unpacks
996 (define_mode_attr HALFMASKMODE
997 [(DI "SI") (SI "HI")])
998
999 ;; Double mask mode for packs
1000 (define_mode_attr DOUBLEMASKMODE
1001 [(HI "SI") (SI "DI")])
1002
1003
1004 ;; Include define_subst patterns for instructions with mask
1005 (include "subst.md")
1006
1007 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1008
1009 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1010 ;;
1011 ;; Move patterns
1012 ;;
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1014
1015 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1016 ;; This is essential for maintaining stable calling conventions.
1017
1018 (define_expand "mov<mode>"
1019 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1020 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1021 "TARGET_SSE"
1022 {
1023 ix86_expand_vector_move (<MODE>mode, operands);
1024 DONE;
1025 })
1026
1027 (define_insn "mov<mode>_internal"
1028 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1029 "=v,v ,v ,m")
1030 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1031 " C,BC,vm,v"))]
1032 "TARGET_SSE
1033 && (register_operand (operands[0], <MODE>mode)
1034 || register_operand (operands[1], <MODE>mode))"
1035 {
1036 switch (get_attr_type (insn))
1037 {
1038 case TYPE_SSELOG1:
1039 return standard_sse_constant_opcode (insn, operands);
1040
1041 case TYPE_SSEMOV:
1042 return ix86_output_ssemov (insn, operands);
1043
1044 default:
1045 gcc_unreachable ();
1046 }
1047 }
1048 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1049 (set_attr "prefix" "maybe_vex")
1050 (set (attr "mode")
1051 (cond [(match_test "TARGET_AVX")
1052 (const_string "<sseinsnmode>")
1053 (ior (not (match_test "TARGET_SSE2"))
1054 (match_test "optimize_function_for_size_p (cfun)"))
1055 (const_string "V4SF")
1056 (and (match_test "<MODE>mode == V2DFmode")
1057 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1058 (const_string "V4SF")
1059 (and (eq_attr "alternative" "3")
1060 (match_test "TARGET_SSE_TYPELESS_STORES"))
1061 (const_string "V4SF")
1062 (and (eq_attr "alternative" "0")
1063 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1064 (const_string "TI")
1065 ]
1066 (const_string "<sseinsnmode>")))
1067 (set (attr "enabled")
1068 (cond [(and (match_test "<MODE_SIZE> == 16")
1069 (eq_attr "alternative" "1"))
1070 (symbol_ref "TARGET_SSE2")
1071 (and (match_test "<MODE_SIZE> == 32")
1072 (eq_attr "alternative" "1"))
1073 (symbol_ref "TARGET_AVX2")
1074 ]
1075 (symbol_ref "true")))])
1076
1077 ;; If mem_addr points to a memory region with less than whole vector size bytes
1078 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1079 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1080 ;; See pr97642.
1081 (define_expand "<avx512>_load<mode>_mask"
1082 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1083 (vec_merge:V48_AVX512VL
1084 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1085 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1086 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1087 "TARGET_AVX512F"
1088 {
1089 if (CONST_INT_P (operands[3]))
1090 {
1091 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1092 DONE;
1093 }
1094 else if (MEM_P (operands[1]))
1095 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1096 gen_rtvec(1, operands[1]),
1097 UNSPEC_MASKLOAD);
1098 })
1099
1100 (define_insn "*<avx512>_load<mode>_mask"
1101 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1102 (vec_merge:V48_AVX512VL
1103 (unspec:V48_AVX512VL
1104 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1105 UNSPEC_MASKLOAD)
1106 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1107 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1108 "TARGET_AVX512F"
1109 {
1110 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1111 {
1112 if (misaligned_operand (operands[1], <MODE>mode))
1113 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1114 else
1115 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1116 }
1117 else
1118 {
1119 if (misaligned_operand (operands[1], <MODE>mode))
1120 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1121 else
1122 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1123 }
1124 }
1125 [(set_attr "type" "ssemov")
1126 (set_attr "prefix" "evex")
1127 (set_attr "mode" "<sseinsnmode>")])
1128
1129 (define_insn_and_split "*<avx512>_load<mode>"
1130 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1131 (unspec:V48_AVX512VL
1132 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1133 UNSPEC_MASKLOAD))]
1134 "TARGET_AVX512F"
1135 "#"
1136 "&& 1"
1137 [(set (match_dup 0) (match_dup 1))])
1138
1139 (define_expand "<avx512>_load<mode>_mask"
1140 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1141 (vec_merge:VI12_AVX512VL
1142 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1143 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1144 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1145 "TARGET_AVX512BW"
1146 {
1147 if (CONST_INT_P (operands[3]))
1148 {
1149 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1150 DONE;
1151 }
1152 else if (MEM_P (operands[1]))
1153 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1154 gen_rtvec(1, operands[1]),
1155 UNSPEC_MASKLOAD);
1156
1157 })
1158
1159 (define_insn "*<avx512>_load<mode>_mask"
1160 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1161 (vec_merge:VI12_AVX512VL
1162 (unspec:VI12_AVX512VL
1163 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1164 UNSPEC_MASKLOAD)
1165 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1166 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1167 "TARGET_AVX512BW"
1168 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1169 [(set_attr "type" "ssemov")
1170 (set_attr "prefix" "evex")
1171 (set_attr "mode" "<sseinsnmode>")])
1172
1173 (define_insn_and_split "*<avx512>_load<mode>"
1174 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1175 (unspec:VI12_AVX512VL
1176 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1177 UNSPEC_MASKLOAD))]
1178 "TARGET_AVX512BW"
1179 "#"
1180 "&& 1"
1181 [(set (match_dup 0) (match_dup 1))])
1182
1183 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1184 [(set (match_operand:VF_128 0 "register_operand" "=v")
1185 (vec_merge:VF_128
1186 (vec_merge:VF_128
1187 (match_operand:VF_128 2 "register_operand" "v")
1188 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1189 (match_operand:QI 4 "register_operand" "Yk"))
1190 (match_operand:VF_128 1 "register_operand" "v")
1191 (const_int 1)))]
1192 "TARGET_AVX512F"
1193 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1194 [(set_attr "type" "ssemov")
1195 (set_attr "prefix" "evex")
1196 (set_attr "mode" "<ssescalarmode>")])
1197
1198 (define_expand "avx512f_load<mode>_mask"
1199 [(set (match_operand:<ssevecmode> 0 "register_operand")
1200 (vec_merge:<ssevecmode>
1201 (vec_merge:<ssevecmode>
1202 (vec_duplicate:<ssevecmode>
1203 (match_operand:MODEF 1 "memory_operand"))
1204 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1205 (match_operand:QI 3 "register_operand"))
1206 (match_dup 4)
1207 (const_int 1)))]
1208 "TARGET_AVX512F"
1209 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1210
1211 (define_insn "*avx512f_load<mode>_mask"
1212 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1213 (vec_merge:<ssevecmode>
1214 (vec_merge:<ssevecmode>
1215 (vec_duplicate:<ssevecmode>
1216 (match_operand:MODEF 1 "memory_operand" "m"))
1217 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1218 (match_operand:QI 3 "register_operand" "Yk"))
1219 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1220 (const_int 1)))]
1221 "TARGET_AVX512F"
1222 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1223 [(set_attr "type" "ssemov")
1224 (set_attr "prefix" "evex")
1225 (set_attr "memory" "load")
1226 (set_attr "mode" "<MODE>")])
1227
1228 (define_insn "avx512f_store<mode>_mask"
1229 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1230 (if_then_else:MODEF
1231 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1232 (const_int 1))
1233 (vec_select:MODEF
1234 (match_operand:<ssevecmode> 1 "register_operand" "v")
1235 (parallel [(const_int 0)]))
1236 (match_dup 0)))]
1237 "TARGET_AVX512F"
1238 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1239 [(set_attr "type" "ssemov")
1240 (set_attr "prefix" "evex")
1241 (set_attr "memory" "store")
1242 (set_attr "mode" "<MODE>")])
1243
1244 (define_insn "<avx512>_blendm<mode>"
1245 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1246 (vec_merge:V48_AVX512VL
1247 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1248 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1249 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1250 "TARGET_AVX512F"
1251 {
1252 if (REG_P (operands[1])
1253 && REGNO (operands[1]) != REGNO (operands[0]))
1254 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1255
1256 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1257 {
1258 if (misaligned_operand (operands[2], <MODE>mode))
1259 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1260 else
1261 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1262 }
1263 else
1264 {
1265 if (misaligned_operand (operands[2], <MODE>mode))
1266 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1267 else
1268 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1269 }
1270 }
1271 [(set_attr "type" "ssemov")
1272 (set_attr "prefix" "evex")
1273 (set_attr "mode" "<sseinsnmode>")])
1274
1275 (define_insn "<avx512>_blendm<mode>"
1276 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1277 (vec_merge:VI12_AVX512VL
1278 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1279 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1280 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1281 "TARGET_AVX512BW"
1282 "@
1283 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1284 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1285 [(set_attr "type" "ssemov")
1286 (set_attr "prefix" "evex")
1287 (set_attr "mode" "<sseinsnmode>")])
1288
1289 (define_insn "<avx512>_store<mode>_mask"
1290 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1291 (vec_merge:V48_AVX512VL
1292 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1293 (match_dup 0)
1294 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1295 "TARGET_AVX512F"
1296 {
1297 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1298 {
1299 if (misaligned_operand (operands[0], <MODE>mode))
1300 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1301 else
1302 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1303 }
1304 else
1305 {
1306 if (misaligned_operand (operands[0], <MODE>mode))
1307 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1308 else
1309 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1310 }
1311 }
1312 [(set_attr "type" "ssemov")
1313 (set_attr "prefix" "evex")
1314 (set_attr "memory" "store")
1315 (set_attr "mode" "<sseinsnmode>")])
1316
1317 (define_insn "<avx512>_store<mode>_mask"
1318 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1319 (vec_merge:VI12_AVX512VL
1320 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1321 (match_dup 0)
1322 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1323 "TARGET_AVX512BW"
1324 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1325 [(set_attr "type" "ssemov")
1326 (set_attr "prefix" "evex")
1327 (set_attr "memory" "store")
1328 (set_attr "mode" "<sseinsnmode>")])
1329
1330 (define_insn "sse2_movq128"
1331 [(set (match_operand:V2DI 0 "register_operand" "=v")
1332 (vec_concat:V2DI
1333 (vec_select:DI
1334 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1335 (parallel [(const_int 0)]))
1336 (const_int 0)))]
1337 "TARGET_SSE2"
1338 "%vmovq\t{%1, %0|%0, %q1}"
1339 [(set_attr "type" "ssemov")
1340 (set_attr "prefix" "maybe_vex")
1341 (set_attr "mode" "TI")])
1342
1343 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1344 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1345 ;; from memory, we'd prefer to load the memory directly into the %xmm
1346 ;; register. To facilitate this happy circumstance, this pattern won't
1347 ;; split until after register allocation. If the 64-bit value didn't
1348 ;; come from memory, this is the best we can do. This is much better
1349 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1350 ;; from there.
1351
1352 (define_insn_and_split "movdi_to_sse"
1353 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1354 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1355 UNSPEC_MOVDI_TO_SSE))
1356 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1357 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1358 "#"
1359 "&& reload_completed"
1360 [(const_int 0)]
1361 {
1362 if (register_operand (operands[1], DImode))
1363 {
1364 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1365 Assemble the 64-bit DImode value in an xmm register. */
1366 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1367 gen_lowpart (SImode, operands[1])));
1368 if (TARGET_SSE4_1)
1369 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1370 gen_highpart (SImode, operands[1]),
1371 GEN_INT (2)));
1372 else
1373 {
1374 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1375 gen_highpart (SImode, operands[1])));
1376 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1377 operands[2]));
1378 }
1379 }
1380 else if (memory_operand (operands[1], DImode))
1381 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1382 operands[1], const0_rtx));
1383 else
1384 gcc_unreachable ();
1385 DONE;
1386 }
1387 [(set_attr "isa" "sse4,*,*")])
1388
1389 (define_split
1390 [(set (match_operand:V4SF 0 "register_operand")
1391 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1392 "TARGET_SSE && reload_completed"
1393 [(set (match_dup 0)
1394 (vec_merge:V4SF
1395 (vec_duplicate:V4SF (match_dup 1))
1396 (match_dup 2)
1397 (const_int 1)))]
1398 {
1399 operands[1] = gen_lowpart (SFmode, operands[1]);
1400 operands[2] = CONST0_RTX (V4SFmode);
1401 })
1402
1403 (define_split
1404 [(set (match_operand:V2DF 0 "register_operand")
1405 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1406 "TARGET_SSE2 && reload_completed"
1407 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1408 {
1409 operands[1] = gen_lowpart (DFmode, operands[1]);
1410 operands[2] = CONST0_RTX (DFmode);
1411 })
1412
1413 (define_expand "movmisalign<mode>"
1414 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1415 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1416 "TARGET_SSE"
1417 {
1418 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1419 DONE;
1420 })
1421
1422 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1423 (define_peephole2
1424 [(set (match_operand:V2DF 0 "sse_reg_operand")
1425 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1426 (match_operand:DF 4 "const0_operand")))
1427 (set (match_operand:V2DF 2 "sse_reg_operand")
1428 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1429 (parallel [(const_int 0)]))
1430 (match_operand:DF 3 "memory_operand")))]
1431 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1432 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1433 [(set (match_dup 2) (match_dup 5))]
1434 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1435
1436 (define_peephole2
1437 [(set (match_operand:DF 0 "sse_reg_operand")
1438 (match_operand:DF 1 "memory_operand"))
1439 (set (match_operand:V2DF 2 "sse_reg_operand")
1440 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1441 (match_operand:DF 3 "memory_operand")))]
1442 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1443 && REGNO (operands[4]) == REGNO (operands[2])
1444 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1445 [(set (match_dup 2) (match_dup 5))]
1446 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1447
1448 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1449 (define_peephole2
1450 [(set (match_operand:DF 0 "memory_operand")
1451 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1452 (parallel [(const_int 0)])))
1453 (set (match_operand:DF 2 "memory_operand")
1454 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1455 (parallel [(const_int 1)])))]
1456 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1457 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1458 [(set (match_dup 4) (match_dup 1))]
1459 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1460
1461 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1462 [(set (match_operand:VI1 0 "register_operand" "=x")
1463 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1464 UNSPEC_LDDQU))]
1465 "TARGET_SSE3"
1466 "%vlddqu\t{%1, %0|%0, %1}"
1467 [(set_attr "type" "ssemov")
1468 (set_attr "movu" "1")
1469 (set (attr "prefix_data16")
1470 (if_then_else
1471 (match_test "TARGET_AVX")
1472 (const_string "*")
1473 (const_string "0")))
1474 (set (attr "prefix_rep")
1475 (if_then_else
1476 (match_test "TARGET_AVX")
1477 (const_string "*")
1478 (const_string "1")))
1479 (set_attr "prefix" "maybe_vex")
1480 (set_attr "mode" "<sseinsnmode>")])
1481
1482 (define_insn "sse2_movnti<mode>"
1483 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1484 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1485 UNSPEC_MOVNT))]
1486 "TARGET_SSE2"
1487 "movnti\t{%1, %0|%0, %1}"
1488 [(set_attr "type" "ssemov")
1489 (set_attr "prefix_data16" "0")
1490 (set_attr "mode" "<MODE>")])
1491
1492 (define_insn "<sse>_movnt<mode>"
1493 [(set (match_operand:VF 0 "memory_operand" "=m")
1494 (unspec:VF
1495 [(match_operand:VF 1 "register_operand" "v")]
1496 UNSPEC_MOVNT))]
1497 "TARGET_SSE"
1498 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "ssemov")
1500 (set_attr "prefix" "maybe_vex")
1501 (set_attr "mode" "<MODE>")])
1502
1503 (define_insn "<sse2>_movnt<mode>"
1504 [(set (match_operand:VI8 0 "memory_operand" "=m")
1505 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1506 UNSPEC_MOVNT))]
1507 "TARGET_SSE2"
1508 "%vmovntdq\t{%1, %0|%0, %1}"
1509 [(set_attr "type" "ssecvt")
1510 (set (attr "prefix_data16")
1511 (if_then_else
1512 (match_test "TARGET_AVX")
1513 (const_string "*")
1514 (const_string "1")))
1515 (set_attr "prefix" "maybe_vex")
1516 (set_attr "mode" "<sseinsnmode>")])
1517
1518 ; Expand patterns for non-temporal stores. At the moment, only those
1519 ; that directly map to insns are defined; it would be possible to
1520 ; define patterns for other modes that would expand to several insns.
1521
1522 ;; Modes handled by storent patterns.
1523 (define_mode_iterator STORENT_MODE
1524 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1525 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1526 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1527 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1528 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1529
1530 (define_expand "storent<mode>"
1531 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1532 (unspec:STORENT_MODE
1533 [(match_operand:STORENT_MODE 1 "register_operand")]
1534 UNSPEC_MOVNT))]
1535 "TARGET_SSE")
1536
1537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1538 ;;
1539 ;; Mask operations
1540 ;;
1541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1542
1543 ;; All integer modes with AVX512BW/DQ.
1544 (define_mode_iterator SWI1248_AVX512BWDQ
1545 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1546
1547 ;; All integer modes with AVX512BW, where HImode operation
1548 ;; can be used instead of QImode.
1549 (define_mode_iterator SWI1248_AVX512BW
1550 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1551
1552 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1553 (define_mode_iterator SWI1248_AVX512BWDQ2
1554 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1555 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1556
1557 (define_expand "kmov<mskmodesuffix>"
1558 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1559 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1560 "TARGET_AVX512F
1561 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1562
1563 (define_insn "k<code><mode>"
1564 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1565 (any_logic:SWI1248_AVX512BW
1566 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1567 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1568 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1569 "TARGET_AVX512F"
1570 {
1571 if (get_attr_mode (insn) == MODE_HI)
1572 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1573 else
1574 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1575 }
1576 [(set_attr "type" "msklog")
1577 (set_attr "prefix" "vex")
1578 (set (attr "mode")
1579 (cond [(and (match_test "<MODE>mode == QImode")
1580 (not (match_test "TARGET_AVX512DQ")))
1581 (const_string "HI")
1582 ]
1583 (const_string "<MODE>")))])
1584
1585 (define_split
1586 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1587 (any_logic:SWI1248_AVX512BW
1588 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1589 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1590 (clobber (reg:CC FLAGS_REG))]
1591 "TARGET_AVX512F && reload_completed"
1592 [(parallel
1593 [(set (match_dup 0)
1594 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1595 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1596
1597 (define_insn "kandn<mode>"
1598 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1599 (and:SWI1248_AVX512BW
1600 (not:SWI1248_AVX512BW
1601 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1602 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1603 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1604 "TARGET_AVX512F"
1605 {
1606 if (get_attr_mode (insn) == MODE_HI)
1607 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1608 else
1609 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1610 }
1611 [(set_attr "type" "msklog")
1612 (set_attr "prefix" "vex")
1613 (set (attr "mode")
1614 (cond [(and (match_test "<MODE>mode == QImode")
1615 (not (match_test "TARGET_AVX512DQ")))
1616 (const_string "HI")
1617 ]
1618 (const_string "<MODE>")))])
1619
1620 (define_split
1621 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1622 (and:SWI1248_AVX512BW
1623 (not:SWI1248_AVX512BW
1624 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1625 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1626 (clobber (reg:CC FLAGS_REG))]
1627 "TARGET_AVX512F && reload_completed"
1628 [(parallel
1629 [(set (match_dup 0)
1630 (and:SWI1248_AVX512BW
1631 (not:SWI1248_AVX512BW (match_dup 1))
1632 (match_dup 2)))
1633 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1634
1635 (define_insn "kxnor<mode>"
1636 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1637 (not:SWI1248_AVX512BW
1638 (xor:SWI1248_AVX512BW
1639 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1640 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1641 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1642 "TARGET_AVX512F"
1643 {
1644 if (get_attr_mode (insn) == MODE_HI)
1645 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1646 else
1647 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1648 }
1649 [(set_attr "type" "msklog")
1650 (set_attr "prefix" "vex")
1651 (set (attr "mode")
1652 (cond [(and (match_test "<MODE>mode == QImode")
1653 (not (match_test "TARGET_AVX512DQ")))
1654 (const_string "HI")
1655 ]
1656 (const_string "<MODE>")))])
1657
1658 (define_insn "knot<mode>"
1659 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1660 (not:SWI1248_AVX512BW
1661 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1662 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1663 "TARGET_AVX512F"
1664 {
1665 if (get_attr_mode (insn) == MODE_HI)
1666 return "knotw\t{%1, %0|%0, %1}";
1667 else
1668 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1669 }
1670 [(set_attr "type" "msklog")
1671 (set_attr "prefix" "vex")
1672 (set (attr "mode")
1673 (cond [(and (match_test "<MODE>mode == QImode")
1674 (not (match_test "TARGET_AVX512DQ")))
1675 (const_string "HI")
1676 ]
1677 (const_string "<MODE>")))])
1678
1679 (define_split
1680 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1681 (not:SWI1248_AVX512BW
1682 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
1683 "TARGET_AVX512F && reload_completed"
1684 [(parallel
1685 [(set (match_dup 0)
1686 (not:SWI1248_AVX512BW (match_dup 1)))
1687 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1688
1689 (define_insn "*knotsi_1_zext"
1690 [(set (match_operand:DI 0 "register_operand" "=k")
1691 (zero_extend:DI
1692 (not:SI (match_operand:SI 1 "register_operand" "k"))))
1693 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1694 "TARGET_AVX512BW"
1695 "knotd\t{%1, %0|%0, %1}";
1696 [(set_attr "type" "msklog")
1697 (set_attr "prefix" "vex")
1698 (set_attr "mode" "SI")])
1699
1700 (define_split
1701 [(set (match_operand:DI 0 "mask_reg_operand")
1702 (zero_extend:DI
1703 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
1704 "TARGET_AVX512BW && reload_completed"
1705 [(parallel
1706 [(set (match_dup 0)
1707 (zero_extend:DI
1708 (not:SI (match_dup 1))))
1709 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1710
1711 (define_insn "kadd<mode>"
1712 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1713 (plus:SWI1248_AVX512BWDQ2
1714 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1715 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1716 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1717 "TARGET_AVX512F"
1718 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1719 [(set_attr "type" "msklog")
1720 (set_attr "prefix" "vex")
1721 (set_attr "mode" "<MODE>")])
1722
1723 ;; Mask variant shift mnemonics
1724 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1725
1726 (define_insn "k<code><mode>"
1727 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1728 (any_lshift:SWI1248_AVX512BWDQ
1729 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1730 (match_operand 2 "const_0_to_255_operand" "n")))
1731 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1732 "TARGET_AVX512F"
1733 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1734 [(set_attr "type" "msklog")
1735 (set_attr "prefix" "vex")
1736 (set_attr "mode" "<MODE>")])
1737
1738 (define_insn "ktest<mode>"
1739 [(set (reg:CC FLAGS_REG)
1740 (unspec:CC
1741 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1742 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1743 UNSPEC_KTEST))]
1744 "TARGET_AVX512F"
1745 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1746 [(set_attr "mode" "<MODE>")
1747 (set_attr "type" "msklog")
1748 (set_attr "prefix" "vex")])
1749
1750 (define_insn "kortest<mode>"
1751 [(set (reg:CC FLAGS_REG)
1752 (unspec:CC
1753 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1754 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1755 UNSPEC_KORTEST))]
1756 "TARGET_AVX512F"
1757 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1758 [(set_attr "mode" "<MODE>")
1759 (set_attr "type" "msklog")
1760 (set_attr "prefix" "vex")])
1761
1762 (define_insn "kunpckhi"
1763 [(set (match_operand:HI 0 "register_operand" "=k")
1764 (ior:HI
1765 (ashift:HI
1766 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1767 (const_int 8))
1768 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1769 "TARGET_AVX512F"
1770 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1771 [(set_attr "mode" "HI")
1772 (set_attr "type" "msklog")
1773 (set_attr "prefix" "vex")])
1774
1775 (define_insn "kunpcksi"
1776 [(set (match_operand:SI 0 "register_operand" "=k")
1777 (ior:SI
1778 (ashift:SI
1779 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1780 (const_int 16))
1781 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1782 "TARGET_AVX512BW"
1783 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1784 [(set_attr "mode" "SI")])
1785
1786 (define_insn "kunpckdi"
1787 [(set (match_operand:DI 0 "register_operand" "=k")
1788 (ior:DI
1789 (ashift:DI
1790 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1791 (const_int 32))
1792 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1793 "TARGET_AVX512BW"
1794 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1795 [(set_attr "mode" "DI")])
1796
1797
1798 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1799 ;;
1800 ;; Parallel floating point arithmetic
1801 ;;
1802 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1803
1804 (define_expand "<code><mode>2"
1805 [(set (match_operand:VF 0 "register_operand")
1806 (absneg:VF
1807 (match_operand:VF 1 "register_operand")))]
1808 "TARGET_SSE"
1809 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1810
1811 (define_insn_and_split "*<code><mode>2"
1812 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1813 (absneg:VF
1814 (match_operand:VF 1 "vector_operand" "0,xBm,v,m")))
1815 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1816 "TARGET_SSE"
1817 "#"
1818 "&& reload_completed"
1819 [(set (match_dup 0)
1820 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1821 {
1822 if (TARGET_AVX)
1823 {
1824 if (MEM_P (operands[1]))
1825 std::swap (operands[1], operands[2]);
1826 }
1827 else
1828 {
1829 if (operands_match_p (operands[0], operands[2]))
1830 std::swap (operands[1], operands[2]);
1831 }
1832 }
1833 [(set_attr "isa" "noavx,noavx,avx,avx")])
1834
1835 (define_insn_and_split "*nabs<mode>2"
1836 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1837 (neg:VF
1838 (abs:VF
1839 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1840 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1841 "TARGET_SSE"
1842 "#"
1843 "&& reload_completed"
1844 [(set (match_dup 0)
1845 (ior:VF (match_dup 1) (match_dup 2)))]
1846 {
1847 if (TARGET_AVX)
1848 {
1849 if (MEM_P (operands[1]))
1850 std::swap (operands[1], operands[2]);
1851 }
1852 else
1853 {
1854 if (operands_match_p (operands[0], operands[2]))
1855 std::swap (operands[1], operands[2]);
1856 }
1857 }
1858 [(set_attr "isa" "noavx,noavx,avx,avx")])
1859
1860 (define_expand "<insn><mode>3<mask_name><round_name>"
1861 [(set (match_operand:VF 0 "register_operand")
1862 (plusminus:VF
1863 (match_operand:VF 1 "<round_nimm_predicate>")
1864 (match_operand:VF 2 "<round_nimm_predicate>")))]
1865 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1866 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1867
1868 (define_insn "*<insn><mode>3<mask_name><round_name>"
1869 [(set (match_operand:VF 0 "register_operand" "=x,v")
1870 (plusminus:VF
1871 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
1872 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1873 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1874 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1875 "@
1876 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1877 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1878 [(set_attr "isa" "noavx,avx")
1879 (set_attr "type" "sseadd")
1880 (set_attr "prefix" "<bcst_mask_prefix3>")
1881 (set_attr "mode" "<MODE>")])
1882
1883 ;; Standard scalar operation patterns which preserve the rest of the
1884 ;; vector for combiner.
1885 (define_insn "*<sse>_vm<insn><mode>3"
1886 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1887 (vec_merge:VF_128
1888 (vec_duplicate:VF_128
1889 (plusminus:<ssescalarmode>
1890 (vec_select:<ssescalarmode>
1891 (match_operand:VF_128 1 "register_operand" "0,v")
1892 (parallel [(const_int 0)]))
1893 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1894 (match_dup 1)
1895 (const_int 1)))]
1896 "TARGET_SSE"
1897 "@
1898 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1899 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1900 [(set_attr "isa" "noavx,avx")
1901 (set_attr "type" "sseadd")
1902 (set_attr "prefix" "orig,vex")
1903 (set_attr "mode" "<ssescalarmode>")])
1904
1905 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
1906 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1907 (vec_merge:VF_128
1908 (plusminus:VF_128
1909 (match_operand:VF_128 1 "register_operand" "0,v")
1910 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1911 (match_dup 1)
1912 (const_int 1)))]
1913 "TARGET_SSE"
1914 "@
1915 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1916 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1917 [(set_attr "isa" "noavx,avx")
1918 (set_attr "type" "sseadd")
1919 (set_attr "prefix" "<round_scalar_prefix>")
1920 (set_attr "mode" "<ssescalarmode>")])
1921
1922 (define_expand "mul<mode>3<mask_name><round_name>"
1923 [(set (match_operand:VF 0 "register_operand")
1924 (mult:VF
1925 (match_operand:VF 1 "<round_nimm_predicate>")
1926 (match_operand:VF 2 "<round_nimm_predicate>")))]
1927 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1928 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1929
1930 (define_insn "*mul<mode>3<mask_name><round_name>"
1931 [(set (match_operand:VF 0 "register_operand" "=x,v")
1932 (mult:VF
1933 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
1934 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1935 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
1936 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1937 "@
1938 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1939 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1940 [(set_attr "isa" "noavx,avx")
1941 (set_attr "type" "ssemul")
1942 (set_attr "prefix" "<bcst_mask_prefix3>")
1943 (set_attr "btver2_decode" "direct,double")
1944 (set_attr "mode" "<MODE>")])
1945
1946 ;; Standard scalar operation patterns which preserve the rest of the
1947 ;; vector for combiner.
1948 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
1949 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1950 (vec_merge:VF_128
1951 (vec_duplicate:VF_128
1952 (multdiv:<ssescalarmode>
1953 (vec_select:<ssescalarmode>
1954 (match_operand:VF_128 1 "register_operand" "0,v")
1955 (parallel [(const_int 0)]))
1956 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1957 (match_dup 1)
1958 (const_int 1)))]
1959 "TARGET_SSE"
1960 "@
1961 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1962 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1963 [(set_attr "isa" "noavx,avx")
1964 (set_attr "type" "sse<multdiv_mnemonic>")
1965 (set_attr "prefix" "orig,vex")
1966 (set_attr "btver2_decode" "direct,double")
1967 (set_attr "mode" "<ssescalarmode>")])
1968
1969 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1970 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1971 (vec_merge:VF_128
1972 (multdiv:VF_128
1973 (match_operand:VF_128 1 "register_operand" "0,v")
1974 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1975 (match_dup 1)
1976 (const_int 1)))]
1977 "TARGET_SSE"
1978 "@
1979 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1980 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1981 [(set_attr "isa" "noavx,avx")
1982 (set_attr "type" "sse<multdiv_mnemonic>")
1983 (set_attr "prefix" "<round_scalar_prefix>")
1984 (set_attr "btver2_decode" "direct,double")
1985 (set_attr "mode" "<ssescalarmode>")])
1986
1987 (define_expand "div<mode>3"
1988 [(set (match_operand:VF2 0 "register_operand")
1989 (div:VF2 (match_operand:VF2 1 "register_operand")
1990 (match_operand:VF2 2 "vector_operand")))]
1991 "TARGET_SSE2"
1992 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1993
1994 (define_expand "div<mode>3"
1995 [(set (match_operand:VF1 0 "register_operand")
1996 (div:VF1 (match_operand:VF1 1 "register_operand")
1997 (match_operand:VF1 2 "vector_operand")))]
1998 "TARGET_SSE"
1999 {
2000 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
2001
2002 if (TARGET_SSE_MATH
2003 && TARGET_RECIP_VEC_DIV
2004 && !optimize_insn_for_size_p ()
2005 && flag_finite_math_only && !flag_trapping_math
2006 && flag_unsafe_math_optimizations)
2007 {
2008 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2009 DONE;
2010 }
2011 })
2012
2013 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2014 [(set (match_operand:VF 0 "register_operand" "=x,v")
2015 (div:VF
2016 (match_operand:VF 1 "register_operand" "0,v")
2017 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2018 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2019 "@
2020 div<ssemodesuffix>\t{%2, %0|%0, %2}
2021 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2022 [(set_attr "isa" "noavx,avx")
2023 (set_attr "type" "ssediv")
2024 (set_attr "prefix" "<bcst_mask_prefix3>")
2025 (set_attr "mode" "<MODE>")])
2026
2027 (define_insn "<sse>_rcp<mode>2"
2028 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2029 (unspec:VF1_128_256
2030 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2031 "TARGET_SSE"
2032 "%vrcpps\t{%1, %0|%0, %1}"
2033 [(set_attr "type" "sse")
2034 (set_attr "atom_sse_attr" "rcp")
2035 (set_attr "btver2_sse_attr" "rcp")
2036 (set_attr "prefix" "maybe_vex")
2037 (set_attr "mode" "<MODE>")])
2038
2039 (define_insn "sse_vmrcpv4sf2"
2040 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2041 (vec_merge:V4SF
2042 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2043 UNSPEC_RCP)
2044 (match_operand:V4SF 2 "register_operand" "0,x")
2045 (const_int 1)))]
2046 "TARGET_SSE"
2047 "@
2048 rcpss\t{%1, %0|%0, %k1}
2049 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2050 [(set_attr "isa" "noavx,avx")
2051 (set_attr "type" "sse")
2052 (set_attr "atom_sse_attr" "rcp")
2053 (set_attr "btver2_sse_attr" "rcp")
2054 (set_attr "prefix" "orig,vex")
2055 (set_attr "mode" "SF")])
2056
2057 (define_insn "*sse_vmrcpv4sf2"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2059 (vec_merge:V4SF
2060 (vec_duplicate:V4SF
2061 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2062 UNSPEC_RCP))
2063 (match_operand:V4SF 2 "register_operand" "0,x")
2064 (const_int 1)))]
2065 "TARGET_SSE"
2066 "@
2067 rcpss\t{%1, %0|%0, %1}
2068 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2069 [(set_attr "isa" "noavx,avx")
2070 (set_attr "type" "sse")
2071 (set_attr "atom_sse_attr" "rcp")
2072 (set_attr "btver2_sse_attr" "rcp")
2073 (set_attr "prefix" "orig,vex")
2074 (set_attr "mode" "SF")])
2075
2076 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2077 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2078 (unspec:VF_AVX512VL
2079 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2080 UNSPEC_RCP14))]
2081 "TARGET_AVX512F"
2082 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2083 [(set_attr "type" "sse")
2084 (set_attr "prefix" "evex")
2085 (set_attr "mode" "<MODE>")])
2086
2087 (define_insn "srcp14<mode>"
2088 [(set (match_operand:VF_128 0 "register_operand" "=v")
2089 (vec_merge:VF_128
2090 (unspec:VF_128
2091 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2092 UNSPEC_RCP14)
2093 (match_operand:VF_128 2 "register_operand" "v")
2094 (const_int 1)))]
2095 "TARGET_AVX512F"
2096 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2097 [(set_attr "type" "sse")
2098 (set_attr "prefix" "evex")
2099 (set_attr "mode" "<MODE>")])
2100
2101 (define_insn "srcp14<mode>_mask"
2102 [(set (match_operand:VF_128 0 "register_operand" "=v")
2103 (vec_merge:VF_128
2104 (vec_merge:VF_128
2105 (unspec:VF_128
2106 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2107 UNSPEC_RCP14)
2108 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2109 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2110 (match_operand:VF_128 2 "register_operand" "v")
2111 (const_int 1)))]
2112 "TARGET_AVX512F"
2113 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2114 [(set_attr "type" "sse")
2115 (set_attr "prefix" "evex")
2116 (set_attr "mode" "<MODE>")])
2117
2118 (define_expand "sqrt<mode>2"
2119 [(set (match_operand:VF2 0 "register_operand")
2120 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2121 "TARGET_SSE2")
2122
2123 (define_expand "sqrt<mode>2"
2124 [(set (match_operand:VF1 0 "register_operand")
2125 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2126 "TARGET_SSE"
2127 {
2128 if (TARGET_SSE_MATH
2129 && TARGET_RECIP_VEC_SQRT
2130 && !optimize_insn_for_size_p ()
2131 && flag_finite_math_only && !flag_trapping_math
2132 && flag_unsafe_math_optimizations)
2133 {
2134 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2135 DONE;
2136 }
2137 })
2138
2139 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2140 [(set (match_operand:VF 0 "register_operand" "=x,v")
2141 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2142 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2143 "@
2144 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2145 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2146 [(set_attr "isa" "noavx,avx")
2147 (set_attr "type" "sse")
2148 (set_attr "atom_sse_attr" "sqrt")
2149 (set_attr "btver2_sse_attr" "sqrt")
2150 (set_attr "prefix" "maybe_vex")
2151 (set_attr "mode" "<MODE>")])
2152
2153 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2154 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2155 (vec_merge:VF_128
2156 (sqrt:VF_128
2157 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2158 (match_operand:VF_128 2 "register_operand" "0,v")
2159 (const_int 1)))]
2160 "TARGET_SSE"
2161 "@
2162 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2163 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2164 [(set_attr "isa" "noavx,avx")
2165 (set_attr "type" "sse")
2166 (set_attr "atom_sse_attr" "sqrt")
2167 (set_attr "prefix" "<round_scalar_prefix>")
2168 (set_attr "btver2_sse_attr" "sqrt")
2169 (set_attr "mode" "<ssescalarmode>")])
2170
2171 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2172 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2173 (vec_merge:VF_128
2174 (vec_duplicate:VF_128
2175 (sqrt:<ssescalarmode>
2176 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2177 (match_operand:VF_128 2 "register_operand" "0,v")
2178 (const_int 1)))]
2179 "TARGET_SSE"
2180 "@
2181 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2182 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2183 [(set_attr "isa" "noavx,avx")
2184 (set_attr "type" "sse")
2185 (set_attr "atom_sse_attr" "sqrt")
2186 (set_attr "prefix" "<round_scalar_prefix>")
2187 (set_attr "btver2_sse_attr" "sqrt")
2188 (set_attr "mode" "<ssescalarmode>")])
2189
2190 (define_expand "rsqrt<mode>2"
2191 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2192 (unspec:VF1_AVX512ER_128_256
2193 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2194 UNSPEC_RSQRT))]
2195 "TARGET_SSE && TARGET_SSE_MATH"
2196 {
2197 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2198 DONE;
2199 })
2200
2201 (define_insn "<sse>_rsqrt<mode>2"
2202 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2203 (unspec:VF1_128_256
2204 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2205 "TARGET_SSE"
2206 "%vrsqrtps\t{%1, %0|%0, %1}"
2207 [(set_attr "type" "sse")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "<MODE>")])
2210
2211 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2212 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2213 (unspec:VF_AVX512VL
2214 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2215 UNSPEC_RSQRT14))]
2216 "TARGET_AVX512F"
2217 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2218 [(set_attr "type" "sse")
2219 (set_attr "prefix" "evex")
2220 (set_attr "mode" "<MODE>")])
2221
2222 (define_insn "rsqrt14<mode>"
2223 [(set (match_operand:VF_128 0 "register_operand" "=v")
2224 (vec_merge:VF_128
2225 (unspec:VF_128
2226 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2227 UNSPEC_RSQRT14)
2228 (match_operand:VF_128 2 "register_operand" "v")
2229 (const_int 1)))]
2230 "TARGET_AVX512F"
2231 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2232 [(set_attr "type" "sse")
2233 (set_attr "prefix" "evex")
2234 (set_attr "mode" "<MODE>")])
2235
2236 (define_insn "rsqrt14_<mode>_mask"
2237 [(set (match_operand:VF_128 0 "register_operand" "=v")
2238 (vec_merge:VF_128
2239 (vec_merge:VF_128
2240 (unspec:VF_128
2241 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2242 UNSPEC_RSQRT14)
2243 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2244 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2245 (match_operand:VF_128 2 "register_operand" "v")
2246 (const_int 1)))]
2247 "TARGET_AVX512F"
2248 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2249 [(set_attr "type" "sse")
2250 (set_attr "prefix" "evex")
2251 (set_attr "mode" "<MODE>")])
2252
2253 (define_insn "sse_vmrsqrtv4sf2"
2254 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2255 (vec_merge:V4SF
2256 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2257 UNSPEC_RSQRT)
2258 (match_operand:V4SF 2 "register_operand" "0,x")
2259 (const_int 1)))]
2260 "TARGET_SSE"
2261 "@
2262 rsqrtss\t{%1, %0|%0, %k1}
2263 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2264 [(set_attr "isa" "noavx,avx")
2265 (set_attr "type" "sse")
2266 (set_attr "prefix" "orig,vex")
2267 (set_attr "mode" "SF")])
2268
2269 (define_insn "*sse_vmrsqrtv4sf2"
2270 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2271 (vec_merge:V4SF
2272 (vec_duplicate:V4SF
2273 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2274 UNSPEC_RSQRT))
2275 (match_operand:V4SF 2 "register_operand" "0,x")
2276 (const_int 1)))]
2277 "TARGET_SSE"
2278 "@
2279 rsqrtss\t{%1, %0|%0, %1}
2280 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2281 [(set_attr "isa" "noavx,avx")
2282 (set_attr "type" "sse")
2283 (set_attr "prefix" "orig,vex")
2284 (set_attr "mode" "SF")])
2285
2286 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2287 [(set (match_operand:VF 0 "register_operand")
2288 (smaxmin:VF
2289 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2290 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2291 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2292 {
2293 if (!flag_finite_math_only || flag_signed_zeros)
2294 {
2295 operands[1] = force_reg (<MODE>mode, operands[1]);
2296 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2297 (operands[0], operands[1], operands[2]
2298 <mask_operand_arg34>
2299 <round_saeonly_mask_arg3>));
2300 DONE;
2301 }
2302 else
2303 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2304 })
2305
2306 ;; These versions of the min/max patterns are intentionally ignorant of
2307 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2308 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2309 ;; are undefined in this condition, we're certain this is correct.
2310
2311 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2312 [(set (match_operand:VF 0 "register_operand" "=x,v")
2313 (smaxmin:VF
2314 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2315 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2316 "TARGET_SSE
2317 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2318 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2319 "@
2320 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2321 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2322 [(set_attr "isa" "noavx,avx")
2323 (set_attr "type" "sseadd")
2324 (set_attr "btver2_sse_attr" "maxmin")
2325 (set_attr "prefix" "<mask_prefix3>")
2326 (set_attr "mode" "<MODE>")])
2327
2328 ;; These versions of the min/max patterns implement exactly the operations
2329 ;; min = (op1 < op2 ? op1 : op2)
2330 ;; max = (!(op1 < op2) ? op1 : op2)
2331 ;; Their operands are not commutative, and thus they may be used in the
2332 ;; presence of -0.0 and NaN.
2333
2334 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2335 [(set (match_operand:VF 0 "register_operand" "=x,v")
2336 (unspec:VF
2337 [(match_operand:VF 1 "register_operand" "0,v")
2338 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2339 IEEE_MAXMIN))]
2340 "TARGET_SSE
2341 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2342 "@
2343 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2344 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2345 [(set_attr "isa" "noavx,avx")
2346 (set_attr "type" "sseadd")
2347 (set_attr "btver2_sse_attr" "maxmin")
2348 (set_attr "prefix" "<mask_prefix3>")
2349 (set_attr "mode" "<MODE>")])
2350
2351 ;; Standard scalar operation patterns which preserve the rest of the
2352 ;; vector for combiner.
2353 (define_insn "*ieee_<ieee_maxmin><mode>3"
2354 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2355 (vec_merge:VF_128
2356 (vec_duplicate:VF_128
2357 (unspec:<ssescalarmode>
2358 [(vec_select:<ssescalarmode>
2359 (match_operand:VF_128 1 "register_operand" "0,v")
2360 (parallel [(const_int 0)]))
2361 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2362 IEEE_MAXMIN))
2363 (match_dup 1)
2364 (const_int 1)))]
2365 "TARGET_SSE"
2366 "@
2367 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2368 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2369 [(set_attr "isa" "noavx,avx")
2370 (set_attr "type" "sseadd")
2371 (set_attr "btver2_sse_attr" "maxmin")
2372 (set_attr "prefix" "orig,vex")
2373 (set_attr "mode" "<ssescalarmode>")])
2374
2375 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2376 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2377 (vec_merge:VF_128
2378 (smaxmin:VF_128
2379 (match_operand:VF_128 1 "register_operand" "0,v")
2380 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2381 (match_dup 1)
2382 (const_int 1)))]
2383 "TARGET_SSE"
2384 "@
2385 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2386 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2387 [(set_attr "isa" "noavx,avx")
2388 (set_attr "type" "sse")
2389 (set_attr "btver2_sse_attr" "maxmin")
2390 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2391 (set_attr "mode" "<ssescalarmode>")])
2392
2393 (define_insn "avx_addsubv4df3"
2394 [(set (match_operand:V4DF 0 "register_operand" "=x")
2395 (vec_merge:V4DF
2396 (minus:V4DF
2397 (match_operand:V4DF 1 "register_operand" "x")
2398 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2399 (plus:V4DF (match_dup 1) (match_dup 2))
2400 (const_int 5)))]
2401 "TARGET_AVX"
2402 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2403 [(set_attr "type" "sseadd")
2404 (set_attr "prefix" "vex")
2405 (set_attr "mode" "V4DF")])
2406
2407 (define_insn "sse3_addsubv2df3"
2408 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2409 (vec_merge:V2DF
2410 (minus:V2DF
2411 (match_operand:V2DF 1 "register_operand" "0,x")
2412 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2413 (plus:V2DF (match_dup 1) (match_dup 2))
2414 (const_int 1)))]
2415 "TARGET_SSE3"
2416 "@
2417 addsubpd\t{%2, %0|%0, %2}
2418 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2419 [(set_attr "isa" "noavx,avx")
2420 (set_attr "type" "sseadd")
2421 (set_attr "atom_unit" "complex")
2422 (set_attr "prefix" "orig,vex")
2423 (set_attr "mode" "V2DF")])
2424
2425 (define_insn "avx_addsubv8sf3"
2426 [(set (match_operand:V8SF 0 "register_operand" "=x")
2427 (vec_merge:V8SF
2428 (minus:V8SF
2429 (match_operand:V8SF 1 "register_operand" "x")
2430 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2431 (plus:V8SF (match_dup 1) (match_dup 2))
2432 (const_int 85)))]
2433 "TARGET_AVX"
2434 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2435 [(set_attr "type" "sseadd")
2436 (set_attr "prefix" "vex")
2437 (set_attr "mode" "V8SF")])
2438
2439 (define_insn "sse3_addsubv4sf3"
2440 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2441 (vec_merge:V4SF
2442 (minus:V4SF
2443 (match_operand:V4SF 1 "register_operand" "0,x")
2444 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2445 (plus:V4SF (match_dup 1) (match_dup 2))
2446 (const_int 5)))]
2447 "TARGET_SSE3"
2448 "@
2449 addsubps\t{%2, %0|%0, %2}
2450 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2451 [(set_attr "isa" "noavx,avx")
2452 (set_attr "type" "sseadd")
2453 (set_attr "prefix" "orig,vex")
2454 (set_attr "prefix_rep" "1,*")
2455 (set_attr "mode" "V4SF")])
2456
2457 (define_split
2458 [(set (match_operand:VF_128_256 0 "register_operand")
2459 (match_operator:VF_128_256 6 "addsub_vm_operator"
2460 [(minus:VF_128_256
2461 (match_operand:VF_128_256 1 "register_operand")
2462 (match_operand:VF_128_256 2 "vector_operand"))
2463 (plus:VF_128_256
2464 (match_operand:VF_128_256 3 "vector_operand")
2465 (match_operand:VF_128_256 4 "vector_operand"))
2466 (match_operand 5 "const_int_operand")]))]
2467 "TARGET_SSE3
2468 && can_create_pseudo_p ()
2469 && ((rtx_equal_p (operands[1], operands[3])
2470 && rtx_equal_p (operands[2], operands[4]))
2471 || (rtx_equal_p (operands[1], operands[4])
2472 && rtx_equal_p (operands[2], operands[3])))"
2473 [(set (match_dup 0)
2474 (vec_merge:VF_128_256
2475 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2476 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2477 (match_dup 5)))])
2478
2479 (define_split
2480 [(set (match_operand:VF_128_256 0 "register_operand")
2481 (match_operator:VF_128_256 6 "addsub_vm_operator"
2482 [(plus:VF_128_256
2483 (match_operand:VF_128_256 1 "vector_operand")
2484 (match_operand:VF_128_256 2 "vector_operand"))
2485 (minus:VF_128_256
2486 (match_operand:VF_128_256 3 "register_operand")
2487 (match_operand:VF_128_256 4 "vector_operand"))
2488 (match_operand 5 "const_int_operand")]))]
2489 "TARGET_SSE3
2490 && can_create_pseudo_p ()
2491 && ((rtx_equal_p (operands[1], operands[3])
2492 && rtx_equal_p (operands[2], operands[4]))
2493 || (rtx_equal_p (operands[1], operands[4])
2494 && rtx_equal_p (operands[2], operands[3])))"
2495 [(set (match_dup 0)
2496 (vec_merge:VF_128_256
2497 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2498 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2499 (match_dup 5)))]
2500 {
2501 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2502 operands[5]
2503 = GEN_INT (~INTVAL (operands[5])
2504 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2505 })
2506
2507 (define_split
2508 [(set (match_operand:VF_128_256 0 "register_operand")
2509 (match_operator:VF_128_256 7 "addsub_vs_operator"
2510 [(vec_concat:<ssedoublemode>
2511 (minus:VF_128_256
2512 (match_operand:VF_128_256 1 "register_operand")
2513 (match_operand:VF_128_256 2 "vector_operand"))
2514 (plus:VF_128_256
2515 (match_operand:VF_128_256 3 "vector_operand")
2516 (match_operand:VF_128_256 4 "vector_operand")))
2517 (match_parallel 5 "addsub_vs_parallel"
2518 [(match_operand 6 "const_int_operand")])]))]
2519 "TARGET_SSE3
2520 && can_create_pseudo_p ()
2521 && ((rtx_equal_p (operands[1], operands[3])
2522 && rtx_equal_p (operands[2], operands[4]))
2523 || (rtx_equal_p (operands[1], operands[4])
2524 && rtx_equal_p (operands[2], operands[3])))"
2525 [(set (match_dup 0)
2526 (vec_merge:VF_128_256
2527 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2528 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2529 (match_dup 5)))]
2530 {
2531 int i, nelt = XVECLEN (operands[5], 0);
2532 HOST_WIDE_INT ival = 0;
2533
2534 for (i = 0; i < nelt; i++)
2535 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2536 ival |= HOST_WIDE_INT_1 << i;
2537
2538 operands[5] = GEN_INT (ival);
2539 })
2540
2541 (define_split
2542 [(set (match_operand:VF_128_256 0 "register_operand")
2543 (match_operator:VF_128_256 7 "addsub_vs_operator"
2544 [(vec_concat:<ssedoublemode>
2545 (plus:VF_128_256
2546 (match_operand:VF_128_256 1 "vector_operand")
2547 (match_operand:VF_128_256 2 "vector_operand"))
2548 (minus:VF_128_256
2549 (match_operand:VF_128_256 3 "register_operand")
2550 (match_operand:VF_128_256 4 "vector_operand")))
2551 (match_parallel 5 "addsub_vs_parallel"
2552 [(match_operand 6 "const_int_operand")])]))]
2553 "TARGET_SSE3
2554 && can_create_pseudo_p ()
2555 && ((rtx_equal_p (operands[1], operands[3])
2556 && rtx_equal_p (operands[2], operands[4]))
2557 || (rtx_equal_p (operands[1], operands[4])
2558 && rtx_equal_p (operands[2], operands[3])))"
2559 [(set (match_dup 0)
2560 (vec_merge:VF_128_256
2561 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2562 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2563 (match_dup 5)))]
2564 {
2565 int i, nelt = XVECLEN (operands[5], 0);
2566 HOST_WIDE_INT ival = 0;
2567
2568 for (i = 0; i < nelt; i++)
2569 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2570 ival |= HOST_WIDE_INT_1 << i;
2571
2572 operands[5] = GEN_INT (ival);
2573 })
2574
2575 (define_insn "avx_h<insn>v4df3"
2576 [(set (match_operand:V4DF 0 "register_operand" "=x")
2577 (vec_concat:V4DF
2578 (vec_concat:V2DF
2579 (plusminus:DF
2580 (vec_select:DF
2581 (match_operand:V4DF 1 "register_operand" "x")
2582 (parallel [(const_int 0)]))
2583 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2584 (plusminus:DF
2585 (vec_select:DF
2586 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2587 (parallel [(const_int 0)]))
2588 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2589 (vec_concat:V2DF
2590 (plusminus:DF
2591 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2592 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2593 (plusminus:DF
2594 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2595 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2596 "TARGET_AVX"
2597 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2598 [(set_attr "type" "sseadd")
2599 (set_attr "prefix" "vex")
2600 (set_attr "mode" "V4DF")])
2601
2602 (define_expand "sse3_haddv2df3"
2603 [(set (match_operand:V2DF 0 "register_operand")
2604 (vec_concat:V2DF
2605 (plus:DF
2606 (vec_select:DF
2607 (match_operand:V2DF 1 "register_operand")
2608 (parallel [(const_int 0)]))
2609 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2610 (plus:DF
2611 (vec_select:DF
2612 (match_operand:V2DF 2 "vector_operand")
2613 (parallel [(const_int 0)]))
2614 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2615 "TARGET_SSE3")
2616
2617 (define_insn "*sse3_haddv2df3"
2618 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2619 (vec_concat:V2DF
2620 (plus:DF
2621 (vec_select:DF
2622 (match_operand:V2DF 1 "register_operand" "0,x")
2623 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2624 (vec_select:DF
2625 (match_dup 1)
2626 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2627 (plus:DF
2628 (vec_select:DF
2629 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2630 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2631 (vec_select:DF
2632 (match_dup 2)
2633 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2634 "TARGET_SSE3
2635 && INTVAL (operands[3]) != INTVAL (operands[4])
2636 && INTVAL (operands[5]) != INTVAL (operands[6])"
2637 "@
2638 haddpd\t{%2, %0|%0, %2}
2639 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2640 [(set_attr "isa" "noavx,avx")
2641 (set_attr "type" "sseadd")
2642 (set_attr "prefix" "orig,vex")
2643 (set_attr "mode" "V2DF")])
2644
2645 (define_insn "sse3_hsubv2df3"
2646 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2647 (vec_concat:V2DF
2648 (minus:DF
2649 (vec_select:DF
2650 (match_operand:V2DF 1 "register_operand" "0,x")
2651 (parallel [(const_int 0)]))
2652 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2653 (minus:DF
2654 (vec_select:DF
2655 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2656 (parallel [(const_int 0)]))
2657 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2658 "TARGET_SSE3"
2659 "@
2660 hsubpd\t{%2, %0|%0, %2}
2661 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2662 [(set_attr "isa" "noavx,avx")
2663 (set_attr "type" "sseadd")
2664 (set_attr "prefix" "orig,vex")
2665 (set_attr "mode" "V2DF")])
2666
2667 (define_insn "*sse3_haddv2df3_low"
2668 [(set (match_operand:DF 0 "register_operand" "=x,x")
2669 (plus:DF
2670 (vec_select:DF
2671 (match_operand:V2DF 1 "register_operand" "0,x")
2672 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2673 (vec_select:DF
2674 (match_dup 1)
2675 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2676 "TARGET_SSE3
2677 && INTVAL (operands[2]) != INTVAL (operands[3])"
2678 "@
2679 haddpd\t{%0, %0|%0, %0}
2680 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2681 [(set_attr "isa" "noavx,avx")
2682 (set_attr "type" "sseadd1")
2683 (set_attr "prefix" "orig,vex")
2684 (set_attr "mode" "V2DF")])
2685
2686 (define_insn "*sse3_hsubv2df3_low"
2687 [(set (match_operand:DF 0 "register_operand" "=x,x")
2688 (minus:DF
2689 (vec_select:DF
2690 (match_operand:V2DF 1 "register_operand" "0,x")
2691 (parallel [(const_int 0)]))
2692 (vec_select:DF
2693 (match_dup 1)
2694 (parallel [(const_int 1)]))))]
2695 "TARGET_SSE3"
2696 "@
2697 hsubpd\t{%0, %0|%0, %0}
2698 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2699 [(set_attr "isa" "noavx,avx")
2700 (set_attr "type" "sseadd1")
2701 (set_attr "prefix" "orig,vex")
2702 (set_attr "mode" "V2DF")])
2703
2704 (define_insn "avx_h<insn>v8sf3"
2705 [(set (match_operand:V8SF 0 "register_operand" "=x")
2706 (vec_concat:V8SF
2707 (vec_concat:V4SF
2708 (vec_concat:V2SF
2709 (plusminus:SF
2710 (vec_select:SF
2711 (match_operand:V8SF 1 "register_operand" "x")
2712 (parallel [(const_int 0)]))
2713 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2714 (plusminus:SF
2715 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2716 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2717 (vec_concat:V2SF
2718 (plusminus:SF
2719 (vec_select:SF
2720 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2721 (parallel [(const_int 0)]))
2722 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2723 (plusminus:SF
2724 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2725 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2726 (vec_concat:V4SF
2727 (vec_concat:V2SF
2728 (plusminus:SF
2729 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2730 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2731 (plusminus:SF
2732 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2733 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2734 (vec_concat:V2SF
2735 (plusminus:SF
2736 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2737 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2738 (plusminus:SF
2739 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2740 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2741 "TARGET_AVX"
2742 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2743 [(set_attr "type" "sseadd")
2744 (set_attr "prefix" "vex")
2745 (set_attr "mode" "V8SF")])
2746
2747 (define_insn "sse3_h<insn>v4sf3"
2748 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2749 (vec_concat:V4SF
2750 (vec_concat:V2SF
2751 (plusminus:SF
2752 (vec_select:SF
2753 (match_operand:V4SF 1 "register_operand" "0,x")
2754 (parallel [(const_int 0)]))
2755 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2756 (plusminus:SF
2757 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2758 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2759 (vec_concat:V2SF
2760 (plusminus:SF
2761 (vec_select:SF
2762 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2763 (parallel [(const_int 0)]))
2764 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2765 (plusminus:SF
2766 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2767 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2768 "TARGET_SSE3"
2769 "@
2770 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2771 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2772 [(set_attr "isa" "noavx,avx")
2773 (set_attr "type" "sseadd")
2774 (set_attr "atom_unit" "complex")
2775 (set_attr "prefix" "orig,vex")
2776 (set_attr "prefix_rep" "1,*")
2777 (set_attr "mode" "V4SF")])
2778
2779 (define_mode_iterator REDUC_SSE_PLUS_MODE
2780 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2781
2782 (define_expand "reduc_plus_scal_<mode>"
2783 [(plus:REDUC_SSE_PLUS_MODE
2784 (match_operand:<ssescalarmode> 0 "register_operand")
2785 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2786 ""
2787 {
2788 rtx tmp = gen_reg_rtx (<MODE>mode);
2789 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2790 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2791 const0_rtx));
2792 DONE;
2793 })
2794
2795 (define_expand "reduc_plus_scal_v16qi"
2796 [(plus:V16QI
2797 (match_operand:QI 0 "register_operand")
2798 (match_operand:V16QI 1 "register_operand"))]
2799 "TARGET_SSE2"
2800 {
2801 rtx tmp = gen_reg_rtx (V1TImode);
2802 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2803 GEN_INT (64)));
2804 rtx tmp2 = gen_reg_rtx (V16QImode);
2805 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2806 rtx tmp3 = gen_reg_rtx (V16QImode);
2807 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2808 rtx tmp4 = gen_reg_rtx (V2DImode);
2809 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2810 tmp4 = gen_lowpart (V16QImode, tmp4);
2811 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2812 DONE;
2813 })
2814
2815 (define_mode_iterator REDUC_PLUS_MODE
2816 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2817 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2818 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2819
2820 (define_expand "reduc_plus_scal_<mode>"
2821 [(plus:REDUC_PLUS_MODE
2822 (match_operand:<ssescalarmode> 0 "register_operand")
2823 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2824 ""
2825 {
2826 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2827 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2828 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2829 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2830 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2831 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2832 DONE;
2833 })
2834
2835 ;; Modes handled by reduc_sm{in,ax}* patterns.
2836 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2837 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2838 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2839 (V2DI "TARGET_SSE4_2")])
2840
2841 (define_expand "reduc_<code>_scal_<mode>"
2842 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2843 (match_operand:<ssescalarmode> 0 "register_operand")
2844 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2845 ""
2846 {
2847 rtx tmp = gen_reg_rtx (<MODE>mode);
2848 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2849 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2850 const0_rtx));
2851 DONE;
2852 })
2853
2854 (define_mode_iterator REDUC_SMINMAX_MODE
2855 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2856 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2857 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2858 (V64QI "TARGET_AVX512BW")
2859 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2860 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2861 (V8DF "TARGET_AVX512F")])
2862
2863 (define_expand "reduc_<code>_scal_<mode>"
2864 [(smaxmin:REDUC_SMINMAX_MODE
2865 (match_operand:<ssescalarmode> 0 "register_operand")
2866 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2867 ""
2868 {
2869 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2870 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2871 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2872 emit_insn (gen_<code><ssehalfvecmodelower>3
2873 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2874 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2875 DONE;
2876 })
2877
2878 (define_expand "reduc_<code>_scal_<mode>"
2879 [(umaxmin:VI_AVX512BW
2880 (match_operand:<ssescalarmode> 0 "register_operand")
2881 (match_operand:VI_AVX512BW 1 "register_operand"))]
2882 "TARGET_AVX512F"
2883 {
2884 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2885 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2886 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2887 emit_insn (gen_<code><ssehalfvecmodelower>3
2888 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2889 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2890 DONE;
2891 })
2892
2893 (define_expand "reduc_<code>_scal_<mode>"
2894 [(umaxmin:VI_256
2895 (match_operand:<ssescalarmode> 0 "register_operand")
2896 (match_operand:VI_256 1 "register_operand"))]
2897 "TARGET_AVX2"
2898 {
2899 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2900 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2901 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2902 emit_insn (gen_<code><ssehalfvecmodelower>3
2903 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2904 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2905 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2906 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2907 (operands[0], tmp3, const0_rtx));
2908 DONE;
2909 })
2910
2911 (define_expand "reduc_umin_scal_v8hi"
2912 [(umin:V8HI
2913 (match_operand:HI 0 "register_operand")
2914 (match_operand:V8HI 1 "register_operand"))]
2915 "TARGET_SSE4_1"
2916 {
2917 rtx tmp = gen_reg_rtx (V8HImode);
2918 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2919 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2920 DONE;
2921 })
2922
2923 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
2924 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2925 (unspec:VF_AVX512VL
2926 [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2927 (match_operand:SI 2 "const_0_to_255_operand")]
2928 UNSPEC_REDUCE))]
2929 "TARGET_AVX512DQ"
2930 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
2931 [(set_attr "type" "sse")
2932 (set_attr "prefix" "evex")
2933 (set_attr "mode" "<MODE>")])
2934
2935 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
2936 [(set (match_operand:VF_128 0 "register_operand" "=v")
2937 (vec_merge:VF_128
2938 (unspec:VF_128
2939 [(match_operand:VF_128 1 "register_operand" "v")
2940 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
2941 (match_operand:SI 3 "const_0_to_255_operand")]
2942 UNSPEC_REDUCE)
2943 (match_dup 1)
2944 (const_int 1)))]
2945 "TARGET_AVX512DQ"
2946 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
2947 [(set_attr "type" "sse")
2948 (set_attr "prefix" "evex")
2949 (set_attr "mode" "<MODE>")])
2950
2951 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2952 ;;
2953 ;; Parallel floating point comparisons
2954 ;;
2955 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2956
2957 (define_insn "avx_cmp<mode>3"
2958 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2959 (unspec:VF_128_256
2960 [(match_operand:VF_128_256 1 "register_operand" "x")
2961 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2962 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2963 UNSPEC_PCMP))]
2964 "TARGET_AVX"
2965 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2966 [(set_attr "type" "ssecmp")
2967 (set_attr "length_immediate" "1")
2968 (set_attr "prefix" "vex")
2969 (set_attr "mode" "<MODE>")])
2970
2971 (define_insn_and_split "*avx_cmp<mode>3_1"
2972 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2973 (vec_merge:<sseintvecmode>
2974 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
2975 (match_operand:<sseintvecmode> 2 "const0_operand")
2976 (unspec:<avx512fmaskmode>
2977 [(match_operand:VF_128_256 3 "register_operand")
2978 (match_operand:VF_128_256 4 "nonimmediate_operand")
2979 (match_operand:SI 5 "const_0_to_31_operand")]
2980 UNSPEC_PCMP)))]
2981 "TARGET_AVX512VL && ix86_pre_reload_split ()"
2982 "#"
2983 "&& 1"
2984 [(set (match_dup 6)
2985 (unspec:VF_128_256
2986 [(match_dup 3)
2987 (match_dup 4)
2988 (match_dup 5)]
2989 UNSPEC_PCMP))
2990 (set (match_dup 0) (match_dup 7))]
2991 {
2992 operands[6] = gen_reg_rtx (<MODE>mode);
2993 operands[7]
2994 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
2995 })
2996
2997 (define_insn_and_split "*avx_cmp<mode>3_2"
2998 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2999 (vec_merge:<sseintvecmode>
3000 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3001 (match_operand:<sseintvecmode> 2 "const0_operand")
3002 (not:<avx512fmaskmode>
3003 (unspec:<avx512fmaskmode>
3004 [(match_operand:VF_128_256 3 "register_operand")
3005 (match_operand:VF_128_256 4 "nonimmediate_operand")
3006 (match_operand:SI 5 "const_0_to_31_operand")]
3007 UNSPEC_PCMP))))]
3008 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3009 "#"
3010 "&& 1"
3011 [(set (match_dup 6)
3012 (unspec:VF_128_256
3013 [(match_dup 3)
3014 (match_dup 4)
3015 (match_dup 5)]
3016 UNSPEC_PCMP))
3017 (set (match_dup 0) (match_dup 7))]
3018 {
3019 operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);
3020 operands[6] = gen_reg_rtx (<MODE>mode);
3021 operands[7]
3022 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3023 })
3024
3025 (define_insn_and_split "*avx_cmp<mode>3_3"
3026 [(set (match_operand:VF_128_256 0 "register_operand")
3027 (vec_merge:VF_128_256
3028 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3029 (match_operand:VF_128_256 2 "const0_operand")
3030 (unspec:<avx512fmaskmode>
3031 [(match_operand:VF_128_256 3 "register_operand")
3032 (match_operand:VF_128_256 4 "nonimmediate_operand")
3033 (match_operand:SI 5 "const_0_to_31_operand")]
3034 UNSPEC_PCMP)))]
3035 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3036 "#"
3037 "&& 1"
3038 [(set (match_dup 0)
3039 (unspec:VF_128_256
3040 [(match_dup 3)
3041 (match_dup 4)
3042 (match_dup 5)]
3043 UNSPEC_PCMP))])
3044
3045 (define_insn_and_split "*avx_cmp<mode>3_4"
3046 [(set (match_operand:VF_128_256 0 "register_operand")
3047 (vec_merge:VF_128_256
3048 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3049 (match_operand:VF_128_256 2 "const0_operand")
3050 (not:<avx512fmaskmode>
3051 (unspec:<avx512fmaskmode>
3052 [(match_operand:VF_128_256 3 "register_operand")
3053 (match_operand:VF_128_256 4 "nonimmediate_operand")
3054 (match_operand:SI 5 "const_0_to_31_operand")]
3055 UNSPEC_PCMP))))]
3056 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3057 "#"
3058 "&& 1"
3059 [(set (match_dup 0)
3060 (unspec:VF_128_256
3061 [(match_dup 3)
3062 (match_dup 4)
3063 (match_dup 5)]
3064 UNSPEC_PCMP))]
3065 "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
3066
3067 (define_insn "avx_vmcmp<mode>3"
3068 [(set (match_operand:VF_128 0 "register_operand" "=x")
3069 (vec_merge:VF_128
3070 (unspec:VF_128
3071 [(match_operand:VF_128 1 "register_operand" "x")
3072 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
3073 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3074 UNSPEC_PCMP)
3075 (match_dup 1)
3076 (const_int 1)))]
3077 "TARGET_AVX"
3078 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
3079 [(set_attr "type" "ssecmp")
3080 (set_attr "length_immediate" "1")
3081 (set_attr "prefix" "vex")
3082 (set_attr "mode" "<ssescalarmode>")])
3083
3084 (define_insn "*<sse>_maskcmp<mode>3_comm"
3085 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3086 (match_operator:VF_128_256 3 "sse_comparison_operator"
3087 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
3088 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3089 "TARGET_SSE
3090 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
3091 "@
3092 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3093 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3094 [(set_attr "isa" "noavx,avx")
3095 (set_attr "type" "ssecmp")
3096 (set_attr "length_immediate" "1")
3097 (set_attr "prefix" "orig,vex")
3098 (set_attr "mode" "<MODE>")])
3099
3100 (define_insn "<sse>_maskcmp<mode>3"
3101 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3102 (match_operator:VF_128_256 3 "sse_comparison_operator"
3103 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3104 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3105 "TARGET_SSE"
3106 "@
3107 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3108 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3109 [(set_attr "isa" "noavx,avx")
3110 (set_attr "type" "ssecmp")
3111 (set_attr "length_immediate" "1")
3112 (set_attr "prefix" "orig,vex")
3113 (set_attr "mode" "<MODE>")])
3114
3115 (define_insn "<sse>_vmmaskcmp<mode>3"
3116 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3117 (vec_merge:VF_128
3118 (match_operator:VF_128 3 "sse_comparison_operator"
3119 [(match_operand:VF_128 1 "register_operand" "0,x")
3120 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3121 (match_dup 1)
3122 (const_int 1)))]
3123 "TARGET_SSE"
3124 "@
3125 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3126 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3127 [(set_attr "isa" "noavx,avx")
3128 (set_attr "type" "ssecmp")
3129 (set_attr "length_immediate" "1,*")
3130 (set_attr "prefix" "orig,vex")
3131 (set_attr "mode" "<ssescalarmode>")])
3132
3133 (define_mode_attr cmp_imm_predicate
3134 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3135 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3136 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3137 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3138 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3139 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3140 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3141 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3142 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3143
3144 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3145 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3146 (unspec:<avx512fmaskmode>
3147 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
3148 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3149 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3150 UNSPEC_PCMP))]
3151 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3152 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3153 [(set_attr "type" "ssecmp")
3154 (set_attr "length_immediate" "1")
3155 (set_attr "prefix" "evex")
3156 (set_attr "mode" "<sseinsnmode>")])
3157
3158 (define_insn_and_split "*<avx512>_cmp<mode>3"
3159 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3160 (not:<avx512fmaskmode>
3161 (unspec:<avx512fmaskmode>
3162 [(match_operand:V48_AVX512VL 1 "register_operand")
3163 (match_operand:V48_AVX512VL 2 "nonimmediate_operand")
3164 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3165 UNSPEC_PCMP)))]
3166 "TARGET_AVX512F && ix86_pre_reload_split ()"
3167 "#"
3168 "&& 1"
3169 [(set (match_dup 0)
3170 (unspec:<avx512fmaskmode>
3171 [(match_dup 1)
3172 (match_dup 2)
3173 (match_dup 4)]
3174 UNSPEC_PCMP))]
3175 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3176
3177 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3178 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3179 (unspec:<avx512fmaskmode>
3180 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3181 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3182 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3183 UNSPEC_PCMP))]
3184 "TARGET_AVX512BW"
3185 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3186 [(set_attr "type" "ssecmp")
3187 (set_attr "length_immediate" "1")
3188 (set_attr "prefix" "evex")
3189 (set_attr "mode" "<sseinsnmode>")])
3190
3191 (define_int_iterator UNSPEC_PCMP_ITER
3192 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3193
3194 (define_insn_and_split "*<avx512>_cmp<mode>3"
3195 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3196 (not:<avx512fmaskmode>
3197 (unspec:<avx512fmaskmode>
3198 [(match_operand:VI12_AVX512VL 1 "register_operand")
3199 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3200 (match_operand:SI 3 "<cmp_imm_predicate>")]
3201 UNSPEC_PCMP_ITER)))]
3202 "TARGET_AVX512BW && ix86_pre_reload_split ()"
3203 "#"
3204 "&& 1"
3205 [(set (match_dup 0)
3206 (unspec:<avx512fmaskmode>
3207 [(match_dup 1)
3208 (match_dup 2)
3209 (match_dup 4)]
3210 UNSPEC_PCMP_ITER))]
3211 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3212
3213 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3214 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3215 (unspec:<avx512fmaskmode>
3216 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3217 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3218 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3219 UNSPEC_UNSIGNED_PCMP))]
3220 "TARGET_AVX512BW"
3221 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3222 [(set_attr "type" "ssecmp")
3223 (set_attr "length_immediate" "1")
3224 (set_attr "prefix" "evex")
3225 (set_attr "mode" "<sseinsnmode>")])
3226
3227 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3228 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3229 (unspec:<avx512fmaskmode>
3230 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3231 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3232 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3233 UNSPEC_UNSIGNED_PCMP))]
3234 "TARGET_AVX512F"
3235 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3236 [(set_attr "type" "ssecmp")
3237 (set_attr "length_immediate" "1")
3238 (set_attr "prefix" "evex")
3239 (set_attr "mode" "<sseinsnmode>")])
3240
3241 (define_insn_and_split "*<avx512>_ucmp<mode>3"
3242 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3243 (not:<avx512fmaskmode>
3244 (unspec:<avx512fmaskmode>
3245 [(match_operand:VI48_AVX512VL 1 "register_operand")
3246 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
3247 (match_operand:SI 3 "const_0_to_7_operand")]
3248 UNSPEC_UNSIGNED_PCMP)))]
3249 "TARGET_AVX512F && ix86_pre_reload_split ()"
3250 "#"
3251 "&& 1"
3252 [(set (match_dup 0)
3253 (unspec:<avx512fmaskmode>
3254 [(match_dup 1)
3255 (match_dup 2)
3256 (match_dup 4)]
3257 UNSPEC_UNSIGNED_PCMP))]
3258 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3259
3260 (define_int_attr pcmp_signed_mask
3261 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
3262
3263 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
3264 ;; For signed comparison, handle EQ 0: NEQ 4,
3265 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
3266
3267 (define_split
3268 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3269 (unspec:<avx512fmaskmode>
3270 [(us_minus:VI12_AVX512VL
3271 (match_operand:VI12_AVX512VL 1 "vector_operand")
3272 (match_operand:VI12_AVX512VL 2 "vector_operand"))
3273 (match_operand:VI12_AVX512VL 3 "const0_operand")
3274 (match_operand:SI 4 "const_0_to_7_operand")]
3275 UNSPEC_PCMP_ITER))]
3276 "TARGET_AVX512BW
3277 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
3278 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
3279 [(const_int 0)]
3280 {
3281 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
3282 int cmp_predicate = 2; /* LE */
3283 if (MEM_P (operands[1]))
3284 {
3285 std::swap (operands[1], operands[2]);
3286 cmp_predicate = 5; /* NLT (GE) */
3287 }
3288 if ((INTVAL (operands[4]) & 4) != 0)
3289 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
3290 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
3291 GEN_INT (cmp_predicate)));
3292 DONE;
3293 })
3294
3295 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3296 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3297 (and:<avx512fmaskmode>
3298 (unspec:<avx512fmaskmode>
3299 [(match_operand:VF_128 1 "register_operand" "v")
3300 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3301 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3302 UNSPEC_PCMP)
3303 (const_int 1)))]
3304 "TARGET_AVX512F"
3305 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3306 [(set_attr "type" "ssecmp")
3307 (set_attr "length_immediate" "1")
3308 (set_attr "prefix" "evex")
3309 (set_attr "mode" "<ssescalarmode>")])
3310
3311 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3312 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3313 (and:<avx512fmaskmode>
3314 (unspec:<avx512fmaskmode>
3315 [(match_operand:VF_128 1 "register_operand" "v")
3316 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3317 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3318 UNSPEC_PCMP)
3319 (and:<avx512fmaskmode>
3320 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3321 (const_int 1))))]
3322 "TARGET_AVX512F"
3323 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3324 [(set_attr "type" "ssecmp")
3325 (set_attr "length_immediate" "1")
3326 (set_attr "prefix" "evex")
3327 (set_attr "mode" "<ssescalarmode>")])
3328
3329 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3330 [(set (reg:CCFP FLAGS_REG)
3331 (compare:CCFP
3332 (vec_select:MODEF
3333 (match_operand:<ssevecmode> 0 "register_operand" "v")
3334 (parallel [(const_int 0)]))
3335 (vec_select:MODEF
3336 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3337 (parallel [(const_int 0)]))))]
3338 "SSE_FLOAT_MODE_P (<MODE>mode)"
3339 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3340 [(set_attr "type" "ssecomi")
3341 (set_attr "prefix" "maybe_vex")
3342 (set_attr "prefix_rep" "0")
3343 (set (attr "prefix_data16")
3344 (if_then_else (eq_attr "mode" "DF")
3345 (const_string "1")
3346 (const_string "0")))
3347 (set_attr "mode" "<MODE>")])
3348
3349 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3350 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3351 (match_operator:<avx512fmaskmode> 1 ""
3352 [(match_operand:V48_AVX512VL 2 "register_operand")
3353 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3354 "TARGET_AVX512F"
3355 {
3356 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3357 operands[2], operands[3]);
3358 gcc_assert (ok);
3359 DONE;
3360 })
3361
3362 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3363 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3364 (match_operator:<avx512fmaskmode> 1 ""
3365 [(match_operand:VI12_AVX512VL 2 "register_operand")
3366 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3367 "TARGET_AVX512BW"
3368 {
3369 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3370 operands[2], operands[3]);
3371 gcc_assert (ok);
3372 DONE;
3373 })
3374
3375 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3376 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3377 (match_operator:<sseintvecmode> 1 ""
3378 [(match_operand:VI_256 2 "register_operand")
3379 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3380 "TARGET_AVX2"
3381 {
3382 bool ok = ix86_expand_int_vec_cmp (operands);
3383 gcc_assert (ok);
3384 DONE;
3385 })
3386
3387 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3388 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3389 (match_operator:<sseintvecmode> 1 ""
3390 [(match_operand:VI124_128 2 "register_operand")
3391 (match_operand:VI124_128 3 "vector_operand")]))]
3392 "TARGET_SSE2"
3393 {
3394 bool ok = ix86_expand_int_vec_cmp (operands);
3395 gcc_assert (ok);
3396 DONE;
3397 })
3398
3399 (define_expand "vec_cmpv2div2di"
3400 [(set (match_operand:V2DI 0 "register_operand")
3401 (match_operator:V2DI 1 ""
3402 [(match_operand:V2DI 2 "register_operand")
3403 (match_operand:V2DI 3 "vector_operand")]))]
3404 "TARGET_SSE4_2"
3405 {
3406 bool ok = ix86_expand_int_vec_cmp (operands);
3407 gcc_assert (ok);
3408 DONE;
3409 })
3410
3411 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3412 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3413 (match_operator:<sseintvecmode> 1 ""
3414 [(match_operand:VF_256 2 "register_operand")
3415 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3416 "TARGET_AVX"
3417 {
3418 bool ok = ix86_expand_fp_vec_cmp (operands);
3419 gcc_assert (ok);
3420 DONE;
3421 })
3422
3423 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3424 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3425 (match_operator:<sseintvecmode> 1 ""
3426 [(match_operand:VF_128 2 "register_operand")
3427 (match_operand:VF_128 3 "vector_operand")]))]
3428 "TARGET_SSE"
3429 {
3430 bool ok = ix86_expand_fp_vec_cmp (operands);
3431 gcc_assert (ok);
3432 DONE;
3433 })
3434
3435 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3436 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3437 (match_operator:<avx512fmaskmode> 1 ""
3438 [(match_operand:VI48_AVX512VL 2 "register_operand")
3439 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3440 "TARGET_AVX512F"
3441 {
3442 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3443 operands[2], operands[3]);
3444 gcc_assert (ok);
3445 DONE;
3446 })
3447
3448 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3449 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3450 (match_operator:<avx512fmaskmode> 1 ""
3451 [(match_operand:VI12_AVX512VL 2 "register_operand")
3452 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3453 "TARGET_AVX512BW"
3454 {
3455 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3456 operands[2], operands[3]);
3457 gcc_assert (ok);
3458 DONE;
3459 })
3460
3461 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3462 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3463 (match_operator:<sseintvecmode> 1 ""
3464 [(match_operand:VI_256 2 "register_operand")
3465 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3466 "TARGET_AVX2"
3467 {
3468 bool ok = ix86_expand_int_vec_cmp (operands);
3469 gcc_assert (ok);
3470 DONE;
3471 })
3472
3473 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3474 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3475 (match_operator:<sseintvecmode> 1 ""
3476 [(match_operand:VI124_128 2 "register_operand")
3477 (match_operand:VI124_128 3 "vector_operand")]))]
3478 "TARGET_SSE2"
3479 {
3480 bool ok = ix86_expand_int_vec_cmp (operands);
3481 gcc_assert (ok);
3482 DONE;
3483 })
3484
3485 (define_expand "vec_cmpuv2div2di"
3486 [(set (match_operand:V2DI 0 "register_operand")
3487 (match_operator:V2DI 1 ""
3488 [(match_operand:V2DI 2 "register_operand")
3489 (match_operand:V2DI 3 "vector_operand")]))]
3490 "TARGET_SSE4_2"
3491 {
3492 bool ok = ix86_expand_int_vec_cmp (operands);
3493 gcc_assert (ok);
3494 DONE;
3495 })
3496
3497 (define_expand "vec_cmpeqv2div2di"
3498 [(set (match_operand:V2DI 0 "register_operand")
3499 (match_operator:V2DI 1 ""
3500 [(match_operand:V2DI 2 "register_operand")
3501 (match_operand:V2DI 3 "vector_operand")]))]
3502 "TARGET_SSE4_1"
3503 {
3504 bool ok = ix86_expand_int_vec_cmp (operands);
3505 gcc_assert (ok);
3506 DONE;
3507 })
3508
3509 (define_expand "vcond<V_512:mode><VF_512:mode>"
3510 [(set (match_operand:V_512 0 "register_operand")
3511 (if_then_else:V_512
3512 (match_operator 3 ""
3513 [(match_operand:VF_512 4 "nonimmediate_operand")
3514 (match_operand:VF_512 5 "nonimmediate_operand")])
3515 (match_operand:V_512 1 "general_operand")
3516 (match_operand:V_512 2 "general_operand")))]
3517 "TARGET_AVX512F
3518 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3519 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3520 {
3521 bool ok = ix86_expand_fp_vcond (operands);
3522 gcc_assert (ok);
3523 DONE;
3524 })
3525
3526 (define_expand "vcond<V_256:mode><VF_256:mode>"
3527 [(set (match_operand:V_256 0 "register_operand")
3528 (if_then_else:V_256
3529 (match_operator 3 ""
3530 [(match_operand:VF_256 4 "nonimmediate_operand")
3531 (match_operand:VF_256 5 "nonimmediate_operand")])
3532 (match_operand:V_256 1 "general_operand")
3533 (match_operand:V_256 2 "general_operand")))]
3534 "TARGET_AVX
3535 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3536 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3537 {
3538 bool ok = ix86_expand_fp_vcond (operands);
3539 gcc_assert (ok);
3540 DONE;
3541 })
3542
3543 (define_expand "vcond<V_128:mode><VF_128:mode>"
3544 [(set (match_operand:V_128 0 "register_operand")
3545 (if_then_else:V_128
3546 (match_operator 3 ""
3547 [(match_operand:VF_128 4 "vector_operand")
3548 (match_operand:VF_128 5 "vector_operand")])
3549 (match_operand:V_128 1 "general_operand")
3550 (match_operand:V_128 2 "general_operand")))]
3551 "TARGET_SSE
3552 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3553 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3554 {
3555 bool ok = ix86_expand_fp_vcond (operands);
3556 gcc_assert (ok);
3557 DONE;
3558 })
3559
3560 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3561 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3562 (vec_merge:V48_AVX512VL
3563 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3564 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3565 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3566 "TARGET_AVX512F")
3567
3568 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3569 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3570 (vec_merge:VI12_AVX512VL
3571 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3572 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3573 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3574 "TARGET_AVX512BW")
3575
3576 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3577 ;; and their condition can be folded late into a constant, we need to
3578 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3579 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3580 V8SI V4DI])
3581
3582 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3583 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3584 (vec_merge:VI_256_AVX2
3585 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3586 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3587 (match_operand:<sseintvecmode> 3 "register_operand")))]
3588 "TARGET_AVX"
3589 {
3590 ix86_expand_sse_movcc (operands[0], operands[3],
3591 operands[1], operands[2]);
3592 DONE;
3593 })
3594
3595 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3596 [(set (match_operand:VI124_128 0 "register_operand")
3597 (vec_merge:VI124_128
3598 (match_operand:VI124_128 1 "vector_operand")
3599 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3600 (match_operand:<sseintvecmode> 3 "register_operand")))]
3601 "TARGET_SSE2"
3602 {
3603 ix86_expand_sse_movcc (operands[0], operands[3],
3604 operands[1], operands[2]);
3605 DONE;
3606 })
3607
3608 (define_expand "vcond_mask_v2div2di"
3609 [(set (match_operand:V2DI 0 "register_operand")
3610 (vec_merge:V2DI
3611 (match_operand:V2DI 1 "vector_operand")
3612 (match_operand:V2DI 2 "nonimm_or_0_operand")
3613 (match_operand:V2DI 3 "register_operand")))]
3614 "TARGET_SSE4_2"
3615 {
3616 ix86_expand_sse_movcc (operands[0], operands[3],
3617 operands[1], operands[2]);
3618 DONE;
3619 })
3620
3621 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3622 [(set (match_operand:VF_256 0 "register_operand")
3623 (vec_merge:VF_256
3624 (match_operand:VF_256 1 "nonimmediate_operand")
3625 (match_operand:VF_256 2 "nonimm_or_0_operand")
3626 (match_operand:<sseintvecmode> 3 "register_operand")))]
3627 "TARGET_AVX"
3628 {
3629 ix86_expand_sse_movcc (operands[0], operands[3],
3630 operands[1], operands[2]);
3631 DONE;
3632 })
3633
3634 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3635 [(set (match_operand:VF_128 0 "register_operand")
3636 (vec_merge:VF_128
3637 (match_operand:VF_128 1 "vector_operand")
3638 (match_operand:VF_128 2 "nonimm_or_0_operand")
3639 (match_operand:<sseintvecmode> 3 "register_operand")))]
3640 "TARGET_SSE"
3641 {
3642 ix86_expand_sse_movcc (operands[0], operands[3],
3643 operands[1], operands[2]);
3644 DONE;
3645 })
3646
3647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3648 ;;
3649 ;; Parallel floating point logical operations
3650 ;;
3651 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3652
3653 (define_insn "<sse>_andnot<mode>3<mask_name>"
3654 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3655 (and:VF_128_256
3656 (not:VF_128_256
3657 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3658 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3659 "TARGET_SSE && <mask_avx512vl_condition>"
3660 {
3661 char buf[128];
3662 const char *ops;
3663 const char *suffix;
3664
3665 switch (which_alternative)
3666 {
3667 case 0:
3668 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3669 break;
3670 case 1:
3671 case 2:
3672 case 3:
3673 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3674 break;
3675 default:
3676 gcc_unreachable ();
3677 }
3678
3679 switch (get_attr_mode (insn))
3680 {
3681 case MODE_V8SF:
3682 case MODE_V4SF:
3683 suffix = "ps";
3684 break;
3685 case MODE_OI:
3686 case MODE_TI:
3687 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3688 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3689 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3690 break;
3691 default:
3692 suffix = "<ssemodesuffix>";
3693 }
3694
3695 snprintf (buf, sizeof (buf), ops, suffix);
3696 output_asm_insn (buf, operands);
3697 return "";
3698 }
3699 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3700 (set_attr "type" "sselog")
3701 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3702 (set (attr "mode")
3703 (cond [(and (match_test "<mask_applied>")
3704 (and (eq_attr "alternative" "1")
3705 (match_test "!TARGET_AVX512DQ")))
3706 (const_string "<sseintvecmode2>")
3707 (eq_attr "alternative" "3")
3708 (const_string "<sseintvecmode2>")
3709 (match_test "TARGET_AVX")
3710 (const_string "<MODE>")
3711 (match_test "optimize_function_for_size_p (cfun)")
3712 (const_string "V4SF")
3713 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3714 (const_string "V4SF")
3715 ]
3716 (const_string "<MODE>")))])
3717
3718 (define_insn "<sse>_andnot<mode>3<mask_name>"
3719 [(set (match_operand:VF_512 0 "register_operand" "=v")
3720 (and:VF_512
3721 (not:VF_512
3722 (match_operand:VF_512 1 "register_operand" "v"))
3723 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3724 "TARGET_AVX512F"
3725 {
3726 char buf[128];
3727 const char *ops;
3728 const char *suffix;
3729
3730 suffix = "<ssemodesuffix>";
3731 ops = "";
3732
3733 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3734 if (!TARGET_AVX512DQ)
3735 {
3736 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3737 ops = "p";
3738 }
3739
3740 snprintf (buf, sizeof (buf),
3741 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3742 ops, suffix);
3743 output_asm_insn (buf, operands);
3744 return "";
3745 }
3746 [(set_attr "type" "sselog")
3747 (set_attr "prefix" "evex")
3748 (set (attr "mode")
3749 (if_then_else (match_test "TARGET_AVX512DQ")
3750 (const_string "<sseinsnmode>")
3751 (const_string "XI")))])
3752
3753 (define_expand "<code><mode>3<mask_name>"
3754 [(set (match_operand:VF_128_256 0 "register_operand")
3755 (any_logic:VF_128_256
3756 (match_operand:VF_128_256 1 "vector_operand")
3757 (match_operand:VF_128_256 2 "vector_operand")))]
3758 "TARGET_SSE && <mask_avx512vl_condition>"
3759 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3760
3761 (define_expand "<code><mode>3<mask_name>"
3762 [(set (match_operand:VF_512 0 "register_operand")
3763 (any_logic:VF_512
3764 (match_operand:VF_512 1 "nonimmediate_operand")
3765 (match_operand:VF_512 2 "nonimmediate_operand")))]
3766 "TARGET_AVX512F"
3767 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3768
3769 (define_insn "*<code><mode>3<mask_name>"
3770 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3771 (any_logic:VF_128_256
3772 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3773 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3774 "TARGET_SSE && <mask_avx512vl_condition>
3775 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3776 {
3777 char buf[128];
3778 const char *ops;
3779 const char *suffix;
3780
3781 switch (which_alternative)
3782 {
3783 case 0:
3784 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3785 break;
3786 case 1:
3787 case 2:
3788 case 3:
3789 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3790 break;
3791 default:
3792 gcc_unreachable ();
3793 }
3794
3795 switch (get_attr_mode (insn))
3796 {
3797 case MODE_V8SF:
3798 case MODE_V4SF:
3799 suffix = "ps";
3800 break;
3801 case MODE_OI:
3802 case MODE_TI:
3803 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3804 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3805 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3806 break;
3807 default:
3808 suffix = "<ssemodesuffix>";
3809 }
3810
3811 snprintf (buf, sizeof (buf), ops, suffix);
3812 output_asm_insn (buf, operands);
3813 return "";
3814 }
3815 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3816 (set_attr "type" "sselog")
3817 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3818 (set (attr "mode")
3819 (cond [(and (match_test "<mask_applied>")
3820 (and (eq_attr "alternative" "1")
3821 (match_test "!TARGET_AVX512DQ")))
3822 (const_string "<sseintvecmode2>")
3823 (eq_attr "alternative" "3")
3824 (const_string "<sseintvecmode2>")
3825 (match_test "TARGET_AVX")
3826 (const_string "<MODE>")
3827 (match_test "optimize_function_for_size_p (cfun)")
3828 (const_string "V4SF")
3829 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3830 (const_string "V4SF")
3831 ]
3832 (const_string "<MODE>")))])
3833
3834 (define_insn "*<code><mode>3<mask_name>"
3835 [(set (match_operand:VF_512 0 "register_operand" "=v")
3836 (any_logic:VF_512
3837 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3838 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3839 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3840 {
3841 char buf[128];
3842 const char *ops;
3843 const char *suffix;
3844
3845 suffix = "<ssemodesuffix>";
3846 ops = "";
3847
3848 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3849 if (!TARGET_AVX512DQ)
3850 {
3851 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3852 ops = "p";
3853 }
3854
3855 snprintf (buf, sizeof (buf),
3856 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3857 ops, suffix);
3858 output_asm_insn (buf, operands);
3859 return "";
3860 }
3861 [(set_attr "type" "sselog")
3862 (set_attr "prefix" "evex")
3863 (set (attr "mode")
3864 (if_then_else (match_test "TARGET_AVX512DQ")
3865 (const_string "<sseinsnmode>")
3866 (const_string "XI")))])
3867
3868 (define_expand "copysign<mode>3"
3869 [(set (match_dup 4)
3870 (and:VF
3871 (not:VF (match_dup 3))
3872 (match_operand:VF 1 "vector_operand")))
3873 (set (match_dup 5)
3874 (and:VF (match_dup 3)
3875 (match_operand:VF 2 "vector_operand")))
3876 (set (match_operand:VF 0 "register_operand")
3877 (ior:VF (match_dup 4) (match_dup 5)))]
3878 "TARGET_SSE"
3879 {
3880 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3881
3882 operands[4] = gen_reg_rtx (<MODE>mode);
3883 operands[5] = gen_reg_rtx (<MODE>mode);
3884 })
3885
3886 (define_expand "xorsign<mode>3"
3887 [(set (match_dup 4)
3888 (and:VF (match_dup 3)
3889 (match_operand:VF 2 "vector_operand")))
3890 (set (match_operand:VF 0 "register_operand")
3891 (xor:VF (match_dup 4)
3892 (match_operand:VF 1 "vector_operand")))]
3893 "TARGET_SSE"
3894 {
3895 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3896
3897 operands[4] = gen_reg_rtx (<MODE>mode);
3898 })
3899
3900 (define_expand "signbit<mode>2"
3901 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3902 (lshiftrt:<sseintvecmode>
3903 (subreg:<sseintvecmode>
3904 (match_operand:VF1_AVX2 1 "register_operand") 0)
3905 (match_dup 2)))]
3906 "TARGET_SSE2"
3907 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3908
3909 ;; Also define scalar versions. These are used for abs, neg, and
3910 ;; conditional move. Using subregs into vector modes causes register
3911 ;; allocation lossage. These patterns do not allow memory operands
3912 ;; because the native instructions read the full 128-bits.
3913
3914 (define_insn "*andnot<mode>3"
3915 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3916 (and:MODEF
3917 (not:MODEF
3918 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3919 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3920 "SSE_FLOAT_MODE_P (<MODE>mode)"
3921 {
3922 char buf[128];
3923 const char *ops;
3924 const char *suffix
3925 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3926
3927 switch (which_alternative)
3928 {
3929 case 0:
3930 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3931 break;
3932 case 1:
3933 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3934 break;
3935 case 2:
3936 if (TARGET_AVX512DQ)
3937 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3938 else
3939 {
3940 suffix = <MODE>mode == DFmode ? "q" : "d";
3941 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3942 }
3943 break;
3944 case 3:
3945 if (TARGET_AVX512DQ)
3946 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3947 else
3948 {
3949 suffix = <MODE>mode == DFmode ? "q" : "d";
3950 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3951 }
3952 break;
3953 default:
3954 gcc_unreachable ();
3955 }
3956
3957 snprintf (buf, sizeof (buf), ops, suffix);
3958 output_asm_insn (buf, operands);
3959 return "";
3960 }
3961 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3962 (set_attr "type" "sselog")
3963 (set_attr "prefix" "orig,vex,evex,evex")
3964 (set (attr "mode")
3965 (cond [(eq_attr "alternative" "2")
3966 (if_then_else (match_test "TARGET_AVX512DQ")
3967 (const_string "<ssevecmode>")
3968 (const_string "TI"))
3969 (eq_attr "alternative" "3")
3970 (if_then_else (match_test "TARGET_AVX512DQ")
3971 (const_string "<avx512fvecmode>")
3972 (const_string "XI"))
3973 (match_test "TARGET_AVX")
3974 (const_string "<ssevecmode>")
3975 (match_test "optimize_function_for_size_p (cfun)")
3976 (const_string "V4SF")
3977 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3978 (const_string "V4SF")
3979 ]
3980 (const_string "<ssevecmode>")))])
3981
3982 (define_insn "*andnottf3"
3983 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3984 (and:TF
3985 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3986 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3987 "TARGET_SSE"
3988 {
3989 char buf[128];
3990 const char *ops;
3991 const char *tmp
3992 = (which_alternative >= 2 ? "pandnq"
3993 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3994
3995 switch (which_alternative)
3996 {
3997 case 0:
3998 ops = "%s\t{%%2, %%0|%%0, %%2}";
3999 break;
4000 case 1:
4001 case 2:
4002 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4003 break;
4004 case 3:
4005 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4006 break;
4007 default:
4008 gcc_unreachable ();
4009 }
4010
4011 snprintf (buf, sizeof (buf), ops, tmp);
4012 output_asm_insn (buf, operands);
4013 return "";
4014 }
4015 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4016 (set_attr "type" "sselog")
4017 (set (attr "prefix_data16")
4018 (if_then_else
4019 (and (eq_attr "alternative" "0")
4020 (eq_attr "mode" "TI"))
4021 (const_string "1")
4022 (const_string "*")))
4023 (set_attr "prefix" "orig,vex,evex,evex")
4024 (set (attr "mode")
4025 (cond [(eq_attr "alternative" "2")
4026 (const_string "TI")
4027 (eq_attr "alternative" "3")
4028 (const_string "XI")
4029 (match_test "TARGET_AVX")
4030 (const_string "TI")
4031 (ior (not (match_test "TARGET_SSE2"))
4032 (match_test "optimize_function_for_size_p (cfun)"))
4033 (const_string "V4SF")
4034 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4035 (const_string "V4SF")
4036 ]
4037 (const_string "TI")))])
4038
4039 (define_insn "*<code><mode>3"
4040 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4041 (any_logic:MODEF
4042 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
4043 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4044 "SSE_FLOAT_MODE_P (<MODE>mode)"
4045 {
4046 char buf[128];
4047 const char *ops;
4048 const char *suffix
4049 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4050
4051 switch (which_alternative)
4052 {
4053 case 0:
4054 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
4055 break;
4056 case 2:
4057 if (!TARGET_AVX512DQ)
4058 {
4059 suffix = <MODE>mode == DFmode ? "q" : "d";
4060 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4061 break;
4062 }
4063 /* FALLTHRU */
4064 case 1:
4065 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4066 break;
4067 case 3:
4068 if (TARGET_AVX512DQ)
4069 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4070 else
4071 {
4072 suffix = <MODE>mode == DFmode ? "q" : "d";
4073 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4074 }
4075 break;
4076 default:
4077 gcc_unreachable ();
4078 }
4079
4080 snprintf (buf, sizeof (buf), ops, suffix);
4081 output_asm_insn (buf, operands);
4082 return "";
4083 }
4084 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4085 (set_attr "type" "sselog")
4086 (set_attr "prefix" "orig,vex,evex,evex")
4087 (set (attr "mode")
4088 (cond [(eq_attr "alternative" "2")
4089 (if_then_else (match_test "TARGET_AVX512DQ")
4090 (const_string "<ssevecmode>")
4091 (const_string "TI"))
4092 (eq_attr "alternative" "3")
4093 (if_then_else (match_test "TARGET_AVX512DQ")
4094 (const_string "<avx512fvecmode>")
4095 (const_string "XI"))
4096 (match_test "TARGET_AVX")
4097 (const_string "<ssevecmode>")
4098 (match_test "optimize_function_for_size_p (cfun)")
4099 (const_string "V4SF")
4100 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4101 (const_string "V4SF")
4102 ]
4103 (const_string "<ssevecmode>")))])
4104
4105 (define_expand "<code>tf3"
4106 [(set (match_operand:TF 0 "register_operand")
4107 (any_logic:TF
4108 (match_operand:TF 1 "vector_operand")
4109 (match_operand:TF 2 "vector_operand")))]
4110 "TARGET_SSE"
4111 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
4112
4113 (define_insn "*<code>tf3"
4114 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
4115 (any_logic:TF
4116 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
4117 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
4118 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4119 {
4120 char buf[128];
4121 const char *ops;
4122 const char *tmp
4123 = (which_alternative >= 2 ? "p<logic>q"
4124 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
4125
4126 switch (which_alternative)
4127 {
4128 case 0:
4129 ops = "%s\t{%%2, %%0|%%0, %%2}";
4130 break;
4131 case 1:
4132 case 2:
4133 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4134 break;
4135 case 3:
4136 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4137 break;
4138 default:
4139 gcc_unreachable ();
4140 }
4141
4142 snprintf (buf, sizeof (buf), ops, tmp);
4143 output_asm_insn (buf, operands);
4144 return "";
4145 }
4146 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4147 (set_attr "type" "sselog")
4148 (set (attr "prefix_data16")
4149 (if_then_else
4150 (and (eq_attr "alternative" "0")
4151 (eq_attr "mode" "TI"))
4152 (const_string "1")
4153 (const_string "*")))
4154 (set_attr "prefix" "orig,vex,evex,evex")
4155 (set (attr "mode")
4156 (cond [(eq_attr "alternative" "2")
4157 (const_string "TI")
4158 (eq_attr "alternative" "3")
4159 (const_string "QI")
4160 (match_test "TARGET_AVX")
4161 (const_string "TI")
4162 (ior (not (match_test "TARGET_SSE2"))
4163 (match_test "optimize_function_for_size_p (cfun)"))
4164 (const_string "V4SF")
4165 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4166 (const_string "V4SF")
4167 ]
4168 (const_string "TI")))])
4169
4170 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4171 ;;
4172 ;; FMA floating point multiply/accumulate instructions. These include
4173 ;; scalar versions of the instructions as well as vector versions.
4174 ;;
4175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4176
4177 ;; The standard names for scalar FMA are only available with SSE math enabled.
4178 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
4179 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
4180 ;; and TARGET_FMA4 are both false.
4181 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
4182 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
4183 ;; GAS to allow proper prefix selection. However, for the moment all hardware
4184 ;; that supports AVX512F also supports FMA so we can ignore this for now.
4185 (define_mode_iterator FMAMODEM
4186 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4187 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4188 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4189 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4190 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4191 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4192 (V16SF "TARGET_AVX512F")
4193 (V8DF "TARGET_AVX512F")])
4194
4195 (define_expand "fma<mode>4"
4196 [(set (match_operand:FMAMODEM 0 "register_operand")
4197 (fma:FMAMODEM
4198 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4199 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4200 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4201
4202 (define_expand "fms<mode>4"
4203 [(set (match_operand:FMAMODEM 0 "register_operand")
4204 (fma:FMAMODEM
4205 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4206 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4207 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4208
4209 (define_expand "fnma<mode>4"
4210 [(set (match_operand:FMAMODEM 0 "register_operand")
4211 (fma:FMAMODEM
4212 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4213 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4214 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4215
4216 (define_expand "fnms<mode>4"
4217 [(set (match_operand:FMAMODEM 0 "register_operand")
4218 (fma:FMAMODEM
4219 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4220 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4221 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4222
4223 ;; The builtins for intrinsics are not constrained by SSE math enabled.
4224 (define_mode_iterator FMAMODE_AVX512
4225 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4226 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4227 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4228 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4229 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4230 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4231 (V16SF "TARGET_AVX512F")
4232 (V8DF "TARGET_AVX512F")])
4233
4234 (define_mode_iterator FMAMODE
4235 [SF DF V4SF V2DF V8SF V4DF])
4236
4237 (define_expand "fma4i_fmadd_<mode>"
4238 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4239 (fma:FMAMODE_AVX512
4240 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4241 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4242 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4243
4244 (define_expand "fma4i_fmsub_<mode>"
4245 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4246 (fma:FMAMODE_AVX512
4247 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4248 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4249 (neg:FMAMODE_AVX512
4250 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4251
4252 (define_expand "fma4i_fnmadd_<mode>"
4253 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4254 (fma:FMAMODE_AVX512
4255 (neg:FMAMODE_AVX512
4256 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4257 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4258 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4259
4260 (define_expand "fma4i_fnmsub_<mode>"
4261 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4262 (fma:FMAMODE_AVX512
4263 (neg:FMAMODE_AVX512
4264 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4265 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4266 (neg:FMAMODE_AVX512
4267 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4268
4269 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4270 [(match_operand:VF_AVX512VL 0 "register_operand")
4271 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4272 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4273 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4274 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4275 "TARGET_AVX512F && <round_mode512bit_condition>"
4276 {
4277 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4278 operands[0], operands[1], operands[2], operands[3],
4279 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4280 DONE;
4281 })
4282
4283 (define_insn "*fma_fmadd_<mode>"
4284 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4285 (fma:FMAMODE
4286 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4287 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4288 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4289 "TARGET_FMA || TARGET_FMA4"
4290 "@
4291 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4292 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4293 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4294 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4295 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4296 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4297 (set_attr "type" "ssemuladd")
4298 (set_attr "mode" "<MODE>")])
4299
4300 ;; Suppose AVX-512F as baseline
4301 (define_mode_iterator VF_SF_AVX512VL
4302 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4303 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4304
4305 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4306 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4307 (fma:VF_SF_AVX512VL
4308 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4309 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4310 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4311 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4312 "@
4313 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4314 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4315 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4316 [(set_attr "type" "ssemuladd")
4317 (set_attr "mode" "<MODE>")])
4318
4319 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4320 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4321 (vec_merge:VF_AVX512VL
4322 (fma:VF_AVX512VL
4323 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4324 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4325 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4326 (match_dup 1)
4327 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4328 "TARGET_AVX512F && <round_mode512bit_condition>"
4329 "@
4330 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4331 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4332 [(set_attr "type" "ssemuladd")
4333 (set_attr "mode" "<MODE>")])
4334
4335 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4336 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4337 (vec_merge:VF_AVX512VL
4338 (fma:VF_AVX512VL
4339 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4340 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4341 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4342 (match_dup 3)
4343 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4344 "TARGET_AVX512F"
4345 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4346 [(set_attr "type" "ssemuladd")
4347 (set_attr "mode" "<MODE>")])
4348
4349 (define_insn "*fma_fmsub_<mode>"
4350 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4351 (fma:FMAMODE
4352 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4353 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4354 (neg:FMAMODE
4355 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4356 "TARGET_FMA || TARGET_FMA4"
4357 "@
4358 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4359 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4360 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4361 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4362 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4363 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4364 (set_attr "type" "ssemuladd")
4365 (set_attr "mode" "<MODE>")])
4366
4367 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4368 [(match_operand:VF_AVX512VL 0 "register_operand")
4369 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4370 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4371 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4372 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4373 "TARGET_AVX512F && <round_mode512bit_condition>"
4374 {
4375 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4376 operands[0], operands[1], operands[2], operands[3],
4377 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4378 DONE;
4379 })
4380
4381 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4382 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4383 (fma:VF_SF_AVX512VL
4384 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4385 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4386 (neg:VF_SF_AVX512VL
4387 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4388 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4389 "@
4390 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4391 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4392 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4393 [(set_attr "type" "ssemuladd")
4394 (set_attr "mode" "<MODE>")])
4395
4396 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4397 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4398 (vec_merge:VF_AVX512VL
4399 (fma:VF_AVX512VL
4400 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4401 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4402 (neg:VF_AVX512VL
4403 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4404 (match_dup 1)
4405 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4406 "TARGET_AVX512F"
4407 "@
4408 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4409 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4410 [(set_attr "type" "ssemuladd")
4411 (set_attr "mode" "<MODE>")])
4412
4413 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4414 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4415 (vec_merge:VF_AVX512VL
4416 (fma:VF_AVX512VL
4417 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4418 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4419 (neg:VF_AVX512VL
4420 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4421 (match_dup 3)
4422 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4423 "TARGET_AVX512F && <round_mode512bit_condition>"
4424 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4425 [(set_attr "type" "ssemuladd")
4426 (set_attr "mode" "<MODE>")])
4427
4428 (define_insn "*fma_fnmadd_<mode>"
4429 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4430 (fma:FMAMODE
4431 (neg:FMAMODE
4432 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4433 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4434 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4435 "TARGET_FMA || TARGET_FMA4"
4436 "@
4437 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4438 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4439 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4440 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4441 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4442 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4443 (set_attr "type" "ssemuladd")
4444 (set_attr "mode" "<MODE>")])
4445
4446 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4447 [(match_operand:VF_AVX512VL 0 "register_operand")
4448 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4449 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4450 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4451 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4452 "TARGET_AVX512F && <round_mode512bit_condition>"
4453 {
4454 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4455 operands[0], operands[1], operands[2], operands[3],
4456 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4457 DONE;
4458 })
4459
4460 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4461 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4462 (fma:VF_SF_AVX512VL
4463 (neg:VF_SF_AVX512VL
4464 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4465 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4466 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4467 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4468 "@
4469 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4470 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4471 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4472 [(set_attr "type" "ssemuladd")
4473 (set_attr "mode" "<MODE>")])
4474
4475 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4476 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4477 (vec_merge:VF_AVX512VL
4478 (fma:VF_AVX512VL
4479 (neg:VF_AVX512VL
4480 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4481 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4482 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4483 (match_dup 1)
4484 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4485 "TARGET_AVX512F && <round_mode512bit_condition>"
4486 "@
4487 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4488 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4489 [(set_attr "type" "ssemuladd")
4490 (set_attr "mode" "<MODE>")])
4491
4492 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4493 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4494 (vec_merge:VF_AVX512VL
4495 (fma:VF_AVX512VL
4496 (neg:VF_AVX512VL
4497 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4498 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4499 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4500 (match_dup 3)
4501 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4502 "TARGET_AVX512F && <round_mode512bit_condition>"
4503 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4504 [(set_attr "type" "ssemuladd")
4505 (set_attr "mode" "<MODE>")])
4506
4507 (define_insn "*fma_fnmsub_<mode>"
4508 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4509 (fma:FMAMODE
4510 (neg:FMAMODE
4511 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4512 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4513 (neg:FMAMODE
4514 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4515 "TARGET_FMA || TARGET_FMA4"
4516 "@
4517 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4518 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4519 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4520 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4521 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4522 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4523 (set_attr "type" "ssemuladd")
4524 (set_attr "mode" "<MODE>")])
4525
4526 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4527 [(match_operand:VF_AVX512VL 0 "register_operand")
4528 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4529 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4530 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4531 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4532 "TARGET_AVX512F && <round_mode512bit_condition>"
4533 {
4534 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4535 operands[0], operands[1], operands[2], operands[3],
4536 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4537 DONE;
4538 })
4539
4540 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4541 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4542 (fma:VF_SF_AVX512VL
4543 (neg:VF_SF_AVX512VL
4544 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4545 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4546 (neg:VF_SF_AVX512VL
4547 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4548 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4549 "@
4550 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4551 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4552 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4553 [(set_attr "type" "ssemuladd")
4554 (set_attr "mode" "<MODE>")])
4555
4556 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4557 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4558 (vec_merge:VF_AVX512VL
4559 (fma:VF_AVX512VL
4560 (neg:VF_AVX512VL
4561 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4562 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4563 (neg:VF_AVX512VL
4564 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4565 (match_dup 1)
4566 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4567 "TARGET_AVX512F && <round_mode512bit_condition>"
4568 "@
4569 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4570 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4571 [(set_attr "type" "ssemuladd")
4572 (set_attr "mode" "<MODE>")])
4573
4574 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4575 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4576 (vec_merge:VF_AVX512VL
4577 (fma:VF_AVX512VL
4578 (neg:VF_AVX512VL
4579 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4580 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4581 (neg:VF_AVX512VL
4582 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4583 (match_dup 3)
4584 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4585 "TARGET_AVX512F"
4586 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4587 [(set_attr "type" "ssemuladd")
4588 (set_attr "mode" "<MODE>")])
4589
4590 ;; FMA parallel floating point multiply addsub and subadd operations.
4591
4592 ;; It would be possible to represent these without the UNSPEC as
4593 ;;
4594 ;; (vec_merge
4595 ;; (fma op1 op2 op3)
4596 ;; (fma op1 op2 (neg op3))
4597 ;; (merge-const))
4598 ;;
4599 ;; But this doesn't seem useful in practice.
4600
4601 (define_expand "fmaddsub_<mode>"
4602 [(set (match_operand:VF 0 "register_operand")
4603 (unspec:VF
4604 [(match_operand:VF 1 "nonimmediate_operand")
4605 (match_operand:VF 2 "nonimmediate_operand")
4606 (match_operand:VF 3 "nonimmediate_operand")]
4607 UNSPEC_FMADDSUB))]
4608 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4609
4610 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4611 [(match_operand:VF_AVX512VL 0 "register_operand")
4612 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4613 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4614 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4615 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4616 "TARGET_AVX512F"
4617 {
4618 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4619 operands[0], operands[1], operands[2], operands[3],
4620 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4621 DONE;
4622 })
4623
4624 (define_insn "*fma_fmaddsub_<mode>"
4625 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4626 (unspec:VF_128_256
4627 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4628 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4629 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4630 UNSPEC_FMADDSUB))]
4631 "TARGET_FMA || TARGET_FMA4"
4632 "@
4633 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4634 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4635 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4636 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4637 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4638 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4639 (set_attr "type" "ssemuladd")
4640 (set_attr "mode" "<MODE>")])
4641
4642 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4643 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4644 (unspec:VF_SF_AVX512VL
4645 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4646 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4647 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4648 UNSPEC_FMADDSUB))]
4649 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4650 "@
4651 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4652 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4653 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4654 [(set_attr "type" "ssemuladd")
4655 (set_attr "mode" "<MODE>")])
4656
4657 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4658 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4659 (vec_merge:VF_AVX512VL
4660 (unspec:VF_AVX512VL
4661 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4662 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4663 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4664 UNSPEC_FMADDSUB)
4665 (match_dup 1)
4666 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4667 "TARGET_AVX512F"
4668 "@
4669 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4670 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4671 [(set_attr "type" "ssemuladd")
4672 (set_attr "mode" "<MODE>")])
4673
4674 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4675 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4676 (vec_merge:VF_AVX512VL
4677 (unspec:VF_AVX512VL
4678 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4679 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4680 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4681 UNSPEC_FMADDSUB)
4682 (match_dup 3)
4683 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4684 "TARGET_AVX512F"
4685 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4686 [(set_attr "type" "ssemuladd")
4687 (set_attr "mode" "<MODE>")])
4688
4689 (define_insn "*fma_fmsubadd_<mode>"
4690 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4691 (unspec:VF_128_256
4692 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4693 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4694 (neg:VF_128_256
4695 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4696 UNSPEC_FMADDSUB))]
4697 "TARGET_FMA || TARGET_FMA4"
4698 "@
4699 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4700 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4701 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4702 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4703 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4704 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4705 (set_attr "type" "ssemuladd")
4706 (set_attr "mode" "<MODE>")])
4707
4708 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4709 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4710 (unspec:VF_SF_AVX512VL
4711 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4712 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4713 (neg:VF_SF_AVX512VL
4714 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4715 UNSPEC_FMADDSUB))]
4716 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4717 "@
4718 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4719 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4720 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4721 [(set_attr "type" "ssemuladd")
4722 (set_attr "mode" "<MODE>")])
4723
4724 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4725 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4726 (vec_merge:VF_AVX512VL
4727 (unspec:VF_AVX512VL
4728 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4729 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4730 (neg:VF_AVX512VL
4731 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4732 UNSPEC_FMADDSUB)
4733 (match_dup 1)
4734 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4735 "TARGET_AVX512F"
4736 "@
4737 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4738 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4739 [(set_attr "type" "ssemuladd")
4740 (set_attr "mode" "<MODE>")])
4741
4742 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4743 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4744 (vec_merge:VF_AVX512VL
4745 (unspec:VF_AVX512VL
4746 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4747 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4748 (neg:VF_AVX512VL
4749 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4750 UNSPEC_FMADDSUB)
4751 (match_dup 3)
4752 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4753 "TARGET_AVX512F"
4754 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4755 [(set_attr "type" "ssemuladd")
4756 (set_attr "mode" "<MODE>")])
4757
4758 ;; FMA3 floating point scalar intrinsics. These merge result with
4759 ;; high-order elements from the destination register.
4760
4761 (define_expand "fmai_vmfmadd_<mode><round_name>"
4762 [(set (match_operand:VF_128 0 "register_operand")
4763 (vec_merge:VF_128
4764 (fma:VF_128
4765 (match_operand:VF_128 1 "register_operand")
4766 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4767 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4768 (match_dup 1)
4769 (const_int 1)))]
4770 "TARGET_FMA")
4771
4772 (define_expand "fmai_vmfmsub_<mode><round_name>"
4773 [(set (match_operand:VF_128 0 "register_operand")
4774 (vec_merge:VF_128
4775 (fma:VF_128
4776 (match_operand:VF_128 1 "register_operand")
4777 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4778 (neg:VF_128
4779 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4780 (match_dup 1)
4781 (const_int 1)))]
4782 "TARGET_FMA")
4783
4784 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4785 [(set (match_operand:VF_128 0 "register_operand")
4786 (vec_merge:VF_128
4787 (fma:VF_128
4788 (neg:VF_128
4789 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4790 (match_operand:VF_128 1 "register_operand")
4791 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4792 (match_dup 1)
4793 (const_int 1)))]
4794 "TARGET_FMA")
4795
4796 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4797 [(set (match_operand:VF_128 0 "register_operand")
4798 (vec_merge:VF_128
4799 (fma:VF_128
4800 (neg:VF_128
4801 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4802 (match_operand:VF_128 1 "register_operand")
4803 (neg:VF_128
4804 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4805 (match_dup 1)
4806 (const_int 1)))]
4807 "TARGET_FMA")
4808
4809 (define_insn "*fmai_fmadd_<mode>"
4810 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4811 (vec_merge:VF_128
4812 (fma:VF_128
4813 (match_operand:VF_128 1 "register_operand" "0,0")
4814 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
4815 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4816 (match_dup 1)
4817 (const_int 1)))]
4818 "TARGET_FMA || TARGET_AVX512F"
4819 "@
4820 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4821 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4822 [(set_attr "type" "ssemuladd")
4823 (set_attr "mode" "<MODE>")])
4824
4825 (define_insn "*fmai_fmsub_<mode>"
4826 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4827 (vec_merge:VF_128
4828 (fma:VF_128
4829 (match_operand:VF_128 1 "register_operand" "0,0")
4830 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4831 (neg:VF_128
4832 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4833 (match_dup 1)
4834 (const_int 1)))]
4835 "TARGET_FMA || TARGET_AVX512F"
4836 "@
4837 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4838 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4839 [(set_attr "type" "ssemuladd")
4840 (set_attr "mode" "<MODE>")])
4841
4842 (define_insn "*fmai_fnmadd_<mode><round_name>"
4843 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4844 (vec_merge:VF_128
4845 (fma:VF_128
4846 (neg:VF_128
4847 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4848 (match_operand:VF_128 1 "register_operand" "0,0")
4849 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4850 (match_dup 1)
4851 (const_int 1)))]
4852 "TARGET_FMA || TARGET_AVX512F"
4853 "@
4854 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4855 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4856 [(set_attr "type" "ssemuladd")
4857 (set_attr "mode" "<MODE>")])
4858
4859 (define_insn "*fmai_fnmsub_<mode><round_name>"
4860 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4861 (vec_merge:VF_128
4862 (fma:VF_128
4863 (neg:VF_128
4864 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4865 (match_operand:VF_128 1 "register_operand" "0,0")
4866 (neg:VF_128
4867 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4868 (match_dup 1)
4869 (const_int 1)))]
4870 "TARGET_FMA || TARGET_AVX512F"
4871 "@
4872 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4873 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4874 [(set_attr "type" "ssemuladd")
4875 (set_attr "mode" "<MODE>")])
4876
4877 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4878 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4879 (vec_merge:VF_128
4880 (vec_merge:VF_128
4881 (fma:VF_128
4882 (match_operand:VF_128 1 "register_operand" "0,0")
4883 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4884 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4885 (match_dup 1)
4886 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4887 (match_dup 1)
4888 (const_int 1)))]
4889 "TARGET_AVX512F"
4890 "@
4891 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4892 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4893 [(set_attr "type" "ssemuladd")
4894 (set_attr "mode" "<MODE>")])
4895
4896 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4897 [(set (match_operand:VF_128 0 "register_operand" "=v")
4898 (vec_merge:VF_128
4899 (vec_merge:VF_128
4900 (fma:VF_128
4901 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4902 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4903 (match_operand:VF_128 3 "register_operand" "0"))
4904 (match_dup 3)
4905 (match_operand:QI 4 "register_operand" "Yk"))
4906 (match_dup 3)
4907 (const_int 1)))]
4908 "TARGET_AVX512F"
4909 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4910 [(set_attr "type" "ssemuladd")
4911 (set_attr "mode" "<MODE>")])
4912
4913 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4914 [(match_operand:VF_128 0 "register_operand")
4915 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4916 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4917 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4918 (match_operand:QI 4 "register_operand")]
4919 "TARGET_AVX512F"
4920 {
4921 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4922 operands[0], operands[1], operands[2], operands[3],
4923 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4924 DONE;
4925 })
4926
4927 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4928 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4929 (vec_merge:VF_128
4930 (vec_merge:VF_128
4931 (fma:VF_128
4932 (match_operand:VF_128 1 "register_operand" "0,0")
4933 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4934 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4935 (match_operand:VF_128 4 "const0_operand" "C,C")
4936 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4937 (match_dup 1)
4938 (const_int 1)))]
4939 "TARGET_AVX512F"
4940 "@
4941 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4942 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4943 [(set_attr "type" "ssemuladd")
4944 (set_attr "mode" "<MODE>")])
4945
4946 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4947 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4948 (vec_merge:VF_128
4949 (vec_merge:VF_128
4950 (fma:VF_128
4951 (match_operand:VF_128 1 "register_operand" "0,0")
4952 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4953 (neg:VF_128
4954 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4955 (match_dup 1)
4956 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4957 (match_dup 1)
4958 (const_int 1)))]
4959 "TARGET_AVX512F"
4960 "@
4961 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4962 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4963 [(set_attr "type" "ssemuladd")
4964 (set_attr "mode" "<MODE>")])
4965
4966 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4967 [(set (match_operand:VF_128 0 "register_operand" "=v")
4968 (vec_merge:VF_128
4969 (vec_merge:VF_128
4970 (fma:VF_128
4971 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4972 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4973 (neg:VF_128
4974 (match_operand:VF_128 3 "register_operand" "0")))
4975 (match_dup 3)
4976 (match_operand:QI 4 "register_operand" "Yk"))
4977 (match_dup 3)
4978 (const_int 1)))]
4979 "TARGET_AVX512F"
4980 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4981 [(set_attr "type" "ssemuladd")
4982 (set_attr "mode" "<MODE>")])
4983
4984 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4985 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4986 (vec_merge:VF_128
4987 (vec_merge:VF_128
4988 (fma:VF_128
4989 (match_operand:VF_128 1 "register_operand" "0,0")
4990 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4991 (neg:VF_128
4992 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4993 (match_operand:VF_128 4 "const0_operand" "C,C")
4994 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4995 (match_dup 1)
4996 (const_int 1)))]
4997 "TARGET_AVX512F"
4998 "@
4999 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5000 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5001 [(set_attr "type" "ssemuladd")
5002 (set_attr "mode" "<MODE>")])
5003
5004 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
5005 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5006 (vec_merge:VF_128
5007 (vec_merge:VF_128
5008 (fma:VF_128
5009 (neg:VF_128
5010 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5011 (match_operand:VF_128 1 "register_operand" "0,0")
5012 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5013 (match_dup 1)
5014 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5015 (match_dup 1)
5016 (const_int 1)))]
5017 "TARGET_AVX512F"
5018 "@
5019 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5020 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5021 [(set_attr "type" "ssemuladd")
5022 (set_attr "mode" "<MODE>")])
5023
5024 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
5025 [(set (match_operand:VF_128 0 "register_operand" "=v")
5026 (vec_merge:VF_128
5027 (vec_merge:VF_128
5028 (fma:VF_128
5029 (neg:VF_128
5030 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5031 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5032 (match_operand:VF_128 3 "register_operand" "0"))
5033 (match_dup 3)
5034 (match_operand:QI 4 "register_operand" "Yk"))
5035 (match_dup 3)
5036 (const_int 1)))]
5037 "TARGET_AVX512F"
5038 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5039 [(set_attr "type" "ssemuladd")
5040 (set_attr "mode" "<MODE>")])
5041
5042 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
5043 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5044 (vec_merge:VF_128
5045 (vec_merge:VF_128
5046 (fma:VF_128
5047 (neg:VF_128
5048 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5049 (match_operand:VF_128 1 "register_operand" "0,0")
5050 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5051 (match_operand:VF_128 4 "const0_operand" "C,C")
5052 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5053 (match_dup 1)
5054 (const_int 1)))]
5055 "TARGET_AVX512F"
5056 "@
5057 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5058 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5059 [(set_attr "type" "ssemuladd")
5060 (set_attr "mode" "<MODE>")])
5061
5062 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
5063 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5064 (vec_merge:VF_128
5065 (vec_merge:VF_128
5066 (fma:VF_128
5067 (neg:VF_128
5068 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5069 (match_operand:VF_128 1 "register_operand" "0,0")
5070 (neg:VF_128
5071 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5072 (match_dup 1)
5073 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5074 (match_dup 1)
5075 (const_int 1)))]
5076 "TARGET_AVX512F"
5077 "@
5078 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5079 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5080 [(set_attr "type" "ssemuladd")
5081 (set_attr "mode" "<MODE>")])
5082
5083 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
5084 [(set (match_operand:VF_128 0 "register_operand" "=v")
5085 (vec_merge:VF_128
5086 (vec_merge:VF_128
5087 (fma:VF_128
5088 (neg:VF_128
5089 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5090 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5091 (neg:VF_128
5092 (match_operand:VF_128 3 "register_operand" "0")))
5093 (match_dup 3)
5094 (match_operand:QI 4 "register_operand" "Yk"))
5095 (match_dup 3)
5096 (const_int 1)))]
5097 "TARGET_AVX512F"
5098 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5099 [(set_attr "type" "ssemuladd")
5100 (set_attr "mode" "<MODE>")])
5101
5102 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
5103 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5104 (vec_merge:VF_128
5105 (vec_merge:VF_128
5106 (fma:VF_128
5107 (neg:VF_128
5108 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5109 (match_operand:VF_128 1 "register_operand" "0,0")
5110 (neg:VF_128
5111 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5112 (match_operand:VF_128 4 "const0_operand" "C,C")
5113 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5114 (match_dup 1)
5115 (const_int 1)))]
5116 "TARGET_AVX512F"
5117 "@
5118 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5119 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5120 [(set_attr "type" "ssemuladd")
5121 (set_attr "mode" "<MODE>")])
5122
5123 ;; FMA4 floating point scalar intrinsics. These write the
5124 ;; entire destination register, with the high-order elements zeroed.
5125
5126 (define_expand "fma4i_vmfmadd_<mode>"
5127 [(set (match_operand:VF_128 0 "register_operand")
5128 (vec_merge:VF_128
5129 (fma:VF_128
5130 (match_operand:VF_128 1 "nonimmediate_operand")
5131 (match_operand:VF_128 2 "nonimmediate_operand")
5132 (match_operand:VF_128 3 "nonimmediate_operand"))
5133 (match_dup 4)
5134 (const_int 1)))]
5135 "TARGET_FMA4"
5136 "operands[4] = CONST0_RTX (<MODE>mode);")
5137
5138 (define_insn "*fma4i_vmfmadd_<mode>"
5139 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5140 (vec_merge:VF_128
5141 (fma:VF_128
5142 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5143 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5144 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5145 (match_operand:VF_128 4 "const0_operand")
5146 (const_int 1)))]
5147 "TARGET_FMA4"
5148 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5149 [(set_attr "type" "ssemuladd")
5150 (set_attr "mode" "<MODE>")])
5151
5152 (define_insn "*fma4i_vmfmsub_<mode>"
5153 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5154 (vec_merge:VF_128
5155 (fma:VF_128
5156 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5157 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5158 (neg:VF_128
5159 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5160 (match_operand:VF_128 4 "const0_operand")
5161 (const_int 1)))]
5162 "TARGET_FMA4"
5163 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5164 [(set_attr "type" "ssemuladd")
5165 (set_attr "mode" "<MODE>")])
5166
5167 (define_insn "*fma4i_vmfnmadd_<mode>"
5168 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5169 (vec_merge:VF_128
5170 (fma:VF_128
5171 (neg:VF_128
5172 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5173 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5174 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5175 (match_operand:VF_128 4 "const0_operand")
5176 (const_int 1)))]
5177 "TARGET_FMA4"
5178 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5179 [(set_attr "type" "ssemuladd")
5180 (set_attr "mode" "<MODE>")])
5181
5182 (define_insn "*fma4i_vmfnmsub_<mode>"
5183 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5184 (vec_merge:VF_128
5185 (fma:VF_128
5186 (neg:VF_128
5187 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5188 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5189 (neg:VF_128
5190 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5191 (match_operand:VF_128 4 "const0_operand")
5192 (const_int 1)))]
5193 "TARGET_FMA4"
5194 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5195 [(set_attr "type" "ssemuladd")
5196 (set_attr "mode" "<MODE>")])
5197
5198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5199 ;;
5200 ;; Parallel single-precision floating point conversion operations
5201 ;;
5202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5203
5204 (define_insn_and_split "sse_cvtpi2ps"
5205 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5206 (vec_merge:V4SF
5207 (vec_duplicate:V4SF
5208 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5209 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5210 (const_int 3)))
5211 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5212 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5213 "@
5214 cvtpi2ps\t{%2, %0|%0, %2}
5215 #
5216 #"
5217 "TARGET_SSE2 && reload_completed
5218 && SSE_REG_P (operands[2])"
5219 [(const_int 0)]
5220 {
5221 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5222 GET_MODE (operands[2]));
5223 /* Generate SSE2 cvtdq2ps. */
5224 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5225
5226 /* Merge operands[3] with operands[0]. */
5227 rtx mask, op1;
5228 if (TARGET_AVX)
5229 {
5230 mask = gen_rtx_PARALLEL (VOIDmode,
5231 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5232 GEN_INT (6), GEN_INT (7)));
5233 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5234 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5235 emit_insn (gen_rtx_SET (operands[0], op2));
5236 }
5237 else
5238 {
5239 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5240 mask = gen_rtx_PARALLEL (VOIDmode,
5241 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5242 GEN_INT (4), GEN_INT (5)));
5243 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5244 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5245 emit_insn (gen_rtx_SET (operands[0], op2));
5246
5247 /* Swap bits 0:63 with bits 64:127. */
5248 mask = gen_rtx_PARALLEL (VOIDmode,
5249 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5250 GEN_INT (0), GEN_INT (1)));
5251 rtx dest = lowpart_subreg (V4SImode, operands[0],
5252 GET_MODE (operands[0]));
5253 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5254 emit_insn (gen_rtx_SET (dest, op1));
5255 }
5256 DONE;
5257 }
5258 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5259 (set_attr "type" "ssecvt")
5260 (set_attr "mode" "V4SF")])
5261
5262 (define_insn_and_split "sse_cvtps2pi"
5263 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5264 (vec_select:V2SI
5265 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5266 UNSPEC_FIX_NOTRUNC)
5267 (parallel [(const_int 0) (const_int 1)])))]
5268 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5269 "@
5270 cvtps2pi\t{%1, %0|%0, %q1}
5271 #"
5272 "TARGET_SSE2 && reload_completed
5273 && SSE_REG_P (operands[0])"
5274 [(const_int 0)]
5275 {
5276 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5277 GET_MODE (operands[1]));
5278 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5279 GET_MODE (operands[0]));
5280
5281 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5282 emit_insn (gen_rtx_SET (tmp, op1));
5283
5284 rtx dest = lowpart_subreg (V4SImode, operands[0],
5285 GET_MODE (operands[0]));
5286 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5287 DONE;
5288 }
5289 [(set_attr "isa" "*,sse2")
5290 (set_attr "mmx_isa" "native,*")
5291 (set_attr "type" "ssecvt")
5292 (set_attr "unit" "mmx,*")
5293 (set_attr "mode" "DI")])
5294
5295 (define_insn_and_split "sse_cvttps2pi"
5296 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5297 (vec_select:V2SI
5298 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5299 (parallel [(const_int 0) (const_int 1)])))]
5300 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5301 "@
5302 cvttps2pi\t{%1, %0|%0, %q1}
5303 #"
5304 "TARGET_SSE2 && reload_completed
5305 && SSE_REG_P (operands[0])"
5306 [(const_int 0)]
5307 {
5308 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5309 GET_MODE (operands[1]));
5310 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5311 GET_MODE (operands[0]));
5312
5313 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5314 emit_insn (gen_rtx_SET (tmp, op1));
5315
5316 rtx dest = lowpart_subreg (V4SImode, operands[0],
5317 GET_MODE (operands[0]));
5318 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5319 DONE;
5320 }
5321 [(set_attr "isa" "*,sse2")
5322 (set_attr "mmx_isa" "native,*")
5323 (set_attr "type" "ssecvt")
5324 (set_attr "unit" "mmx,*")
5325 (set_attr "prefix_rep" "0")
5326 (set_attr "mode" "SF")])
5327
5328 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5329 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5330 (vec_merge:V4SF
5331 (vec_duplicate:V4SF
5332 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5333 (match_operand:V4SF 1 "register_operand" "0,0,v")
5334 (const_int 1)))]
5335 "TARGET_SSE"
5336 "@
5337 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5338 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5339 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5340 [(set_attr "isa" "noavx,noavx,avx")
5341 (set_attr "type" "sseicvt")
5342 (set_attr "athlon_decode" "vector,double,*")
5343 (set_attr "amdfam10_decode" "vector,double,*")
5344 (set_attr "bdver1_decode" "double,direct,*")
5345 (set_attr "btver2_decode" "double,double,double")
5346 (set_attr "znver1_decode" "double,double,double")
5347 (set (attr "length_vex")
5348 (if_then_else
5349 (and (match_test "<MODE>mode == DImode")
5350 (eq_attr "alternative" "2"))
5351 (const_string "4")
5352 (const_string "*")))
5353 (set (attr "prefix_rex")
5354 (if_then_else
5355 (and (match_test "<MODE>mode == DImode")
5356 (eq_attr "alternative" "0,1"))
5357 (const_string "1")
5358 (const_string "*")))
5359 (set_attr "prefix" "orig,orig,maybe_evex")
5360 (set_attr "mode" "SF")])
5361
5362 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5363 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5364 (unspec:SWI48
5365 [(vec_select:SF
5366 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5367 (parallel [(const_int 0)]))]
5368 UNSPEC_FIX_NOTRUNC))]
5369 "TARGET_SSE"
5370 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5371 [(set_attr "type" "sseicvt")
5372 (set_attr "athlon_decode" "double,vector")
5373 (set_attr "bdver1_decode" "double,double")
5374 (set_attr "prefix_rep" "1")
5375 (set_attr "prefix" "maybe_vex")
5376 (set_attr "mode" "<MODE>")])
5377
5378 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5379 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5380 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5381 UNSPEC_FIX_NOTRUNC))]
5382 "TARGET_SSE"
5383 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5384 [(set_attr "type" "sseicvt")
5385 (set_attr "athlon_decode" "double,vector")
5386 (set_attr "amdfam10_decode" "double,double")
5387 (set_attr "bdver1_decode" "double,double")
5388 (set_attr "prefix_rep" "1")
5389 (set_attr "prefix" "maybe_vex")
5390 (set_attr "mode" "<MODE>")])
5391
5392 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5393 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5394 (fix:SWI48
5395 (vec_select:SF
5396 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5397 (parallel [(const_int 0)]))))]
5398 "TARGET_SSE"
5399 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5400 [(set_attr "type" "sseicvt")
5401 (set_attr "athlon_decode" "double,vector")
5402 (set_attr "amdfam10_decode" "double,double")
5403 (set_attr "bdver1_decode" "double,double")
5404 (set_attr "prefix_rep" "1")
5405 (set_attr "prefix" "maybe_vex")
5406 (set_attr "mode" "<MODE>")])
5407
5408 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5409 [(set (match_operand:VF_128 0 "register_operand" "=v")
5410 (vec_merge:VF_128
5411 (vec_duplicate:VF_128
5412 (unsigned_float:<ssescalarmode>
5413 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5414 (match_operand:VF_128 1 "register_operand" "v")
5415 (const_int 1)))]
5416 "TARGET_AVX512F && <round_modev4sf_condition>"
5417 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5418 [(set_attr "type" "sseicvt")
5419 (set_attr "prefix" "evex")
5420 (set_attr "mode" "<ssescalarmode>")])
5421
5422 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5423 [(set (match_operand:VF_128 0 "register_operand" "=v")
5424 (vec_merge:VF_128
5425 (vec_duplicate:VF_128
5426 (unsigned_float:<ssescalarmode>
5427 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5428 (match_operand:VF_128 1 "register_operand" "v")
5429 (const_int 1)))]
5430 "TARGET_AVX512F && TARGET_64BIT"
5431 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5432 [(set_attr "type" "sseicvt")
5433 (set_attr "prefix" "evex")
5434 (set_attr "mode" "<ssescalarmode>")])
5435
5436 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5437 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5438 (float:VF1
5439 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5440 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5441 "@
5442 cvtdq2ps\t{%1, %0|%0, %1}
5443 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5444 [(set_attr "isa" "noavx,avx")
5445 (set_attr "type" "ssecvt")
5446 (set_attr "prefix" "maybe_vex")
5447 (set_attr "mode" "<sseinsnmode>")])
5448
5449 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5450 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5451 (unsigned_float:VF1_AVX512VL
5452 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5453 "TARGET_AVX512F"
5454 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5455 [(set_attr "type" "ssecvt")
5456 (set_attr "prefix" "evex")
5457 (set_attr "mode" "<MODE>")])
5458
5459 (define_expand "floatuns<sseintvecmodelower><mode>2"
5460 [(match_operand:VF1 0 "register_operand")
5461 (match_operand:<sseintvecmode> 1 "register_operand")]
5462 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5463 {
5464 if (<MODE>mode == V16SFmode)
5465 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5466 else
5467 if (TARGET_AVX512VL)
5468 {
5469 if (<MODE>mode == V4SFmode)
5470 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5471 else
5472 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5473 }
5474 else
5475 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5476
5477 DONE;
5478 })
5479
5480
5481 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5482 (define_mode_attr sf2simodelower
5483 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5484
5485 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5486 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5487 (unspec:VI4_AVX
5488 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5489 UNSPEC_FIX_NOTRUNC))]
5490 "TARGET_SSE2 && <mask_mode512bit_condition>"
5491 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5492 [(set_attr "type" "ssecvt")
5493 (set (attr "prefix_data16")
5494 (if_then_else
5495 (match_test "TARGET_AVX")
5496 (const_string "*")
5497 (const_string "1")))
5498 (set_attr "prefix" "maybe_vex")
5499 (set_attr "mode" "<sseinsnmode>")])
5500
5501 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5502 [(set (match_operand:V16SI 0 "register_operand" "=v")
5503 (unspec:V16SI
5504 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5505 UNSPEC_FIX_NOTRUNC))]
5506 "TARGET_AVX512F"
5507 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5508 [(set_attr "type" "ssecvt")
5509 (set_attr "prefix" "evex")
5510 (set_attr "mode" "XI")])
5511
5512 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5513 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5514 (unspec:VI4_AVX512VL
5515 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5516 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5517 "TARGET_AVX512F"
5518 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5519 [(set_attr "type" "ssecvt")
5520 (set_attr "prefix" "evex")
5521 (set_attr "mode" "<sseinsnmode>")])
5522
5523 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5524 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5525 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5526 UNSPEC_FIX_NOTRUNC))]
5527 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5528 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5529 [(set_attr "type" "ssecvt")
5530 (set_attr "prefix" "evex")
5531 (set_attr "mode" "<sseinsnmode>")])
5532
5533 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5534 [(set (match_operand:V2DI 0 "register_operand" "=v")
5535 (unspec:V2DI
5536 [(vec_select:V2SF
5537 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5538 (parallel [(const_int 0) (const_int 1)]))]
5539 UNSPEC_FIX_NOTRUNC))]
5540 "TARGET_AVX512DQ && TARGET_AVX512VL"
5541 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5542 [(set_attr "type" "ssecvt")
5543 (set_attr "prefix" "evex")
5544 (set_attr "mode" "TI")])
5545
5546 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5547 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5548 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5549 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5550 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5551 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5552 [(set_attr "type" "ssecvt")
5553 (set_attr "prefix" "evex")
5554 (set_attr "mode" "<sseinsnmode>")])
5555
5556 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5557 [(set (match_operand:V2DI 0 "register_operand" "=v")
5558 (unspec:V2DI
5559 [(vec_select:V2SF
5560 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5561 (parallel [(const_int 0) (const_int 1)]))]
5562 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5563 "TARGET_AVX512DQ && TARGET_AVX512VL"
5564 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5565 [(set_attr "type" "ssecvt")
5566 (set_attr "prefix" "evex")
5567 (set_attr "mode" "TI")])
5568
5569 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5570 [(set (match_operand:V16SI 0 "register_operand" "=v")
5571 (any_fix:V16SI
5572 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5573 "TARGET_AVX512F"
5574 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5575 [(set_attr "type" "ssecvt")
5576 (set_attr "prefix" "evex")
5577 (set_attr "mode" "XI")])
5578
5579 (define_insn "fix_truncv8sfv8si2<mask_name>"
5580 [(set (match_operand:V8SI 0 "register_operand" "=v")
5581 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5582 "TARGET_AVX && <mask_avx512vl_condition>"
5583 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5584 [(set_attr "type" "ssecvt")
5585 (set_attr "prefix" "<mask_prefix>")
5586 (set_attr "mode" "OI")])
5587
5588 (define_insn "fix_truncv4sfv4si2<mask_name>"
5589 [(set (match_operand:V4SI 0 "register_operand" "=v")
5590 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5591 "TARGET_SSE2 && <mask_avx512vl_condition>"
5592 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5593 [(set_attr "type" "ssecvt")
5594 (set (attr "prefix_rep")
5595 (if_then_else
5596 (match_test "TARGET_AVX")
5597 (const_string "*")
5598 (const_string "1")))
5599 (set (attr "prefix_data16")
5600 (if_then_else
5601 (match_test "TARGET_AVX")
5602 (const_string "*")
5603 (const_string "0")))
5604 (set_attr "prefix_data16" "0")
5605 (set_attr "prefix" "<mask_prefix2>")
5606 (set_attr "mode" "TI")])
5607
5608 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5609 [(match_operand:<sseintvecmode> 0 "register_operand")
5610 (match_operand:VF1 1 "register_operand")]
5611 "TARGET_SSE2"
5612 {
5613 if (<MODE>mode == V16SFmode)
5614 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5615 operands[1]));
5616 else
5617 {
5618 rtx tmp[3];
5619 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5620 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5621 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5622 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5623 }
5624 DONE;
5625 })
5626
5627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5628 ;;
5629 ;; Parallel double-precision floating point conversion operations
5630 ;;
5631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5632
5633 (define_insn "sse2_cvtpi2pd"
5634 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5635 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5636 "TARGET_SSE2"
5637 "@
5638 %vcvtdq2pd\t{%1, %0|%0, %1}
5639 cvtpi2pd\t{%1, %0|%0, %1}"
5640 [(set_attr "mmx_isa" "*,native")
5641 (set_attr "type" "ssecvt")
5642 (set_attr "unit" "*,mmx")
5643 (set_attr "prefix_data16" "*,1")
5644 (set_attr "prefix" "maybe_vex,*")
5645 (set_attr "mode" "V2DF")])
5646
5647 (define_expand "floatv2siv2df2"
5648 [(set (match_operand:V2DF 0 "register_operand")
5649 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5650 "TARGET_MMX_WITH_SSE")
5651
5652 (define_insn "floatunsv2siv2df2"
5653 [(set (match_operand:V2DF 0 "register_operand" "=v")
5654 (unsigned_float:V2DF
5655 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5656 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5657 "vcvtudq2pd\t{%1, %0|%0, %1}"
5658 [(set_attr "type" "ssecvt")
5659 (set_attr "prefix" "evex")
5660 (set_attr "mode" "V2DF")])
5661
5662 (define_insn "sse2_cvtpd2pi"
5663 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5664 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5665 UNSPEC_FIX_NOTRUNC))]
5666 "TARGET_SSE2"
5667 "@
5668 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5669 cvtpd2pi\t{%1, %0|%0, %1}"
5670 [(set_attr "mmx_isa" "*,native")
5671 (set_attr "type" "ssecvt")
5672 (set_attr "unit" "*,mmx")
5673 (set_attr "amdfam10_decode" "double")
5674 (set_attr "athlon_decode" "vector")
5675 (set_attr "bdver1_decode" "double")
5676 (set_attr "prefix_data16" "*,1")
5677 (set_attr "prefix" "maybe_vex,*")
5678 (set_attr "mode" "TI")])
5679
5680 (define_insn "sse2_cvttpd2pi"
5681 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5682 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5683 "TARGET_SSE2"
5684 "@
5685 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5686 cvttpd2pi\t{%1, %0|%0, %1}"
5687 [(set_attr "mmx_isa" "*,native")
5688 (set_attr "type" "ssecvt")
5689 (set_attr "unit" "*,mmx")
5690 (set_attr "amdfam10_decode" "double")
5691 (set_attr "athlon_decode" "vector")
5692 (set_attr "bdver1_decode" "double")
5693 (set_attr "prefix_data16" "*,1")
5694 (set_attr "prefix" "maybe_vex,*")
5695 (set_attr "mode" "TI")])
5696
5697 (define_expand "fix_truncv2dfv2si2"
5698 [(set (match_operand:V2SI 0 "register_operand")
5699 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5700 "TARGET_MMX_WITH_SSE")
5701
5702 (define_insn "fixuns_truncv2dfv2si2"
5703 [(set (match_operand:V2SI 0 "register_operand" "=v")
5704 (unsigned_fix:V2SI
5705 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5706 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5707 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5708 [(set_attr "type" "ssecvt")
5709 (set_attr "prefix" "evex")
5710 (set_attr "mode" "TI")])
5711
5712 (define_insn "sse2_cvtsi2sd"
5713 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5714 (vec_merge:V2DF
5715 (vec_duplicate:V2DF
5716 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5717 (match_operand:V2DF 1 "register_operand" "0,0,v")
5718 (const_int 1)))]
5719 "TARGET_SSE2"
5720 "@
5721 cvtsi2sd{l}\t{%2, %0|%0, %2}
5722 cvtsi2sd{l}\t{%2, %0|%0, %2}
5723 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5724 [(set_attr "isa" "noavx,noavx,avx")
5725 (set_attr "type" "sseicvt")
5726 (set_attr "athlon_decode" "double,direct,*")
5727 (set_attr "amdfam10_decode" "vector,double,*")
5728 (set_attr "bdver1_decode" "double,direct,*")
5729 (set_attr "btver2_decode" "double,double,double")
5730 (set_attr "znver1_decode" "double,double,double")
5731 (set_attr "prefix" "orig,orig,maybe_evex")
5732 (set_attr "mode" "DF")])
5733
5734 (define_insn "sse2_cvtsi2sdq<round_name>"
5735 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5736 (vec_merge:V2DF
5737 (vec_duplicate:V2DF
5738 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5739 (match_operand:V2DF 1 "register_operand" "0,0,v")
5740 (const_int 1)))]
5741 "TARGET_SSE2 && TARGET_64BIT"
5742 "@
5743 cvtsi2sd{q}\t{%2, %0|%0, %2}
5744 cvtsi2sd{q}\t{%2, %0|%0, %2}
5745 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5746 [(set_attr "isa" "noavx,noavx,avx")
5747 (set_attr "type" "sseicvt")
5748 (set_attr "athlon_decode" "double,direct,*")
5749 (set_attr "amdfam10_decode" "vector,double,*")
5750 (set_attr "bdver1_decode" "double,direct,*")
5751 (set_attr "length_vex" "*,*,4")
5752 (set_attr "prefix_rex" "1,1,*")
5753 (set_attr "prefix" "orig,orig,maybe_evex")
5754 (set_attr "mode" "DF")])
5755
5756 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5757 [(set (match_operand:SWI48 0 "register_operand" "=r")
5758 (unspec:SWI48
5759 [(vec_select:SF
5760 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5761 (parallel [(const_int 0)]))]
5762 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5763 "TARGET_AVX512F"
5764 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5765 [(set_attr "type" "sseicvt")
5766 (set_attr "prefix" "evex")
5767 (set_attr "mode" "<MODE>")])
5768
5769 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5770 [(set (match_operand:SWI48 0 "register_operand" "=r")
5771 (unsigned_fix:SWI48
5772 (vec_select:SF
5773 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5774 (parallel [(const_int 0)]))))]
5775 "TARGET_AVX512F"
5776 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5777 [(set_attr "type" "sseicvt")
5778 (set_attr "prefix" "evex")
5779 (set_attr "mode" "<MODE>")])
5780
5781 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5782 [(set (match_operand:SWI48 0 "register_operand" "=r")
5783 (unspec:SWI48
5784 [(vec_select:DF
5785 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5786 (parallel [(const_int 0)]))]
5787 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5788 "TARGET_AVX512F"
5789 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5790 [(set_attr "type" "sseicvt")
5791 (set_attr "prefix" "evex")
5792 (set_attr "mode" "<MODE>")])
5793
5794 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5795 [(set (match_operand:SWI48 0 "register_operand" "=r")
5796 (unsigned_fix:SWI48
5797 (vec_select:DF
5798 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5799 (parallel [(const_int 0)]))))]
5800 "TARGET_AVX512F"
5801 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5802 [(set_attr "type" "sseicvt")
5803 (set_attr "prefix" "evex")
5804 (set_attr "mode" "<MODE>")])
5805
5806 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5807 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5808 (unspec:SWI48
5809 [(vec_select:DF
5810 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5811 (parallel [(const_int 0)]))]
5812 UNSPEC_FIX_NOTRUNC))]
5813 "TARGET_SSE2"
5814 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5815 [(set_attr "type" "sseicvt")
5816 (set_attr "athlon_decode" "double,vector")
5817 (set_attr "bdver1_decode" "double,double")
5818 (set_attr "btver2_decode" "double,double")
5819 (set_attr "prefix_rep" "1")
5820 (set_attr "prefix" "maybe_vex")
5821 (set_attr "mode" "<MODE>")])
5822
5823 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5824 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5825 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5826 UNSPEC_FIX_NOTRUNC))]
5827 "TARGET_SSE2"
5828 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5829 [(set_attr "type" "sseicvt")
5830 (set_attr "athlon_decode" "double,vector")
5831 (set_attr "amdfam10_decode" "double,double")
5832 (set_attr "bdver1_decode" "double,double")
5833 (set_attr "prefix_rep" "1")
5834 (set_attr "prefix" "maybe_vex")
5835 (set_attr "mode" "<MODE>")])
5836
5837 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5838 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5839 (fix:SWI48
5840 (vec_select:DF
5841 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5842 (parallel [(const_int 0)]))))]
5843 "TARGET_SSE2"
5844 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5845 [(set_attr "type" "sseicvt")
5846 (set_attr "athlon_decode" "double,vector")
5847 (set_attr "amdfam10_decode" "double,double")
5848 (set_attr "bdver1_decode" "double,double")
5849 (set_attr "btver2_decode" "double,double")
5850 (set_attr "prefix_rep" "1")
5851 (set_attr "prefix" "maybe_vex")
5852 (set_attr "mode" "<MODE>")])
5853
5854 ;; For float<si2dfmode><mode>2 insn pattern
5855 (define_mode_attr si2dfmode
5856 [(V8DF "V8SI") (V4DF "V4SI")])
5857 (define_mode_attr si2dfmodelower
5858 [(V8DF "v8si") (V4DF "v4si")])
5859
5860 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5861 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5862 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5863 "TARGET_AVX && <mask_mode512bit_condition>"
5864 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5865 [(set_attr "type" "ssecvt")
5866 (set_attr "prefix" "maybe_vex")
5867 (set_attr "mode" "<MODE>")])
5868
5869 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5870 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5871 (any_float:VF2_AVX512VL
5872 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5873 "TARGET_AVX512DQ"
5874 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5875 [(set_attr "type" "ssecvt")
5876 (set_attr "prefix" "evex")
5877 (set_attr "mode" "<MODE>")])
5878
5879 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5880 (define_mode_attr qq2pssuff
5881 [(V8SF "") (V4SF "{y}")])
5882
5883 (define_mode_attr sselongvecmode
5884 [(V8SF "V8DI") (V4SF "V4DI")])
5885
5886 (define_mode_attr sselongvecmodelower
5887 [(V8SF "v8di") (V4SF "v4di")])
5888
5889 (define_mode_attr sseintvecmode3
5890 [(V8SF "XI") (V4SF "OI")
5891 (V8DF "OI") (V4DF "TI")])
5892
5893 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5894 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5895 (any_float:VF1_128_256VL
5896 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5897 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5898 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5899 [(set_attr "type" "ssecvt")
5900 (set_attr "prefix" "evex")
5901 (set_attr "mode" "<MODE>")])
5902
5903 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
5904 [(set (match_operand:V4SF 0 "register_operand" "=v")
5905 (vec_concat:V4SF
5906 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5907 (match_dup 2)))]
5908 "TARGET_AVX512DQ && TARGET_AVX512VL"
5909 "operands[2] = CONST0_RTX (V2SFmode);")
5910
5911 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
5912 [(set (match_operand:V4SF 0 "register_operand" "=v")
5913 (vec_concat:V4SF
5914 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5915 (match_operand:V2SF 2 "const0_operand" "C")))]
5916 "TARGET_AVX512DQ && TARGET_AVX512VL"
5917 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5918 [(set_attr "type" "ssecvt")
5919 (set_attr "prefix" "evex")
5920 (set_attr "mode" "V4SF")])
5921
5922 (define_expand "float<floatunssuffix>v2div2sf2"
5923 [(set (match_operand:V2SF 0 "register_operand")
5924 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
5925 "TARGET_AVX512DQ && TARGET_AVX512VL"
5926 {
5927 operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
5928 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
5929 (operands[0], operands[1]));
5930 DONE;
5931 })
5932
5933 (define_mode_attr vpckfloat_concat_mode
5934 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5935 (define_mode_attr vpckfloat_temp_mode
5936 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5937 (define_mode_attr vpckfloat_op_mode
5938 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5939
5940 (define_expand "vec_pack<floatprefix>_float_<mode>"
5941 [(match_operand:<ssePSmode> 0 "register_operand")
5942 (any_float:<ssePSmode>
5943 (match_operand:VI8_AVX512VL 1 "register_operand"))
5944 (match_operand:VI8_AVX512VL 2 "register_operand")]
5945 "TARGET_AVX512DQ"
5946 {
5947 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5948 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5949 rtx (*gen) (rtx, rtx);
5950
5951 if (<MODE>mode == V2DImode)
5952 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
5953 else
5954 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5955 emit_insn (gen (r1, operands[1]));
5956 emit_insn (gen (r2, operands[2]));
5957 if (<MODE>mode == V2DImode)
5958 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5959 else
5960 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5961 r1, r2));
5962 DONE;
5963 })
5964
5965 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5966 [(set (match_operand:V4SF 0 "register_operand" "=v")
5967 (vec_concat:V4SF
5968 (vec_merge:V2SF
5969 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5970 (vec_select:V2SF
5971 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5972 (parallel [(const_int 0) (const_int 1)]))
5973 (match_operand:QI 3 "register_operand" "Yk"))
5974 (match_dup 4)))]
5975 "TARGET_AVX512DQ && TARGET_AVX512VL"
5976 "operands[4] = CONST0_RTX (V2SFmode);")
5977
5978 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5979 [(set (match_operand:V4SF 0 "register_operand" "=v")
5980 (vec_concat:V4SF
5981 (vec_merge:V2SF
5982 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5983 (vec_select:V2SF
5984 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5985 (parallel [(const_int 0) (const_int 1)]))
5986 (match_operand:QI 3 "register_operand" "Yk"))
5987 (match_operand:V2SF 4 "const0_operand" "C")))]
5988 "TARGET_AVX512DQ && TARGET_AVX512VL"
5989 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5990 [(set_attr "type" "ssecvt")
5991 (set_attr "prefix" "evex")
5992 (set_attr "mode" "V4SF")])
5993
5994 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5995 [(set (match_operand:V4SF 0 "register_operand" "=v")
5996 (vec_concat:V4SF
5997 (vec_merge:V2SF
5998 (any_float:V2SF (match_operand:V2DI 1
5999 "nonimmediate_operand" "vm"))
6000 (match_operand:V2SF 3 "const0_operand" "C")
6001 (match_operand:QI 2 "register_operand" "Yk"))
6002 (match_operand:V2SF 4 "const0_operand" "C")))]
6003 "TARGET_AVX512DQ && TARGET_AVX512VL"
6004 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6005 [(set_attr "type" "ssecvt")
6006 (set_attr "prefix" "evex")
6007 (set_attr "mode" "V4SF")])
6008
6009 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
6010 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
6011 (unsigned_float:VF2_512_256VL
6012 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
6013 "TARGET_AVX512F"
6014 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6015 [(set_attr "type" "ssecvt")
6016 (set_attr "prefix" "evex")
6017 (set_attr "mode" "<MODE>")])
6018
6019 (define_insn "ufloatv2siv2df2<mask_name>"
6020 [(set (match_operand:V2DF 0 "register_operand" "=v")
6021 (unsigned_float:V2DF
6022 (vec_select:V2SI
6023 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6024 (parallel [(const_int 0) (const_int 1)]))))]
6025 "TARGET_AVX512VL"
6026 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6027 [(set_attr "type" "ssecvt")
6028 (set_attr "prefix" "evex")
6029 (set_attr "mode" "V2DF")])
6030
6031 (define_insn "avx512f_cvtdq2pd512_2"
6032 [(set (match_operand:V8DF 0 "register_operand" "=v")
6033 (float:V8DF
6034 (vec_select:V8SI
6035 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
6036 (parallel [(const_int 0) (const_int 1)
6037 (const_int 2) (const_int 3)
6038 (const_int 4) (const_int 5)
6039 (const_int 6) (const_int 7)]))))]
6040 "TARGET_AVX512F"
6041 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
6042 [(set_attr "type" "ssecvt")
6043 (set_attr "prefix" "evex")
6044 (set_attr "mode" "V8DF")])
6045
6046 (define_insn "avx_cvtdq2pd256_2"
6047 [(set (match_operand:V4DF 0 "register_operand" "=v")
6048 (float:V4DF
6049 (vec_select:V4SI
6050 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
6051 (parallel [(const_int 0) (const_int 1)
6052 (const_int 2) (const_int 3)]))))]
6053 "TARGET_AVX"
6054 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
6055 [(set_attr "type" "ssecvt")
6056 (set_attr "prefix" "maybe_evex")
6057 (set_attr "mode" "V4DF")])
6058
6059 (define_insn "sse2_cvtdq2pd<mask_name>"
6060 [(set (match_operand:V2DF 0 "register_operand" "=v")
6061 (float:V2DF
6062 (vec_select:V2SI
6063 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6064 (parallel [(const_int 0) (const_int 1)]))))]
6065 "TARGET_SSE2 && <mask_avx512vl_condition>"
6066 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6067 [(set_attr "type" "ssecvt")
6068 (set_attr "prefix" "maybe_vex")
6069 (set_attr "mode" "V2DF")])
6070
6071 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
6072 [(set (match_operand:V8SI 0 "register_operand" "=v")
6073 (unspec:V8SI
6074 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
6075 UNSPEC_FIX_NOTRUNC))]
6076 "TARGET_AVX512F"
6077 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6078 [(set_attr "type" "ssecvt")
6079 (set_attr "prefix" "evex")
6080 (set_attr "mode" "OI")])
6081
6082 (define_insn "avx_cvtpd2dq256<mask_name>"
6083 [(set (match_operand:V4SI 0 "register_operand" "=v")
6084 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6085 UNSPEC_FIX_NOTRUNC))]
6086 "TARGET_AVX && <mask_avx512vl_condition>"
6087 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6088 [(set_attr "type" "ssecvt")
6089 (set_attr "prefix" "<mask_prefix>")
6090 (set_attr "mode" "OI")])
6091
6092 (define_expand "avx_cvtpd2dq256_2"
6093 [(set (match_operand:V8SI 0 "register_operand")
6094 (vec_concat:V8SI
6095 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
6096 UNSPEC_FIX_NOTRUNC)
6097 (match_dup 2)))]
6098 "TARGET_AVX"
6099 "operands[2] = CONST0_RTX (V4SImode);")
6100
6101 (define_insn "*avx_cvtpd2dq256_2"
6102 [(set (match_operand:V8SI 0 "register_operand" "=v")
6103 (vec_concat:V8SI
6104 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6105 UNSPEC_FIX_NOTRUNC)
6106 (match_operand:V4SI 2 "const0_operand")))]
6107 "TARGET_AVX"
6108 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
6109 [(set_attr "type" "ssecvt")
6110 (set_attr "prefix" "vex")
6111 (set_attr "btver2_decode" "vector")
6112 (set_attr "mode" "OI")])
6113
6114 (define_insn "sse2_cvtpd2dq"
6115 [(set (match_operand:V4SI 0 "register_operand" "=v")
6116 (vec_concat:V4SI
6117 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
6118 UNSPEC_FIX_NOTRUNC)
6119 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6120 "TARGET_SSE2"
6121 {
6122 if (TARGET_AVX)
6123 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
6124 else
6125 return "cvtpd2dq\t{%1, %0|%0, %1}";
6126 }
6127 [(set_attr "type" "ssecvt")
6128 (set_attr "prefix_rep" "1")
6129 (set_attr "prefix_data16" "0")
6130 (set_attr "prefix" "maybe_vex")
6131 (set_attr "mode" "TI")
6132 (set_attr "amdfam10_decode" "double")
6133 (set_attr "athlon_decode" "vector")
6134 (set_attr "bdver1_decode" "double")])
6135
6136 (define_insn "sse2_cvtpd2dq_mask"
6137 [(set (match_operand:V4SI 0 "register_operand" "=v")
6138 (vec_concat:V4SI
6139 (vec_merge:V2SI
6140 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6141 UNSPEC_FIX_NOTRUNC)
6142 (vec_select:V2SI
6143 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6144 (parallel [(const_int 0) (const_int 1)]))
6145 (match_operand:QI 3 "register_operand" "Yk"))
6146 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6147 "TARGET_AVX512VL"
6148 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6149 [(set_attr "type" "ssecvt")
6150 (set_attr "prefix" "evex")
6151 (set_attr "mode" "TI")])
6152
6153 (define_insn "*sse2_cvtpd2dq_mask_1"
6154 [(set (match_operand:V4SI 0 "register_operand" "=v")
6155 (vec_concat:V4SI
6156 (vec_merge:V2SI
6157 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6158 UNSPEC_FIX_NOTRUNC)
6159 (const_vector:V2SI [(const_int 0) (const_int 0)])
6160 (match_operand:QI 2 "register_operand" "Yk"))
6161 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6162 "TARGET_AVX512VL"
6163 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6164 [(set_attr "type" "ssecvt")
6165 (set_attr "prefix" "evex")
6166 (set_attr "mode" "TI")])
6167
6168 ;; For ufix_notrunc* insn patterns
6169 (define_mode_attr pd2udqsuff
6170 [(V8DF "") (V4DF "{y}")])
6171
6172 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6173 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6174 (unspec:<si2dfmode>
6175 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6176 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6177 "TARGET_AVX512F"
6178 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6179 [(set_attr "type" "ssecvt")
6180 (set_attr "prefix" "evex")
6181 (set_attr "mode" "<sseinsnmode>")])
6182
6183 (define_insn "ufix_notruncv2dfv2si2"
6184 [(set (match_operand:V4SI 0 "register_operand" "=v")
6185 (vec_concat:V4SI
6186 (unspec:V2SI
6187 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6188 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6189 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6190 "TARGET_AVX512VL"
6191 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6192 [(set_attr "type" "ssecvt")
6193 (set_attr "prefix" "evex")
6194 (set_attr "mode" "TI")])
6195
6196 (define_insn "ufix_notruncv2dfv2si2_mask"
6197 [(set (match_operand:V4SI 0 "register_operand" "=v")
6198 (vec_concat:V4SI
6199 (vec_merge:V2SI
6200 (unspec:V2SI
6201 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6202 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6203 (vec_select:V2SI
6204 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6205 (parallel [(const_int 0) (const_int 1)]))
6206 (match_operand:QI 3 "register_operand" "Yk"))
6207 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6208 "TARGET_AVX512VL"
6209 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6210 [(set_attr "type" "ssecvt")
6211 (set_attr "prefix" "evex")
6212 (set_attr "mode" "TI")])
6213
6214 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6215 [(set (match_operand:V4SI 0 "register_operand" "=v")
6216 (vec_concat:V4SI
6217 (vec_merge:V2SI
6218 (unspec:V2SI
6219 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6220 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6221 (const_vector:V2SI [(const_int 0) (const_int 0)])
6222 (match_operand:QI 2 "register_operand" "Yk"))
6223 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6224 "TARGET_AVX512VL"
6225 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6226 [(set_attr "type" "ssecvt")
6227 (set_attr "prefix" "evex")
6228 (set_attr "mode" "TI")])
6229
6230 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6231 [(set (match_operand:V8SI 0 "register_operand" "=v")
6232 (any_fix:V8SI
6233 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6234 "TARGET_AVX512F"
6235 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6236 [(set_attr "type" "ssecvt")
6237 (set_attr "prefix" "evex")
6238 (set_attr "mode" "OI")])
6239
6240 (define_insn "ufix_truncv2dfv2si2"
6241 [(set (match_operand:V4SI 0 "register_operand" "=v")
6242 (vec_concat:V4SI
6243 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6244 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6245 "TARGET_AVX512VL"
6246 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6247 [(set_attr "type" "ssecvt")
6248 (set_attr "prefix" "evex")
6249 (set_attr "mode" "TI")])
6250
6251 (define_insn "ufix_truncv2dfv2si2_mask"
6252 [(set (match_operand:V4SI 0 "register_operand" "=v")
6253 (vec_concat:V4SI
6254 (vec_merge:V2SI
6255 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6256 (vec_select:V2SI
6257 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6258 (parallel [(const_int 0) (const_int 1)]))
6259 (match_operand:QI 3 "register_operand" "Yk"))
6260 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6261 "TARGET_AVX512VL"
6262 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6263 [(set_attr "type" "ssecvt")
6264 (set_attr "prefix" "evex")
6265 (set_attr "mode" "TI")])
6266
6267 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6268 [(set (match_operand:V4SI 0 "register_operand" "=v")
6269 (vec_concat:V4SI
6270 (vec_merge:V2SI
6271 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6272 (const_vector:V2SI [(const_int 0) (const_int 0)])
6273 (match_operand:QI 2 "register_operand" "Yk"))
6274 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6275 "TARGET_AVX512VL"
6276 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6277 [(set_attr "type" "ssecvt")
6278 (set_attr "prefix" "evex")
6279 (set_attr "mode" "TI")])
6280
6281 (define_insn "fix_truncv4dfv4si2<mask_name>"
6282 [(set (match_operand:V4SI 0 "register_operand" "=v")
6283 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6284 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6285 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6286 [(set_attr "type" "ssecvt")
6287 (set_attr "prefix" "maybe_evex")
6288 (set_attr "mode" "OI")])
6289
6290 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6291 [(set (match_operand:V4SI 0 "register_operand" "=v")
6292 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6293 "TARGET_AVX512VL && TARGET_AVX512F"
6294 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6295 [(set_attr "type" "ssecvt")
6296 (set_attr "prefix" "maybe_evex")
6297 (set_attr "mode" "OI")])
6298
6299 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6300 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6301 (any_fix:<sseintvecmode>
6302 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6303 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6304 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6305 [(set_attr "type" "ssecvt")
6306 (set_attr "prefix" "evex")
6307 (set_attr "mode" "<sseintvecmode2>")])
6308
6309 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6310 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6311 (unspec:<sseintvecmode>
6312 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6313 UNSPEC_FIX_NOTRUNC))]
6314 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6315 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6316 [(set_attr "type" "ssecvt")
6317 (set_attr "prefix" "evex")
6318 (set_attr "mode" "<sseintvecmode2>")])
6319
6320 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6321 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6322 (unspec:<sseintvecmode>
6323 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6324 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6325 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6326 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6327 [(set_attr "type" "ssecvt")
6328 (set_attr "prefix" "evex")
6329 (set_attr "mode" "<sseintvecmode2>")])
6330
6331 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6332 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6333 (any_fix:<sselongvecmode>
6334 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6335 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6336 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6337 [(set_attr "type" "ssecvt")
6338 (set_attr "prefix" "evex")
6339 (set_attr "mode" "<sseintvecmode3>")])
6340
6341 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6342 [(set (match_operand:V2DI 0 "register_operand" "=v")
6343 (any_fix:V2DI
6344 (vec_select:V2SF
6345 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6346 (parallel [(const_int 0) (const_int 1)]))))]
6347 "TARGET_AVX512DQ && TARGET_AVX512VL"
6348 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6349 [(set_attr "type" "ssecvt")
6350 (set_attr "prefix" "evex")
6351 (set_attr "mode" "TI")])
6352
6353 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
6354 [(set (match_operand:V2DI 0 "register_operand")
6355 (any_fix:V2DI
6356 (match_operand:V2SF 1 "register_operand")))]
6357 "TARGET_AVX512DQ && TARGET_AVX512VL"
6358 {
6359 operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
6360 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
6361 (operands[0], operands[1]));
6362 DONE;
6363 })
6364
6365 (define_mode_attr vunpckfixt_mode
6366 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6367 (define_mode_attr vunpckfixt_model
6368 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6369 (define_mode_attr vunpckfixt_extract_mode
6370 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6371
6372 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6373 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6374 (any_fix:<vunpckfixt_mode>
6375 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6376 "TARGET_AVX512DQ"
6377 {
6378 rtx tem = operands[1];
6379 rtx (*gen) (rtx, rtx);
6380
6381 if (<MODE>mode != V4SFmode)
6382 {
6383 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6384 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6385 operands[1]));
6386 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6387 }
6388 else
6389 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6390
6391 emit_insn (gen (operands[0], tem));
6392 DONE;
6393 })
6394
6395 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6396 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6397 (any_fix:<vunpckfixt_mode>
6398 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6399 "TARGET_AVX512DQ"
6400 {
6401 rtx tem;
6402 rtx (*gen) (rtx, rtx);
6403
6404 if (<MODE>mode != V4SFmode)
6405 {
6406 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6407 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6408 operands[1]));
6409 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6410 }
6411 else
6412 {
6413 tem = gen_reg_rtx (V4SFmode);
6414 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6415 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6416 }
6417
6418 emit_insn (gen (operands[0], tem));
6419 DONE;
6420 })
6421
6422 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6423 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6424 (unsigned_fix:<sseintvecmode>
6425 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6426 "TARGET_AVX512VL"
6427 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6428 [(set_attr "type" "ssecvt")
6429 (set_attr "prefix" "evex")
6430 (set_attr "mode" "<sseintvecmode2>")])
6431
6432 (define_expand "avx_cvttpd2dq256_2"
6433 [(set (match_operand:V8SI 0 "register_operand")
6434 (vec_concat:V8SI
6435 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6436 (match_dup 2)))]
6437 "TARGET_AVX"
6438 "operands[2] = CONST0_RTX (V4SImode);")
6439
6440 (define_insn "sse2_cvttpd2dq"
6441 [(set (match_operand:V4SI 0 "register_operand" "=v")
6442 (vec_concat:V4SI
6443 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6444 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6445 "TARGET_SSE2"
6446 {
6447 if (TARGET_AVX)
6448 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6449 else
6450 return "cvttpd2dq\t{%1, %0|%0, %1}";
6451 }
6452 [(set_attr "type" "ssecvt")
6453 (set_attr "amdfam10_decode" "double")
6454 (set_attr "athlon_decode" "vector")
6455 (set_attr "bdver1_decode" "double")
6456 (set_attr "prefix" "maybe_vex")
6457 (set_attr "mode" "TI")])
6458
6459 (define_insn "sse2_cvttpd2dq_mask"
6460 [(set (match_operand:V4SI 0 "register_operand" "=v")
6461 (vec_concat:V4SI
6462 (vec_merge:V2SI
6463 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6464 (vec_select:V2SI
6465 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6466 (parallel [(const_int 0) (const_int 1)]))
6467 (match_operand:QI 3 "register_operand" "Yk"))
6468 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6469 "TARGET_AVX512VL"
6470 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6471 [(set_attr "type" "ssecvt")
6472 (set_attr "prefix" "evex")
6473 (set_attr "mode" "TI")])
6474
6475 (define_insn "*sse2_cvttpd2dq_mask_1"
6476 [(set (match_operand:V4SI 0 "register_operand" "=v")
6477 (vec_concat:V4SI
6478 (vec_merge:V2SI
6479 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6480 (const_vector:V2SI [(const_int 0) (const_int 0)])
6481 (match_operand:QI 2 "register_operand" "Yk"))
6482 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6483 "TARGET_AVX512VL"
6484 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6485 [(set_attr "type" "ssecvt")
6486 (set_attr "prefix" "evex")
6487 (set_attr "mode" "TI")])
6488
6489 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
6490 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6491 (vec_merge:V4SF
6492 (vec_duplicate:V4SF
6493 (float_truncate:V2SF
6494 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6495 (match_operand:V4SF 1 "register_operand" "0,0,v")
6496 (const_int 1)))]
6497 "TARGET_SSE2"
6498 "@
6499 cvtsd2ss\t{%2, %0|%0, %2}
6500 cvtsd2ss\t{%2, %0|%0, %q2}
6501 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
6502 [(set_attr "isa" "noavx,noavx,avx")
6503 (set_attr "type" "ssecvt")
6504 (set_attr "athlon_decode" "vector,double,*")
6505 (set_attr "amdfam10_decode" "vector,double,*")
6506 (set_attr "bdver1_decode" "direct,direct,*")
6507 (set_attr "btver2_decode" "double,double,double")
6508 (set_attr "prefix" "orig,orig,<round_prefix>")
6509 (set_attr "mode" "SF")])
6510
6511 (define_insn "*sse2_vd_cvtsd2ss"
6512 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6513 (vec_merge:V4SF
6514 (vec_duplicate:V4SF
6515 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6516 (match_operand:V4SF 1 "register_operand" "0,0,v")
6517 (const_int 1)))]
6518 "TARGET_SSE2"
6519 "@
6520 cvtsd2ss\t{%2, %0|%0, %2}
6521 cvtsd2ss\t{%2, %0|%0, %2}
6522 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6523 [(set_attr "isa" "noavx,noavx,avx")
6524 (set_attr "type" "ssecvt")
6525 (set_attr "athlon_decode" "vector,double,*")
6526 (set_attr "amdfam10_decode" "vector,double,*")
6527 (set_attr "bdver1_decode" "direct,direct,*")
6528 (set_attr "btver2_decode" "double,double,double")
6529 (set_attr "prefix" "orig,orig,vex")
6530 (set_attr "mode" "SF")])
6531
6532 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
6533 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6534 (vec_merge:V2DF
6535 (float_extend:V2DF
6536 (vec_select:V2SF
6537 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6538 (parallel [(const_int 0) (const_int 1)])))
6539 (match_operand:V2DF 1 "register_operand" "0,0,v")
6540 (const_int 1)))]
6541 "TARGET_SSE2"
6542 "@
6543 cvtss2sd\t{%2, %0|%0, %2}
6544 cvtss2sd\t{%2, %0|%0, %k2}
6545 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
6546 [(set_attr "isa" "noavx,noavx,avx")
6547 (set_attr "type" "ssecvt")
6548 (set_attr "amdfam10_decode" "vector,double,*")
6549 (set_attr "athlon_decode" "direct,direct,*")
6550 (set_attr "bdver1_decode" "direct,direct,*")
6551 (set_attr "btver2_decode" "double,double,double")
6552 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6553 (set_attr "mode" "DF")])
6554
6555 (define_insn "*sse2_vd_cvtss2sd"
6556 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6557 (vec_merge:V2DF
6558 (vec_duplicate:V2DF
6559 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6560 (match_operand:V2DF 1 "register_operand" "0,0,v")
6561 (const_int 1)))]
6562 "TARGET_SSE2"
6563 "@
6564 cvtss2sd\t{%2, %0|%0, %2}
6565 cvtss2sd\t{%2, %0|%0, %2}
6566 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6567 [(set_attr "isa" "noavx,noavx,avx")
6568 (set_attr "type" "ssecvt")
6569 (set_attr "amdfam10_decode" "vector,double,*")
6570 (set_attr "athlon_decode" "direct,direct,*")
6571 (set_attr "bdver1_decode" "direct,direct,*")
6572 (set_attr "btver2_decode" "double,double,double")
6573 (set_attr "prefix" "orig,orig,vex")
6574 (set_attr "mode" "DF")])
6575
6576 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6577 [(set (match_operand:V8SF 0 "register_operand" "=v")
6578 (float_truncate:V8SF
6579 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6580 "TARGET_AVX512F"
6581 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6582 [(set_attr "type" "ssecvt")
6583 (set_attr "prefix" "evex")
6584 (set_attr "mode" "V8SF")])
6585
6586 (define_insn "avx_cvtpd2ps256<mask_name>"
6587 [(set (match_operand:V4SF 0 "register_operand" "=v")
6588 (float_truncate:V4SF
6589 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6590 "TARGET_AVX && <mask_avx512vl_condition>"
6591 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6592 [(set_attr "type" "ssecvt")
6593 (set_attr "prefix" "maybe_evex")
6594 (set_attr "btver2_decode" "vector")
6595 (set_attr "mode" "V4SF")])
6596
6597 (define_expand "sse2_cvtpd2ps"
6598 [(set (match_operand:V4SF 0 "register_operand")
6599 (vec_concat:V4SF
6600 (float_truncate:V2SF
6601 (match_operand:V2DF 1 "vector_operand"))
6602 (match_dup 2)))]
6603 "TARGET_SSE2"
6604 "operands[2] = CONST0_RTX (V2SFmode);")
6605
6606 (define_expand "sse2_cvtpd2ps_mask"
6607 [(set (match_operand:V4SF 0 "register_operand")
6608 (vec_concat:V4SF
6609 (vec_merge:V2SF
6610 (float_truncate:V2SF
6611 (match_operand:V2DF 1 "vector_operand"))
6612 (vec_select:V2SF
6613 (match_operand:V4SF 2 "nonimm_or_0_operand")
6614 (parallel [(const_int 0) (const_int 1)]))
6615 (match_operand:QI 3 "register_operand"))
6616 (match_dup 4)))]
6617 "TARGET_SSE2"
6618 "operands[4] = CONST0_RTX (V2SFmode);")
6619
6620 (define_insn "*sse2_cvtpd2ps"
6621 [(set (match_operand:V4SF 0 "register_operand" "=v")
6622 (vec_concat:V4SF
6623 (float_truncate:V2SF
6624 (match_operand:V2DF 1 "vector_operand" "vBm"))
6625 (match_operand:V2SF 2 "const0_operand" "C")))]
6626 "TARGET_SSE2"
6627 {
6628 if (TARGET_AVX)
6629 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6630 else
6631 return "cvtpd2ps\t{%1, %0|%0, %1}";
6632 }
6633 [(set_attr "type" "ssecvt")
6634 (set_attr "amdfam10_decode" "double")
6635 (set_attr "athlon_decode" "vector")
6636 (set_attr "bdver1_decode" "double")
6637 (set_attr "prefix_data16" "1")
6638 (set_attr "prefix" "maybe_vex")
6639 (set_attr "mode" "V4SF")])
6640
6641 (define_insn "truncv2dfv2sf2"
6642 [(set (match_operand:V2SF 0 "register_operand" "=v")
6643 (float_truncate:V2SF
6644 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6645 "TARGET_MMX_WITH_SSE"
6646 {
6647 if (TARGET_AVX)
6648 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6649 else
6650 return "cvtpd2ps\t{%1, %0|%0, %1}";
6651 }
6652 [(set_attr "type" "ssecvt")
6653 (set_attr "amdfam10_decode" "double")
6654 (set_attr "athlon_decode" "vector")
6655 (set_attr "bdver1_decode" "double")
6656 (set_attr "prefix_data16" "1")
6657 (set_attr "prefix" "maybe_vex")
6658 (set_attr "mode" "V4SF")])
6659
6660 (define_insn "*sse2_cvtpd2ps_mask"
6661 [(set (match_operand:V4SF 0 "register_operand" "=v")
6662 (vec_concat:V4SF
6663 (vec_merge:V2SF
6664 (float_truncate:V2SF
6665 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6666 (vec_select:V2SF
6667 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6668 (parallel [(const_int 0) (const_int 1)]))
6669 (match_operand:QI 3 "register_operand" "Yk"))
6670 (match_operand:V2SF 4 "const0_operand" "C")))]
6671 "TARGET_AVX512VL"
6672 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6673 [(set_attr "type" "ssecvt")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "V4SF")])
6676
6677 (define_insn "*sse2_cvtpd2ps_mask_1"
6678 [(set (match_operand:V4SF 0 "register_operand" "=v")
6679 (vec_concat:V4SF
6680 (vec_merge:V2SF
6681 (float_truncate:V2SF
6682 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6683 (match_operand:V2SF 3 "const0_operand" "C")
6684 (match_operand:QI 2 "register_operand" "Yk"))
6685 (match_operand:V2SF 4 "const0_operand" "C")))]
6686 "TARGET_AVX512VL"
6687 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6688 [(set_attr "type" "ssecvt")
6689 (set_attr "prefix" "evex")
6690 (set_attr "mode" "V4SF")])
6691
6692 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6693 (define_mode_attr sf2dfmode
6694 [(V8DF "V8SF") (V4DF "V4SF")])
6695 (define_mode_attr sf2dfmode_lower
6696 [(V8DF "v8sf") (V4DF "v4sf")])
6697
6698 (define_expand "trunc<mode><sf2dfmode_lower>2"
6699 [(set (match_operand:<sf2dfmode> 0 "register_operand")
6700 (float_truncate:<sf2dfmode>
6701 (match_operand:VF2_512_256 1 "vector_operand")))]
6702 "TARGET_AVX")
6703
6704 (define_expand "extend<sf2dfmode_lower><mode>2"
6705 [(set (match_operand:VF2_512_256 0 "register_operand")
6706 (float_extend:VF2_512_256
6707 (match_operand:<sf2dfmode> 1 "vector_operand")))]
6708 "TARGET_AVX")
6709
6710 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6711 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6712 (float_extend:VF2_512_256
6713 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6714 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6715 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6716 [(set_attr "type" "ssecvt")
6717 (set_attr "prefix" "maybe_vex")
6718 (set_attr "mode" "<MODE>")])
6719
6720 (define_insn "*avx_cvtps2pd256_2"
6721 [(set (match_operand:V4DF 0 "register_operand" "=v")
6722 (float_extend:V4DF
6723 (vec_select:V4SF
6724 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6725 (parallel [(const_int 0) (const_int 1)
6726 (const_int 2) (const_int 3)]))))]
6727 "TARGET_AVX"
6728 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6729 [(set_attr "type" "ssecvt")
6730 (set_attr "prefix" "vex")
6731 (set_attr "mode" "V4DF")])
6732
6733 (define_insn "vec_unpacks_lo_v16sf"
6734 [(set (match_operand:V8DF 0 "register_operand" "=v")
6735 (float_extend:V8DF
6736 (vec_select:V8SF
6737 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6738 (parallel [(const_int 0) (const_int 1)
6739 (const_int 2) (const_int 3)
6740 (const_int 4) (const_int 5)
6741 (const_int 6) (const_int 7)]))))]
6742 "TARGET_AVX512F"
6743 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6744 [(set_attr "type" "ssecvt")
6745 (set_attr "prefix" "evex")
6746 (set_attr "mode" "V8DF")])
6747
6748 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6749 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6750 (unspec:<avx512fmaskmode>
6751 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6752 UNSPEC_CVTINT2MASK))]
6753 "TARGET_AVX512BW"
6754 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6755 [(set_attr "prefix" "evex")
6756 (set_attr "mode" "<sseinsnmode>")])
6757
6758 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6759 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6760 (unspec:<avx512fmaskmode>
6761 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6762 UNSPEC_CVTINT2MASK))]
6763 "TARGET_AVX512DQ"
6764 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6765 [(set_attr "prefix" "evex")
6766 (set_attr "mode" "<sseinsnmode>")])
6767
6768 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6769 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6770 (vec_merge:VI12_AVX512VL
6771 (match_dup 2)
6772 (match_dup 3)
6773 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6774 "TARGET_AVX512BW"
6775 {
6776 operands[2] = CONSTM1_RTX (<MODE>mode);
6777 operands[3] = CONST0_RTX (<MODE>mode);
6778 })
6779
6780 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6781 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6782 (vec_merge:VI12_AVX512VL
6783 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6784 (match_operand:VI12_AVX512VL 3 "const0_operand")
6785 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6786 "TARGET_AVX512BW"
6787 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6788 [(set_attr "prefix" "evex")
6789 (set_attr "mode" "<sseinsnmode>")])
6790
6791 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6792 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6793 (vec_merge:VI48_AVX512VL
6794 (match_dup 2)
6795 (match_dup 3)
6796 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6797 "TARGET_AVX512F"
6798 "{
6799 operands[2] = CONSTM1_RTX (<MODE>mode);
6800 operands[3] = CONST0_RTX (<MODE>mode);
6801 }")
6802
6803 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6804 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
6805 (vec_merge:VI48_AVX512VL
6806 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6807 (match_operand:VI48_AVX512VL 3 "const0_operand")
6808 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
6809 "TARGET_AVX512F"
6810 "@
6811 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
6812 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
6813 [(set_attr "isa" "avx512dq,*")
6814 (set_attr "length_immediate" "0,1")
6815 (set_attr "prefix" "evex")
6816 (set_attr "mode" "<sseinsnmode>")])
6817
6818 (define_insn "sse2_cvtps2pd<mask_name>"
6819 [(set (match_operand:V2DF 0 "register_operand" "=v")
6820 (float_extend:V2DF
6821 (vec_select:V2SF
6822 (match_operand:V4SF 1 "vector_operand" "vm")
6823 (parallel [(const_int 0) (const_int 1)]))))]
6824 "TARGET_SSE2 && <mask_avx512vl_condition>"
6825 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6826 [(set_attr "type" "ssecvt")
6827 (set_attr "amdfam10_decode" "direct")
6828 (set_attr "athlon_decode" "double")
6829 (set_attr "bdver1_decode" "double")
6830 (set_attr "prefix_data16" "0")
6831 (set_attr "prefix" "maybe_vex")
6832 (set_attr "mode" "V2DF")])
6833
6834 (define_insn "extendv2sfv2df2"
6835 [(set (match_operand:V2DF 0 "register_operand" "=v")
6836 (float_extend:V2DF
6837 (match_operand:V2SF 1 "register_operand" "v")))]
6838 "TARGET_MMX_WITH_SSE"
6839 "%vcvtps2pd\t{%1, %0|%0, %1}"
6840 [(set_attr "type" "ssecvt")
6841 (set_attr "amdfam10_decode" "direct")
6842 (set_attr "athlon_decode" "double")
6843 (set_attr "bdver1_decode" "double")
6844 (set_attr "prefix_data16" "0")
6845 (set_attr "prefix" "maybe_vex")
6846 (set_attr "mode" "V2DF")])
6847
6848 (define_expand "vec_unpacks_hi_v4sf"
6849 [(set (match_dup 2)
6850 (vec_select:V4SF
6851 (vec_concat:V8SF
6852 (match_dup 2)
6853 (match_operand:V4SF 1 "vector_operand"))
6854 (parallel [(const_int 6) (const_int 7)
6855 (const_int 2) (const_int 3)])))
6856 (set (match_operand:V2DF 0 "register_operand")
6857 (float_extend:V2DF
6858 (vec_select:V2SF
6859 (match_dup 2)
6860 (parallel [(const_int 0) (const_int 1)]))))]
6861 "TARGET_SSE2"
6862 "operands[2] = gen_reg_rtx (V4SFmode);")
6863
6864 (define_expand "vec_unpacks_hi_v8sf"
6865 [(set (match_dup 2)
6866 (vec_select:V4SF
6867 (match_operand:V8SF 1 "register_operand")
6868 (parallel [(const_int 4) (const_int 5)
6869 (const_int 6) (const_int 7)])))
6870 (set (match_operand:V4DF 0 "register_operand")
6871 (float_extend:V4DF
6872 (match_dup 2)))]
6873 "TARGET_AVX"
6874 "operands[2] = gen_reg_rtx (V4SFmode);")
6875
6876 (define_expand "vec_unpacks_hi_v16sf"
6877 [(set (match_dup 2)
6878 (vec_select:V8SF
6879 (match_operand:V16SF 1 "register_operand")
6880 (parallel [(const_int 8) (const_int 9)
6881 (const_int 10) (const_int 11)
6882 (const_int 12) (const_int 13)
6883 (const_int 14) (const_int 15)])))
6884 (set (match_operand:V8DF 0 "register_operand")
6885 (float_extend:V8DF
6886 (match_dup 2)))]
6887 "TARGET_AVX512F"
6888 "operands[2] = gen_reg_rtx (V8SFmode);")
6889
6890 (define_expand "vec_unpacks_lo_v4sf"
6891 [(set (match_operand:V2DF 0 "register_operand")
6892 (float_extend:V2DF
6893 (vec_select:V2SF
6894 (match_operand:V4SF 1 "vector_operand")
6895 (parallel [(const_int 0) (const_int 1)]))))]
6896 "TARGET_SSE2")
6897
6898 (define_expand "vec_unpacks_lo_v8sf"
6899 [(set (match_operand:V4DF 0 "register_operand")
6900 (float_extend:V4DF
6901 (vec_select:V4SF
6902 (match_operand:V8SF 1 "nonimmediate_operand")
6903 (parallel [(const_int 0) (const_int 1)
6904 (const_int 2) (const_int 3)]))))]
6905 "TARGET_AVX")
6906
6907 (define_mode_attr sseunpackfltmode
6908 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6909 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6910
6911 (define_expand "vec_unpacks_float_hi_<mode>"
6912 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6913 (match_operand:VI2_AVX512F 1 "register_operand")]
6914 "TARGET_SSE2"
6915 {
6916 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6917
6918 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6919 emit_insn (gen_rtx_SET (operands[0],
6920 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6921 DONE;
6922 })
6923
6924 (define_expand "vec_unpacks_float_lo_<mode>"
6925 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6926 (match_operand:VI2_AVX512F 1 "register_operand")]
6927 "TARGET_SSE2"
6928 {
6929 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6930
6931 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6932 emit_insn (gen_rtx_SET (operands[0],
6933 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6934 DONE;
6935 })
6936
6937 (define_expand "vec_unpacku_float_hi_<mode>"
6938 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6939 (match_operand:VI2_AVX512F 1 "register_operand")]
6940 "TARGET_SSE2"
6941 {
6942 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6943
6944 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6945 emit_insn (gen_rtx_SET (operands[0],
6946 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6947 DONE;
6948 })
6949
6950 (define_expand "vec_unpacku_float_lo_<mode>"
6951 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6952 (match_operand:VI2_AVX512F 1 "register_operand")]
6953 "TARGET_SSE2"
6954 {
6955 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6956
6957 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6958 emit_insn (gen_rtx_SET (operands[0],
6959 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6960 DONE;
6961 })
6962
6963 (define_expand "vec_unpacks_float_hi_v4si"
6964 [(set (match_dup 2)
6965 (vec_select:V4SI
6966 (match_operand:V4SI 1 "vector_operand")
6967 (parallel [(const_int 2) (const_int 3)
6968 (const_int 2) (const_int 3)])))
6969 (set (match_operand:V2DF 0 "register_operand")
6970 (float:V2DF
6971 (vec_select:V2SI
6972 (match_dup 2)
6973 (parallel [(const_int 0) (const_int 1)]))))]
6974 "TARGET_SSE2"
6975 "operands[2] = gen_reg_rtx (V4SImode);")
6976
6977 (define_expand "vec_unpacks_float_lo_v4si"
6978 [(set (match_operand:V2DF 0 "register_operand")
6979 (float:V2DF
6980 (vec_select:V2SI
6981 (match_operand:V4SI 1 "vector_operand")
6982 (parallel [(const_int 0) (const_int 1)]))))]
6983 "TARGET_SSE2")
6984
6985 (define_expand "vec_unpacks_float_hi_v8si"
6986 [(set (match_dup 2)
6987 (vec_select:V4SI
6988 (match_operand:V8SI 1 "vector_operand")
6989 (parallel [(const_int 4) (const_int 5)
6990 (const_int 6) (const_int 7)])))
6991 (set (match_operand:V4DF 0 "register_operand")
6992 (float:V4DF
6993 (match_dup 2)))]
6994 "TARGET_AVX"
6995 "operands[2] = gen_reg_rtx (V4SImode);")
6996
6997 (define_expand "vec_unpacks_float_lo_v8si"
6998 [(set (match_operand:V4DF 0 "register_operand")
6999 (float:V4DF
7000 (vec_select:V4SI
7001 (match_operand:V8SI 1 "nonimmediate_operand")
7002 (parallel [(const_int 0) (const_int 1)
7003 (const_int 2) (const_int 3)]))))]
7004 "TARGET_AVX")
7005
7006 (define_expand "vec_unpacks_float_hi_v16si"
7007 [(set (match_dup 2)
7008 (vec_select:V8SI
7009 (match_operand:V16SI 1 "nonimmediate_operand")
7010 (parallel [(const_int 8) (const_int 9)
7011 (const_int 10) (const_int 11)
7012 (const_int 12) (const_int 13)
7013 (const_int 14) (const_int 15)])))
7014 (set (match_operand:V8DF 0 "register_operand")
7015 (float:V8DF
7016 (match_dup 2)))]
7017 "TARGET_AVX512F"
7018 "operands[2] = gen_reg_rtx (V8SImode);")
7019
7020 (define_expand "vec_unpacks_float_lo_v16si"
7021 [(set (match_operand:V8DF 0 "register_operand")
7022 (float:V8DF
7023 (vec_select:V8SI
7024 (match_operand:V16SI 1 "nonimmediate_operand")
7025 (parallel [(const_int 0) (const_int 1)
7026 (const_int 2) (const_int 3)
7027 (const_int 4) (const_int 5)
7028 (const_int 6) (const_int 7)]))))]
7029 "TARGET_AVX512F")
7030
7031 (define_expand "vec_unpacku_float_hi_v4si"
7032 [(set (match_dup 5)
7033 (vec_select:V4SI
7034 (match_operand:V4SI 1 "vector_operand")
7035 (parallel [(const_int 2) (const_int 3)
7036 (const_int 2) (const_int 3)])))
7037 (set (match_dup 6)
7038 (float:V2DF
7039 (vec_select:V2SI
7040 (match_dup 5)
7041 (parallel [(const_int 0) (const_int 1)]))))
7042 (set (match_dup 7)
7043 (lt:V2DF (match_dup 6) (match_dup 3)))
7044 (set (match_dup 8)
7045 (and:V2DF (match_dup 7) (match_dup 4)))
7046 (set (match_operand:V2DF 0 "register_operand")
7047 (plus:V2DF (match_dup 6) (match_dup 8)))]
7048 "TARGET_SSE2"
7049 {
7050 REAL_VALUE_TYPE TWO32r;
7051 rtx x;
7052 int i;
7053
7054 real_ldexp (&TWO32r, &dconst1, 32);
7055 x = const_double_from_real_value (TWO32r, DFmode);
7056
7057 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7058 operands[4] = force_reg (V2DFmode,
7059 ix86_build_const_vector (V2DFmode, 1, x));
7060
7061 operands[5] = gen_reg_rtx (V4SImode);
7062
7063 for (i = 6; i < 9; i++)
7064 operands[i] = gen_reg_rtx (V2DFmode);
7065 })
7066
7067 (define_expand "vec_unpacku_float_lo_v4si"
7068 [(set (match_dup 5)
7069 (float:V2DF
7070 (vec_select:V2SI
7071 (match_operand:V4SI 1 "vector_operand")
7072 (parallel [(const_int 0) (const_int 1)]))))
7073 (set (match_dup 6)
7074 (lt:V2DF (match_dup 5) (match_dup 3)))
7075 (set (match_dup 7)
7076 (and:V2DF (match_dup 6) (match_dup 4)))
7077 (set (match_operand:V2DF 0 "register_operand")
7078 (plus:V2DF (match_dup 5) (match_dup 7)))]
7079 "TARGET_SSE2"
7080 {
7081 REAL_VALUE_TYPE TWO32r;
7082 rtx x;
7083 int i;
7084
7085 real_ldexp (&TWO32r, &dconst1, 32);
7086 x = const_double_from_real_value (TWO32r, DFmode);
7087
7088 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7089 operands[4] = force_reg (V2DFmode,
7090 ix86_build_const_vector (V2DFmode, 1, x));
7091
7092 for (i = 5; i < 8; i++)
7093 operands[i] = gen_reg_rtx (V2DFmode);
7094 })
7095
7096 (define_expand "vec_unpacku_float_hi_v8si"
7097 [(match_operand:V4DF 0 "register_operand")
7098 (match_operand:V8SI 1 "register_operand")]
7099 "TARGET_AVX"
7100 {
7101 REAL_VALUE_TYPE TWO32r;
7102 rtx x, tmp[6];
7103 int i;
7104
7105 real_ldexp (&TWO32r, &dconst1, 32);
7106 x = const_double_from_real_value (TWO32r, DFmode);
7107
7108 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7109 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7110 tmp[5] = gen_reg_rtx (V4SImode);
7111
7112 for (i = 2; i < 5; i++)
7113 tmp[i] = gen_reg_rtx (V4DFmode);
7114 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
7115 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
7116 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7117 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7118 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7119 DONE;
7120 })
7121
7122 (define_expand "vec_unpacku_float_hi_v16si"
7123 [(match_operand:V8DF 0 "register_operand")
7124 (match_operand:V16SI 1 "register_operand")]
7125 "TARGET_AVX512F"
7126 {
7127 REAL_VALUE_TYPE TWO32r;
7128 rtx k, x, tmp[4];
7129
7130 real_ldexp (&TWO32r, &dconst1, 32);
7131 x = const_double_from_real_value (TWO32r, DFmode);
7132
7133 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7134 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7135 tmp[2] = gen_reg_rtx (V8DFmode);
7136 tmp[3] = gen_reg_rtx (V8SImode);
7137 k = gen_reg_rtx (QImode);
7138
7139 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
7140 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
7141 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7142 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7143 emit_move_insn (operands[0], tmp[2]);
7144 DONE;
7145 })
7146
7147 (define_expand "vec_unpacku_float_lo_v8si"
7148 [(match_operand:V4DF 0 "register_operand")
7149 (match_operand:V8SI 1 "nonimmediate_operand")]
7150 "TARGET_AVX"
7151 {
7152 REAL_VALUE_TYPE TWO32r;
7153 rtx x, tmp[5];
7154 int i;
7155
7156 real_ldexp (&TWO32r, &dconst1, 32);
7157 x = const_double_from_real_value (TWO32r, DFmode);
7158
7159 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7160 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7161
7162 for (i = 2; i < 5; i++)
7163 tmp[i] = gen_reg_rtx (V4DFmode);
7164 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
7165 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7166 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7167 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7168 DONE;
7169 })
7170
7171 (define_expand "vec_unpacku_float_lo_v16si"
7172 [(match_operand:V8DF 0 "register_operand")
7173 (match_operand:V16SI 1 "nonimmediate_operand")]
7174 "TARGET_AVX512F"
7175 {
7176 REAL_VALUE_TYPE TWO32r;
7177 rtx k, x, tmp[3];
7178
7179 real_ldexp (&TWO32r, &dconst1, 32);
7180 x = const_double_from_real_value (TWO32r, DFmode);
7181
7182 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7183 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7184 tmp[2] = gen_reg_rtx (V8DFmode);
7185 k = gen_reg_rtx (QImode);
7186
7187 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7188 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7189 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7190 emit_move_insn (operands[0], tmp[2]);
7191 DONE;
7192 })
7193
7194 (define_expand "vec_pack_trunc_<mode>"
7195 [(set (match_dup 3)
7196 (float_truncate:<sf2dfmode>
7197 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7198 (set (match_dup 4)
7199 (float_truncate:<sf2dfmode>
7200 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7201 (set (match_operand:<ssePSmode> 0 "register_operand")
7202 (vec_concat:<ssePSmode>
7203 (match_dup 3)
7204 (match_dup 4)))]
7205 "TARGET_AVX"
7206 {
7207 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7208 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7209 })
7210
7211 (define_expand "vec_pack_trunc_v2df"
7212 [(match_operand:V4SF 0 "register_operand")
7213 (match_operand:V2DF 1 "vector_operand")
7214 (match_operand:V2DF 2 "vector_operand")]
7215 "TARGET_SSE2"
7216 {
7217 rtx tmp0, tmp1;
7218
7219 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7220 {
7221 tmp0 = gen_reg_rtx (V4DFmode);
7222 tmp1 = force_reg (V2DFmode, operands[1]);
7223
7224 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7225 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7226 }
7227 else
7228 {
7229 tmp0 = gen_reg_rtx (V4SFmode);
7230 tmp1 = gen_reg_rtx (V4SFmode);
7231
7232 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7233 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7234 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7235 }
7236 DONE;
7237 })
7238
7239 (define_expand "vec_pack_sfix_trunc_v8df"
7240 [(match_operand:V16SI 0 "register_operand")
7241 (match_operand:V8DF 1 "nonimmediate_operand")
7242 (match_operand:V8DF 2 "nonimmediate_operand")]
7243 "TARGET_AVX512F"
7244 {
7245 rtx r1, r2;
7246
7247 r1 = gen_reg_rtx (V8SImode);
7248 r2 = gen_reg_rtx (V8SImode);
7249
7250 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7251 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7252 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7253 DONE;
7254 })
7255
7256 (define_expand "vec_pack_sfix_trunc_v4df"
7257 [(match_operand:V8SI 0 "register_operand")
7258 (match_operand:V4DF 1 "nonimmediate_operand")
7259 (match_operand:V4DF 2 "nonimmediate_operand")]
7260 "TARGET_AVX"
7261 {
7262 rtx r1, r2;
7263
7264 r1 = gen_reg_rtx (V4SImode);
7265 r2 = gen_reg_rtx (V4SImode);
7266
7267 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7268 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7269 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7270 DONE;
7271 })
7272
7273 (define_expand "vec_pack_sfix_trunc_v2df"
7274 [(match_operand:V4SI 0 "register_operand")
7275 (match_operand:V2DF 1 "vector_operand")
7276 (match_operand:V2DF 2 "vector_operand")]
7277 "TARGET_SSE2"
7278 {
7279 rtx tmp0, tmp1, tmp2;
7280
7281 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7282 {
7283 tmp0 = gen_reg_rtx (V4DFmode);
7284 tmp1 = force_reg (V2DFmode, operands[1]);
7285
7286 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7287 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7288 }
7289 else
7290 {
7291 tmp0 = gen_reg_rtx (V4SImode);
7292 tmp1 = gen_reg_rtx (V4SImode);
7293 tmp2 = gen_reg_rtx (V2DImode);
7294
7295 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7296 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7297 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7298 gen_lowpart (V2DImode, tmp0),
7299 gen_lowpart (V2DImode, tmp1)));
7300 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7301 }
7302 DONE;
7303 })
7304
7305 (define_mode_attr ssepackfltmode
7306 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7307
7308 (define_expand "vec_pack_ufix_trunc_<mode>"
7309 [(match_operand:<ssepackfltmode> 0 "register_operand")
7310 (match_operand:VF2 1 "register_operand")
7311 (match_operand:VF2 2 "register_operand")]
7312 "TARGET_SSE2"
7313 {
7314 if (<MODE>mode == V8DFmode)
7315 {
7316 rtx r1, r2;
7317
7318 r1 = gen_reg_rtx (V8SImode);
7319 r2 = gen_reg_rtx (V8SImode);
7320
7321 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7322 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7323 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7324 }
7325 else
7326 {
7327 rtx tmp[7];
7328 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7329 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7330 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7331 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7332 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7333 {
7334 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7335 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7336 }
7337 else
7338 {
7339 tmp[5] = gen_reg_rtx (V8SFmode);
7340 ix86_expand_vec_extract_even_odd (tmp[5],
7341 gen_lowpart (V8SFmode, tmp[2]),
7342 gen_lowpart (V8SFmode, tmp[3]), 0);
7343 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7344 }
7345 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7346 operands[0], 0, OPTAB_DIRECT);
7347 if (tmp[6] != operands[0])
7348 emit_move_insn (operands[0], tmp[6]);
7349 }
7350
7351 DONE;
7352 })
7353
7354 (define_expand "avx512f_vec_pack_sfix_v8df"
7355 [(match_operand:V16SI 0 "register_operand")
7356 (match_operand:V8DF 1 "nonimmediate_operand")
7357 (match_operand:V8DF 2 "nonimmediate_operand")]
7358 "TARGET_AVX512F"
7359 {
7360 rtx r1, r2;
7361
7362 r1 = gen_reg_rtx (V8SImode);
7363 r2 = gen_reg_rtx (V8SImode);
7364
7365 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7366 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7367 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7368 DONE;
7369 })
7370
7371 (define_expand "vec_pack_sfix_v4df"
7372 [(match_operand:V8SI 0 "register_operand")
7373 (match_operand:V4DF 1 "nonimmediate_operand")
7374 (match_operand:V4DF 2 "nonimmediate_operand")]
7375 "TARGET_AVX"
7376 {
7377 rtx r1, r2;
7378
7379 r1 = gen_reg_rtx (V4SImode);
7380 r2 = gen_reg_rtx (V4SImode);
7381
7382 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7383 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7384 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7385 DONE;
7386 })
7387
7388 (define_expand "vec_pack_sfix_v2df"
7389 [(match_operand:V4SI 0 "register_operand")
7390 (match_operand:V2DF 1 "vector_operand")
7391 (match_operand:V2DF 2 "vector_operand")]
7392 "TARGET_SSE2"
7393 {
7394 rtx tmp0, tmp1, tmp2;
7395
7396 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7397 {
7398 tmp0 = gen_reg_rtx (V4DFmode);
7399 tmp1 = force_reg (V2DFmode, operands[1]);
7400
7401 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7402 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7403 }
7404 else
7405 {
7406 tmp0 = gen_reg_rtx (V4SImode);
7407 tmp1 = gen_reg_rtx (V4SImode);
7408 tmp2 = gen_reg_rtx (V2DImode);
7409
7410 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7411 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7412 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7413 gen_lowpart (V2DImode, tmp0),
7414 gen_lowpart (V2DImode, tmp1)));
7415 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7416 }
7417 DONE;
7418 })
7419
7420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7421 ;;
7422 ;; Parallel single-precision floating point element swizzling
7423 ;;
7424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7425
7426 (define_expand "sse_movhlps_exp"
7427 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7428 (vec_select:V4SF
7429 (vec_concat:V8SF
7430 (match_operand:V4SF 1 "nonimmediate_operand")
7431 (match_operand:V4SF 2 "nonimmediate_operand"))
7432 (parallel [(const_int 6)
7433 (const_int 7)
7434 (const_int 2)
7435 (const_int 3)])))]
7436 "TARGET_SSE"
7437 {
7438 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7439
7440 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7441
7442 /* Fix up the destination if needed. */
7443 if (dst != operands[0])
7444 emit_move_insn (operands[0], dst);
7445
7446 DONE;
7447 })
7448
7449 (define_insn "sse_movhlps"
7450 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7451 (vec_select:V4SF
7452 (vec_concat:V8SF
7453 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7454 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7455 (parallel [(const_int 6)
7456 (const_int 7)
7457 (const_int 2)
7458 (const_int 3)])))]
7459 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7460 "@
7461 movhlps\t{%2, %0|%0, %2}
7462 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7463 movlps\t{%H2, %0|%0, %H2}
7464 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7465 %vmovhps\t{%2, %0|%q0, %2}"
7466 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7467 (set_attr "type" "ssemov")
7468 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7469 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7470
7471 (define_expand "sse_movlhps_exp"
7472 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7473 (vec_select:V4SF
7474 (vec_concat:V8SF
7475 (match_operand:V4SF 1 "nonimmediate_operand")
7476 (match_operand:V4SF 2 "nonimmediate_operand"))
7477 (parallel [(const_int 0)
7478 (const_int 1)
7479 (const_int 4)
7480 (const_int 5)])))]
7481 "TARGET_SSE"
7482 {
7483 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7484
7485 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7486
7487 /* Fix up the destination if needed. */
7488 if (dst != operands[0])
7489 emit_move_insn (operands[0], dst);
7490
7491 DONE;
7492 })
7493
7494 (define_insn "sse_movlhps"
7495 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7496 (vec_select:V4SF
7497 (vec_concat:V8SF
7498 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7499 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7500 (parallel [(const_int 0)
7501 (const_int 1)
7502 (const_int 4)
7503 (const_int 5)])))]
7504 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7505 "@
7506 movlhps\t{%2, %0|%0, %2}
7507 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7508 movhps\t{%2, %0|%0, %q2}
7509 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7510 %vmovlps\t{%2, %H0|%H0, %2}"
7511 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7512 (set_attr "type" "ssemov")
7513 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7514 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7515
7516 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7517 [(set (match_operand:V16SF 0 "register_operand" "=v")
7518 (vec_select:V16SF
7519 (vec_concat:V32SF
7520 (match_operand:V16SF 1 "register_operand" "v")
7521 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7522 (parallel [(const_int 2) (const_int 18)
7523 (const_int 3) (const_int 19)
7524 (const_int 6) (const_int 22)
7525 (const_int 7) (const_int 23)
7526 (const_int 10) (const_int 26)
7527 (const_int 11) (const_int 27)
7528 (const_int 14) (const_int 30)
7529 (const_int 15) (const_int 31)])))]
7530 "TARGET_AVX512F"
7531 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7532 [(set_attr "type" "sselog")
7533 (set_attr "prefix" "evex")
7534 (set_attr "mode" "V16SF")])
7535
7536 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7537 (define_insn "avx_unpckhps256<mask_name>"
7538 [(set (match_operand:V8SF 0 "register_operand" "=v")
7539 (vec_select:V8SF
7540 (vec_concat:V16SF
7541 (match_operand:V8SF 1 "register_operand" "v")
7542 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7543 (parallel [(const_int 2) (const_int 10)
7544 (const_int 3) (const_int 11)
7545 (const_int 6) (const_int 14)
7546 (const_int 7) (const_int 15)])))]
7547 "TARGET_AVX && <mask_avx512vl_condition>"
7548 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7549 [(set_attr "type" "sselog")
7550 (set_attr "prefix" "vex")
7551 (set_attr "mode" "V8SF")])
7552
7553 (define_expand "vec_interleave_highv8sf"
7554 [(set (match_dup 3)
7555 (vec_select:V8SF
7556 (vec_concat:V16SF
7557 (match_operand:V8SF 1 "register_operand")
7558 (match_operand:V8SF 2 "nonimmediate_operand"))
7559 (parallel [(const_int 0) (const_int 8)
7560 (const_int 1) (const_int 9)
7561 (const_int 4) (const_int 12)
7562 (const_int 5) (const_int 13)])))
7563 (set (match_dup 4)
7564 (vec_select:V8SF
7565 (vec_concat:V16SF
7566 (match_dup 1)
7567 (match_dup 2))
7568 (parallel [(const_int 2) (const_int 10)
7569 (const_int 3) (const_int 11)
7570 (const_int 6) (const_int 14)
7571 (const_int 7) (const_int 15)])))
7572 (set (match_operand:V8SF 0 "register_operand")
7573 (vec_select:V8SF
7574 (vec_concat:V16SF
7575 (match_dup 3)
7576 (match_dup 4))
7577 (parallel [(const_int 4) (const_int 5)
7578 (const_int 6) (const_int 7)
7579 (const_int 12) (const_int 13)
7580 (const_int 14) (const_int 15)])))]
7581 "TARGET_AVX"
7582 {
7583 operands[3] = gen_reg_rtx (V8SFmode);
7584 operands[4] = gen_reg_rtx (V8SFmode);
7585 })
7586
7587 (define_insn "vec_interleave_highv4sf<mask_name>"
7588 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7589 (vec_select:V4SF
7590 (vec_concat:V8SF
7591 (match_operand:V4SF 1 "register_operand" "0,v")
7592 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7593 (parallel [(const_int 2) (const_int 6)
7594 (const_int 3) (const_int 7)])))]
7595 "TARGET_SSE && <mask_avx512vl_condition>"
7596 "@
7597 unpckhps\t{%2, %0|%0, %2}
7598 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7599 [(set_attr "isa" "noavx,avx")
7600 (set_attr "type" "sselog")
7601 (set_attr "prefix" "orig,vex")
7602 (set_attr "mode" "V4SF")])
7603
7604 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7605 [(set (match_operand:V16SF 0 "register_operand" "=v")
7606 (vec_select:V16SF
7607 (vec_concat:V32SF
7608 (match_operand:V16SF 1 "register_operand" "v")
7609 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7610 (parallel [(const_int 0) (const_int 16)
7611 (const_int 1) (const_int 17)
7612 (const_int 4) (const_int 20)
7613 (const_int 5) (const_int 21)
7614 (const_int 8) (const_int 24)
7615 (const_int 9) (const_int 25)
7616 (const_int 12) (const_int 28)
7617 (const_int 13) (const_int 29)])))]
7618 "TARGET_AVX512F"
7619 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7620 [(set_attr "type" "sselog")
7621 (set_attr "prefix" "evex")
7622 (set_attr "mode" "V16SF")])
7623
7624 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7625 (define_insn "avx_unpcklps256<mask_name>"
7626 [(set (match_operand:V8SF 0 "register_operand" "=v")
7627 (vec_select:V8SF
7628 (vec_concat:V16SF
7629 (match_operand:V8SF 1 "register_operand" "v")
7630 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7631 (parallel [(const_int 0) (const_int 8)
7632 (const_int 1) (const_int 9)
7633 (const_int 4) (const_int 12)
7634 (const_int 5) (const_int 13)])))]
7635 "TARGET_AVX && <mask_avx512vl_condition>"
7636 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7637 [(set_attr "type" "sselog")
7638 (set_attr "prefix" "vex")
7639 (set_attr "mode" "V8SF")])
7640
7641 (define_insn "unpcklps128_mask"
7642 [(set (match_operand:V4SF 0 "register_operand" "=v")
7643 (vec_merge:V4SF
7644 (vec_select:V4SF
7645 (vec_concat:V8SF
7646 (match_operand:V4SF 1 "register_operand" "v")
7647 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7648 (parallel [(const_int 0) (const_int 4)
7649 (const_int 1) (const_int 5)]))
7650 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7651 (match_operand:QI 4 "register_operand" "Yk")))]
7652 "TARGET_AVX512VL"
7653 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7654 [(set_attr "type" "sselog")
7655 (set_attr "prefix" "evex")
7656 (set_attr "mode" "V4SF")])
7657
7658 (define_expand "vec_interleave_lowv8sf"
7659 [(set (match_dup 3)
7660 (vec_select:V8SF
7661 (vec_concat:V16SF
7662 (match_operand:V8SF 1 "register_operand")
7663 (match_operand:V8SF 2 "nonimmediate_operand"))
7664 (parallel [(const_int 0) (const_int 8)
7665 (const_int 1) (const_int 9)
7666 (const_int 4) (const_int 12)
7667 (const_int 5) (const_int 13)])))
7668 (set (match_dup 4)
7669 (vec_select:V8SF
7670 (vec_concat:V16SF
7671 (match_dup 1)
7672 (match_dup 2))
7673 (parallel [(const_int 2) (const_int 10)
7674 (const_int 3) (const_int 11)
7675 (const_int 6) (const_int 14)
7676 (const_int 7) (const_int 15)])))
7677 (set (match_operand:V8SF 0 "register_operand")
7678 (vec_select:V8SF
7679 (vec_concat:V16SF
7680 (match_dup 3)
7681 (match_dup 4))
7682 (parallel [(const_int 0) (const_int 1)
7683 (const_int 2) (const_int 3)
7684 (const_int 8) (const_int 9)
7685 (const_int 10) (const_int 11)])))]
7686 "TARGET_AVX"
7687 {
7688 operands[3] = gen_reg_rtx (V8SFmode);
7689 operands[4] = gen_reg_rtx (V8SFmode);
7690 })
7691
7692 (define_insn "vec_interleave_lowv4sf"
7693 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7694 (vec_select:V4SF
7695 (vec_concat:V8SF
7696 (match_operand:V4SF 1 "register_operand" "0,v")
7697 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7698 (parallel [(const_int 0) (const_int 4)
7699 (const_int 1) (const_int 5)])))]
7700 "TARGET_SSE"
7701 "@
7702 unpcklps\t{%2, %0|%0, %2}
7703 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7704 [(set_attr "isa" "noavx,avx")
7705 (set_attr "type" "sselog")
7706 (set_attr "prefix" "orig,maybe_evex")
7707 (set_attr "mode" "V4SF")])
7708
7709 ;; These are modeled with the same vec_concat as the others so that we
7710 ;; capture users of shufps that can use the new instructions
7711 (define_insn "avx_movshdup256<mask_name>"
7712 [(set (match_operand:V8SF 0 "register_operand" "=v")
7713 (vec_select:V8SF
7714 (vec_concat:V16SF
7715 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7716 (match_dup 1))
7717 (parallel [(const_int 1) (const_int 1)
7718 (const_int 3) (const_int 3)
7719 (const_int 5) (const_int 5)
7720 (const_int 7) (const_int 7)])))]
7721 "TARGET_AVX && <mask_avx512vl_condition>"
7722 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7723 [(set_attr "type" "sse")
7724 (set_attr "prefix" "vex")
7725 (set_attr "mode" "V8SF")])
7726
7727 (define_insn "sse3_movshdup<mask_name>"
7728 [(set (match_operand:V4SF 0 "register_operand" "=v")
7729 (vec_select:V4SF
7730 (vec_concat:V8SF
7731 (match_operand:V4SF 1 "vector_operand" "vBm")
7732 (match_dup 1))
7733 (parallel [(const_int 1)
7734 (const_int 1)
7735 (const_int 7)
7736 (const_int 7)])))]
7737 "TARGET_SSE3 && <mask_avx512vl_condition>"
7738 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7739 [(set_attr "type" "sse")
7740 (set_attr "prefix_rep" "1")
7741 (set_attr "prefix" "maybe_vex")
7742 (set_attr "mode" "V4SF")])
7743
7744 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7745 [(set (match_operand:V16SF 0 "register_operand" "=v")
7746 (vec_select:V16SF
7747 (vec_concat:V32SF
7748 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7749 (match_dup 1))
7750 (parallel [(const_int 1) (const_int 1)
7751 (const_int 3) (const_int 3)
7752 (const_int 5) (const_int 5)
7753 (const_int 7) (const_int 7)
7754 (const_int 9) (const_int 9)
7755 (const_int 11) (const_int 11)
7756 (const_int 13) (const_int 13)
7757 (const_int 15) (const_int 15)])))]
7758 "TARGET_AVX512F"
7759 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7760 [(set_attr "type" "sse")
7761 (set_attr "prefix" "evex")
7762 (set_attr "mode" "V16SF")])
7763
7764 (define_insn "avx_movsldup256<mask_name>"
7765 [(set (match_operand:V8SF 0 "register_operand" "=v")
7766 (vec_select:V8SF
7767 (vec_concat:V16SF
7768 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7769 (match_dup 1))
7770 (parallel [(const_int 0) (const_int 0)
7771 (const_int 2) (const_int 2)
7772 (const_int 4) (const_int 4)
7773 (const_int 6) (const_int 6)])))]
7774 "TARGET_AVX && <mask_avx512vl_condition>"
7775 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7776 [(set_attr "type" "sse")
7777 (set_attr "prefix" "vex")
7778 (set_attr "mode" "V8SF")])
7779
7780 (define_insn "sse3_movsldup<mask_name>"
7781 [(set (match_operand:V4SF 0 "register_operand" "=v")
7782 (vec_select:V4SF
7783 (vec_concat:V8SF
7784 (match_operand:V4SF 1 "vector_operand" "vBm")
7785 (match_dup 1))
7786 (parallel [(const_int 0)
7787 (const_int 0)
7788 (const_int 6)
7789 (const_int 6)])))]
7790 "TARGET_SSE3 && <mask_avx512vl_condition>"
7791 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7792 [(set_attr "type" "sse")
7793 (set_attr "prefix_rep" "1")
7794 (set_attr "prefix" "maybe_vex")
7795 (set_attr "mode" "V4SF")])
7796
7797 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7798 [(set (match_operand:V16SF 0 "register_operand" "=v")
7799 (vec_select:V16SF
7800 (vec_concat:V32SF
7801 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7802 (match_dup 1))
7803 (parallel [(const_int 0) (const_int 0)
7804 (const_int 2) (const_int 2)
7805 (const_int 4) (const_int 4)
7806 (const_int 6) (const_int 6)
7807 (const_int 8) (const_int 8)
7808 (const_int 10) (const_int 10)
7809 (const_int 12) (const_int 12)
7810 (const_int 14) (const_int 14)])))]
7811 "TARGET_AVX512F"
7812 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7813 [(set_attr "type" "sse")
7814 (set_attr "prefix" "evex")
7815 (set_attr "mode" "V16SF")])
7816
7817 (define_expand "avx_shufps256<mask_expand4_name>"
7818 [(match_operand:V8SF 0 "register_operand")
7819 (match_operand:V8SF 1 "register_operand")
7820 (match_operand:V8SF 2 "nonimmediate_operand")
7821 (match_operand:SI 3 "const_int_operand")]
7822 "TARGET_AVX"
7823 {
7824 int mask = INTVAL (operands[3]);
7825 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7826 operands[1],
7827 operands[2],
7828 GEN_INT ((mask >> 0) & 3),
7829 GEN_INT ((mask >> 2) & 3),
7830 GEN_INT (((mask >> 4) & 3) + 8),
7831 GEN_INT (((mask >> 6) & 3) + 8),
7832 GEN_INT (((mask >> 0) & 3) + 4),
7833 GEN_INT (((mask >> 2) & 3) + 4),
7834 GEN_INT (((mask >> 4) & 3) + 12),
7835 GEN_INT (((mask >> 6) & 3) + 12)
7836 <mask_expand4_args>));
7837 DONE;
7838 })
7839
7840 ;; One bit in mask selects 2 elements.
7841 (define_insn "avx_shufps256_1<mask_name>"
7842 [(set (match_operand:V8SF 0 "register_operand" "=v")
7843 (vec_select:V8SF
7844 (vec_concat:V16SF
7845 (match_operand:V8SF 1 "register_operand" "v")
7846 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7847 (parallel [(match_operand 3 "const_0_to_3_operand" )
7848 (match_operand 4 "const_0_to_3_operand" )
7849 (match_operand 5 "const_8_to_11_operand" )
7850 (match_operand 6 "const_8_to_11_operand" )
7851 (match_operand 7 "const_4_to_7_operand" )
7852 (match_operand 8 "const_4_to_7_operand" )
7853 (match_operand 9 "const_12_to_15_operand")
7854 (match_operand 10 "const_12_to_15_operand")])))]
7855 "TARGET_AVX
7856 && <mask_avx512vl_condition>
7857 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7858 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7859 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7860 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7861 {
7862 int mask;
7863 mask = INTVAL (operands[3]);
7864 mask |= INTVAL (operands[4]) << 2;
7865 mask |= (INTVAL (operands[5]) - 8) << 4;
7866 mask |= (INTVAL (operands[6]) - 8) << 6;
7867 operands[3] = GEN_INT (mask);
7868
7869 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7870 }
7871 [(set_attr "type" "sseshuf")
7872 (set_attr "length_immediate" "1")
7873 (set_attr "prefix" "<mask_prefix>")
7874 (set_attr "mode" "V8SF")])
7875
7876 (define_expand "sse_shufps<mask_expand4_name>"
7877 [(match_operand:V4SF 0 "register_operand")
7878 (match_operand:V4SF 1 "register_operand")
7879 (match_operand:V4SF 2 "vector_operand")
7880 (match_operand:SI 3 "const_int_operand")]
7881 "TARGET_SSE"
7882 {
7883 int mask = INTVAL (operands[3]);
7884 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7885 operands[1],
7886 operands[2],
7887 GEN_INT ((mask >> 0) & 3),
7888 GEN_INT ((mask >> 2) & 3),
7889 GEN_INT (((mask >> 4) & 3) + 4),
7890 GEN_INT (((mask >> 6) & 3) + 4)
7891 <mask_expand4_args>));
7892 DONE;
7893 })
7894
7895 (define_insn "sse_shufps_v4sf_mask"
7896 [(set (match_operand:V4SF 0 "register_operand" "=v")
7897 (vec_merge:V4SF
7898 (vec_select:V4SF
7899 (vec_concat:V8SF
7900 (match_operand:V4SF 1 "register_operand" "v")
7901 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7902 (parallel [(match_operand 3 "const_0_to_3_operand")
7903 (match_operand 4 "const_0_to_3_operand")
7904 (match_operand 5 "const_4_to_7_operand")
7905 (match_operand 6 "const_4_to_7_operand")]))
7906 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7907 (match_operand:QI 8 "register_operand" "Yk")))]
7908 "TARGET_AVX512VL"
7909 {
7910 int mask = 0;
7911 mask |= INTVAL (operands[3]) << 0;
7912 mask |= INTVAL (operands[4]) << 2;
7913 mask |= (INTVAL (operands[5]) - 4) << 4;
7914 mask |= (INTVAL (operands[6]) - 4) << 6;
7915 operands[3] = GEN_INT (mask);
7916
7917 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7918 }
7919 [(set_attr "type" "sseshuf")
7920 (set_attr "length_immediate" "1")
7921 (set_attr "prefix" "evex")
7922 (set_attr "mode" "V4SF")])
7923
7924 (define_insn "sse_shufps_<mode>"
7925 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7926 (vec_select:VI4F_128
7927 (vec_concat:<ssedoublevecmode>
7928 (match_operand:VI4F_128 1 "register_operand" "0,v")
7929 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7930 (parallel [(match_operand 3 "const_0_to_3_operand")
7931 (match_operand 4 "const_0_to_3_operand")
7932 (match_operand 5 "const_4_to_7_operand")
7933 (match_operand 6 "const_4_to_7_operand")])))]
7934 "TARGET_SSE"
7935 {
7936 int mask = 0;
7937 mask |= INTVAL (operands[3]) << 0;
7938 mask |= INTVAL (operands[4]) << 2;
7939 mask |= (INTVAL (operands[5]) - 4) << 4;
7940 mask |= (INTVAL (operands[6]) - 4) << 6;
7941 operands[3] = GEN_INT (mask);
7942
7943 switch (which_alternative)
7944 {
7945 case 0:
7946 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7947 case 1:
7948 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7949 default:
7950 gcc_unreachable ();
7951 }
7952 }
7953 [(set_attr "isa" "noavx,avx")
7954 (set_attr "type" "sseshuf")
7955 (set_attr "length_immediate" "1")
7956 (set_attr "prefix" "orig,maybe_evex")
7957 (set_attr "mode" "V4SF")])
7958
7959 (define_insn "sse_storehps"
7960 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7961 (vec_select:V2SF
7962 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7963 (parallel [(const_int 2) (const_int 3)])))]
7964 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7965 "@
7966 %vmovhps\t{%1, %0|%q0, %1}
7967 %vmovhlps\t{%1, %d0|%d0, %1}
7968 %vmovlps\t{%H1, %d0|%d0, %H1}"
7969 [(set_attr "type" "ssemov")
7970 (set_attr "prefix" "maybe_vex")
7971 (set_attr "mode" "V2SF,V4SF,V2SF")])
7972
7973 (define_expand "sse_loadhps_exp"
7974 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7975 (vec_concat:V4SF
7976 (vec_select:V2SF
7977 (match_operand:V4SF 1 "nonimmediate_operand")
7978 (parallel [(const_int 0) (const_int 1)]))
7979 (match_operand:V2SF 2 "nonimmediate_operand")))]
7980 "TARGET_SSE"
7981 {
7982 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7983
7984 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7985
7986 /* Fix up the destination if needed. */
7987 if (dst != operands[0])
7988 emit_move_insn (operands[0], dst);
7989
7990 DONE;
7991 })
7992
7993 (define_insn "sse_loadhps"
7994 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7995 (vec_concat:V4SF
7996 (vec_select:V2SF
7997 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7998 (parallel [(const_int 0) (const_int 1)]))
7999 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
8000 "TARGET_SSE"
8001 "@
8002 movhps\t{%2, %0|%0, %q2}
8003 vmovhps\t{%2, %1, %0|%0, %1, %q2}
8004 movlhps\t{%2, %0|%0, %2}
8005 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8006 %vmovlps\t{%2, %H0|%H0, %2}"
8007 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8008 (set_attr "type" "ssemov")
8009 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8010 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
8011
8012 (define_insn "sse_storelps"
8013 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
8014 (vec_select:V2SF
8015 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
8016 (parallel [(const_int 0) (const_int 1)])))]
8017 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8018 "@
8019 %vmovlps\t{%1, %0|%q0, %1}
8020 %vmovaps\t{%1, %0|%0, %1}
8021 %vmovlps\t{%1, %d0|%d0, %q1}"
8022 [(set_attr "type" "ssemov")
8023 (set_attr "prefix" "maybe_vex")
8024 (set_attr "mode" "V2SF,V4SF,V2SF")])
8025
8026 (define_expand "sse_loadlps_exp"
8027 [(set (match_operand:V4SF 0 "nonimmediate_operand")
8028 (vec_concat:V4SF
8029 (match_operand:V2SF 2 "nonimmediate_operand")
8030 (vec_select:V2SF
8031 (match_operand:V4SF 1 "nonimmediate_operand")
8032 (parallel [(const_int 2) (const_int 3)]))))]
8033 "TARGET_SSE"
8034 {
8035 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
8036
8037 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
8038
8039 /* Fix up the destination if needed. */
8040 if (dst != operands[0])
8041 emit_move_insn (operands[0], dst);
8042
8043 DONE;
8044 })
8045
8046 (define_insn "sse_loadlps"
8047 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
8048 (vec_concat:V4SF
8049 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
8050 (vec_select:V2SF
8051 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
8052 (parallel [(const_int 2) (const_int 3)]))))]
8053 "TARGET_SSE"
8054 "@
8055 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
8056 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
8057 movlps\t{%2, %0|%0, %q2}
8058 vmovlps\t{%2, %1, %0|%0, %1, %q2}
8059 %vmovlps\t{%2, %0|%q0, %2}"
8060 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8061 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
8062 (set (attr "length_immediate")
8063 (if_then_else (eq_attr "alternative" "0,1")
8064 (const_string "1")
8065 (const_string "*")))
8066 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8067 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
8068
8069 (define_insn "sse_movss"
8070 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
8071 (vec_merge:V4SF
8072 (match_operand:V4SF 2 "register_operand" " x,v")
8073 (match_operand:V4SF 1 "register_operand" " 0,v")
8074 (const_int 1)))]
8075 "TARGET_SSE"
8076 "@
8077 movss\t{%2, %0|%0, %2}
8078 vmovss\t{%2, %1, %0|%0, %1, %2}"
8079 [(set_attr "isa" "noavx,avx")
8080 (set_attr "type" "ssemov")
8081 (set_attr "prefix" "orig,maybe_evex")
8082 (set_attr "mode" "SF")])
8083
8084 (define_insn "avx2_vec_dup<mode>"
8085 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
8086 (vec_duplicate:VF1_128_256
8087 (vec_select:SF
8088 (match_operand:V4SF 1 "register_operand" "v")
8089 (parallel [(const_int 0)]))))]
8090 "TARGET_AVX2"
8091 "vbroadcastss\t{%1, %0|%0, %1}"
8092 [(set_attr "type" "sselog1")
8093 (set_attr "prefix" "maybe_evex")
8094 (set_attr "mode" "<MODE>")])
8095
8096 (define_insn "avx2_vec_dupv8sf_1"
8097 [(set (match_operand:V8SF 0 "register_operand" "=v")
8098 (vec_duplicate:V8SF
8099 (vec_select:SF
8100 (match_operand:V8SF 1 "register_operand" "v")
8101 (parallel [(const_int 0)]))))]
8102 "TARGET_AVX2"
8103 "vbroadcastss\t{%x1, %0|%0, %x1}"
8104 [(set_attr "type" "sselog1")
8105 (set_attr "prefix" "maybe_evex")
8106 (set_attr "mode" "V8SF")])
8107
8108 (define_insn "avx512f_vec_dup<mode>_1"
8109 [(set (match_operand:VF_512 0 "register_operand" "=v")
8110 (vec_duplicate:VF_512
8111 (vec_select:<ssescalarmode>
8112 (match_operand:VF_512 1 "register_operand" "v")
8113 (parallel [(const_int 0)]))))]
8114 "TARGET_AVX512F"
8115 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
8116 [(set_attr "type" "sselog1")
8117 (set_attr "prefix" "evex")
8118 (set_attr "mode" "<MODE>")])
8119
8120 ;; Although insertps takes register source, we prefer
8121 ;; unpcklps with register source since it is shorter.
8122 (define_insn "*vec_concatv2sf_sse4_1"
8123 [(set (match_operand:V2SF 0 "register_operand"
8124 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
8125 (vec_concat:V2SF
8126 (match_operand:SF 1 "nonimmediate_operand"
8127 " 0, 0,Yv, 0,0, v,m, 0 , m")
8128 (match_operand:SF 2 "nonimm_or_0_operand"
8129 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
8130 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8131 "@
8132 unpcklps\t{%2, %0|%0, %2}
8133 unpcklps\t{%2, %0|%0, %2}
8134 vunpcklps\t{%2, %1, %0|%0, %1, %2}
8135 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8136 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8137 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
8138 %vmovss\t{%1, %0|%0, %1}
8139 punpckldq\t{%2, %0|%0, %2}
8140 movd\t{%1, %0|%0, %1}"
8141 [(set (attr "isa")
8142 (cond [(eq_attr "alternative" "0,1,3,4")
8143 (const_string "noavx")
8144 (eq_attr "alternative" "2,5")
8145 (const_string "avx")
8146 ]
8147 (const_string "*")))
8148 (set (attr "type")
8149 (cond [(eq_attr "alternative" "6")
8150 (const_string "ssemov")
8151 (eq_attr "alternative" "7")
8152 (const_string "mmxcvt")
8153 (eq_attr "alternative" "8")
8154 (const_string "mmxmov")
8155 ]
8156 (const_string "sselog")))
8157 (set (attr "mmx_isa")
8158 (if_then_else (eq_attr "alternative" "7,8")
8159 (const_string "native")
8160 (const_string "*")))
8161 (set (attr "prefix_data16")
8162 (if_then_else (eq_attr "alternative" "3,4")
8163 (const_string "1")
8164 (const_string "*")))
8165 (set (attr "prefix_extra")
8166 (if_then_else (eq_attr "alternative" "3,4,5")
8167 (const_string "1")
8168 (const_string "*")))
8169 (set (attr "length_immediate")
8170 (if_then_else (eq_attr "alternative" "3,4,5")
8171 (const_string "1")
8172 (const_string "*")))
8173 (set (attr "prefix")
8174 (cond [(eq_attr "alternative" "2,5")
8175 (const_string "maybe_evex")
8176 (eq_attr "alternative" "6")
8177 (const_string "maybe_vex")
8178 ]
8179 (const_string "orig")))
8180 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8181
8182 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8183 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8184 ;; alternatives pretty much forces the MMX alternative to be chosen.
8185 (define_insn "*vec_concatv2sf_sse"
8186 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8187 (vec_concat:V2SF
8188 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8189 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8190 "TARGET_SSE"
8191 "@
8192 unpcklps\t{%2, %0|%0, %2}
8193 movss\t{%1, %0|%0, %1}
8194 punpckldq\t{%2, %0|%0, %2}
8195 movd\t{%1, %0|%0, %1}"
8196 [(set_attr "mmx_isa" "*,*,native,native")
8197 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8198 (set_attr "mode" "V4SF,SF,DI,DI")])
8199
8200 (define_insn "*vec_concatv4sf"
8201 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8202 (vec_concat:V4SF
8203 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8204 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8205 "TARGET_SSE"
8206 "@
8207 movlhps\t{%2, %0|%0, %2}
8208 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8209 movhps\t{%2, %0|%0, %q2}
8210 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8211 [(set_attr "isa" "noavx,avx,noavx,avx")
8212 (set_attr "type" "ssemov")
8213 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8214 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8215
8216 (define_insn "*vec_concatv4sf_0"
8217 [(set (match_operand:V4SF 0 "register_operand" "=v")
8218 (vec_concat:V4SF
8219 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
8220 (match_operand:V2SF 2 "const0_operand" " C")))]
8221 "TARGET_SSE2"
8222 "%vmovq\t{%1, %0|%0, %1}"
8223 [(set_attr "type" "ssemov")
8224 (set_attr "prefix" "maybe_vex")
8225 (set_attr "mode" "DF")])
8226
8227 ;; Avoid combining registers from different units in a single alternative,
8228 ;; see comment above inline_secondary_memory_needed function in i386.c
8229 (define_insn "vec_set<mode>_0"
8230 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8231 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8232 (vec_merge:VI4F_128
8233 (vec_duplicate:VI4F_128
8234 (match_operand:<ssescalarmode> 2 "general_operand"
8235 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8236 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8237 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8238 (const_int 1)))]
8239 "TARGET_SSE"
8240 "@
8241 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8242 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8243 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8244 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8245 %vmovd\t{%2, %0|%0, %2}
8246 movss\t{%2, %0|%0, %2}
8247 movss\t{%2, %0|%0, %2}
8248 vmovss\t{%2, %1, %0|%0, %1, %2}
8249 pinsrd\t{$0, %2, %0|%0, %2, 0}
8250 pinsrd\t{$0, %2, %0|%0, %2, 0}
8251 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8252 #
8253 #
8254 #"
8255 [(set (attr "isa")
8256 (cond [(eq_attr "alternative" "0,1,8,9")
8257 (const_string "sse4_noavx")
8258 (eq_attr "alternative" "2,7,10")
8259 (const_string "avx")
8260 (eq_attr "alternative" "3,4")
8261 (const_string "sse2")
8262 (eq_attr "alternative" "5,6")
8263 (const_string "noavx")
8264 ]
8265 (const_string "*")))
8266 (set (attr "type")
8267 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8268 (const_string "sselog")
8269 (eq_attr "alternative" "12")
8270 (const_string "imov")
8271 (eq_attr "alternative" "13")
8272 (const_string "fmov")
8273 ]
8274 (const_string "ssemov")))
8275 (set (attr "prefix_extra")
8276 (if_then_else (eq_attr "alternative" "8,9,10")
8277 (const_string "1")
8278 (const_string "*")))
8279 (set (attr "length_immediate")
8280 (if_then_else (eq_attr "alternative" "8,9,10")
8281 (const_string "1")
8282 (const_string "*")))
8283 (set (attr "prefix")
8284 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8285 (const_string "orig")
8286 (eq_attr "alternative" "2")
8287 (const_string "maybe_evex")
8288 (eq_attr "alternative" "3,4")
8289 (const_string "maybe_vex")
8290 (eq_attr "alternative" "7,10")
8291 (const_string "vex")
8292 ]
8293 (const_string "*")))
8294 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8295 (set (attr "preferred_for_speed")
8296 (cond [(eq_attr "alternative" "4")
8297 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8298 ]
8299 (symbol_ref "true")))])
8300
8301 ;; A subset is vec_setv4sf.
8302 (define_insn "*vec_setv4sf_sse4_1"
8303 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8304 (vec_merge:V4SF
8305 (vec_duplicate:V4SF
8306 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8307 (match_operand:V4SF 1 "register_operand" "0,0,v")
8308 (match_operand:SI 3 "const_int_operand")))]
8309 "TARGET_SSE4_1
8310 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8311 < GET_MODE_NUNITS (V4SFmode))"
8312 {
8313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8314 switch (which_alternative)
8315 {
8316 case 0:
8317 case 1:
8318 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8319 case 2:
8320 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8321 default:
8322 gcc_unreachable ();
8323 }
8324 }
8325 [(set_attr "isa" "noavx,noavx,avx")
8326 (set_attr "type" "sselog")
8327 (set_attr "prefix_data16" "1,1,*")
8328 (set_attr "prefix_extra" "1")
8329 (set_attr "length_immediate" "1")
8330 (set_attr "prefix" "orig,orig,maybe_evex")
8331 (set_attr "mode" "V4SF")])
8332
8333 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8334 (define_insn "vec_set<mode>_0"
8335 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8336 (vec_merge:VI4F_256_512
8337 (vec_duplicate:VI4F_256_512
8338 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8339 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8340 (const_int 1)))]
8341 "TARGET_AVX"
8342 "@
8343 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8344 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8345 vmovd\t{%2, %x0|%x0, %2}"
8346 [(set (attr "type")
8347 (if_then_else (eq_attr "alternative" "0")
8348 (const_string "sselog")
8349 (const_string "ssemov")))
8350 (set_attr "prefix" "maybe_evex")
8351 (set_attr "mode" "SF,<ssescalarmode>,SI")
8352 (set (attr "preferred_for_speed")
8353 (cond [(eq_attr "alternative" "2")
8354 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8355 ]
8356 (symbol_ref "true")))])
8357
8358 (define_insn "sse4_1_insertps"
8359 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8360 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8361 (match_operand:V4SF 1 "register_operand" "0,0,v")
8362 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8363 UNSPEC_INSERTPS))]
8364 "TARGET_SSE4_1"
8365 {
8366 if (MEM_P (operands[2]))
8367 {
8368 unsigned count_s = INTVAL (operands[3]) >> 6;
8369 if (count_s)
8370 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8371 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8372 }
8373 switch (which_alternative)
8374 {
8375 case 0:
8376 case 1:
8377 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8378 case 2:
8379 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8380 default:
8381 gcc_unreachable ();
8382 }
8383 }
8384 [(set_attr "isa" "noavx,noavx,avx")
8385 (set_attr "type" "sselog")
8386 (set_attr "prefix_data16" "1,1,*")
8387 (set_attr "prefix_extra" "1")
8388 (set_attr "length_immediate" "1")
8389 (set_attr "prefix" "orig,orig,maybe_evex")
8390 (set_attr "mode" "V4SF")])
8391
8392 (define_split
8393 [(set (match_operand:VI4F_128 0 "memory_operand")
8394 (vec_merge:VI4F_128
8395 (vec_duplicate:VI4F_128
8396 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8397 (match_dup 0)
8398 (const_int 1)))]
8399 "TARGET_SSE && reload_completed"
8400 [(set (match_dup 0) (match_dup 1))]
8401 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8402
8403 ;; Standard scalar operation patterns which preserve the rest of the
8404 ;; vector for combiner.
8405 (define_insn "vec_setv2df_0"
8406 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8407 (vec_merge:V2DF
8408 (vec_duplicate:V2DF
8409 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8410 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8411 (const_int 1)))]
8412 "TARGET_SSE2"
8413 "@
8414 movsd\t{%2, %0|%0, %2}
8415 vmovsd\t{%2, %1, %0|%0, %1, %2}
8416 movlpd\t{%2, %0|%0, %2}
8417 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8418 [(set_attr "isa" "noavx,avx,noavx,avx")
8419 (set_attr "type" "ssemov")
8420 (set_attr "mode" "DF")])
8421
8422 (define_expand "vec_set<mode>"
8423 [(match_operand:V 0 "register_operand")
8424 (match_operand:<ssescalarmode> 1 "register_operand")
8425 (match_operand 2 "vec_setm_operand")]
8426 "TARGET_SSE"
8427 {
8428 if (CONST_INT_P (operands[2]))
8429 ix86_expand_vector_set (false, operands[0], operands[1],
8430 INTVAL (operands[2]));
8431 else
8432 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8433 DONE;
8434 })
8435
8436 (define_insn_and_split "*vec_extractv4sf_0"
8437 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8438 (vec_select:SF
8439 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8440 (parallel [(const_int 0)])))]
8441 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8442 "#"
8443 "&& reload_completed"
8444 [(set (match_dup 0) (match_dup 1))]
8445 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8446
8447 (define_insn_and_split "*sse4_1_extractps"
8448 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8449 (vec_select:SF
8450 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8451 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8452 "TARGET_SSE4_1"
8453 "@
8454 extractps\t{%2, %1, %0|%0, %1, %2}
8455 extractps\t{%2, %1, %0|%0, %1, %2}
8456 vextractps\t{%2, %1, %0|%0, %1, %2}
8457 #
8458 #"
8459 "&& reload_completed && SSE_REG_P (operands[0])"
8460 [(const_int 0)]
8461 {
8462 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8463 switch (INTVAL (operands[2]))
8464 {
8465 case 1:
8466 case 3:
8467 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8468 operands[2], operands[2],
8469 GEN_INT (INTVAL (operands[2]) + 4),
8470 GEN_INT (INTVAL (operands[2]) + 4)));
8471 break;
8472 case 2:
8473 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8474 break;
8475 default:
8476 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8477 gcc_unreachable ();
8478 }
8479 DONE;
8480 }
8481 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8482 (set_attr "type" "sselog,sselog,sselog,*,*")
8483 (set_attr "prefix_data16" "1,1,1,*,*")
8484 (set_attr "prefix_extra" "1,1,1,*,*")
8485 (set_attr "length_immediate" "1,1,1,*,*")
8486 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8487 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8488
8489 (define_insn_and_split "*vec_extractv4sf_mem"
8490 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8491 (vec_select:SF
8492 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8493 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8494 "TARGET_SSE"
8495 "#"
8496 "&& reload_completed"
8497 [(set (match_dup 0) (match_dup 1))]
8498 {
8499 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8500 })
8501
8502 (define_mode_attr extract_type
8503 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8504
8505 (define_mode_attr extract_suf
8506 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8507
8508 (define_mode_iterator AVX512_VEC
8509 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8510
8511 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8512 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8513 (match_operand:AVX512_VEC 1 "register_operand")
8514 (match_operand:SI 2 "const_0_to_3_operand")
8515 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8516 (match_operand:QI 4 "register_operand")]
8517 "TARGET_AVX512F"
8518 {
8519 int mask;
8520 mask = INTVAL (operands[2]);
8521 rtx dest = operands[0];
8522
8523 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8524 dest = gen_reg_rtx (<ssequartermode>mode);
8525
8526 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8527 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8528 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8529 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8530 operands[4]));
8531 else
8532 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8533 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8534 operands[4]));
8535 if (dest != operands[0])
8536 emit_move_insn (operands[0], dest);
8537 DONE;
8538 })
8539
8540 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8541 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8542 (vec_merge:<ssequartermode>
8543 (vec_select:<ssequartermode>
8544 (match_operand:V8FI 1 "register_operand" "v,v")
8545 (parallel [(match_operand 2 "const_0_to_7_operand")
8546 (match_operand 3 "const_0_to_7_operand")]))
8547 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8548 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8549 "TARGET_AVX512DQ
8550 && INTVAL (operands[2]) % 2 == 0
8551 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8552 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8553 {
8554 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8555 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8556 }
8557 [(set_attr "type" "sselog1")
8558 (set_attr "prefix_extra" "1")
8559 (set_attr "length_immediate" "1")
8560 (set_attr "prefix" "evex")
8561 (set_attr "mode" "<sseinsnmode>")])
8562
8563 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8564 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8565 (vec_select:<ssequartermode>
8566 (match_operand:V8FI 1 "register_operand" "v")
8567 (parallel [(match_operand 2 "const_0_to_7_operand")
8568 (match_operand 3 "const_0_to_7_operand")])))]
8569 "TARGET_AVX512DQ
8570 && INTVAL (operands[2]) % 2 == 0
8571 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8572 {
8573 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8574 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8575 }
8576 [(set_attr "type" "sselog1")
8577 (set_attr "prefix_extra" "1")
8578 (set_attr "length_immediate" "1")
8579 (set_attr "prefix" "evex")
8580 (set_attr "mode" "<sseinsnmode>")])
8581
8582 (define_split
8583 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8584 (vec_select:<ssequartermode>
8585 (match_operand:V8FI 1 "register_operand")
8586 (parallel [(const_int 0) (const_int 1)])))]
8587 "TARGET_AVX512DQ
8588 && reload_completed
8589 && (TARGET_AVX512VL
8590 || REG_P (operands[0])
8591 || !EXT_REX_SSE_REG_P (operands[1]))"
8592 [(set (match_dup 0) (match_dup 1))]
8593 {
8594 if (!TARGET_AVX512VL
8595 && REG_P (operands[0])
8596 && EXT_REX_SSE_REG_P (operands[1]))
8597 operands[0]
8598 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8599 else
8600 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8601 })
8602
8603 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8604 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8605 (vec_merge:<ssequartermode>
8606 (vec_select:<ssequartermode>
8607 (match_operand:V16FI 1 "register_operand" "v,v")
8608 (parallel [(match_operand 2 "const_0_to_15_operand")
8609 (match_operand 3 "const_0_to_15_operand")
8610 (match_operand 4 "const_0_to_15_operand")
8611 (match_operand 5 "const_0_to_15_operand")]))
8612 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8613 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8614 "TARGET_AVX512F
8615 && INTVAL (operands[2]) % 4 == 0
8616 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8617 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8618 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8619 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8620 {
8621 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8622 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8623 }
8624 [(set_attr "type" "sselog1")
8625 (set_attr "prefix_extra" "1")
8626 (set_attr "length_immediate" "1")
8627 (set_attr "prefix" "evex")
8628 (set_attr "mode" "<sseinsnmode>")])
8629
8630 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8631 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8632 (vec_select:<ssequartermode>
8633 (match_operand:V16FI 1 "register_operand" "v")
8634 (parallel [(match_operand 2 "const_0_to_15_operand")
8635 (match_operand 3 "const_0_to_15_operand")
8636 (match_operand 4 "const_0_to_15_operand")
8637 (match_operand 5 "const_0_to_15_operand")])))]
8638 "TARGET_AVX512F
8639 && INTVAL (operands[2]) % 4 == 0
8640 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8641 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8642 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8643 {
8644 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8645 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8646 }
8647 [(set_attr "type" "sselog1")
8648 (set_attr "prefix_extra" "1")
8649 (set_attr "length_immediate" "1")
8650 (set_attr "prefix" "evex")
8651 (set_attr "mode" "<sseinsnmode>")])
8652
8653 (define_split
8654 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8655 (vec_select:<ssequartermode>
8656 (match_operand:V16FI 1 "register_operand")
8657 (parallel [(const_int 0) (const_int 1)
8658 (const_int 2) (const_int 3)])))]
8659 "TARGET_AVX512F
8660 && reload_completed
8661 && (TARGET_AVX512VL
8662 || REG_P (operands[0])
8663 || !EXT_REX_SSE_REG_P (operands[1]))"
8664 [(set (match_dup 0) (match_dup 1))]
8665 {
8666 if (!TARGET_AVX512VL
8667 && REG_P (operands[0])
8668 && EXT_REX_SSE_REG_P (operands[1]))
8669 operands[0]
8670 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8671 else
8672 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8673 })
8674
8675 (define_mode_attr extract_type_2
8676 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8677
8678 (define_mode_attr extract_suf_2
8679 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8680
8681 (define_mode_iterator AVX512_VEC_2
8682 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8683
8684 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8685 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8686 (match_operand:AVX512_VEC_2 1 "register_operand")
8687 (match_operand:SI 2 "const_0_to_1_operand")
8688 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8689 (match_operand:QI 4 "register_operand")]
8690 "TARGET_AVX512F"
8691 {
8692 rtx (*insn)(rtx, rtx, rtx, rtx);
8693 rtx dest = operands[0];
8694
8695 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8696 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8697
8698 switch (INTVAL (operands[2]))
8699 {
8700 case 0:
8701 insn = gen_vec_extract_lo_<mode>_mask;
8702 break;
8703 case 1:
8704 insn = gen_vec_extract_hi_<mode>_mask;
8705 break;
8706 default:
8707 gcc_unreachable ();
8708 }
8709
8710 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8711 if (dest != operands[0])
8712 emit_move_insn (operands[0], dest);
8713 DONE;
8714 })
8715
8716 (define_split
8717 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8718 (vec_select:<ssehalfvecmode>
8719 (match_operand:V8FI 1 "nonimmediate_operand")
8720 (parallel [(const_int 0) (const_int 1)
8721 (const_int 2) (const_int 3)])))]
8722 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8723 && reload_completed
8724 && (TARGET_AVX512VL
8725 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8726 [(set (match_dup 0) (match_dup 1))]
8727 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8728
8729 (define_insn "vec_extract_lo_<mode>_mask"
8730 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8731 (vec_merge:<ssehalfvecmode>
8732 (vec_select:<ssehalfvecmode>
8733 (match_operand:V8FI 1 "register_operand" "v,v")
8734 (parallel [(const_int 0) (const_int 1)
8735 (const_int 2) (const_int 3)]))
8736 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8737 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8738 "TARGET_AVX512F
8739 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8740 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8741 [(set_attr "type" "sselog1")
8742 (set_attr "prefix_extra" "1")
8743 (set_attr "length_immediate" "1")
8744 (set_attr "memory" "none,store")
8745 (set_attr "prefix" "evex")
8746 (set_attr "mode" "<sseinsnmode>")])
8747
8748 (define_insn "vec_extract_lo_<mode>"
8749 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
8750 (vec_select:<ssehalfvecmode>
8751 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
8752 (parallel [(const_int 0) (const_int 1)
8753 (const_int 2) (const_int 3)])))]
8754 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8755 {
8756 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
8757 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8758 else
8759 return "#";
8760 }
8761 [(set_attr "type" "sselog1")
8762 (set_attr "prefix_extra" "1")
8763 (set_attr "length_immediate" "1")
8764 (set_attr "memory" "none,store,load")
8765 (set_attr "prefix" "evex")
8766 (set_attr "mode" "<sseinsnmode>")])
8767
8768 (define_insn "vec_extract_hi_<mode>_mask"
8769 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8770 (vec_merge:<ssehalfvecmode>
8771 (vec_select:<ssehalfvecmode>
8772 (match_operand:V8FI 1 "register_operand" "v,v")
8773 (parallel [(const_int 4) (const_int 5)
8774 (const_int 6) (const_int 7)]))
8775 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8776 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8777 "TARGET_AVX512F
8778 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8779 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8780 [(set_attr "type" "sselog1")
8781 (set_attr "prefix_extra" "1")
8782 (set_attr "length_immediate" "1")
8783 (set_attr "prefix" "evex")
8784 (set_attr "mode" "<sseinsnmode>")])
8785
8786 (define_insn "vec_extract_hi_<mode>"
8787 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8788 (vec_select:<ssehalfvecmode>
8789 (match_operand:V8FI 1 "register_operand" "v")
8790 (parallel [(const_int 4) (const_int 5)
8791 (const_int 6) (const_int 7)])))]
8792 "TARGET_AVX512F"
8793 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8794 [(set_attr "type" "sselog1")
8795 (set_attr "prefix_extra" "1")
8796 (set_attr "length_immediate" "1")
8797 (set_attr "prefix" "evex")
8798 (set_attr "mode" "<sseinsnmode>")])
8799
8800 (define_insn "vec_extract_hi_<mode>_mask"
8801 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8802 (vec_merge:<ssehalfvecmode>
8803 (vec_select:<ssehalfvecmode>
8804 (match_operand:V16FI 1 "register_operand" "v,v")
8805 (parallel [(const_int 8) (const_int 9)
8806 (const_int 10) (const_int 11)
8807 (const_int 12) (const_int 13)
8808 (const_int 14) (const_int 15)]))
8809 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8810 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8811 "TARGET_AVX512DQ
8812 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8813 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8814 [(set_attr "type" "sselog1")
8815 (set_attr "prefix_extra" "1")
8816 (set_attr "length_immediate" "1")
8817 (set_attr "prefix" "evex")
8818 (set_attr "mode" "<sseinsnmode>")])
8819
8820 (define_insn "vec_extract_hi_<mode>"
8821 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
8822 (vec_select:<ssehalfvecmode>
8823 (match_operand:V16FI 1 "register_operand" "v,v")
8824 (parallel [(const_int 8) (const_int 9)
8825 (const_int 10) (const_int 11)
8826 (const_int 12) (const_int 13)
8827 (const_int 14) (const_int 15)])))]
8828 "TARGET_AVX512F"
8829 "@
8830 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
8831 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8832 [(set_attr "type" "sselog1")
8833 (set_attr "prefix_extra" "1")
8834 (set_attr "isa" "avx512dq,noavx512dq")
8835 (set_attr "length_immediate" "1")
8836 (set_attr "prefix" "evex")
8837 (set_attr "mode" "<sseinsnmode>")])
8838
8839 (define_mode_iterator VI48F_256_DQ
8840 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
8841
8842 (define_expand "avx512vl_vextractf128<mode>"
8843 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8844 (match_operand:VI48F_256_DQ 1 "register_operand")
8845 (match_operand:SI 2 "const_0_to_1_operand")
8846 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8847 (match_operand:QI 4 "register_operand")]
8848 "TARGET_AVX512VL"
8849 {
8850 rtx (*insn)(rtx, rtx, rtx, rtx);
8851 rtx dest = operands[0];
8852
8853 if (MEM_P (dest)
8854 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8855 /* For V8S[IF]mode there are maskm insns with =m and 0
8856 constraints. */
8857 ? !rtx_equal_p (dest, operands[3])
8858 /* For V4D[IF]mode, hi insns don't allow memory, and
8859 lo insns have =m and 0C constraints. */
8860 : (operands[2] != const0_rtx
8861 || (!rtx_equal_p (dest, operands[3])
8862 && GET_CODE (operands[3]) != CONST_VECTOR))))
8863 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8864 switch (INTVAL (operands[2]))
8865 {
8866 case 0:
8867 insn = gen_vec_extract_lo_<mode>_mask;
8868 break;
8869 case 1:
8870 insn = gen_vec_extract_hi_<mode>_mask;
8871 break;
8872 default:
8873 gcc_unreachable ();
8874 }
8875
8876 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8877 if (dest != operands[0])
8878 emit_move_insn (operands[0], dest);
8879 DONE;
8880 })
8881
8882 (define_expand "avx_vextractf128<mode>"
8883 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8884 (match_operand:V_256 1 "register_operand")
8885 (match_operand:SI 2 "const_0_to_1_operand")]
8886 "TARGET_AVX"
8887 {
8888 rtx (*insn)(rtx, rtx);
8889
8890 switch (INTVAL (operands[2]))
8891 {
8892 case 0:
8893 insn = gen_vec_extract_lo_<mode>;
8894 break;
8895 case 1:
8896 insn = gen_vec_extract_hi_<mode>;
8897 break;
8898 default:
8899 gcc_unreachable ();
8900 }
8901
8902 emit_insn (insn (operands[0], operands[1]));
8903 DONE;
8904 })
8905
8906 (define_insn "vec_extract_lo_<mode>_mask"
8907 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8908 (vec_merge:<ssehalfvecmode>
8909 (vec_select:<ssehalfvecmode>
8910 (match_operand:V16FI 1 "register_operand" "v,v")
8911 (parallel [(const_int 0) (const_int 1)
8912 (const_int 2) (const_int 3)
8913 (const_int 4) (const_int 5)
8914 (const_int 6) (const_int 7)]))
8915 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8916 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8917 "TARGET_AVX512DQ
8918 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8919 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8920 [(set_attr "type" "sselog1")
8921 (set_attr "prefix_extra" "1")
8922 (set_attr "length_immediate" "1")
8923 (set_attr "memory" "none,store")
8924 (set_attr "prefix" "evex")
8925 (set_attr "mode" "<sseinsnmode>")])
8926
8927 (define_insn "vec_extract_lo_<mode>"
8928 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8929 (vec_select:<ssehalfvecmode>
8930 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
8931 (parallel [(const_int 0) (const_int 1)
8932 (const_int 2) (const_int 3)
8933 (const_int 4) (const_int 5)
8934 (const_int 6) (const_int 7)])))]
8935 "TARGET_AVX512F
8936 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8937 {
8938 if (!TARGET_AVX512VL
8939 && !REG_P (operands[0])
8940 && EXT_REX_SSE_REG_P (operands[1]))
8941 {
8942 if (TARGET_AVX512DQ)
8943 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
8944 else
8945 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8946 }
8947 else
8948 return "#";
8949 }
8950 [(set_attr "type" "sselog1")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "length_immediate" "1")
8953 (set_attr "memory" "none,load,store")
8954 (set_attr "prefix" "evex")
8955 (set_attr "mode" "<sseinsnmode>")])
8956
8957 (define_split
8958 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8959 (vec_select:<ssehalfvecmode>
8960 (match_operand:V16FI 1 "nonimmediate_operand")
8961 (parallel [(const_int 0) (const_int 1)
8962 (const_int 2) (const_int 3)
8963 (const_int 4) (const_int 5)
8964 (const_int 6) (const_int 7)])))]
8965 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8966 && reload_completed
8967 && (TARGET_AVX512VL
8968 || REG_P (operands[0])
8969 || !EXT_REX_SSE_REG_P (operands[1]))"
8970 [(set (match_dup 0) (match_dup 1))]
8971 {
8972 if (!TARGET_AVX512VL
8973 && REG_P (operands[0])
8974 && EXT_REX_SSE_REG_P (operands[1]))
8975 operands[0]
8976 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8977 else
8978 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8979 })
8980
8981 (define_insn "vec_extract_lo_<mode>_mask"
8982 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8983 (vec_merge:<ssehalfvecmode>
8984 (vec_select:<ssehalfvecmode>
8985 (match_operand:VI8F_256 1 "register_operand" "v,v")
8986 (parallel [(const_int 0) (const_int 1)]))
8987 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8988 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8989 "TARGET_AVX512DQ
8990 && TARGET_AVX512VL
8991 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8992 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8993 [(set_attr "type" "sselog1")
8994 (set_attr "prefix_extra" "1")
8995 (set_attr "length_immediate" "1")
8996 (set_attr "memory" "none,store")
8997 (set_attr "prefix" "evex")
8998 (set_attr "mode" "XI")])
8999
9000 (define_insn "vec_extract_lo_<mode>"
9001 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9002 (vec_select:<ssehalfvecmode>
9003 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
9004 (parallel [(const_int 0) (const_int 1)])))]
9005 "TARGET_AVX
9006 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9007 "#")
9008
9009 (define_split
9010 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9011 (vec_select:<ssehalfvecmode>
9012 (match_operand:VI8F_256 1 "nonimmediate_operand")
9013 (parallel [(const_int 0) (const_int 1)])))]
9014 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9015 && reload_completed"
9016 [(set (match_dup 0) (match_dup 1))]
9017 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9018
9019 (define_insn "vec_extract_hi_<mode>_mask"
9020 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9021 (vec_merge:<ssehalfvecmode>
9022 (vec_select:<ssehalfvecmode>
9023 (match_operand:VI8F_256 1 "register_operand" "v,v")
9024 (parallel [(const_int 2) (const_int 3)]))
9025 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9026 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9027 "TARGET_AVX512DQ
9028 && TARGET_AVX512VL
9029 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9030 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9031 [(set_attr "type" "sselog1")
9032 (set_attr "prefix_extra" "1")
9033 (set_attr "length_immediate" "1")
9034 (set_attr "prefix" "vex")
9035 (set_attr "mode" "<sseinsnmode>")])
9036
9037 (define_insn "vec_extract_hi_<mode>"
9038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
9039 (vec_select:<ssehalfvecmode>
9040 (match_operand:VI8F_256 1 "register_operand" "v")
9041 (parallel [(const_int 2) (const_int 3)])))]
9042 "TARGET_AVX"
9043 {
9044 if (TARGET_AVX512VL)
9045 {
9046 if (TARGET_AVX512DQ)
9047 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
9048 else
9049 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
9050 }
9051 else
9052 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
9053 }
9054 [(set_attr "type" "sselog1")
9055 (set_attr "prefix_extra" "1")
9056 (set_attr "length_immediate" "1")
9057 (set_attr "prefix" "vex")
9058 (set_attr "mode" "<sseinsnmode>")])
9059
9060 (define_split
9061 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9062 (vec_select:<ssehalfvecmode>
9063 (match_operand:VI4F_256 1 "nonimmediate_operand")
9064 (parallel [(const_int 0) (const_int 1)
9065 (const_int 2) (const_int 3)])))]
9066 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9067 && reload_completed"
9068 [(set (match_dup 0) (match_dup 1))]
9069 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9070
9071 (define_insn "vec_extract_lo_<mode>_mask"
9072 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9073 (vec_merge:<ssehalfvecmode>
9074 (vec_select:<ssehalfvecmode>
9075 (match_operand:VI4F_256 1 "register_operand" "v,v")
9076 (parallel [(const_int 0) (const_int 1)
9077 (const_int 2) (const_int 3)]))
9078 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9079 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9080 "TARGET_AVX512VL
9081 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9082 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9083 [(set_attr "type" "sselog1")
9084 (set_attr "prefix_extra" "1")
9085 (set_attr "length_immediate" "1")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "<sseinsnmode>")])
9088
9089 (define_insn "vec_extract_lo_<mode>"
9090 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9091 (vec_select:<ssehalfvecmode>
9092 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
9093 (parallel [(const_int 0) (const_int 1)
9094 (const_int 2) (const_int 3)])))]
9095 "TARGET_AVX
9096 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9097 "#"
9098 [(set_attr "type" "sselog1")
9099 (set_attr "prefix_extra" "1")
9100 (set_attr "length_immediate" "1")
9101 (set_attr "prefix" "evex")
9102 (set_attr "mode" "<sseinsnmode>")])
9103
9104 (define_insn "vec_extract_hi_<mode>_mask"
9105 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
9106 (vec_merge:<ssehalfvecmode>
9107 (vec_select:<ssehalfvecmode>
9108 (match_operand:VI4F_256 1 "register_operand" "v,v")
9109 (parallel [(const_int 4) (const_int 5)
9110 (const_int 6) (const_int 7)]))
9111 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9112 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9113 "TARGET_AVX512VL
9114 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9115 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9116 [(set_attr "type" "sselog1")
9117 (set_attr "length_immediate" "1")
9118 (set_attr "prefix" "evex")
9119 (set_attr "mode" "<sseinsnmode>")])
9120
9121 (define_insn "vec_extract_hi_<mode>"
9122 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
9123 (vec_select:<ssehalfvecmode>
9124 (match_operand:VI4F_256 1 "register_operand" "x, v")
9125 (parallel [(const_int 4) (const_int 5)
9126 (const_int 6) (const_int 7)])))]
9127 "TARGET_AVX"
9128 "@
9129 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
9130 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9131 [(set_attr "isa" "*, avx512vl")
9132 (set_attr "prefix" "vex, evex")
9133 (set_attr "type" "sselog1")
9134 (set_attr "length_immediate" "1")
9135 (set_attr "mode" "<sseinsnmode>")])
9136
9137 (define_insn_and_split "vec_extract_lo_v32hi"
9138 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
9139 (vec_select:V16HI
9140 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
9141 (parallel [(const_int 0) (const_int 1)
9142 (const_int 2) (const_int 3)
9143 (const_int 4) (const_int 5)
9144 (const_int 6) (const_int 7)
9145 (const_int 8) (const_int 9)
9146 (const_int 10) (const_int 11)
9147 (const_int 12) (const_int 13)
9148 (const_int 14) (const_int 15)])))]
9149 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9150 {
9151 if (TARGET_AVX512VL
9152 || REG_P (operands[0])
9153 || !EXT_REX_SSE_REG_P (operands[1]))
9154 return "#";
9155 else
9156 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9157 }
9158 "&& reload_completed
9159 && (TARGET_AVX512VL
9160 || REG_P (operands[0])
9161 || !EXT_REX_SSE_REG_P (operands[1]))"
9162 [(set (match_dup 0) (match_dup 1))]
9163 {
9164 if (!TARGET_AVX512VL
9165 && REG_P (operands[0])
9166 && EXT_REX_SSE_REG_P (operands[1]))
9167 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
9168 else
9169 operands[1] = gen_lowpart (V16HImode, operands[1]);
9170 }
9171 [(set_attr "type" "sselog1")
9172 (set_attr "prefix_extra" "1")
9173 (set_attr "length_immediate" "1")
9174 (set_attr "memory" "none,load,store")
9175 (set_attr "prefix" "evex")
9176 (set_attr "mode" "XI")])
9177
9178 (define_insn "vec_extract_hi_v32hi"
9179 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9180 (vec_select:V16HI
9181 (match_operand:V32HI 1 "register_operand" "v")
9182 (parallel [(const_int 16) (const_int 17)
9183 (const_int 18) (const_int 19)
9184 (const_int 20) (const_int 21)
9185 (const_int 22) (const_int 23)
9186 (const_int 24) (const_int 25)
9187 (const_int 26) (const_int 27)
9188 (const_int 28) (const_int 29)
9189 (const_int 30) (const_int 31)])))]
9190 "TARGET_AVX512F"
9191 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9192 [(set_attr "type" "sselog1")
9193 (set_attr "prefix_extra" "1")
9194 (set_attr "length_immediate" "1")
9195 (set_attr "prefix" "evex")
9196 (set_attr "mode" "XI")])
9197
9198 (define_insn_and_split "vec_extract_lo_v16hi"
9199 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9200 (vec_select:V8HI
9201 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9202 (parallel [(const_int 0) (const_int 1)
9203 (const_int 2) (const_int 3)
9204 (const_int 4) (const_int 5)
9205 (const_int 6) (const_int 7)])))]
9206 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9207 "#"
9208 "&& reload_completed"
9209 [(set (match_dup 0) (match_dup 1))]
9210 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9211
9212 (define_insn "vec_extract_hi_v16hi"
9213 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9214 (vec_select:V8HI
9215 (match_operand:V16HI 1 "register_operand" "x,v,v")
9216 (parallel [(const_int 8) (const_int 9)
9217 (const_int 10) (const_int 11)
9218 (const_int 12) (const_int 13)
9219 (const_int 14) (const_int 15)])))]
9220 "TARGET_AVX"
9221 "@
9222 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9223 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9224 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9225 [(set_attr "type" "sselog1")
9226 (set_attr "prefix_extra" "1")
9227 (set_attr "length_immediate" "1")
9228 (set_attr "isa" "*,avx512dq,avx512f")
9229 (set_attr "prefix" "vex,evex,evex")
9230 (set_attr "mode" "OI")])
9231
9232 (define_insn_and_split "vec_extract_lo_v64qi"
9233 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9234 (vec_select:V32QI
9235 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9236 (parallel [(const_int 0) (const_int 1)
9237 (const_int 2) (const_int 3)
9238 (const_int 4) (const_int 5)
9239 (const_int 6) (const_int 7)
9240 (const_int 8) (const_int 9)
9241 (const_int 10) (const_int 11)
9242 (const_int 12) (const_int 13)
9243 (const_int 14) (const_int 15)
9244 (const_int 16) (const_int 17)
9245 (const_int 18) (const_int 19)
9246 (const_int 20) (const_int 21)
9247 (const_int 22) (const_int 23)
9248 (const_int 24) (const_int 25)
9249 (const_int 26) (const_int 27)
9250 (const_int 28) (const_int 29)
9251 (const_int 30) (const_int 31)])))]
9252 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9253 {
9254 if (TARGET_AVX512VL
9255 || REG_P (operands[0])
9256 || !EXT_REX_SSE_REG_P (operands[1]))
9257 return "#";
9258 else
9259 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9260 }
9261 "&& reload_completed
9262 && (TARGET_AVX512VL
9263 || REG_P (operands[0])
9264 || !EXT_REX_SSE_REG_P (operands[1]))"
9265 [(set (match_dup 0) (match_dup 1))]
9266 {
9267 if (!TARGET_AVX512VL
9268 && REG_P (operands[0])
9269 && EXT_REX_SSE_REG_P (operands[1]))
9270 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9271 else
9272 operands[1] = gen_lowpart (V32QImode, operands[1]);
9273 }
9274 [(set_attr "type" "sselog1")
9275 (set_attr "prefix_extra" "1")
9276 (set_attr "length_immediate" "1")
9277 (set_attr "memory" "none,load,store")
9278 (set_attr "prefix" "evex")
9279 (set_attr "mode" "XI")])
9280
9281 (define_insn "vec_extract_hi_v64qi"
9282 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9283 (vec_select:V32QI
9284 (match_operand:V64QI 1 "register_operand" "v")
9285 (parallel [(const_int 32) (const_int 33)
9286 (const_int 34) (const_int 35)
9287 (const_int 36) (const_int 37)
9288 (const_int 38) (const_int 39)
9289 (const_int 40) (const_int 41)
9290 (const_int 42) (const_int 43)
9291 (const_int 44) (const_int 45)
9292 (const_int 46) (const_int 47)
9293 (const_int 48) (const_int 49)
9294 (const_int 50) (const_int 51)
9295 (const_int 52) (const_int 53)
9296 (const_int 54) (const_int 55)
9297 (const_int 56) (const_int 57)
9298 (const_int 58) (const_int 59)
9299 (const_int 60) (const_int 61)
9300 (const_int 62) (const_int 63)])))]
9301 "TARGET_AVX512F"
9302 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9303 [(set_attr "type" "sselog1")
9304 (set_attr "prefix_extra" "1")
9305 (set_attr "length_immediate" "1")
9306 (set_attr "prefix" "evex")
9307 (set_attr "mode" "XI")])
9308
9309 (define_insn_and_split "vec_extract_lo_v32qi"
9310 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9311 (vec_select:V16QI
9312 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9313 (parallel [(const_int 0) (const_int 1)
9314 (const_int 2) (const_int 3)
9315 (const_int 4) (const_int 5)
9316 (const_int 6) (const_int 7)
9317 (const_int 8) (const_int 9)
9318 (const_int 10) (const_int 11)
9319 (const_int 12) (const_int 13)
9320 (const_int 14) (const_int 15)])))]
9321 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9322 "#"
9323 "&& reload_completed"
9324 [(set (match_dup 0) (match_dup 1))]
9325 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9326
9327 (define_insn "vec_extract_hi_v32qi"
9328 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9329 (vec_select:V16QI
9330 (match_operand:V32QI 1 "register_operand" "x,v,v")
9331 (parallel [(const_int 16) (const_int 17)
9332 (const_int 18) (const_int 19)
9333 (const_int 20) (const_int 21)
9334 (const_int 22) (const_int 23)
9335 (const_int 24) (const_int 25)
9336 (const_int 26) (const_int 27)
9337 (const_int 28) (const_int 29)
9338 (const_int 30) (const_int 31)])))]
9339 "TARGET_AVX"
9340 "@
9341 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9342 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9343 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9344 [(set_attr "type" "sselog1")
9345 (set_attr "prefix_extra" "1")
9346 (set_attr "length_immediate" "1")
9347 (set_attr "isa" "*,avx512dq,avx512f")
9348 (set_attr "prefix" "vex,evex,evex")
9349 (set_attr "mode" "OI")])
9350
9351 ;; Modes handled by vec_extract patterns.
9352 (define_mode_iterator VEC_EXTRACT_MODE
9353 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9354 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9355 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9356 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9357 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9358 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9359 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9360
9361 (define_expand "vec_extract<mode><ssescalarmodelower>"
9362 [(match_operand:<ssescalarmode> 0 "register_operand")
9363 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9364 (match_operand 2 "const_int_operand")]
9365 "TARGET_SSE"
9366 {
9367 ix86_expand_vector_extract (false, operands[0], operands[1],
9368 INTVAL (operands[2]));
9369 DONE;
9370 })
9371
9372 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9373 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9374 (match_operand:V_256_512 1 "register_operand")
9375 (match_operand 2 "const_0_to_1_operand")]
9376 "TARGET_AVX"
9377 {
9378 if (INTVAL (operands[2]))
9379 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9380 else
9381 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9382 DONE;
9383 })
9384
9385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9386 ;;
9387 ;; Parallel double-precision floating point element swizzling
9388 ;;
9389 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9390
9391 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9392 [(set (match_operand:V8DF 0 "register_operand" "=v")
9393 (vec_select:V8DF
9394 (vec_concat:V16DF
9395 (match_operand:V8DF 1 "register_operand" "v")
9396 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9397 (parallel [(const_int 1) (const_int 9)
9398 (const_int 3) (const_int 11)
9399 (const_int 5) (const_int 13)
9400 (const_int 7) (const_int 15)])))]
9401 "TARGET_AVX512F"
9402 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9403 [(set_attr "type" "sselog")
9404 (set_attr "prefix" "evex")
9405 (set_attr "mode" "V8DF")])
9406
9407 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9408 (define_insn "avx_unpckhpd256<mask_name>"
9409 [(set (match_operand:V4DF 0 "register_operand" "=v")
9410 (vec_select:V4DF
9411 (vec_concat:V8DF
9412 (match_operand:V4DF 1 "register_operand" "v")
9413 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9414 (parallel [(const_int 1) (const_int 5)
9415 (const_int 3) (const_int 7)])))]
9416 "TARGET_AVX && <mask_avx512vl_condition>"
9417 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9418 [(set_attr "type" "sselog")
9419 (set_attr "prefix" "vex")
9420 (set_attr "mode" "V4DF")])
9421
9422 (define_expand "vec_interleave_highv4df"
9423 [(set (match_dup 3)
9424 (vec_select:V4DF
9425 (vec_concat:V8DF
9426 (match_operand:V4DF 1 "register_operand")
9427 (match_operand:V4DF 2 "nonimmediate_operand"))
9428 (parallel [(const_int 0) (const_int 4)
9429 (const_int 2) (const_int 6)])))
9430 (set (match_dup 4)
9431 (vec_select:V4DF
9432 (vec_concat:V8DF
9433 (match_dup 1)
9434 (match_dup 2))
9435 (parallel [(const_int 1) (const_int 5)
9436 (const_int 3) (const_int 7)])))
9437 (set (match_operand:V4DF 0 "register_operand")
9438 (vec_select:V4DF
9439 (vec_concat:V8DF
9440 (match_dup 3)
9441 (match_dup 4))
9442 (parallel [(const_int 2) (const_int 3)
9443 (const_int 6) (const_int 7)])))]
9444 "TARGET_AVX"
9445 {
9446 operands[3] = gen_reg_rtx (V4DFmode);
9447 operands[4] = gen_reg_rtx (V4DFmode);
9448 })
9449
9450
9451 (define_insn "avx512vl_unpckhpd128_mask"
9452 [(set (match_operand:V2DF 0 "register_operand" "=v")
9453 (vec_merge:V2DF
9454 (vec_select:V2DF
9455 (vec_concat:V4DF
9456 (match_operand:V2DF 1 "register_operand" "v")
9457 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9458 (parallel [(const_int 1) (const_int 3)]))
9459 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9460 (match_operand:QI 4 "register_operand" "Yk")))]
9461 "TARGET_AVX512VL"
9462 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9463 [(set_attr "type" "sselog")
9464 (set_attr "prefix" "evex")
9465 (set_attr "mode" "V2DF")])
9466
9467 (define_expand "vec_interleave_highv2df"
9468 [(set (match_operand:V2DF 0 "register_operand")
9469 (vec_select:V2DF
9470 (vec_concat:V4DF
9471 (match_operand:V2DF 1 "nonimmediate_operand")
9472 (match_operand:V2DF 2 "nonimmediate_operand"))
9473 (parallel [(const_int 1)
9474 (const_int 3)])))]
9475 "TARGET_SSE2"
9476 {
9477 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9478 operands[2] = force_reg (V2DFmode, operands[2]);
9479 })
9480
9481 (define_insn "*vec_interleave_highv2df"
9482 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9483 (vec_select:V2DF
9484 (vec_concat:V4DF
9485 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9486 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9487 (parallel [(const_int 1)
9488 (const_int 3)])))]
9489 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9490 "@
9491 unpckhpd\t{%2, %0|%0, %2}
9492 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9493 %vmovddup\t{%H1, %0|%0, %H1}
9494 movlpd\t{%H1, %0|%0, %H1}
9495 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9496 %vmovhpd\t{%1, %0|%q0, %1}"
9497 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9498 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9499 (set (attr "prefix_data16")
9500 (if_then_else (eq_attr "alternative" "3,5")
9501 (const_string "1")
9502 (const_string "*")))
9503 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9504 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9505
9506 (define_expand "avx512f_movddup512<mask_name>"
9507 [(set (match_operand:V8DF 0 "register_operand")
9508 (vec_select:V8DF
9509 (vec_concat:V16DF
9510 (match_operand:V8DF 1 "nonimmediate_operand")
9511 (match_dup 1))
9512 (parallel [(const_int 0) (const_int 8)
9513 (const_int 2) (const_int 10)
9514 (const_int 4) (const_int 12)
9515 (const_int 6) (const_int 14)])))]
9516 "TARGET_AVX512F")
9517
9518 (define_expand "avx512f_unpcklpd512<mask_name>"
9519 [(set (match_operand:V8DF 0 "register_operand")
9520 (vec_select:V8DF
9521 (vec_concat:V16DF
9522 (match_operand:V8DF 1 "register_operand")
9523 (match_operand:V8DF 2 "nonimmediate_operand"))
9524 (parallel [(const_int 0) (const_int 8)
9525 (const_int 2) (const_int 10)
9526 (const_int 4) (const_int 12)
9527 (const_int 6) (const_int 14)])))]
9528 "TARGET_AVX512F")
9529
9530 (define_insn "*avx512f_unpcklpd512<mask_name>"
9531 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9532 (vec_select:V8DF
9533 (vec_concat:V16DF
9534 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9535 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9536 (parallel [(const_int 0) (const_int 8)
9537 (const_int 2) (const_int 10)
9538 (const_int 4) (const_int 12)
9539 (const_int 6) (const_int 14)])))]
9540 "TARGET_AVX512F"
9541 "@
9542 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9543 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9544 [(set_attr "type" "sselog")
9545 (set_attr "prefix" "evex")
9546 (set_attr "mode" "V8DF")])
9547
9548 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9549 (define_expand "avx_movddup256<mask_name>"
9550 [(set (match_operand:V4DF 0 "register_operand")
9551 (vec_select:V4DF
9552 (vec_concat:V8DF
9553 (match_operand:V4DF 1 "nonimmediate_operand")
9554 (match_dup 1))
9555 (parallel [(const_int 0) (const_int 4)
9556 (const_int 2) (const_int 6)])))]
9557 "TARGET_AVX && <mask_avx512vl_condition>")
9558
9559 (define_expand "avx_unpcklpd256<mask_name>"
9560 [(set (match_operand:V4DF 0 "register_operand")
9561 (vec_select:V4DF
9562 (vec_concat:V8DF
9563 (match_operand:V4DF 1 "register_operand")
9564 (match_operand:V4DF 2 "nonimmediate_operand"))
9565 (parallel [(const_int 0) (const_int 4)
9566 (const_int 2) (const_int 6)])))]
9567 "TARGET_AVX && <mask_avx512vl_condition>")
9568
9569 (define_insn "*avx_unpcklpd256<mask_name>"
9570 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9571 (vec_select:V4DF
9572 (vec_concat:V8DF
9573 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9574 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9575 (parallel [(const_int 0) (const_int 4)
9576 (const_int 2) (const_int 6)])))]
9577 "TARGET_AVX && <mask_avx512vl_condition>"
9578 "@
9579 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9580 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9581 [(set_attr "type" "sselog")
9582 (set_attr "prefix" "vex")
9583 (set_attr "mode" "V4DF")])
9584
9585 (define_expand "vec_interleave_lowv4df"
9586 [(set (match_dup 3)
9587 (vec_select:V4DF
9588 (vec_concat:V8DF
9589 (match_operand:V4DF 1 "register_operand")
9590 (match_operand:V4DF 2 "nonimmediate_operand"))
9591 (parallel [(const_int 0) (const_int 4)
9592 (const_int 2) (const_int 6)])))
9593 (set (match_dup 4)
9594 (vec_select:V4DF
9595 (vec_concat:V8DF
9596 (match_dup 1)
9597 (match_dup 2))
9598 (parallel [(const_int 1) (const_int 5)
9599 (const_int 3) (const_int 7)])))
9600 (set (match_operand:V4DF 0 "register_operand")
9601 (vec_select:V4DF
9602 (vec_concat:V8DF
9603 (match_dup 3)
9604 (match_dup 4))
9605 (parallel [(const_int 0) (const_int 1)
9606 (const_int 4) (const_int 5)])))]
9607 "TARGET_AVX"
9608 {
9609 operands[3] = gen_reg_rtx (V4DFmode);
9610 operands[4] = gen_reg_rtx (V4DFmode);
9611 })
9612
9613 (define_insn "avx512vl_unpcklpd128_mask"
9614 [(set (match_operand:V2DF 0 "register_operand" "=v")
9615 (vec_merge:V2DF
9616 (vec_select:V2DF
9617 (vec_concat:V4DF
9618 (match_operand:V2DF 1 "register_operand" "v")
9619 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9620 (parallel [(const_int 0) (const_int 2)]))
9621 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9622 (match_operand:QI 4 "register_operand" "Yk")))]
9623 "TARGET_AVX512VL"
9624 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9625 [(set_attr "type" "sselog")
9626 (set_attr "prefix" "evex")
9627 (set_attr "mode" "V2DF")])
9628
9629 (define_expand "vec_interleave_lowv2df"
9630 [(set (match_operand:V2DF 0 "register_operand")
9631 (vec_select:V2DF
9632 (vec_concat:V4DF
9633 (match_operand:V2DF 1 "nonimmediate_operand")
9634 (match_operand:V2DF 2 "nonimmediate_operand"))
9635 (parallel [(const_int 0)
9636 (const_int 2)])))]
9637 "TARGET_SSE2"
9638 {
9639 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9640 operands[1] = force_reg (V2DFmode, operands[1]);
9641 })
9642
9643 (define_insn "*vec_interleave_lowv2df"
9644 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9645 (vec_select:V2DF
9646 (vec_concat:V4DF
9647 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9648 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9649 (parallel [(const_int 0)
9650 (const_int 2)])))]
9651 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9652 "@
9653 unpcklpd\t{%2, %0|%0, %2}
9654 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9655 %vmovddup\t{%1, %0|%0, %q1}
9656 movhpd\t{%2, %0|%0, %q2}
9657 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9658 %vmovlpd\t{%2, %H0|%H0, %2}"
9659 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9660 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9661 (set (attr "prefix_data16")
9662 (if_then_else (eq_attr "alternative" "3,5")
9663 (const_string "1")
9664 (const_string "*")))
9665 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9666 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9667
9668 (define_split
9669 [(set (match_operand:V2DF 0 "memory_operand")
9670 (vec_select:V2DF
9671 (vec_concat:V4DF
9672 (match_operand:V2DF 1 "register_operand")
9673 (match_dup 1))
9674 (parallel [(const_int 0)
9675 (const_int 2)])))]
9676 "TARGET_SSE3 && reload_completed"
9677 [(const_int 0)]
9678 {
9679 rtx low = gen_lowpart (DFmode, operands[1]);
9680
9681 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9682 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9683 DONE;
9684 })
9685
9686 (define_split
9687 [(set (match_operand:V2DF 0 "register_operand")
9688 (vec_select:V2DF
9689 (vec_concat:V4DF
9690 (match_operand:V2DF 1 "memory_operand")
9691 (match_dup 1))
9692 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9693 (match_operand:SI 3 "const_int_operand")])))]
9694 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9695 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9696 {
9697 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9698 })
9699
9700 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9701 [(set (match_operand:VF_128 0 "register_operand" "=v")
9702 (vec_merge:VF_128
9703 (unspec:VF_128
9704 [(match_operand:VF_128 1 "register_operand" "v")
9705 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9706 UNSPEC_SCALEF)
9707 (match_dup 1)
9708 (const_int 1)))]
9709 "TARGET_AVX512F"
9710 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9711 [(set_attr "prefix" "evex")
9712 (set_attr "mode" "<ssescalarmode>")])
9713
9714 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9715 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9716 (unspec:VF_AVX512VL
9717 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9718 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9719 UNSPEC_SCALEF))]
9720 "TARGET_AVX512F"
9721 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9722 [(set_attr "prefix" "evex")
9723 (set_attr "mode" "<MODE>")])
9724
9725 (define_expand "<avx512>_vternlog<mode>_maskz"
9726 [(match_operand:VI48_AVX512VL 0 "register_operand")
9727 (match_operand:VI48_AVX512VL 1 "register_operand")
9728 (match_operand:VI48_AVX512VL 2 "register_operand")
9729 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9730 (match_operand:SI 4 "const_0_to_255_operand")
9731 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9732 "TARGET_AVX512F"
9733 {
9734 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9735 operands[0], operands[1], operands[2], operands[3],
9736 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9737 DONE;
9738 })
9739
9740 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9741 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9742 (unspec:VI48_AVX512VL
9743 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9744 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9745 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9746 (match_operand:SI 4 "const_0_to_255_operand")]
9747 UNSPEC_VTERNLOG))]
9748 "TARGET_AVX512F"
9749 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9750 [(set_attr "type" "sselog")
9751 (set_attr "prefix" "evex")
9752 (set_attr "mode" "<sseinsnmode>")])
9753
9754 (define_insn "<avx512>_vternlog<mode>_mask"
9755 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9756 (vec_merge:VI48_AVX512VL
9757 (unspec:VI48_AVX512VL
9758 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9759 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9760 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9761 (match_operand:SI 4 "const_0_to_255_operand")]
9762 UNSPEC_VTERNLOG)
9763 (match_dup 1)
9764 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9765 "TARGET_AVX512F"
9766 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9767 [(set_attr "type" "sselog")
9768 (set_attr "prefix" "evex")
9769 (set_attr "mode" "<sseinsnmode>")])
9770
9771 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9772 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9773 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9774 UNSPEC_GETEXP))]
9775 "TARGET_AVX512F"
9776 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9777 [(set_attr "prefix" "evex")
9778 (set_attr "mode" "<MODE>")])
9779
9780 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9781 [(set (match_operand:VF_128 0 "register_operand" "=v")
9782 (vec_merge:VF_128
9783 (unspec:VF_128
9784 [(match_operand:VF_128 1 "register_operand" "v")
9785 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9786 UNSPEC_GETEXP)
9787 (match_dup 1)
9788 (const_int 1)))]
9789 "TARGET_AVX512F"
9790 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9791 [(set_attr "prefix" "evex")
9792 (set_attr "mode" "<ssescalarmode>")])
9793
9794 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9795 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9796 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9797 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9798 (match_operand:SI 3 "const_0_to_255_operand")]
9799 UNSPEC_ALIGN))]
9800 "TARGET_AVX512F"
9801 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9802 [(set_attr "prefix" "evex")
9803 (set_attr "mode" "<sseinsnmode>")])
9804
9805 (define_expand "avx512f_shufps512_mask"
9806 [(match_operand:V16SF 0 "register_operand")
9807 (match_operand:V16SF 1 "register_operand")
9808 (match_operand:V16SF 2 "nonimmediate_operand")
9809 (match_operand:SI 3 "const_0_to_255_operand")
9810 (match_operand:V16SF 4 "register_operand")
9811 (match_operand:HI 5 "register_operand")]
9812 "TARGET_AVX512F"
9813 {
9814 int mask = INTVAL (operands[3]);
9815 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9816 GEN_INT ((mask >> 0) & 3),
9817 GEN_INT ((mask >> 2) & 3),
9818 GEN_INT (((mask >> 4) & 3) + 16),
9819 GEN_INT (((mask >> 6) & 3) + 16),
9820 GEN_INT (((mask >> 0) & 3) + 4),
9821 GEN_INT (((mask >> 2) & 3) + 4),
9822 GEN_INT (((mask >> 4) & 3) + 20),
9823 GEN_INT (((mask >> 6) & 3) + 20),
9824 GEN_INT (((mask >> 0) & 3) + 8),
9825 GEN_INT (((mask >> 2) & 3) + 8),
9826 GEN_INT (((mask >> 4) & 3) + 24),
9827 GEN_INT (((mask >> 6) & 3) + 24),
9828 GEN_INT (((mask >> 0) & 3) + 12),
9829 GEN_INT (((mask >> 2) & 3) + 12),
9830 GEN_INT (((mask >> 4) & 3) + 28),
9831 GEN_INT (((mask >> 6) & 3) + 28),
9832 operands[4], operands[5]));
9833 DONE;
9834 })
9835
9836
9837 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9838 [(match_operand:VF_AVX512VL 0 "register_operand")
9839 (match_operand:VF_AVX512VL 1 "register_operand")
9840 (match_operand:VF_AVX512VL 2 "register_operand")
9841 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9842 (match_operand:SI 4 "const_0_to_255_operand")
9843 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9844 "TARGET_AVX512F"
9845 {
9846 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9847 operands[0], operands[1], operands[2], operands[3],
9848 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9849 <round_saeonly_expand_operand6>));
9850 DONE;
9851 })
9852
9853 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9854 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9855 (unspec:VF_AVX512VL
9856 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9857 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9858 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9859 (match_operand:SI 4 "const_0_to_255_operand")]
9860 UNSPEC_FIXUPIMM))]
9861 "TARGET_AVX512F"
9862 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9863 [(set_attr "prefix" "evex")
9864 (set_attr "mode" "<MODE>")])
9865
9866 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9867 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9868 (vec_merge:VF_AVX512VL
9869 (unspec:VF_AVX512VL
9870 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9871 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9872 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9873 (match_operand:SI 4 "const_0_to_255_operand")]
9874 UNSPEC_FIXUPIMM)
9875 (match_dup 1)
9876 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9877 "TARGET_AVX512F"
9878 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9879 [(set_attr "prefix" "evex")
9880 (set_attr "mode" "<MODE>")])
9881
9882 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9883 [(match_operand:VF_128 0 "register_operand")
9884 (match_operand:VF_128 1 "register_operand")
9885 (match_operand:VF_128 2 "register_operand")
9886 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9887 (match_operand:SI 4 "const_0_to_255_operand")
9888 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9889 "TARGET_AVX512F"
9890 {
9891 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9892 operands[0], operands[1], operands[2], operands[3],
9893 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9894 <round_saeonly_expand_operand6>));
9895 DONE;
9896 })
9897
9898 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9899 [(set (match_operand:VF_128 0 "register_operand" "=v")
9900 (vec_merge:VF_128
9901 (unspec:VF_128
9902 [(match_operand:VF_128 1 "register_operand" "0")
9903 (match_operand:VF_128 2 "register_operand" "v")
9904 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9905 (match_operand:SI 4 "const_0_to_255_operand")]
9906 UNSPEC_FIXUPIMM)
9907 (match_dup 1)
9908 (const_int 1)))]
9909 "TARGET_AVX512F"
9910 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9911 [(set_attr "prefix" "evex")
9912 (set_attr "mode" "<ssescalarmode>")])
9913
9914 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9915 [(set (match_operand:VF_128 0 "register_operand" "=v")
9916 (vec_merge:VF_128
9917 (vec_merge:VF_128
9918 (unspec:VF_128
9919 [(match_operand:VF_128 1 "register_operand" "0")
9920 (match_operand:VF_128 2 "register_operand" "v")
9921 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9922 (match_operand:SI 4 "const_0_to_255_operand")]
9923 UNSPEC_FIXUPIMM)
9924 (match_dup 1)
9925 (const_int 1))
9926 (match_dup 1)
9927 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9928 "TARGET_AVX512F"
9929 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9930 [(set_attr "prefix" "evex")
9931 (set_attr "mode" "<ssescalarmode>")])
9932
9933 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9934 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9935 (unspec:VF_AVX512VL
9936 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9937 (match_operand:SI 2 "const_0_to_255_operand")]
9938 UNSPEC_ROUND))]
9939 "TARGET_AVX512F"
9940 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9941 [(set_attr "length_immediate" "1")
9942 (set_attr "prefix" "evex")
9943 (set_attr "mode" "<MODE>")])
9944
9945 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
9946 [(set (match_operand:VF_128 0 "register_operand" "=v")
9947 (vec_merge:VF_128
9948 (unspec:VF_128
9949 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
9950 (match_operand:SI 3 "const_0_to_255_operand")]
9951 UNSPEC_ROUND)
9952 (match_operand:VF_128 1 "register_operand" "v")
9953 (const_int 1)))]
9954 "TARGET_AVX512F"
9955 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
9956 [(set_attr "length_immediate" "1")
9957 (set_attr "prefix" "evex")
9958 (set_attr "mode" "<MODE>")])
9959
9960 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
9961 [(set (match_operand:VF_128 0 "register_operand" "=v")
9962 (vec_merge:VF_128
9963 (vec_duplicate:VF_128
9964 (unspec:<ssescalarmode>
9965 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9966 (match_operand:SI 3 "const_0_to_255_operand")]
9967 UNSPEC_ROUND))
9968 (match_operand:VF_128 1 "register_operand" "v")
9969 (const_int 1)))]
9970 "TARGET_AVX512F"
9971 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
9972 [(set_attr "length_immediate" "1")
9973 (set_attr "prefix" "evex")
9974 (set_attr "mode" "<MODE>")])
9975
9976 ;; One bit in mask selects 2 elements.
9977 (define_insn "avx512f_shufps512_1<mask_name>"
9978 [(set (match_operand:V16SF 0 "register_operand" "=v")
9979 (vec_select:V16SF
9980 (vec_concat:V32SF
9981 (match_operand:V16SF 1 "register_operand" "v")
9982 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9983 (parallel [(match_operand 3 "const_0_to_3_operand")
9984 (match_operand 4 "const_0_to_3_operand")
9985 (match_operand 5 "const_16_to_19_operand")
9986 (match_operand 6 "const_16_to_19_operand")
9987 (match_operand 7 "const_4_to_7_operand")
9988 (match_operand 8 "const_4_to_7_operand")
9989 (match_operand 9 "const_20_to_23_operand")
9990 (match_operand 10 "const_20_to_23_operand")
9991 (match_operand 11 "const_8_to_11_operand")
9992 (match_operand 12 "const_8_to_11_operand")
9993 (match_operand 13 "const_24_to_27_operand")
9994 (match_operand 14 "const_24_to_27_operand")
9995 (match_operand 15 "const_12_to_15_operand")
9996 (match_operand 16 "const_12_to_15_operand")
9997 (match_operand 17 "const_28_to_31_operand")
9998 (match_operand 18 "const_28_to_31_operand")])))]
9999 "TARGET_AVX512F
10000 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
10001 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
10002 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
10003 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
10004 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
10005 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
10006 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
10007 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
10008 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
10009 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
10010 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
10011 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
10012 {
10013 int mask;
10014 mask = INTVAL (operands[3]);
10015 mask |= INTVAL (operands[4]) << 2;
10016 mask |= (INTVAL (operands[5]) - 16) << 4;
10017 mask |= (INTVAL (operands[6]) - 16) << 6;
10018 operands[3] = GEN_INT (mask);
10019
10020 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
10021 }
10022 [(set_attr "type" "sselog")
10023 (set_attr "length_immediate" "1")
10024 (set_attr "prefix" "evex")
10025 (set_attr "mode" "V16SF")])
10026
10027 (define_expand "avx512f_shufpd512_mask"
10028 [(match_operand:V8DF 0 "register_operand")
10029 (match_operand:V8DF 1 "register_operand")
10030 (match_operand:V8DF 2 "nonimmediate_operand")
10031 (match_operand:SI 3 "const_0_to_255_operand")
10032 (match_operand:V8DF 4 "register_operand")
10033 (match_operand:QI 5 "register_operand")]
10034 "TARGET_AVX512F"
10035 {
10036 int mask = INTVAL (operands[3]);
10037 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
10038 GEN_INT (mask & 1),
10039 GEN_INT (mask & 2 ? 9 : 8),
10040 GEN_INT (mask & 4 ? 3 : 2),
10041 GEN_INT (mask & 8 ? 11 : 10),
10042 GEN_INT (mask & 16 ? 5 : 4),
10043 GEN_INT (mask & 32 ? 13 : 12),
10044 GEN_INT (mask & 64 ? 7 : 6),
10045 GEN_INT (mask & 128 ? 15 : 14),
10046 operands[4], operands[5]));
10047 DONE;
10048 })
10049
10050 (define_insn "avx512f_shufpd512_1<mask_name>"
10051 [(set (match_operand:V8DF 0 "register_operand" "=v")
10052 (vec_select:V8DF
10053 (vec_concat:V16DF
10054 (match_operand:V8DF 1 "register_operand" "v")
10055 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
10056 (parallel [(match_operand 3 "const_0_to_1_operand")
10057 (match_operand 4 "const_8_to_9_operand")
10058 (match_operand 5 "const_2_to_3_operand")
10059 (match_operand 6 "const_10_to_11_operand")
10060 (match_operand 7 "const_4_to_5_operand")
10061 (match_operand 8 "const_12_to_13_operand")
10062 (match_operand 9 "const_6_to_7_operand")
10063 (match_operand 10 "const_14_to_15_operand")])))]
10064 "TARGET_AVX512F"
10065 {
10066 int mask;
10067 mask = INTVAL (operands[3]);
10068 mask |= (INTVAL (operands[4]) - 8) << 1;
10069 mask |= (INTVAL (operands[5]) - 2) << 2;
10070 mask |= (INTVAL (operands[6]) - 10) << 3;
10071 mask |= (INTVAL (operands[7]) - 4) << 4;
10072 mask |= (INTVAL (operands[8]) - 12) << 5;
10073 mask |= (INTVAL (operands[9]) - 6) << 6;
10074 mask |= (INTVAL (operands[10]) - 14) << 7;
10075 operands[3] = GEN_INT (mask);
10076
10077 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
10078 }
10079 [(set_attr "type" "sselog")
10080 (set_attr "length_immediate" "1")
10081 (set_attr "prefix" "evex")
10082 (set_attr "mode" "V8DF")])
10083
10084 (define_expand "avx_shufpd256<mask_expand4_name>"
10085 [(match_operand:V4DF 0 "register_operand")
10086 (match_operand:V4DF 1 "register_operand")
10087 (match_operand:V4DF 2 "nonimmediate_operand")
10088 (match_operand:SI 3 "const_int_operand")]
10089 "TARGET_AVX"
10090 {
10091 int mask = INTVAL (operands[3]);
10092 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
10093 operands[1],
10094 operands[2],
10095 GEN_INT (mask & 1),
10096 GEN_INT (mask & 2 ? 5 : 4),
10097 GEN_INT (mask & 4 ? 3 : 2),
10098 GEN_INT (mask & 8 ? 7 : 6)
10099 <mask_expand4_args>));
10100 DONE;
10101 })
10102
10103 (define_insn "avx_shufpd256_1<mask_name>"
10104 [(set (match_operand:V4DF 0 "register_operand" "=v")
10105 (vec_select:V4DF
10106 (vec_concat:V8DF
10107 (match_operand:V4DF 1 "register_operand" "v")
10108 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
10109 (parallel [(match_operand 3 "const_0_to_1_operand")
10110 (match_operand 4 "const_4_to_5_operand")
10111 (match_operand 5 "const_2_to_3_operand")
10112 (match_operand 6 "const_6_to_7_operand")])))]
10113 "TARGET_AVX && <mask_avx512vl_condition>"
10114 {
10115 int mask;
10116 mask = INTVAL (operands[3]);
10117 mask |= (INTVAL (operands[4]) - 4) << 1;
10118 mask |= (INTVAL (operands[5]) - 2) << 2;
10119 mask |= (INTVAL (operands[6]) - 6) << 3;
10120 operands[3] = GEN_INT (mask);
10121
10122 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
10123 }
10124 [(set_attr "type" "sseshuf")
10125 (set_attr "length_immediate" "1")
10126 (set_attr "prefix" "vex")
10127 (set_attr "mode" "V4DF")])
10128
10129 (define_expand "sse2_shufpd<mask_expand4_name>"
10130 [(match_operand:V2DF 0 "register_operand")
10131 (match_operand:V2DF 1 "register_operand")
10132 (match_operand:V2DF 2 "vector_operand")
10133 (match_operand:SI 3 "const_int_operand")]
10134 "TARGET_SSE2"
10135 {
10136 int mask = INTVAL (operands[3]);
10137 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
10138 operands[2], GEN_INT (mask & 1),
10139 GEN_INT (mask & 2 ? 3 : 2)
10140 <mask_expand4_args>));
10141 DONE;
10142 })
10143
10144 (define_insn "sse2_shufpd_v2df_mask"
10145 [(set (match_operand:V2DF 0 "register_operand" "=v")
10146 (vec_merge:V2DF
10147 (vec_select:V2DF
10148 (vec_concat:V4DF
10149 (match_operand:V2DF 1 "register_operand" "v")
10150 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
10151 (parallel [(match_operand 3 "const_0_to_1_operand")
10152 (match_operand 4 "const_2_to_3_operand")]))
10153 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
10154 (match_operand:QI 6 "register_operand" "Yk")))]
10155 "TARGET_AVX512VL"
10156 {
10157 int mask;
10158 mask = INTVAL (operands[3]);
10159 mask |= (INTVAL (operands[4]) - 2) << 1;
10160 operands[3] = GEN_INT (mask);
10161
10162 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
10163 }
10164 [(set_attr "type" "sseshuf")
10165 (set_attr "length_immediate" "1")
10166 (set_attr "prefix" "evex")
10167 (set_attr "mode" "V2DF")])
10168
10169 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
10170 (define_insn "avx2_interleave_highv4di<mask_name>"
10171 [(set (match_operand:V4DI 0 "register_operand" "=v")
10172 (vec_select:V4DI
10173 (vec_concat:V8DI
10174 (match_operand:V4DI 1 "register_operand" "v")
10175 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10176 (parallel [(const_int 1)
10177 (const_int 5)
10178 (const_int 3)
10179 (const_int 7)])))]
10180 "TARGET_AVX2 && <mask_avx512vl_condition>"
10181 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10182 [(set_attr "type" "sselog")
10183 (set_attr "prefix" "vex")
10184 (set_attr "mode" "OI")])
10185
10186 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10187 [(set (match_operand:V8DI 0 "register_operand" "=v")
10188 (vec_select:V8DI
10189 (vec_concat:V16DI
10190 (match_operand:V8DI 1 "register_operand" "v")
10191 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10192 (parallel [(const_int 1) (const_int 9)
10193 (const_int 3) (const_int 11)
10194 (const_int 5) (const_int 13)
10195 (const_int 7) (const_int 15)])))]
10196 "TARGET_AVX512F"
10197 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10198 [(set_attr "type" "sselog")
10199 (set_attr "prefix" "evex")
10200 (set_attr "mode" "XI")])
10201
10202 (define_insn "vec_interleave_highv2di<mask_name>"
10203 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10204 (vec_select:V2DI
10205 (vec_concat:V4DI
10206 (match_operand:V2DI 1 "register_operand" "0,v")
10207 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10208 (parallel [(const_int 1)
10209 (const_int 3)])))]
10210 "TARGET_SSE2 && <mask_avx512vl_condition>"
10211 "@
10212 punpckhqdq\t{%2, %0|%0, %2}
10213 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10214 [(set_attr "isa" "noavx,avx")
10215 (set_attr "type" "sselog")
10216 (set_attr "prefix_data16" "1,*")
10217 (set_attr "prefix" "orig,<mask_prefix>")
10218 (set_attr "mode" "TI")])
10219
10220 (define_insn "avx2_interleave_lowv4di<mask_name>"
10221 [(set (match_operand:V4DI 0 "register_operand" "=v")
10222 (vec_select:V4DI
10223 (vec_concat:V8DI
10224 (match_operand:V4DI 1 "register_operand" "v")
10225 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10226 (parallel [(const_int 0)
10227 (const_int 4)
10228 (const_int 2)
10229 (const_int 6)])))]
10230 "TARGET_AVX2 && <mask_avx512vl_condition>"
10231 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10232 [(set_attr "type" "sselog")
10233 (set_attr "prefix" "vex")
10234 (set_attr "mode" "OI")])
10235
10236 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10237 [(set (match_operand:V8DI 0 "register_operand" "=v")
10238 (vec_select:V8DI
10239 (vec_concat:V16DI
10240 (match_operand:V8DI 1 "register_operand" "v")
10241 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10242 (parallel [(const_int 0) (const_int 8)
10243 (const_int 2) (const_int 10)
10244 (const_int 4) (const_int 12)
10245 (const_int 6) (const_int 14)])))]
10246 "TARGET_AVX512F"
10247 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10248 [(set_attr "type" "sselog")
10249 (set_attr "prefix" "evex")
10250 (set_attr "mode" "XI")])
10251
10252 (define_insn "vec_interleave_lowv2di<mask_name>"
10253 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10254 (vec_select:V2DI
10255 (vec_concat:V4DI
10256 (match_operand:V2DI 1 "register_operand" "0,v")
10257 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10258 (parallel [(const_int 0)
10259 (const_int 2)])))]
10260 "TARGET_SSE2 && <mask_avx512vl_condition>"
10261 "@
10262 punpcklqdq\t{%2, %0|%0, %2}
10263 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10264 [(set_attr "isa" "noavx,avx")
10265 (set_attr "type" "sselog")
10266 (set_attr "prefix_data16" "1,*")
10267 (set_attr "prefix" "orig,vex")
10268 (set_attr "mode" "TI")])
10269
10270 (define_insn "sse2_shufpd_<mode>"
10271 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10272 (vec_select:VI8F_128
10273 (vec_concat:<ssedoublevecmode>
10274 (match_operand:VI8F_128 1 "register_operand" "0,v")
10275 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10276 (parallel [(match_operand 3 "const_0_to_1_operand")
10277 (match_operand 4 "const_2_to_3_operand")])))]
10278 "TARGET_SSE2"
10279 {
10280 int mask;
10281 mask = INTVAL (operands[3]);
10282 mask |= (INTVAL (operands[4]) - 2) << 1;
10283 operands[3] = GEN_INT (mask);
10284
10285 switch (which_alternative)
10286 {
10287 case 0:
10288 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10289 case 1:
10290 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10291 default:
10292 gcc_unreachable ();
10293 }
10294 }
10295 [(set_attr "isa" "noavx,avx")
10296 (set_attr "type" "sseshuf")
10297 (set_attr "length_immediate" "1")
10298 (set_attr "prefix" "orig,maybe_evex")
10299 (set_attr "mode" "V2DF")])
10300
10301 ;; Avoid combining registers from different units in a single alternative,
10302 ;; see comment above inline_secondary_memory_needed function in i386.c
10303 (define_insn "sse2_storehpd"
10304 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10305 (vec_select:DF
10306 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10307 (parallel [(const_int 1)])))]
10308 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10309 "@
10310 %vmovhpd\t{%1, %0|%0, %1}
10311 unpckhpd\t%0, %0
10312 vunpckhpd\t{%d1, %0|%0, %d1}
10313 #
10314 #
10315 #"
10316 [(set_attr "isa" "*,noavx,avx,*,*,*")
10317 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10318 (set (attr "prefix_data16")
10319 (if_then_else
10320 (and (eq_attr "alternative" "0")
10321 (not (match_test "TARGET_AVX")))
10322 (const_string "1")
10323 (const_string "*")))
10324 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10325 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10326
10327 (define_split
10328 [(set (match_operand:DF 0 "register_operand")
10329 (vec_select:DF
10330 (match_operand:V2DF 1 "memory_operand")
10331 (parallel [(const_int 1)])))]
10332 "TARGET_SSE2 && reload_completed"
10333 [(set (match_dup 0) (match_dup 1))]
10334 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10335
10336 (define_insn "*vec_extractv2df_1_sse"
10337 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10338 (vec_select:DF
10339 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10340 (parallel [(const_int 1)])))]
10341 "!TARGET_SSE2 && TARGET_SSE
10342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10343 "@
10344 movhps\t{%1, %0|%0, %1}
10345 movhlps\t{%1, %0|%0, %1}
10346 movlps\t{%H1, %0|%0, %H1}"
10347 [(set_attr "type" "ssemov")
10348 (set_attr "mode" "V2SF,V4SF,V2SF")])
10349
10350 ;; Avoid combining registers from different units in a single alternative,
10351 ;; see comment above inline_secondary_memory_needed function in i386.c
10352 (define_insn "sse2_storelpd"
10353 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10354 (vec_select:DF
10355 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10356 (parallel [(const_int 0)])))]
10357 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10358 "@
10359 %vmovlpd\t{%1, %0|%0, %1}
10360 #
10361 #
10362 #
10363 #"
10364 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10365 (set (attr "prefix_data16")
10366 (if_then_else (eq_attr "alternative" "0")
10367 (const_string "1")
10368 (const_string "*")))
10369 (set_attr "prefix" "maybe_vex")
10370 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10371
10372 (define_split
10373 [(set (match_operand:DF 0 "register_operand")
10374 (vec_select:DF
10375 (match_operand:V2DF 1 "nonimmediate_operand")
10376 (parallel [(const_int 0)])))]
10377 "TARGET_SSE2 && reload_completed"
10378 [(set (match_dup 0) (match_dup 1))]
10379 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10380
10381 (define_insn "*vec_extractv2df_0_sse"
10382 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10383 (vec_select:DF
10384 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10385 (parallel [(const_int 0)])))]
10386 "!TARGET_SSE2 && TARGET_SSE
10387 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10388 "@
10389 movlps\t{%1, %0|%0, %1}
10390 movaps\t{%1, %0|%0, %1}
10391 movlps\t{%1, %0|%0, %q1}"
10392 [(set_attr "type" "ssemov")
10393 (set_attr "mode" "V2SF,V4SF,V2SF")])
10394
10395 (define_expand "sse2_loadhpd_exp"
10396 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10397 (vec_concat:V2DF
10398 (vec_select:DF
10399 (match_operand:V2DF 1 "nonimmediate_operand")
10400 (parallel [(const_int 0)]))
10401 (match_operand:DF 2 "nonimmediate_operand")))]
10402 "TARGET_SSE2"
10403 {
10404 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10405
10406 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10407
10408 /* Fix up the destination if needed. */
10409 if (dst != operands[0])
10410 emit_move_insn (operands[0], dst);
10411
10412 DONE;
10413 })
10414
10415 ;; Avoid combining registers from different units in a single alternative,
10416 ;; see comment above inline_secondary_memory_needed function in i386.c
10417 (define_insn "sse2_loadhpd"
10418 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10419 "=x,v,x,v ,o,o ,o")
10420 (vec_concat:V2DF
10421 (vec_select:DF
10422 (match_operand:V2DF 1 "nonimmediate_operand"
10423 " 0,v,0,v ,0,0 ,0")
10424 (parallel [(const_int 0)]))
10425 (match_operand:DF 2 "nonimmediate_operand"
10426 " m,m,x,Yv,x,*f,r")))]
10427 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10428 "@
10429 movhpd\t{%2, %0|%0, %2}
10430 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10431 unpcklpd\t{%2, %0|%0, %2}
10432 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10433 #
10434 #
10435 #"
10436 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10437 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10438 (set (attr "prefix_data16")
10439 (if_then_else (eq_attr "alternative" "0")
10440 (const_string "1")
10441 (const_string "*")))
10442 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10443 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10444
10445 (define_split
10446 [(set (match_operand:V2DF 0 "memory_operand")
10447 (vec_concat:V2DF
10448 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10449 (match_operand:DF 1 "register_operand")))]
10450 "TARGET_SSE2 && reload_completed"
10451 [(set (match_dup 0) (match_dup 1))]
10452 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10453
10454 (define_expand "sse2_loadlpd_exp"
10455 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10456 (vec_concat:V2DF
10457 (match_operand:DF 2 "nonimmediate_operand")
10458 (vec_select:DF
10459 (match_operand:V2DF 1 "nonimmediate_operand")
10460 (parallel [(const_int 1)]))))]
10461 "TARGET_SSE2"
10462 {
10463 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10464
10465 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10466
10467 /* Fix up the destination if needed. */
10468 if (dst != operands[0])
10469 emit_move_insn (operands[0], dst);
10470
10471 DONE;
10472 })
10473
10474 ;; Avoid combining registers from different units in a single alternative,
10475 ;; see comment above inline_secondary_memory_needed function in i386.c
10476 (define_insn "sse2_loadlpd"
10477 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10478 "=v,x,v,x,v,x,x,v,m,m ,m")
10479 (vec_concat:V2DF
10480 (match_operand:DF 2 "nonimmediate_operand"
10481 "vm,m,m,x,v,0,0,v,x,*f,r")
10482 (vec_select:DF
10483 (match_operand:V2DF 1 "nonimm_or_0_operand"
10484 " C,0,v,0,v,x,o,o,0,0 ,0")
10485 (parallel [(const_int 1)]))))]
10486 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10487 "@
10488 %vmovq\t{%2, %0|%0, %2}
10489 movlpd\t{%2, %0|%0, %2}
10490 vmovlpd\t{%2, %1, %0|%0, %1, %2}
10491 movsd\t{%2, %0|%0, %2}
10492 vmovsd\t{%2, %1, %0|%0, %1, %2}
10493 shufpd\t{$2, %1, %0|%0, %1, 2}
10494 movhpd\t{%H1, %0|%0, %H1}
10495 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10496 #
10497 #
10498 #"
10499 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10500 (set (attr "type")
10501 (cond [(eq_attr "alternative" "5")
10502 (const_string "sselog")
10503 (eq_attr "alternative" "9")
10504 (const_string "fmov")
10505 (eq_attr "alternative" "10")
10506 (const_string "imov")
10507 ]
10508 (const_string "ssemov")))
10509 (set (attr "prefix_data16")
10510 (if_then_else (eq_attr "alternative" "1,6")
10511 (const_string "1")
10512 (const_string "*")))
10513 (set (attr "length_immediate")
10514 (if_then_else (eq_attr "alternative" "5")
10515 (const_string "1")
10516 (const_string "*")))
10517 (set (attr "prefix")
10518 (cond [(eq_attr "alternative" "0")
10519 (const_string "maybe_vex")
10520 (eq_attr "alternative" "1,3,5,6")
10521 (const_string "orig")
10522 (eq_attr "alternative" "2,4,7")
10523 (const_string "maybe_evex")
10524 ]
10525 (const_string "*")))
10526 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10527
10528 (define_split
10529 [(set (match_operand:V2DF 0 "memory_operand")
10530 (vec_concat:V2DF
10531 (match_operand:DF 1 "register_operand")
10532 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10533 "TARGET_SSE2 && reload_completed"
10534 [(set (match_dup 0) (match_dup 1))]
10535 "operands[0] = adjust_address (operands[0], DFmode, 0);")
10536
10537 (define_insn "sse2_movsd"
10538 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
10539 (vec_merge:V2DF
10540 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10541 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10542 (const_int 1)))]
10543 "TARGET_SSE2"
10544 "@
10545 movsd\t{%2, %0|%0, %2}
10546 vmovsd\t{%2, %1, %0|%0, %1, %2}
10547 movlpd\t{%2, %0|%0, %q2}
10548 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10549 %vmovlpd\t{%2, %0|%q0, %2}
10550 shufpd\t{$2, %1, %0|%0, %1, 2}
10551 movhps\t{%H1, %0|%0, %H1}
10552 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10553 %vmovhps\t{%1, %H0|%H0, %1}"
10554 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10555 (set (attr "type")
10556 (if_then_else
10557 (eq_attr "alternative" "5")
10558 (const_string "sselog")
10559 (const_string "ssemov")))
10560 (set (attr "prefix_data16")
10561 (if_then_else
10562 (and (eq_attr "alternative" "2,4")
10563 (not (match_test "TARGET_AVX")))
10564 (const_string "1")
10565 (const_string "*")))
10566 (set (attr "length_immediate")
10567 (if_then_else (eq_attr "alternative" "5")
10568 (const_string "1")
10569 (const_string "*")))
10570 (set (attr "prefix")
10571 (cond [(eq_attr "alternative" "1,3,7")
10572 (const_string "maybe_evex")
10573 (eq_attr "alternative" "4,8")
10574 (const_string "maybe_vex")
10575 ]
10576 (const_string "orig")))
10577 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10578
10579 (define_insn "vec_dupv2df<mask_name>"
10580 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10581 (vec_duplicate:V2DF
10582 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10583 "TARGET_SSE2 && <mask_avx512vl_condition>"
10584 "@
10585 unpcklpd\t%0, %0
10586 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10587 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10588 [(set_attr "isa" "noavx,sse3,avx512vl")
10589 (set_attr "type" "sselog1")
10590 (set_attr "prefix" "orig,maybe_vex,evex")
10591 (set_attr "mode" "V2DF,DF,DF")])
10592
10593 (define_insn "vec_concatv2df"
10594 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10595 (vec_concat:V2DF
10596 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10597 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10598 "TARGET_SSE
10599 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10600 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10601 "@
10602 unpcklpd\t{%2, %0|%0, %2}
10603 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10604 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10605 %vmovddup\t{%1, %0|%0, %1}
10606 vmovddup\t{%1, %0|%0, %1}
10607 movhpd\t{%2, %0|%0, %2}
10608 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10609 %vmovq\t{%1, %0|%0, %1}
10610 movlhps\t{%2, %0|%0, %2}
10611 movhps\t{%2, %0|%0, %2}"
10612 [(set (attr "isa")
10613 (cond [(eq_attr "alternative" "0,5")
10614 (const_string "sse2_noavx")
10615 (eq_attr "alternative" "1,6")
10616 (const_string "avx")
10617 (eq_attr "alternative" "2,4")
10618 (const_string "avx512vl")
10619 (eq_attr "alternative" "3")
10620 (const_string "sse3")
10621 (eq_attr "alternative" "7")
10622 (const_string "sse2")
10623 ]
10624 (const_string "noavx")))
10625 (set (attr "type")
10626 (if_then_else
10627 (eq_attr "alternative" "0,1,2,3,4")
10628 (const_string "sselog")
10629 (const_string "ssemov")))
10630 (set (attr "prefix_data16")
10631 (if_then_else (eq_attr "alternative" "5")
10632 (const_string "1")
10633 (const_string "*")))
10634 (set (attr "prefix")
10635 (cond [(eq_attr "alternative" "1,6")
10636 (const_string "vex")
10637 (eq_attr "alternative" "2,4")
10638 (const_string "evex")
10639 (eq_attr "alternative" "3,7")
10640 (const_string "maybe_vex")
10641 ]
10642 (const_string "orig")))
10643 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10644
10645 ;; vmovq clears also the higher bits.
10646 (define_insn "vec_set<mode>_0"
10647 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10648 (vec_merge:VF2_512_256
10649 (vec_duplicate:VF2_512_256
10650 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
10651 (match_operand:VF2_512_256 1 "const0_operand" "C")
10652 (const_int 1)))]
10653 "TARGET_AVX"
10654 "vmovq\t{%2, %x0|%x0, %2}"
10655 [(set_attr "type" "ssemov")
10656 (set_attr "prefix" "maybe_evex")
10657 (set_attr "mode" "DF")])
10658
10659 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10660 ;;
10661 ;; Parallel integer down-conversion operations
10662 ;;
10663 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10664
10665 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10666 (define_mode_attr pmov_src_mode
10667 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10668 (define_mode_attr pmov_src_lower
10669 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10670 (define_mode_attr pmov_suff_1
10671 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10672
10673 (define_expand "trunc<pmov_src_lower><mode>2"
10674 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
10675 (truncate:PMOV_DST_MODE_1
10676 (match_operand:<pmov_src_mode> 1 "register_operand")))]
10677 "TARGET_AVX512F")
10678
10679 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10680 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10681 (any_truncate:PMOV_DST_MODE_1
10682 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10683 "TARGET_AVX512F"
10684 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10685 [(set_attr "type" "ssemov")
10686 (set_attr "memory" "none,store")
10687 (set_attr "prefix" "evex")
10688 (set_attr "mode" "<sseinsnmode>")])
10689
10690 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10691 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10692 (vec_merge:PMOV_DST_MODE_1
10693 (any_truncate:PMOV_DST_MODE_1
10694 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10695 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10696 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10697 "TARGET_AVX512F"
10698 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10699 [(set_attr "type" "ssemov")
10700 (set_attr "memory" "none,store")
10701 (set_attr "prefix" "evex")
10702 (set_attr "mode" "<sseinsnmode>")])
10703
10704 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10705 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10706 (vec_merge:PMOV_DST_MODE_1
10707 (any_truncate:PMOV_DST_MODE_1
10708 (match_operand:<pmov_src_mode> 1 "register_operand"))
10709 (match_dup 0)
10710 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10711 "TARGET_AVX512F")
10712
10713 (define_expand "truncv32hiv32qi2"
10714 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10715 (truncate:V32QI
10716 (match_operand:V32HI 1 "register_operand")))]
10717 "TARGET_AVX512BW")
10718
10719 (define_insn "avx512bw_<code>v32hiv32qi2"
10720 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10721 (any_truncate:V32QI
10722 (match_operand:V32HI 1 "register_operand" "v,v")))]
10723 "TARGET_AVX512BW"
10724 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10725 [(set_attr "type" "ssemov")
10726 (set_attr "memory" "none,store")
10727 (set_attr "prefix" "evex")
10728 (set_attr "mode" "XI")])
10729
10730 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10731 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10732 (vec_merge:V32QI
10733 (any_truncate:V32QI
10734 (match_operand:V32HI 1 "register_operand" "v,v"))
10735 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10736 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10737 "TARGET_AVX512BW"
10738 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10739 [(set_attr "type" "ssemov")
10740 (set_attr "memory" "none,store")
10741 (set_attr "prefix" "evex")
10742 (set_attr "mode" "XI")])
10743
10744 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10745 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10746 (vec_merge:V32QI
10747 (any_truncate:V32QI
10748 (match_operand:V32HI 1 "register_operand"))
10749 (match_dup 0)
10750 (match_operand:SI 2 "register_operand")))]
10751 "TARGET_AVX512BW")
10752
10753 (define_mode_iterator PMOV_DST_MODE_2
10754 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10755 (define_mode_attr pmov_suff_2
10756 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10757
10758 (define_expand "trunc<ssedoublemodelower><mode>2"
10759 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10760 (truncate:PMOV_DST_MODE_2
10761 (match_operand:<ssedoublemode> 1 "register_operand")))]
10762 "TARGET_AVX512VL")
10763
10764 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10765 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10766 (any_truncate:PMOV_DST_MODE_2
10767 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10768 "TARGET_AVX512VL"
10769 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10770 [(set_attr "type" "ssemov")
10771 (set_attr "memory" "none,store")
10772 (set_attr "prefix" "evex")
10773 (set_attr "mode" "<sseinsnmode>")])
10774
10775 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10776 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10777 (vec_merge:PMOV_DST_MODE_2
10778 (any_truncate:PMOV_DST_MODE_2
10779 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10780 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10781 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10782 "TARGET_AVX512VL"
10783 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10784 [(set_attr "type" "ssemov")
10785 (set_attr "memory" "none,store")
10786 (set_attr "prefix" "evex")
10787 (set_attr "mode" "<sseinsnmode>")])
10788
10789 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10790 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10791 (vec_merge:PMOV_DST_MODE_2
10792 (any_truncate:PMOV_DST_MODE_2
10793 (match_operand:<ssedoublemode> 1 "register_operand"))
10794 (match_dup 0)
10795 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10796 "TARGET_AVX512VL")
10797
10798 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10799 (define_mode_attr pmov_dst_3_lower
10800 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
10801 (define_mode_attr pmov_dst_3
10802 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10803 (define_mode_attr pmov_dst_zeroed_3
10804 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10805 (define_mode_attr pmov_suff_3
10806 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10807
10808 (define_expand "trunc<mode><pmov_dst_3_lower>2"
10809 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
10810 (truncate:<pmov_dst_3>
10811 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
10812 "TARGET_AVX512VL"
10813 {
10814 operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
10815 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
10816 operands[1],
10817 CONST0_RTX (<pmov_dst_zeroed_3>mode)));
10818 DONE;
10819 })
10820
10821 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
10822 [(set (match_operand:V16QI 0 "register_operand" "=v")
10823 (vec_concat:V16QI
10824 (any_truncate:<pmov_dst_3>
10825 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10826 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10827 "TARGET_AVX512VL"
10828 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10829 [(set_attr "type" "ssemov")
10830 (set_attr "prefix" "evex")
10831 (set_attr "mode" "TI")])
10832
10833 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
10834 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10835 (any_truncate:V2QI
10836 (match_operand:V2DI 1 "register_operand" "v")))]
10837 "TARGET_AVX512VL"
10838 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10839 [(set_attr "type" "ssemov")
10840 (set_attr "memory" "store")
10841 (set_attr "prefix" "evex")
10842 (set_attr "mode" "TI")])
10843
10844 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
10845 [(set (match_operand:HI 0 "memory_operand")
10846 (subreg:HI
10847 (any_truncate:V2QI
10848 (match_operand:V2DI 1 "register_operand")) 0))]
10849 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10850 "#"
10851 "&& 1"
10852 [(set (match_dup 0)
10853 (any_truncate:V2QI (match_dup 1)))]
10854 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10855
10856 (define_insn "avx512vl_<code>v2div2qi2_mask"
10857 [(set (match_operand:V16QI 0 "register_operand" "=v")
10858 (vec_concat:V16QI
10859 (vec_merge:V2QI
10860 (any_truncate:V2QI
10861 (match_operand:V2DI 1 "register_operand" "v"))
10862 (vec_select:V2QI
10863 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10864 (parallel [(const_int 0) (const_int 1)]))
10865 (match_operand:QI 3 "register_operand" "Yk"))
10866 (const_vector:V14QI [(const_int 0) (const_int 0)
10867 (const_int 0) (const_int 0)
10868 (const_int 0) (const_int 0)
10869 (const_int 0) (const_int 0)
10870 (const_int 0) (const_int 0)
10871 (const_int 0) (const_int 0)
10872 (const_int 0) (const_int 0)])))]
10873 "TARGET_AVX512VL"
10874 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10875 [(set_attr "type" "ssemov")
10876 (set_attr "prefix" "evex")
10877 (set_attr "mode" "TI")])
10878
10879 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10880 [(set (match_operand:V16QI 0 "register_operand" "=v")
10881 (vec_concat:V16QI
10882 (vec_merge:V2QI
10883 (any_truncate:V2QI
10884 (match_operand:V2DI 1 "register_operand" "v"))
10885 (const_vector:V2QI [(const_int 0) (const_int 0)])
10886 (match_operand:QI 2 "register_operand" "Yk"))
10887 (const_vector:V14QI [(const_int 0) (const_int 0)
10888 (const_int 0) (const_int 0)
10889 (const_int 0) (const_int 0)
10890 (const_int 0) (const_int 0)
10891 (const_int 0) (const_int 0)
10892 (const_int 0) (const_int 0)
10893 (const_int 0) (const_int 0)])))]
10894 "TARGET_AVX512VL"
10895 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10896 [(set_attr "type" "ssemov")
10897 (set_attr "prefix" "evex")
10898 (set_attr "mode" "TI")])
10899
10900 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
10901 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10902 (vec_merge:V2QI
10903 (any_truncate:V2QI
10904 (match_operand:V2DI 1 "register_operand" "v"))
10905 (match_dup 0)
10906 (match_operand:QI 2 "register_operand" "Yk")))]
10907 "TARGET_AVX512VL"
10908 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10909 [(set_attr "type" "ssemov")
10910 (set_attr "memory" "store")
10911 (set_attr "prefix" "evex")
10912 (set_attr "mode" "TI")])
10913
10914 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
10915 [(set (match_operand:HI 0 "memory_operand")
10916 (subreg:HI
10917 (vec_merge:V2QI
10918 (any_truncate:V2QI
10919 (match_operand:V2DI 1 "register_operand"))
10920 (vec_select:V2QI
10921 (subreg:V4QI
10922 (vec_concat:V2HI
10923 (match_dup 0)
10924 (const_int 0)) 0)
10925 (parallel [(const_int 0) (const_int 1)]))
10926 (match_operand:QI 2 "register_operand")) 0))]
10927 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10928 "#"
10929 "&& 1"
10930 [(set (match_dup 0)
10931 (vec_merge:V2QI
10932 (any_truncate:V2QI (match_dup 1))
10933 (match_dup 0)
10934 (match_dup 2)))]
10935 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10936
10937 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
10938 [(set (match_operand:V4QI 0 "memory_operand" "=m")
10939 (any_truncate:V4QI
10940 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
10941 "TARGET_AVX512VL"
10942 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10943 [(set_attr "type" "ssemov")
10944 (set_attr "memory" "store")
10945 (set_attr "prefix" "evex")
10946 (set_attr "mode" "TI")])
10947
10948 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
10949 [(set (match_operand:SI 0 "memory_operand")
10950 (subreg:SI
10951 (any_truncate:V4QI
10952 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
10953 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10954 "#"
10955 "&& 1"
10956 [(set (match_dup 0)
10957 (any_truncate:V4QI (match_dup 1)))]
10958 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
10959
10960 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10961 [(set (match_operand:V16QI 0 "register_operand" "=v")
10962 (vec_concat:V16QI
10963 (vec_merge:V4QI
10964 (any_truncate:V4QI
10965 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10966 (vec_select:V4QI
10967 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10968 (parallel [(const_int 0) (const_int 1)
10969 (const_int 2) (const_int 3)]))
10970 (match_operand:QI 3 "register_operand" "Yk"))
10971 (const_vector:V12QI [(const_int 0) (const_int 0)
10972 (const_int 0) (const_int 0)
10973 (const_int 0) (const_int 0)
10974 (const_int 0) (const_int 0)
10975 (const_int 0) (const_int 0)
10976 (const_int 0) (const_int 0)])))]
10977 "TARGET_AVX512VL"
10978 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10979 [(set_attr "type" "ssemov")
10980 (set_attr "prefix" "evex")
10981 (set_attr "mode" "TI")])
10982
10983 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10984 [(set (match_operand:V16QI 0 "register_operand" "=v")
10985 (vec_concat:V16QI
10986 (vec_merge:V4QI
10987 (any_truncate:V4QI
10988 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10989 (const_vector:V4QI [(const_int 0) (const_int 0)
10990 (const_int 0) (const_int 0)])
10991 (match_operand:QI 2 "register_operand" "Yk"))
10992 (const_vector:V12QI [(const_int 0) (const_int 0)
10993 (const_int 0) (const_int 0)
10994 (const_int 0) (const_int 0)
10995 (const_int 0) (const_int 0)
10996 (const_int 0) (const_int 0)
10997 (const_int 0) (const_int 0)])))]
10998 "TARGET_AVX512VL"
10999 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11000 [(set_attr "type" "ssemov")
11001 (set_attr "prefix" "evex")
11002 (set_attr "mode" "TI")])
11003
11004 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
11005 [(set (match_operand:V4QI 0 "memory_operand" "=m")
11006 (vec_merge:V4QI
11007 (any_truncate:V4QI
11008 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11009 (match_dup 0)
11010 (match_operand:QI 2 "register_operand" "Yk")))]
11011 "TARGET_AVX512VL"
11012 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11013 [(set_attr "type" "ssemov")
11014 (set_attr "memory" "store")
11015 (set_attr "prefix" "evex")
11016 (set_attr "mode" "TI")])
11017
11018 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
11019 [(set (match_operand:SI 0 "memory_operand")
11020 (subreg:SI
11021 (vec_merge:V4QI
11022 (any_truncate:V4QI
11023 (match_operand:VI4_128_8_256 1 "register_operand"))
11024 (vec_select:V4QI
11025 (subreg:V8QI
11026 (vec_concat:V2SI
11027 (match_dup 0)
11028 (const_int 0)) 0)
11029 (parallel [(const_int 0) (const_int 1)
11030 (const_int 2) (const_int 3)]))
11031 (match_operand:QI 2 "register_operand")) 0))]
11032 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11033 "#"
11034 "&& 1"
11035 [(set (match_dup 0)
11036 (vec_merge:V4QI
11037 (any_truncate:V4QI (match_dup 1))
11038 (match_dup 0)
11039 (match_dup 2)))]
11040 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
11041
11042 (define_mode_iterator VI2_128_BW_4_256
11043 [(V8HI "TARGET_AVX512BW") V8SI])
11044
11045 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
11046 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11047 (any_truncate:V8QI
11048 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
11049 "TARGET_AVX512VL"
11050 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11051 [(set_attr "type" "ssemov")
11052 (set_attr "memory" "store")
11053 (set_attr "prefix" "evex")
11054 (set_attr "mode" "TI")])
11055
11056 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
11057 [(set (match_operand:DI 0 "memory_operand" "=m")
11058 (subreg:DI
11059 (any_truncate:V8QI
11060 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
11061 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11062 "#"
11063 "&& 1"
11064 [(set (match_dup 0)
11065 (any_truncate:V8QI (match_dup 1)))]
11066 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11067
11068 (define_insn "avx512vl_<code><mode>v8qi2_mask"
11069 [(set (match_operand:V16QI 0 "register_operand" "=v")
11070 (vec_concat:V16QI
11071 (vec_merge:V8QI
11072 (any_truncate:V8QI
11073 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11074 (vec_select:V8QI
11075 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11076 (parallel [(const_int 0) (const_int 1)
11077 (const_int 2) (const_int 3)
11078 (const_int 4) (const_int 5)
11079 (const_int 6) (const_int 7)]))
11080 (match_operand:QI 3 "register_operand" "Yk"))
11081 (const_vector:V8QI [(const_int 0) (const_int 0)
11082 (const_int 0) (const_int 0)
11083 (const_int 0) (const_int 0)
11084 (const_int 0) (const_int 0)])))]
11085 "TARGET_AVX512VL"
11086 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11087 [(set_attr "type" "ssemov")
11088 (set_attr "prefix" "evex")
11089 (set_attr "mode" "TI")])
11090
11091 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
11092 [(set (match_operand:V16QI 0 "register_operand" "=v")
11093 (vec_concat:V16QI
11094 (vec_merge:V8QI
11095 (any_truncate:V8QI
11096 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11097 (const_vector:V8QI [(const_int 0) (const_int 0)
11098 (const_int 0) (const_int 0)
11099 (const_int 0) (const_int 0)
11100 (const_int 0) (const_int 0)])
11101 (match_operand:QI 2 "register_operand" "Yk"))
11102 (const_vector:V8QI [(const_int 0) (const_int 0)
11103 (const_int 0) (const_int 0)
11104 (const_int 0) (const_int 0)
11105 (const_int 0) (const_int 0)])))]
11106 "TARGET_AVX512VL"
11107 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11108 [(set_attr "type" "ssemov")
11109 (set_attr "prefix" "evex")
11110 (set_attr "mode" "TI")])
11111
11112 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
11113 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11114 (vec_merge:V8QI
11115 (any_truncate:V8QI
11116 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11117 (match_dup 0)
11118 (match_operand:QI 2 "register_operand" "Yk")))]
11119 "TARGET_AVX512VL"
11120 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11121 [(set_attr "type" "ssemov")
11122 (set_attr "memory" "store")
11123 (set_attr "prefix" "evex")
11124 (set_attr "mode" "TI")])
11125
11126 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
11127 [(set (match_operand:DI 0 "memory_operand")
11128 (subreg:DI
11129 (vec_merge:V8QI
11130 (any_truncate:V8QI
11131 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
11132 (vec_select:V8QI
11133 (subreg:V16QI
11134 (vec_concat:V2DI
11135 (match_dup 0)
11136 (const_int 0)) 0)
11137 (parallel [(const_int 0) (const_int 1)
11138 (const_int 2) (const_int 3)
11139 (const_int 4) (const_int 5)
11140 (const_int 6) (const_int 7)]))
11141 (match_operand:QI 2 "register_operand")) 0))]
11142 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11143 "#"
11144 "&& 1"
11145 [(set (match_dup 0)
11146 (vec_merge:V8QI
11147 (any_truncate:V8QI (match_dup 1))
11148 (match_dup 0)
11149 (match_dup 2)))]
11150 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11151
11152 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
11153 (define_mode_attr pmov_dst_4
11154 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
11155 (define_mode_attr pmov_dst_zeroed_4
11156 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
11157 (define_mode_attr pmov_suff_4
11158 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
11159
11160 (define_expand "trunc<mode><pmov_dst_4>2"
11161 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
11162 (truncate:<pmov_dst_4>
11163 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
11164 "TARGET_AVX512VL"
11165 {
11166 operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
11167 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
11168 operands[1],
11169 CONST0_RTX (<pmov_dst_zeroed_4>mode)));
11170 DONE;
11171
11172 })
11173
11174 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
11175 [(set (match_operand:V8HI 0 "register_operand" "=v")
11176 (vec_concat:V8HI
11177 (any_truncate:<pmov_dst_4>
11178 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
11179 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
11180 "TARGET_AVX512VL"
11181 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11182 [(set_attr "type" "ssemov")
11183 (set_attr "prefix" "evex")
11184 (set_attr "mode" "TI")])
11185
11186 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
11187 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11188 (any_truncate:V4HI
11189 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11190 "TARGET_AVX512VL"
11191 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11192 [(set_attr "type" "ssemov")
11193 (set_attr "memory" "store")
11194 (set_attr "prefix" "evex")
11195 (set_attr "mode" "TI")])
11196
11197 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
11198 [(set (match_operand:DI 0 "memory_operand")
11199 (subreg:DI
11200 (any_truncate:V4HI
11201 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11202 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11203 "#"
11204 "&& 1"
11205 [(set (match_dup 0)
11206 (any_truncate:V4HI (match_dup 1)))]
11207 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11208
11209 (define_insn "avx512vl_<code><mode>v4hi2_mask"
11210 [(set (match_operand:V8HI 0 "register_operand" "=v")
11211 (vec_concat:V8HI
11212 (vec_merge:V4HI
11213 (any_truncate:V4HI
11214 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11215 (vec_select:V4HI
11216 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11217 (parallel [(const_int 0) (const_int 1)
11218 (const_int 2) (const_int 3)]))
11219 (match_operand:QI 3 "register_operand" "Yk"))
11220 (const_vector:V4HI [(const_int 0) (const_int 0)
11221 (const_int 0) (const_int 0)])))]
11222 "TARGET_AVX512VL"
11223 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11224 [(set_attr "type" "ssemov")
11225 (set_attr "prefix" "evex")
11226 (set_attr "mode" "TI")])
11227
11228 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
11229 [(set (match_operand:V8HI 0 "register_operand" "=v")
11230 (vec_concat:V8HI
11231 (vec_merge:V4HI
11232 (any_truncate:V4HI
11233 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11234 (const_vector:V4HI [(const_int 0) (const_int 0)
11235 (const_int 0) (const_int 0)])
11236 (match_operand:QI 2 "register_operand" "Yk"))
11237 (const_vector:V4HI [(const_int 0) (const_int 0)
11238 (const_int 0) (const_int 0)])))]
11239 "TARGET_AVX512VL"
11240 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11241 [(set_attr "type" "ssemov")
11242 (set_attr "prefix" "evex")
11243 (set_attr "mode" "TI")])
11244
11245 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
11246 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11247 (vec_merge:V4HI
11248 (any_truncate:V4HI
11249 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11250 (match_dup 0)
11251 (match_operand:QI 2 "register_operand" "Yk")))]
11252 "TARGET_AVX512VL"
11253 {
11254 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
11255 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
11256 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
11257 }
11258 [(set_attr "type" "ssemov")
11259 (set_attr "memory" "store")
11260 (set_attr "prefix" "evex")
11261 (set_attr "mode" "TI")])
11262
11263 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
11264 [(set (match_operand:DI 0 "memory_operand")
11265 (subreg:DI
11266 (vec_merge:V4HI
11267 (any_truncate:V4HI
11268 (match_operand:VI4_128_8_256 1 "register_operand"))
11269 (vec_select:V4HI
11270 (subreg:V8HI
11271 (vec_concat:V2DI
11272 (match_dup 0)
11273 (const_int 0)) 0)
11274 (parallel [(const_int 0) (const_int 1)
11275 (const_int 2) (const_int 3)]))
11276 (match_operand:QI 2 "register_operand")) 0))]
11277 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11278 "#"
11279 "&& 1"
11280 [(set (match_dup 0)
11281 (vec_merge:V4HI
11282 (any_truncate:V4HI (match_dup 1))
11283 (match_dup 0)
11284 (match_dup 2)))]
11285 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11286
11287
11288 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
11289 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11290 (any_truncate:V2HI
11291 (match_operand:V2DI 1 "register_operand" "v")))]
11292 "TARGET_AVX512VL"
11293 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11294 [(set_attr "type" "ssemov")
11295 (set_attr "memory" "store")
11296 (set_attr "prefix" "evex")
11297 (set_attr "mode" "TI")])
11298
11299 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
11300 [(set (match_operand:SI 0 "memory_operand")
11301 (subreg:SI
11302 (any_truncate:V2HI
11303 (match_operand:V2DI 1 "register_operand")) 0))]
11304 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11305 "#"
11306 "&& 1"
11307 [(set (match_dup 0)
11308 (any_truncate:V2HI (match_dup 1)))]
11309 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11310
11311 (define_insn "avx512vl_<code>v2div2hi2_mask"
11312 [(set (match_operand:V8HI 0 "register_operand" "=v")
11313 (vec_concat:V8HI
11314 (vec_merge:V2HI
11315 (any_truncate:V2HI
11316 (match_operand:V2DI 1 "register_operand" "v"))
11317 (vec_select:V2HI
11318 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11319 (parallel [(const_int 0) (const_int 1)]))
11320 (match_operand:QI 3 "register_operand" "Yk"))
11321 (const_vector:V6HI [(const_int 0) (const_int 0)
11322 (const_int 0) (const_int 0)
11323 (const_int 0) (const_int 0)])))]
11324 "TARGET_AVX512VL"
11325 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11326 [(set_attr "type" "ssemov")
11327 (set_attr "prefix" "evex")
11328 (set_attr "mode" "TI")])
11329
11330 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
11331 [(set (match_operand:V8HI 0 "register_operand" "=v")
11332 (vec_concat:V8HI
11333 (vec_merge:V2HI
11334 (any_truncate:V2HI
11335 (match_operand:V2DI 1 "register_operand" "v"))
11336 (const_vector:V2HI [(const_int 0) (const_int 0)])
11337 (match_operand:QI 2 "register_operand" "Yk"))
11338 (const_vector:V6HI [(const_int 0) (const_int 0)
11339 (const_int 0) (const_int 0)
11340 (const_int 0) (const_int 0)])))]
11341 "TARGET_AVX512VL"
11342 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11343 [(set_attr "type" "ssemov")
11344 (set_attr "prefix" "evex")
11345 (set_attr "mode" "TI")])
11346
11347 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
11348 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11349 (vec_merge:V2HI
11350 (any_truncate:V2HI
11351 (match_operand:V2DI 1 "register_operand" "v"))
11352 (match_dup 0)
11353 (match_operand:QI 2 "register_operand" "Yk")))]
11354 "TARGET_AVX512VL"
11355 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
11356 [(set_attr "type" "ssemov")
11357 (set_attr "memory" "store")
11358 (set_attr "prefix" "evex")
11359 (set_attr "mode" "TI")])
11360
11361 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
11362 [(set (match_operand:SI 0 "memory_operand")
11363 (subreg:SI
11364 (vec_merge:V2HI
11365 (any_truncate:V2HI
11366 (match_operand:V2DI 1 "register_operand"))
11367 (vec_select:V2HI
11368 (subreg:V4HI
11369 (vec_concat:V2SI
11370 (match_dup 0)
11371 (const_int 0)) 0)
11372 (parallel [(const_int 0) (const_int 1)]))
11373 (match_operand:QI 2 "register_operand")) 0))]
11374 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11375 "#"
11376 "&& 1"
11377 [(set (match_dup 0)
11378 (vec_merge:V2HI
11379 (any_truncate:V2HI (match_dup 1))
11380 (match_dup 0)
11381 (match_dup 2)))]
11382 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11383
11384 (define_expand "truncv2div2si2"
11385 [(set (match_operand:V2SI 0 "register_operand")
11386 (truncate:V2SI
11387 (match_operand:V2DI 1 "register_operand")))]
11388 "TARGET_AVX512VL"
11389 {
11390 operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
11391 emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
11392 operands[1],
11393 CONST0_RTX (V2SImode)));
11394 DONE;
11395 })
11396
11397 (define_insn "avx512vl_<code>v2div2si2"
11398 [(set (match_operand:V4SI 0 "register_operand" "=v")
11399 (vec_concat:V4SI
11400 (any_truncate:V2SI
11401 (match_operand:V2DI 1 "register_operand" "v"))
11402 (match_operand:V2SI 2 "const0_operand")))]
11403 "TARGET_AVX512VL"
11404 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11405 [(set_attr "type" "ssemov")
11406 (set_attr "prefix" "evex")
11407 (set_attr "mode" "TI")])
11408
11409 (define_insn "*avx512vl_<code>v2div2si2_store_1"
11410 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11411 (any_truncate:V2SI
11412 (match_operand:V2DI 1 "register_operand" "v")))]
11413 "TARGET_AVX512VL"
11414 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11415 [(set_attr "type" "ssemov")
11416 (set_attr "memory" "store")
11417 (set_attr "prefix" "evex")
11418 (set_attr "mode" "TI")])
11419
11420 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
11421 [(set (match_operand:DI 0 "memory_operand")
11422 (subreg:DI
11423 (any_truncate:V2SI
11424 (match_operand:V2DI 1 "register_operand")) 0))]
11425 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11426 "#"
11427 "&& 1"
11428 [(set (match_dup 0)
11429 (any_truncate:V2SI (match_dup 1)))]
11430 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11431
11432 (define_insn "avx512vl_<code>v2div2si2_mask"
11433 [(set (match_operand:V4SI 0 "register_operand" "=v")
11434 (vec_concat:V4SI
11435 (vec_merge:V2SI
11436 (any_truncate:V2SI
11437 (match_operand:V2DI 1 "register_operand" "v"))
11438 (vec_select:V2SI
11439 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11440 (parallel [(const_int 0) (const_int 1)]))
11441 (match_operand:QI 3 "register_operand" "Yk"))
11442 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11443 "TARGET_AVX512VL"
11444 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11445 [(set_attr "type" "ssemov")
11446 (set_attr "prefix" "evex")
11447 (set_attr "mode" "TI")])
11448
11449 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
11450 [(set (match_operand:V4SI 0 "register_operand" "=v")
11451 (vec_concat:V4SI
11452 (vec_merge:V2SI
11453 (any_truncate:V2SI
11454 (match_operand:V2DI 1 "register_operand" "v"))
11455 (const_vector:V2SI [(const_int 0) (const_int 0)])
11456 (match_operand:QI 2 "register_operand" "Yk"))
11457 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11458 "TARGET_AVX512VL"
11459 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11460 [(set_attr "type" "ssemov")
11461 (set_attr "prefix" "evex")
11462 (set_attr "mode" "TI")])
11463
11464 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
11465 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11466 (vec_merge:V2SI
11467 (any_truncate:V2SI
11468 (match_operand:V2DI 1 "register_operand" "v"))
11469 (match_dup 0)
11470 (match_operand:QI 2 "register_operand" "Yk")))]
11471 "TARGET_AVX512VL"
11472 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11473 [(set_attr "type" "ssemov")
11474 (set_attr "memory" "store")
11475 (set_attr "prefix" "evex")
11476 (set_attr "mode" "TI")])
11477
11478 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
11479 [(set (match_operand:DI 0 "memory_operand")
11480 (subreg:DI
11481 (vec_merge:V2SI
11482 (any_truncate:V2SI
11483 (match_operand:V2DI 1 "register_operand"))
11484 (vec_select:V2SI
11485 (subreg:V4SI
11486 (vec_concat:V2DI
11487 (match_dup 0)
11488 (const_int 0)) 0)
11489 (parallel [(const_int 0) (const_int 1)]))
11490 (match_operand:QI 2 "register_operand")) 0))]
11491 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11492 "#"
11493 "&& 1"
11494 [(set (match_dup 0)
11495 (vec_merge:V2SI
11496 (any_truncate:V2SI (match_dup 1))
11497 (match_dup 0)
11498 (match_dup 2)))]
11499 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11500
11501 (define_expand "truncv8div8qi2"
11502 [(set (match_operand:V8QI 0 "register_operand")
11503 (truncate:V8QI
11504 (match_operand:V8DI 1 "register_operand")))]
11505 "TARGET_AVX512F"
11506 {
11507 operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
11508 emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
11509 DONE;
11510 })
11511
11512 (define_insn "avx512f_<code>v8div16qi2"
11513 [(set (match_operand:V16QI 0 "register_operand" "=v")
11514 (vec_concat:V16QI
11515 (any_truncate:V8QI
11516 (match_operand:V8DI 1 "register_operand" "v"))
11517 (const_vector:V8QI [(const_int 0) (const_int 0)
11518 (const_int 0) (const_int 0)
11519 (const_int 0) (const_int 0)
11520 (const_int 0) (const_int 0)])))]
11521 "TARGET_AVX512F"
11522 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11523 [(set_attr "type" "ssemov")
11524 (set_attr "prefix" "evex")
11525 (set_attr "mode" "TI")])
11526
11527 (define_insn "*avx512f_<code>v8div16qi2_store_1"
11528 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11529 (any_truncate:V8QI
11530 (match_operand:V8DI 1 "register_operand" "v")))]
11531 "TARGET_AVX512F"
11532 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11533 [(set_attr "type" "ssemov")
11534 (set_attr "memory" "store")
11535 (set_attr "prefix" "evex")
11536 (set_attr "mode" "TI")])
11537
11538 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
11539 [(set (match_operand:DI 0 "memory_operand")
11540 (subreg:DI
11541 (any_truncate:V8QI
11542 (match_operand:V8DI 1 "register_operand")) 0))]
11543 "TARGET_AVX512F && ix86_pre_reload_split ()"
11544 "#"
11545 "&& 1"
11546 [(set (match_dup 0)
11547 (any_truncate:V8QI (match_dup 1)))]
11548 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11549
11550 (define_insn "avx512f_<code>v8div16qi2_mask"
11551 [(set (match_operand:V16QI 0 "register_operand" "=v")
11552 (vec_concat:V16QI
11553 (vec_merge:V8QI
11554 (any_truncate:V8QI
11555 (match_operand:V8DI 1 "register_operand" "v"))
11556 (vec_select:V8QI
11557 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11558 (parallel [(const_int 0) (const_int 1)
11559 (const_int 2) (const_int 3)
11560 (const_int 4) (const_int 5)
11561 (const_int 6) (const_int 7)]))
11562 (match_operand:QI 3 "register_operand" "Yk"))
11563 (const_vector:V8QI [(const_int 0) (const_int 0)
11564 (const_int 0) (const_int 0)
11565 (const_int 0) (const_int 0)
11566 (const_int 0) (const_int 0)])))]
11567 "TARGET_AVX512F"
11568 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11569 [(set_attr "type" "ssemov")
11570 (set_attr "prefix" "evex")
11571 (set_attr "mode" "TI")])
11572
11573 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
11574 [(set (match_operand:V16QI 0 "register_operand" "=v")
11575 (vec_concat:V16QI
11576 (vec_merge:V8QI
11577 (any_truncate:V8QI
11578 (match_operand:V8DI 1 "register_operand" "v"))
11579 (const_vector:V8QI [(const_int 0) (const_int 0)
11580 (const_int 0) (const_int 0)
11581 (const_int 0) (const_int 0)
11582 (const_int 0) (const_int 0)])
11583 (match_operand:QI 2 "register_operand" "Yk"))
11584 (const_vector:V8QI [(const_int 0) (const_int 0)
11585 (const_int 0) (const_int 0)
11586 (const_int 0) (const_int 0)
11587 (const_int 0) (const_int 0)])))]
11588 "TARGET_AVX512F"
11589 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11590 [(set_attr "type" "ssemov")
11591 (set_attr "prefix" "evex")
11592 (set_attr "mode" "TI")])
11593
11594 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
11595 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11596 (vec_merge:V8QI
11597 (any_truncate:V8QI
11598 (match_operand:V8DI 1 "register_operand" "v"))
11599 (match_dup 0)
11600 (match_operand:QI 2 "register_operand" "Yk")))]
11601 "TARGET_AVX512F"
11602 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11603 [(set_attr "type" "ssemov")
11604 (set_attr "memory" "store")
11605 (set_attr "prefix" "evex")
11606 (set_attr "mode" "TI")])
11607
11608 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
11609 [(set (match_operand:DI 0 "memory_operand")
11610 (subreg:DI
11611 (vec_merge:V8QI
11612 (any_truncate:V8QI
11613 (match_operand:V8DI 1 "register_operand"))
11614 (vec_select:V8QI
11615 (subreg:V16QI
11616 (vec_concat:V2DI
11617 (match_dup 0)
11618 (const_int 0)) 0)
11619 (parallel [(const_int 0) (const_int 1)
11620 (const_int 2) (const_int 3)
11621 (const_int 4) (const_int 5)
11622 (const_int 6) (const_int 7)]))
11623 (match_operand:QI 2 "register_operand")) 0))]
11624 "TARGET_AVX512F && ix86_pre_reload_split ()"
11625 "#"
11626 "&& 1"
11627 [(set (match_dup 0)
11628 (vec_merge:V8QI
11629 (any_truncate:V8QI (match_dup 1))
11630 (match_dup 0)
11631 (match_dup 2)))]
11632 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11633
11634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11635 ;;
11636 ;; Parallel integral arithmetic
11637 ;;
11638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11639
11640 (define_expand "neg<mode>2"
11641 [(set (match_operand:VI_AVX2 0 "register_operand")
11642 (minus:VI_AVX2
11643 (match_dup 2)
11644 (match_operand:VI_AVX2 1 "vector_operand")))]
11645 "TARGET_SSE2"
11646 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11647
11648 (define_expand "<insn><mode>3"
11649 [(set (match_operand:VI_AVX2 0 "register_operand")
11650 (plusminus:VI_AVX2
11651 (match_operand:VI_AVX2 1 "vector_operand")
11652 (match_operand:VI_AVX2 2 "vector_operand")))]
11653 "TARGET_SSE2"
11654 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11655
11656 (define_expand "<insn><mode>3_mask"
11657 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11658 (vec_merge:VI48_AVX512VL
11659 (plusminus:VI48_AVX512VL
11660 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11661 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11662 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11663 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11664 "TARGET_AVX512F"
11665 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11666
11667 (define_expand "<insn><mode>3_mask"
11668 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11669 (vec_merge:VI12_AVX512VL
11670 (plusminus:VI12_AVX512VL
11671 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11672 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11673 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11674 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11675 "TARGET_AVX512BW"
11676 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11677
11678 (define_insn "*<insn><mode>3"
11679 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
11680 (plusminus:VI_AVX2
11681 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
11682 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
11683 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11684 "@
11685 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11686 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11687 [(set_attr "isa" "noavx,avx")
11688 (set_attr "type" "sseiadd")
11689 (set_attr "prefix_data16" "1,*")
11690 (set_attr "prefix" "orig,maybe_evex")
11691 (set_attr "mode" "<sseinsnmode>")])
11692
11693 (define_insn "*<insn><mode>3_mask"
11694 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11695 (vec_merge:VI48_AVX512VL
11696 (plusminus:VI48_AVX512VL
11697 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11698 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11699 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
11700 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11701 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11702 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11703 [(set_attr "type" "sseiadd")
11704 (set_attr "prefix" "evex")
11705 (set_attr "mode" "<sseinsnmode>")])
11706
11707 (define_insn "*<insn><mode>3_mask"
11708 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11709 (vec_merge:VI12_AVX512VL
11710 (plusminus:VI12_AVX512VL
11711 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11712 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11713 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
11714 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11715 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11716 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11717 [(set_attr "type" "sseiadd")
11718 (set_attr "prefix" "evex")
11719 (set_attr "mode" "<sseinsnmode>")])
11720
11721 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
11722 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
11723 (sat_plusminus:VI12_AVX2_AVX512BW
11724 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
11725 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
11726 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11727 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11728
11729 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
11730 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
11731 (sat_plusminus:VI12_AVX2_AVX512BW
11732 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
11733 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
11734 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
11735 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11736 "@
11737 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11738 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11739 [(set_attr "isa" "noavx,avx")
11740 (set_attr "type" "sseiadd")
11741 (set_attr "prefix_data16" "1,*")
11742 (set_attr "prefix" "orig,maybe_evex")
11743 (set_attr "mode" "TI")])
11744
11745 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
11746 (define_split
11747 [(set (match_operand:VI12_AVX2 0 "register_operand")
11748 (eq:VI12_AVX2
11749 (us_minus:VI12_AVX2
11750 (match_operand:VI12_AVX2 1 "vector_operand")
11751 (match_operand:VI12_AVX2 2 "vector_operand"))
11752 (match_operand:VI12_AVX2 3 "const0_operand")))]
11753 "TARGET_SSE2
11754 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
11755 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
11756 [(set (match_dup 4)
11757 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
11758 (set (match_dup 0)
11759 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
11760 "operands[4] = gen_reg_rtx (<MODE>mode);")
11761
11762 (define_expand "mulv8qi3"
11763 [(set (match_operand:V8QI 0 "register_operand")
11764 (mult:V8QI (match_operand:V8QI 1 "register_operand")
11765 (match_operand:V8QI 2 "register_operand")))]
11766 "TARGET_AVX512VL && TARGET_AVX512BW"
11767 {
11768 gcc_assert (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]));
11769 DONE;
11770 })
11771
11772 (define_expand "mul<mode>3"
11773 [(set (match_operand:VI1_AVX512 0 "register_operand")
11774 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
11775 (match_operand:VI1_AVX512 2 "register_operand")))]
11776 "TARGET_SSE2"
11777 {
11778 if (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]))
11779 DONE;
11780 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11781 DONE;
11782 })
11783
11784 (define_expand "mul<mode>3<mask_name>"
11785 [(set (match_operand:VI2_AVX2 0 "register_operand")
11786 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11787 (match_operand:VI2_AVX2 2 "vector_operand")))]
11788 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11789 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11790
11791 (define_insn "*mul<mode>3<mask_name>"
11792 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11793 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11794 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11795 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11796 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11797 "@
11798 pmullw\t{%2, %0|%0, %2}
11799 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11800 [(set_attr "isa" "noavx,avx")
11801 (set_attr "type" "sseimul")
11802 (set_attr "prefix_data16" "1,*")
11803 (set_attr "prefix" "orig,vex")
11804 (set_attr "mode" "<sseinsnmode>")])
11805
11806 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11807 [(set (match_operand:VI2_AVX2 0 "register_operand")
11808 (truncate:VI2_AVX2
11809 (lshiftrt:<ssedoublemode>
11810 (mult:<ssedoublemode>
11811 (any_extend:<ssedoublemode>
11812 (match_operand:VI2_AVX2 1 "vector_operand"))
11813 (any_extend:<ssedoublemode>
11814 (match_operand:VI2_AVX2 2 "vector_operand")))
11815 (const_int 16))))]
11816 "TARGET_SSE2
11817 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11818 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11819
11820 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11821 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11822 (truncate:VI2_AVX2
11823 (lshiftrt:<ssedoublemode>
11824 (mult:<ssedoublemode>
11825 (any_extend:<ssedoublemode>
11826 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11827 (any_extend:<ssedoublemode>
11828 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11829 (const_int 16))))]
11830 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11831 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11832 "@
11833 pmulh<u>w\t{%2, %0|%0, %2}
11834 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11835 [(set_attr "isa" "noavx,avx")
11836 (set_attr "type" "sseimul")
11837 (set_attr "prefix_data16" "1,*")
11838 (set_attr "prefix" "orig,vex")
11839 (set_attr "mode" "<sseinsnmode>")])
11840
11841 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11842 [(set (match_operand:V8DI 0 "register_operand")
11843 (mult:V8DI
11844 (zero_extend:V8DI
11845 (vec_select:V8SI
11846 (match_operand:V16SI 1 "nonimmediate_operand")
11847 (parallel [(const_int 0) (const_int 2)
11848 (const_int 4) (const_int 6)
11849 (const_int 8) (const_int 10)
11850 (const_int 12) (const_int 14)])))
11851 (zero_extend:V8DI
11852 (vec_select:V8SI
11853 (match_operand:V16SI 2 "nonimmediate_operand")
11854 (parallel [(const_int 0) (const_int 2)
11855 (const_int 4) (const_int 6)
11856 (const_int 8) (const_int 10)
11857 (const_int 12) (const_int 14)])))))]
11858 "TARGET_AVX512F"
11859 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11860
11861 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11862 [(set (match_operand:V8DI 0 "register_operand" "=v")
11863 (mult:V8DI
11864 (zero_extend:V8DI
11865 (vec_select:V8SI
11866 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11867 (parallel [(const_int 0) (const_int 2)
11868 (const_int 4) (const_int 6)
11869 (const_int 8) (const_int 10)
11870 (const_int 12) (const_int 14)])))
11871 (zero_extend:V8DI
11872 (vec_select:V8SI
11873 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11874 (parallel [(const_int 0) (const_int 2)
11875 (const_int 4) (const_int 6)
11876 (const_int 8) (const_int 10)
11877 (const_int 12) (const_int 14)])))))]
11878 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11879 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11880 [(set_attr "type" "sseimul")
11881 (set_attr "prefix_extra" "1")
11882 (set_attr "prefix" "evex")
11883 (set_attr "mode" "XI")])
11884
11885 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11886 [(set (match_operand:V4DI 0 "register_operand")
11887 (mult:V4DI
11888 (zero_extend:V4DI
11889 (vec_select:V4SI
11890 (match_operand:V8SI 1 "nonimmediate_operand")
11891 (parallel [(const_int 0) (const_int 2)
11892 (const_int 4) (const_int 6)])))
11893 (zero_extend:V4DI
11894 (vec_select:V4SI
11895 (match_operand:V8SI 2 "nonimmediate_operand")
11896 (parallel [(const_int 0) (const_int 2)
11897 (const_int 4) (const_int 6)])))))]
11898 "TARGET_AVX2 && <mask_avx512vl_condition>"
11899 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11900
11901 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11902 [(set (match_operand:V4DI 0 "register_operand" "=v")
11903 (mult:V4DI
11904 (zero_extend:V4DI
11905 (vec_select:V4SI
11906 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11907 (parallel [(const_int 0) (const_int 2)
11908 (const_int 4) (const_int 6)])))
11909 (zero_extend:V4DI
11910 (vec_select:V4SI
11911 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11912 (parallel [(const_int 0) (const_int 2)
11913 (const_int 4) (const_int 6)])))))]
11914 "TARGET_AVX2 && <mask_avx512vl_condition>
11915 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11916 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11917 [(set_attr "type" "sseimul")
11918 (set_attr "prefix" "maybe_evex")
11919 (set_attr "mode" "OI")])
11920
11921 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11922 [(set (match_operand:V2DI 0 "register_operand")
11923 (mult:V2DI
11924 (zero_extend:V2DI
11925 (vec_select:V2SI
11926 (match_operand:V4SI 1 "vector_operand")
11927 (parallel [(const_int 0) (const_int 2)])))
11928 (zero_extend:V2DI
11929 (vec_select:V2SI
11930 (match_operand:V4SI 2 "vector_operand")
11931 (parallel [(const_int 0) (const_int 2)])))))]
11932 "TARGET_SSE2 && <mask_avx512vl_condition>"
11933 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11934
11935 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11936 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11937 (mult:V2DI
11938 (zero_extend:V2DI
11939 (vec_select:V2SI
11940 (match_operand:V4SI 1 "vector_operand" "%0,v")
11941 (parallel [(const_int 0) (const_int 2)])))
11942 (zero_extend:V2DI
11943 (vec_select:V2SI
11944 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11945 (parallel [(const_int 0) (const_int 2)])))))]
11946 "TARGET_SSE2 && <mask_avx512vl_condition>
11947 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11948 "@
11949 pmuludq\t{%2, %0|%0, %2}
11950 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11951 [(set_attr "isa" "noavx,avx")
11952 (set_attr "type" "sseimul")
11953 (set_attr "prefix_data16" "1,*")
11954 (set_attr "prefix" "orig,maybe_evex")
11955 (set_attr "mode" "TI")])
11956
11957 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11958 [(set (match_operand:V8DI 0 "register_operand")
11959 (mult:V8DI
11960 (sign_extend:V8DI
11961 (vec_select:V8SI
11962 (match_operand:V16SI 1 "nonimmediate_operand")
11963 (parallel [(const_int 0) (const_int 2)
11964 (const_int 4) (const_int 6)
11965 (const_int 8) (const_int 10)
11966 (const_int 12) (const_int 14)])))
11967 (sign_extend:V8DI
11968 (vec_select:V8SI
11969 (match_operand:V16SI 2 "nonimmediate_operand")
11970 (parallel [(const_int 0) (const_int 2)
11971 (const_int 4) (const_int 6)
11972 (const_int 8) (const_int 10)
11973 (const_int 12) (const_int 14)])))))]
11974 "TARGET_AVX512F"
11975 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11976
11977 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11978 [(set (match_operand:V8DI 0 "register_operand" "=v")
11979 (mult:V8DI
11980 (sign_extend:V8DI
11981 (vec_select:V8SI
11982 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11983 (parallel [(const_int 0) (const_int 2)
11984 (const_int 4) (const_int 6)
11985 (const_int 8) (const_int 10)
11986 (const_int 12) (const_int 14)])))
11987 (sign_extend:V8DI
11988 (vec_select:V8SI
11989 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11990 (parallel [(const_int 0) (const_int 2)
11991 (const_int 4) (const_int 6)
11992 (const_int 8) (const_int 10)
11993 (const_int 12) (const_int 14)])))))]
11994 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11995 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11996 [(set_attr "type" "sseimul")
11997 (set_attr "prefix_extra" "1")
11998 (set_attr "prefix" "evex")
11999 (set_attr "mode" "XI")])
12000
12001 (define_expand "vec_widen_smult_even_v8si<mask_name>"
12002 [(set (match_operand:V4DI 0 "register_operand")
12003 (mult:V4DI
12004 (sign_extend:V4DI
12005 (vec_select:V4SI
12006 (match_operand:V8SI 1 "nonimmediate_operand")
12007 (parallel [(const_int 0) (const_int 2)
12008 (const_int 4) (const_int 6)])))
12009 (sign_extend:V4DI
12010 (vec_select:V4SI
12011 (match_operand:V8SI 2 "nonimmediate_operand")
12012 (parallel [(const_int 0) (const_int 2)
12013 (const_int 4) (const_int 6)])))))]
12014 "TARGET_AVX2 && <mask_avx512vl_condition>"
12015 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
12016
12017 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
12018 [(set (match_operand:V4DI 0 "register_operand" "=v")
12019 (mult:V4DI
12020 (sign_extend:V4DI
12021 (vec_select:V4SI
12022 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
12023 (parallel [(const_int 0) (const_int 2)
12024 (const_int 4) (const_int 6)])))
12025 (sign_extend:V4DI
12026 (vec_select:V4SI
12027 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
12028 (parallel [(const_int 0) (const_int 2)
12029 (const_int 4) (const_int 6)])))))]
12030 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12031 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12032 [(set_attr "type" "sseimul")
12033 (set_attr "prefix_extra" "1")
12034 (set_attr "prefix" "vex")
12035 (set_attr "mode" "OI")])
12036
12037 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
12038 [(set (match_operand:V2DI 0 "register_operand")
12039 (mult:V2DI
12040 (sign_extend:V2DI
12041 (vec_select:V2SI
12042 (match_operand:V4SI 1 "vector_operand")
12043 (parallel [(const_int 0) (const_int 2)])))
12044 (sign_extend:V2DI
12045 (vec_select:V2SI
12046 (match_operand:V4SI 2 "vector_operand")
12047 (parallel [(const_int 0) (const_int 2)])))))]
12048 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
12049 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
12050
12051 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
12052 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
12053 (mult:V2DI
12054 (sign_extend:V2DI
12055 (vec_select:V2SI
12056 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
12057 (parallel [(const_int 0) (const_int 2)])))
12058 (sign_extend:V2DI
12059 (vec_select:V2SI
12060 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
12061 (parallel [(const_int 0) (const_int 2)])))))]
12062 "TARGET_SSE4_1 && <mask_avx512vl_condition>
12063 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12064 "@
12065 pmuldq\t{%2, %0|%0, %2}
12066 pmuldq\t{%2, %0|%0, %2}
12067 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12068 [(set_attr "isa" "noavx,noavx,avx")
12069 (set_attr "type" "sseimul")
12070 (set_attr "prefix_data16" "1,1,*")
12071 (set_attr "prefix_extra" "1")
12072 (set_attr "prefix" "orig,orig,vex")
12073 (set_attr "mode" "TI")])
12074
12075 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
12076 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
12077 (unspec:<sseunpackmode>
12078 [(match_operand:VI2_AVX2 1 "register_operand" "v")
12079 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
12080 UNSPEC_PMADDWD512))]
12081 "TARGET_AVX512BW && <mask_mode512bit_condition>"
12082 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
12083 [(set_attr "type" "sseiadd")
12084 (set_attr "prefix" "evex")
12085 (set_attr "mode" "XI")])
12086
12087 (define_expand "avx2_pmaddwd"
12088 [(set (match_operand:V8SI 0 "register_operand")
12089 (plus:V8SI
12090 (mult:V8SI
12091 (sign_extend:V8SI
12092 (vec_select:V8HI
12093 (match_operand:V16HI 1 "nonimmediate_operand")
12094 (parallel [(const_int 0) (const_int 2)
12095 (const_int 4) (const_int 6)
12096 (const_int 8) (const_int 10)
12097 (const_int 12) (const_int 14)])))
12098 (sign_extend:V8SI
12099 (vec_select:V8HI
12100 (match_operand:V16HI 2 "nonimmediate_operand")
12101 (parallel [(const_int 0) (const_int 2)
12102 (const_int 4) (const_int 6)
12103 (const_int 8) (const_int 10)
12104 (const_int 12) (const_int 14)]))))
12105 (mult:V8SI
12106 (sign_extend:V8SI
12107 (vec_select:V8HI (match_dup 1)
12108 (parallel [(const_int 1) (const_int 3)
12109 (const_int 5) (const_int 7)
12110 (const_int 9) (const_int 11)
12111 (const_int 13) (const_int 15)])))
12112 (sign_extend:V8SI
12113 (vec_select:V8HI (match_dup 2)
12114 (parallel [(const_int 1) (const_int 3)
12115 (const_int 5) (const_int 7)
12116 (const_int 9) (const_int 11)
12117 (const_int 13) (const_int 15)]))))))]
12118 "TARGET_AVX2"
12119 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
12120
12121 (define_insn "*avx2_pmaddwd"
12122 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
12123 (plus:V8SI
12124 (mult:V8SI
12125 (sign_extend:V8SI
12126 (vec_select:V8HI
12127 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
12128 (parallel [(const_int 0) (const_int 2)
12129 (const_int 4) (const_int 6)
12130 (const_int 8) (const_int 10)
12131 (const_int 12) (const_int 14)])))
12132 (sign_extend:V8SI
12133 (vec_select:V8HI
12134 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
12135 (parallel [(const_int 0) (const_int 2)
12136 (const_int 4) (const_int 6)
12137 (const_int 8) (const_int 10)
12138 (const_int 12) (const_int 14)]))))
12139 (mult:V8SI
12140 (sign_extend:V8SI
12141 (vec_select:V8HI (match_dup 1)
12142 (parallel [(const_int 1) (const_int 3)
12143 (const_int 5) (const_int 7)
12144 (const_int 9) (const_int 11)
12145 (const_int 13) (const_int 15)])))
12146 (sign_extend:V8SI
12147 (vec_select:V8HI (match_dup 2)
12148 (parallel [(const_int 1) (const_int 3)
12149 (const_int 5) (const_int 7)
12150 (const_int 9) (const_int 11)
12151 (const_int 13) (const_int 15)]))))))]
12152 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12153 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12154 [(set_attr "type" "sseiadd")
12155 (set_attr "isa" "*,avx512bw")
12156 (set_attr "prefix" "vex,evex")
12157 (set_attr "mode" "OI")])
12158
12159 (define_expand "sse2_pmaddwd"
12160 [(set (match_operand:V4SI 0 "register_operand")
12161 (plus:V4SI
12162 (mult:V4SI
12163 (sign_extend:V4SI
12164 (vec_select:V4HI
12165 (match_operand:V8HI 1 "vector_operand")
12166 (parallel [(const_int 0) (const_int 2)
12167 (const_int 4) (const_int 6)])))
12168 (sign_extend:V4SI
12169 (vec_select:V4HI
12170 (match_operand:V8HI 2 "vector_operand")
12171 (parallel [(const_int 0) (const_int 2)
12172 (const_int 4) (const_int 6)]))))
12173 (mult:V4SI
12174 (sign_extend:V4SI
12175 (vec_select:V4HI (match_dup 1)
12176 (parallel [(const_int 1) (const_int 3)
12177 (const_int 5) (const_int 7)])))
12178 (sign_extend:V4SI
12179 (vec_select:V4HI (match_dup 2)
12180 (parallel [(const_int 1) (const_int 3)
12181 (const_int 5) (const_int 7)]))))))]
12182 "TARGET_SSE2"
12183 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
12184
12185 (define_insn "*sse2_pmaddwd"
12186 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
12187 (plus:V4SI
12188 (mult:V4SI
12189 (sign_extend:V4SI
12190 (vec_select:V4HI
12191 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12192 (parallel [(const_int 0) (const_int 2)
12193 (const_int 4) (const_int 6)])))
12194 (sign_extend:V4SI
12195 (vec_select:V4HI
12196 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
12197 (parallel [(const_int 0) (const_int 2)
12198 (const_int 4) (const_int 6)]))))
12199 (mult:V4SI
12200 (sign_extend:V4SI
12201 (vec_select:V4HI (match_dup 1)
12202 (parallel [(const_int 1) (const_int 3)
12203 (const_int 5) (const_int 7)])))
12204 (sign_extend:V4SI
12205 (vec_select:V4HI (match_dup 2)
12206 (parallel [(const_int 1) (const_int 3)
12207 (const_int 5) (const_int 7)]))))))]
12208 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12209 "@
12210 pmaddwd\t{%2, %0|%0, %2}
12211 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
12212 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12213 [(set_attr "isa" "noavx,avx,avx512bw")
12214 (set_attr "type" "sseiadd")
12215 (set_attr "atom_unit" "simul")
12216 (set_attr "prefix_data16" "1,*,*")
12217 (set_attr "prefix" "orig,vex,evex")
12218 (set_attr "mode" "TI")])
12219
12220 (define_insn "avx512dq_mul<mode>3<mask_name>"
12221 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
12222 (mult:VI8_AVX512VL
12223 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
12224 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
12225 "TARGET_AVX512DQ && <mask_mode512bit_condition>
12226 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
12227 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12228 [(set_attr "type" "sseimul")
12229 (set_attr "prefix" "evex")
12230 (set_attr "mode" "<sseinsnmode>")])
12231
12232 (define_expand "mul<mode>3<mask_name>"
12233 [(set (match_operand:VI4_AVX512F 0 "register_operand")
12234 (mult:VI4_AVX512F
12235 (match_operand:VI4_AVX512F 1 "general_vector_operand")
12236 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
12237 "TARGET_SSE2 && <mask_mode512bit_condition>"
12238 {
12239 if (TARGET_SSE4_1)
12240 {
12241 if (!vector_operand (operands[1], <MODE>mode))
12242 operands[1] = force_reg (<MODE>mode, operands[1]);
12243 if (!vector_operand (operands[2], <MODE>mode))
12244 operands[2] = force_reg (<MODE>mode, operands[2]);
12245 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
12246 }
12247 else
12248 {
12249 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
12250 DONE;
12251 }
12252 })
12253
12254 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
12255 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
12256 (mult:VI4_AVX512F
12257 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
12258 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
12259 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
12260 && <mask_mode512bit_condition>"
12261 "@
12262 pmulld\t{%2, %0|%0, %2}
12263 pmulld\t{%2, %0|%0, %2}
12264 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12265 [(set_attr "isa" "noavx,noavx,avx")
12266 (set_attr "type" "sseimul")
12267 (set_attr "prefix_extra" "1")
12268 (set_attr "prefix" "<bcst_mask_prefix4>")
12269 (set_attr "btver2_decode" "vector,vector,vector")
12270 (set_attr "mode" "<sseinsnmode>")])
12271
12272 (define_expand "mul<mode>3"
12273 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12274 (mult:VI8_AVX2_AVX512F
12275 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12276 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12277 "TARGET_SSE2"
12278 {
12279 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
12280 DONE;
12281 })
12282
12283 (define_expand "vec_widen_<s>mult_hi_<mode>"
12284 [(match_operand:<sseunpackmode> 0 "register_operand")
12285 (any_extend:<sseunpackmode>
12286 (match_operand:VI124_AVX2 1 "register_operand"))
12287 (match_operand:VI124_AVX2 2 "register_operand")]
12288 "TARGET_SSE2"
12289 {
12290 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12291 <u_bool>, true);
12292 DONE;
12293 })
12294
12295 (define_expand "vec_widen_<s>mult_lo_<mode>"
12296 [(match_operand:<sseunpackmode> 0 "register_operand")
12297 (any_extend:<sseunpackmode>
12298 (match_operand:VI124_AVX2 1 "register_operand"))
12299 (match_operand:VI124_AVX2 2 "register_operand")]
12300 "TARGET_SSE2"
12301 {
12302 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12303 <u_bool>, false);
12304 DONE;
12305 })
12306
12307 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
12308 ;; named patterns, but signed V4SI needs special help for plain SSE2.
12309 (define_expand "vec_widen_smult_even_v4si"
12310 [(match_operand:V2DI 0 "register_operand")
12311 (match_operand:V4SI 1 "vector_operand")
12312 (match_operand:V4SI 2 "vector_operand")]
12313 "TARGET_SSE2"
12314 {
12315 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12316 false, false);
12317 DONE;
12318 })
12319
12320 (define_expand "vec_widen_<s>mult_odd_<mode>"
12321 [(match_operand:<sseunpackmode> 0 "register_operand")
12322 (any_extend:<sseunpackmode>
12323 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
12324 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
12325 "TARGET_SSE2"
12326 {
12327 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12328 <u_bool>, true);
12329 DONE;
12330 })
12331
12332 (define_mode_attr SDOT_PMADD_SUF
12333 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
12334
12335 (define_expand "sdot_prod<mode>"
12336 [(match_operand:<sseunpackmode> 0 "register_operand")
12337 (match_operand:VI2_AVX2 1 "register_operand")
12338 (match_operand:VI2_AVX2 2 "register_operand")
12339 (match_operand:<sseunpackmode> 3 "register_operand")]
12340 "TARGET_SSE2"
12341 {
12342 rtx t = gen_reg_rtx (<sseunpackmode>mode);
12343 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
12344 emit_insn (gen_rtx_SET (operands[0],
12345 gen_rtx_PLUS (<sseunpackmode>mode,
12346 operands[3], t)));
12347 DONE;
12348 })
12349
12350 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
12351 ;; back together when madd is available.
12352 (define_expand "sdot_prodv4si"
12353 [(match_operand:V2DI 0 "register_operand")
12354 (match_operand:V4SI 1 "register_operand")
12355 (match_operand:V4SI 2 "register_operand")
12356 (match_operand:V2DI 3 "register_operand")]
12357 "TARGET_XOP"
12358 {
12359 rtx t = gen_reg_rtx (V2DImode);
12360 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
12361 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
12362 DONE;
12363 })
12364
12365 (define_expand "uavg<mode>3_ceil"
12366 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12367 (truncate:VI12_AVX2_AVX512BW
12368 (lshiftrt:<ssedoublemode>
12369 (plus:<ssedoublemode>
12370 (plus:<ssedoublemode>
12371 (zero_extend:<ssedoublemode>
12372 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
12373 (zero_extend:<ssedoublemode>
12374 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
12375 (match_dup 3))
12376 (const_int 1))))]
12377 "TARGET_SSE2"
12378 {
12379 operands[3] = CONST1_RTX(<ssedoublemode>mode);
12380 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12381 })
12382
12383 (define_expand "usadv16qi"
12384 [(match_operand:V4SI 0 "register_operand")
12385 (match_operand:V16QI 1 "register_operand")
12386 (match_operand:V16QI 2 "vector_operand")
12387 (match_operand:V4SI 3 "vector_operand")]
12388 "TARGET_SSE2"
12389 {
12390 rtx t1 = gen_reg_rtx (V2DImode);
12391 rtx t2 = gen_reg_rtx (V4SImode);
12392 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
12393 convert_move (t2, t1, 0);
12394 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
12395 DONE;
12396 })
12397
12398 (define_expand "usadv32qi"
12399 [(match_operand:V8SI 0 "register_operand")
12400 (match_operand:V32QI 1 "register_operand")
12401 (match_operand:V32QI 2 "nonimmediate_operand")
12402 (match_operand:V8SI 3 "nonimmediate_operand")]
12403 "TARGET_AVX2"
12404 {
12405 rtx t1 = gen_reg_rtx (V4DImode);
12406 rtx t2 = gen_reg_rtx (V8SImode);
12407 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
12408 convert_move (t2, t1, 0);
12409 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
12410 DONE;
12411 })
12412
12413 (define_expand "usadv64qi"
12414 [(match_operand:V16SI 0 "register_operand")
12415 (match_operand:V64QI 1 "register_operand")
12416 (match_operand:V64QI 2 "nonimmediate_operand")
12417 (match_operand:V16SI 3 "nonimmediate_operand")]
12418 "TARGET_AVX512BW"
12419 {
12420 rtx t1 = gen_reg_rtx (V8DImode);
12421 rtx t2 = gen_reg_rtx (V16SImode);
12422 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
12423 convert_move (t2, t1, 0);
12424 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
12425 DONE;
12426 })
12427
12428 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12429 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
12430 (ashiftrt:VI248_AVX512BW_1
12431 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
12432 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12433 "TARGET_AVX512VL"
12434 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12435 [(set_attr "type" "sseishft")
12436 (set (attr "length_immediate")
12437 (if_then_else (match_operand 2 "const_int_operand")
12438 (const_string "1")
12439 (const_string "0")))
12440 (set_attr "mode" "<sseinsnmode>")])
12441
12442 (define_insn "ashr<mode>3"
12443 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
12444 (ashiftrt:VI24_AVX2
12445 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
12446 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12447 "TARGET_SSE2"
12448 "@
12449 psra<ssemodesuffix>\t{%2, %0|%0, %2}
12450 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12451 [(set_attr "isa" "noavx,avx")
12452 (set_attr "type" "sseishft")
12453 (set (attr "length_immediate")
12454 (if_then_else (match_operand 2 "const_int_operand")
12455 (const_string "1")
12456 (const_string "0")))
12457 (set_attr "prefix_data16" "1,*")
12458 (set_attr "prefix" "orig,vex")
12459 (set_attr "mode" "<sseinsnmode>")])
12460
12461 (define_insn "ashr<mode>3<mask_name>"
12462 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12463 (ashiftrt:VI248_AVX512BW_AVX512VL
12464 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12465 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12466 "TARGET_AVX512F"
12467 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12468 [(set_attr "type" "sseishft")
12469 (set (attr "length_immediate")
12470 (if_then_else (match_operand 2 "const_int_operand")
12471 (const_string "1")
12472 (const_string "0")))
12473 (set_attr "mode" "<sseinsnmode>")])
12474
12475 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
12476 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12477 (any_lshift:VI248_AVX512BW_2
12478 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12479 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12480 "TARGET_AVX512VL"
12481 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12482 [(set_attr "type" "sseishft")
12483 (set (attr "length_immediate")
12484 (if_then_else (match_operand 2 "const_int_operand")
12485 (const_string "1")
12486 (const_string "0")))
12487 (set_attr "mode" "<sseinsnmode>")])
12488
12489 (define_insn "<insn><mode>3"
12490 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
12491 (any_lshift:VI248_AVX2
12492 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
12493 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12494 "TARGET_SSE2"
12495 "@
12496 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12497 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12498 [(set_attr "isa" "noavx,avx")
12499 (set_attr "type" "sseishft")
12500 (set (attr "length_immediate")
12501 (if_then_else (match_operand 2 "const_int_operand")
12502 (const_string "1")
12503 (const_string "0")))
12504 (set_attr "prefix_data16" "1,*")
12505 (set_attr "prefix" "orig,vex")
12506 (set_attr "mode" "<sseinsnmode>")])
12507
12508 (define_insn "<insn><mode>3<mask_name>"
12509 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12510 (any_lshift:VI248_AVX512BW
12511 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12512 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12513 "TARGET_AVX512F"
12514 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12515 [(set_attr "type" "sseishft")
12516 (set (attr "length_immediate")
12517 (if_then_else (match_operand 2 "const_int_operand")
12518 (const_string "1")
12519 (const_string "0")))
12520 (set_attr "mode" "<sseinsnmode>")])
12521
12522
12523 (define_expand "vec_shl_<mode>"
12524 [(set (match_dup 3)
12525 (ashift:V1TI
12526 (match_operand:V_128 1 "register_operand")
12527 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12528 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12529 "TARGET_SSE2"
12530 {
12531 operands[1] = gen_lowpart (V1TImode, operands[1]);
12532 operands[3] = gen_reg_rtx (V1TImode);
12533 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12534 })
12535
12536 (define_expand "vec_shr_<mode>"
12537 [(set (match_dup 3)
12538 (lshiftrt:V1TI
12539 (match_operand:V_128 1 "register_operand")
12540 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12541 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12542 "TARGET_SSE2"
12543 {
12544 operands[1] = gen_lowpart (V1TImode, operands[1]);
12545 operands[3] = gen_reg_rtx (V1TImode);
12546 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12547 })
12548
12549 (define_insn "avx512bw_<insn><mode>3"
12550 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
12551 (any_lshift:VIMAX_AVX512VL
12552 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
12553 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
12554 "TARGET_AVX512BW"
12555 {
12556 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12557 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12558 }
12559 [(set_attr "type" "sseishft")
12560 (set_attr "length_immediate" "1")
12561 (set_attr "prefix" "maybe_evex")
12562 (set_attr "mode" "<sseinsnmode>")])
12563
12564 (define_insn "<sse2_avx2>_<insn><mode>3"
12565 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
12566 (any_lshift:VIMAX_AVX2
12567 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
12568 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
12569 "TARGET_SSE2"
12570 {
12571 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12572
12573 switch (which_alternative)
12574 {
12575 case 0:
12576 return "p<vshift>dq\t{%2, %0|%0, %2}";
12577 case 1:
12578 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12579 default:
12580 gcc_unreachable ();
12581 }
12582 }
12583 [(set_attr "isa" "noavx,avx")
12584 (set_attr "type" "sseishft")
12585 (set_attr "length_immediate" "1")
12586 (set_attr "atom_unit" "sishuf")
12587 (set_attr "prefix_data16" "1,*")
12588 (set_attr "prefix" "orig,vex")
12589 (set_attr "mode" "<sseinsnmode>")])
12590
12591 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
12592 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12593 (any_rotate:VI48_AVX512VL
12594 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
12595 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12596 "TARGET_AVX512F"
12597 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12598 [(set_attr "prefix" "evex")
12599 (set_attr "mode" "<sseinsnmode>")])
12600
12601 (define_insn "<avx512>_<rotate><mode><mask_name>"
12602 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12603 (any_rotate:VI48_AVX512VL
12604 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
12605 (match_operand:SI 2 "const_0_to_255_operand")))]
12606 "TARGET_AVX512F"
12607 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12608 [(set_attr "prefix" "evex")
12609 (set_attr "mode" "<sseinsnmode>")])
12610
12611 (define_expand "<code><mode>3"
12612 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
12613 (maxmin:VI124_256_AVX512F_AVX512BW
12614 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
12615 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
12616 "TARGET_AVX2"
12617 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12618
12619 (define_insn "*avx2_<code><mode>3"
12620 [(set (match_operand:VI124_256 0 "register_operand" "=v")
12621 (maxmin:VI124_256
12622 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
12623 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
12624 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12625 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12626 [(set_attr "type" "sseiadd")
12627 (set_attr "prefix_extra" "1")
12628 (set_attr "prefix" "vex")
12629 (set_attr "mode" "OI")])
12630
12631 (define_expand "<code><mode>3_mask"
12632 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12633 (vec_merge:VI48_AVX512VL
12634 (maxmin:VI48_AVX512VL
12635 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12636 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12637 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12638 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12639 "TARGET_AVX512F"
12640 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12641
12642 (define_insn "*avx512f_<code><mode>3<mask_name>"
12643 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12644 (maxmin:VI48_AVX512VL
12645 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
12646 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12647 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12648 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12649 [(set_attr "type" "sseiadd")
12650 (set_attr "prefix_extra" "1")
12651 (set_attr "prefix" "maybe_evex")
12652 (set_attr "mode" "<sseinsnmode>")])
12653
12654 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12655 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12656 (maxmin:VI12_AVX512VL
12657 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
12658 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
12659 "TARGET_AVX512BW"
12660 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12661 [(set_attr "type" "sseiadd")
12662 (set_attr "prefix" "evex")
12663 (set_attr "mode" "<sseinsnmode>")])
12664
12665 (define_expand "<code><mode>3"
12666 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12667 (maxmin:VI8_AVX2_AVX512F
12668 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12669 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12670 "TARGET_SSE4_2"
12671 {
12672 if (TARGET_AVX512F
12673 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
12674 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12675 else
12676 {
12677 enum rtx_code code;
12678 rtx xops[6];
12679 bool ok;
12680
12681
12682 xops[0] = operands[0];
12683
12684 if (<CODE> == SMAX || <CODE> == UMAX)
12685 {
12686 xops[1] = operands[1];
12687 xops[2] = operands[2];
12688 }
12689 else
12690 {
12691 xops[1] = operands[2];
12692 xops[2] = operands[1];
12693 }
12694
12695 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
12696
12697 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
12698 xops[4] = operands[1];
12699 xops[5] = operands[2];
12700
12701 ok = ix86_expand_int_vcond (xops);
12702 gcc_assert (ok);
12703 DONE;
12704 }
12705 })
12706
12707 (define_expand "<code><mode>3"
12708 [(set (match_operand:VI124_128 0 "register_operand")
12709 (smaxmin:VI124_128
12710 (match_operand:VI124_128 1 "vector_operand")
12711 (match_operand:VI124_128 2 "vector_operand")))]
12712 "TARGET_SSE2"
12713 {
12714 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
12715 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12716 else
12717 {
12718 rtx xops[6];
12719 bool ok;
12720
12721 xops[0] = operands[0];
12722 operands[1] = force_reg (<MODE>mode, operands[1]);
12723 operands[2] = force_reg (<MODE>mode, operands[2]);
12724
12725 if (<CODE> == SMAX)
12726 {
12727 xops[1] = operands[1];
12728 xops[2] = operands[2];
12729 }
12730 else
12731 {
12732 xops[1] = operands[2];
12733 xops[2] = operands[1];
12734 }
12735
12736 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
12737 xops[4] = operands[1];
12738 xops[5] = operands[2];
12739
12740 ok = ix86_expand_int_vcond (xops);
12741 gcc_assert (ok);
12742 DONE;
12743 }
12744 })
12745
12746 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12747 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
12748 (smaxmin:VI14_128
12749 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
12750 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12751 "TARGET_SSE4_1
12752 && <mask_mode512bit_condition>
12753 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12754 "@
12755 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12756 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12757 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12758 [(set_attr "isa" "noavx,noavx,avx")
12759 (set_attr "type" "sseiadd")
12760 (set_attr "prefix_extra" "1,1,*")
12761 (set_attr "prefix" "orig,orig,vex")
12762 (set_attr "mode" "TI")])
12763
12764 (define_insn "*<code>v8hi3"
12765 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
12766 (smaxmin:V8HI
12767 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12768 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
12769 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12770 "@
12771 p<maxmin_int>w\t{%2, %0|%0, %2}
12772 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
12773 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
12774 [(set_attr "isa" "noavx,avx,avx512bw")
12775 (set_attr "type" "sseiadd")
12776 (set_attr "prefix_data16" "1,*,*")
12777 (set_attr "prefix_extra" "*,1,1")
12778 (set_attr "prefix" "orig,vex,evex")
12779 (set_attr "mode" "TI")])
12780
12781 (define_expand "<code><mode>3"
12782 [(set (match_operand:VI124_128 0 "register_operand")
12783 (umaxmin:VI124_128
12784 (match_operand:VI124_128 1 "vector_operand")
12785 (match_operand:VI124_128 2 "vector_operand")))]
12786 "TARGET_SSE2"
12787 {
12788 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
12789 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12790 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
12791 {
12792 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
12793 operands[1] = force_reg (<MODE>mode, operands[1]);
12794 if (rtx_equal_p (op3, op2))
12795 op3 = gen_reg_rtx (V8HImode);
12796 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12797 emit_insn (gen_addv8hi3 (op0, op3, op2));
12798 DONE;
12799 }
12800 else
12801 {
12802 rtx xops[6];
12803 bool ok;
12804
12805 operands[1] = force_reg (<MODE>mode, operands[1]);
12806 operands[2] = force_reg (<MODE>mode, operands[2]);
12807
12808 xops[0] = operands[0];
12809
12810 if (<CODE> == UMAX)
12811 {
12812 xops[1] = operands[1];
12813 xops[2] = operands[2];
12814 }
12815 else
12816 {
12817 xops[1] = operands[2];
12818 xops[2] = operands[1];
12819 }
12820
12821 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12822 xops[4] = operands[1];
12823 xops[5] = operands[2];
12824
12825 ok = ix86_expand_int_vcond (xops);
12826 gcc_assert (ok);
12827 DONE;
12828 }
12829 })
12830
12831 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12832 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12833 (umaxmin:VI24_128
12834 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12835 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12836 "TARGET_SSE4_1
12837 && <mask_mode512bit_condition>
12838 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12839 "@
12840 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12841 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12842 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12843 [(set_attr "isa" "noavx,noavx,avx")
12844 (set_attr "type" "sseiadd")
12845 (set_attr "prefix_extra" "1,1,*")
12846 (set_attr "prefix" "orig,orig,vex")
12847 (set_attr "mode" "TI")])
12848
12849 (define_insn "*<code>v16qi3"
12850 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12851 (umaxmin:V16QI
12852 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12853 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12854 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12855 "@
12856 p<maxmin_int>b\t{%2, %0|%0, %2}
12857 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12858 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12859 [(set_attr "isa" "noavx,avx,avx512bw")
12860 (set_attr "type" "sseiadd")
12861 (set_attr "prefix_data16" "1,*,*")
12862 (set_attr "prefix_extra" "*,1,1")
12863 (set_attr "prefix" "orig,vex,evex")
12864 (set_attr "mode" "TI")])
12865
12866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12867 ;;
12868 ;; Parallel integral comparisons
12869 ;;
12870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12871
12872 (define_expand "avx2_eq<mode>3"
12873 [(set (match_operand:VI_256 0 "register_operand")
12874 (eq:VI_256
12875 (match_operand:VI_256 1 "nonimmediate_operand")
12876 (match_operand:VI_256 2 "nonimmediate_operand")))]
12877 "TARGET_AVX2"
12878 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12879
12880 (define_insn "*avx2_eq<mode>3"
12881 [(set (match_operand:VI_256 0 "register_operand" "=x")
12882 (eq:VI_256
12883 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12884 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12885 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12886 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12887 [(set_attr "type" "ssecmp")
12888 (set_attr "prefix_extra" "1")
12889 (set_attr "prefix" "vex")
12890 (set_attr "mode" "OI")])
12891
12892 (define_insn_and_split "*avx2_eq<mode>3"
12893 [(set (match_operand:VI_128_256 0 "register_operand")
12894 (vec_merge:VI_128_256
12895 (match_operand:VI_128_256 1 "vector_all_ones_operand")
12896 (match_operand:VI_128_256 2 "const0_operand")
12897 (unspec:<avx512fmaskmode>
12898 [(match_operand:VI_128_256 3 "nonimmediate_operand")
12899 (match_operand:VI_128_256 4 "nonimmediate_operand")]
12900 UNSPEC_MASKED_EQ)))]
12901 "TARGET_AVX512VL && ix86_pre_reload_split ()
12902 && !(MEM_P (operands[3]) && MEM_P (operands[4]))"
12903 "#"
12904 "&& 1"
12905 [(set (match_dup 0)
12906 (eq:VI_128_256
12907 (match_dup 3)
12908 (match_dup 4)))])
12909
12910 (define_insn_and_split "*avx2_pcmp<mode>3_1"
12911 [(set (match_operand:VI_128_256 0 "register_operand")
12912 (vec_merge:VI_128_256
12913 (match_operand:VI_128_256 1 "vector_all_ones_operand")
12914 (match_operand:VI_128_256 2 "const0_operand")
12915 (unspec:<avx512fmaskmode>
12916 [(match_operand:VI_128_256 3 "nonimmediate_operand")
12917 (match_operand:VI_128_256 4 "nonimmediate_operand")
12918 (match_operand:SI 5 "const_0_to_7_operand")]
12919 UNSPEC_PCMP)))]
12920 "TARGET_AVX512VL && ix86_pre_reload_split ()
12921 /* EQ is commutative. */
12922 && ((INTVAL (operands[5]) == 0
12923 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
12924 /* NLE aka GT, 3 must be register. */
12925 || (INTVAL (operands[5]) == 6
12926 && !MEM_P (operands[3]))
12927 /* LT, 4 must be register and we swap operands. */
12928 || (INTVAL (operands[5]) == 1
12929 && !MEM_P (operands[4])))"
12930 "#"
12931 "&& 1"
12932 [(const_int 0)]
12933 {
12934 if (INTVAL (operands[5]) == 1)
12935 std::swap (operands[3], operands[4]);
12936 enum rtx_code code = INTVAL (operands[5]) ? GT : EQ;
12937 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
12938 operands[3], operands[4]));
12939 DONE;
12940 })
12941
12942 (define_insn_and_split "*avx2_pcmp<mode>3_2"
12943 [(set (match_operand:VI_128_256 0 "register_operand")
12944 (vec_merge:VI_128_256
12945 (match_operand:VI_128_256 1 "vector_all_ones_operand")
12946 (match_operand:VI_128_256 2 "const0_operand")
12947 (not:<avx512fmaskmode>
12948 (unspec:<avx512fmaskmode>
12949 [(match_operand:VI_128_256 3 "nonimmediate_operand")
12950 (match_operand:VI_128_256 4 "nonimmediate_operand")
12951 (match_operand:SI 5 "const_0_to_7_operand")]
12952 UNSPEC_PCMP))))]
12953 "TARGET_AVX512VL && ix86_pre_reload_split ()
12954 /* NE is commutative. */
12955 && ((INTVAL (operands[5]) == 4
12956 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
12957 /* LE, 3 must be register. */
12958 || (INTVAL (operands[5]) == 2
12959 && !MEM_P (operands[3]))
12960 /* NLT aka GE, 4 must be register and we swap operands. */
12961 || (INTVAL (operands[5]) == 5
12962 && !MEM_P (operands[4])))"
12963 "#"
12964 "&& 1"
12965 [(const_int 0)]
12966 {
12967 if (INTVAL (operands[5]) == 5)
12968 std::swap (operands[3], operands[4]);
12969 enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
12970 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
12971 operands[3], operands[4]));
12972 DONE;
12973 })
12974
12975 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12976 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12977 (unspec:<avx512fmaskmode>
12978 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12979 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12980 UNSPEC_MASKED_EQ))]
12981 "TARGET_AVX512BW"
12982 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12983
12984 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12985 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12986 (unspec:<avx512fmaskmode>
12987 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12988 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12989 UNSPEC_MASKED_EQ))]
12990 "TARGET_AVX512F"
12991 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12992
12993 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12994 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12995 (unspec:<avx512fmaskmode>
12996 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12997 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12998 UNSPEC_MASKED_EQ))]
12999 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13000 "@
13001 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
13002 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
13003 [(set_attr "type" "ssecmp")
13004 (set_attr "prefix_extra" "1")
13005 (set_attr "prefix" "evex")
13006 (set_attr "mode" "<sseinsnmode>")])
13007
13008 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
13009 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
13010 (unspec:<avx512fmaskmode>
13011 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
13012 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
13013 UNSPEC_MASKED_EQ))]
13014 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13015 "@
13016 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
13017 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
13018 [(set_attr "type" "ssecmp")
13019 (set_attr "prefix_extra" "1")
13020 (set_attr "prefix" "evex")
13021 (set_attr "mode" "<sseinsnmode>")])
13022
13023 (define_insn "*sse4_1_eqv2di3"
13024 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
13025 (eq:V2DI
13026 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
13027 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
13028 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13029 "@
13030 pcmpeqq\t{%2, %0|%0, %2}
13031 pcmpeqq\t{%2, %0|%0, %2}
13032 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
13033 [(set_attr "isa" "noavx,noavx,avx")
13034 (set_attr "type" "ssecmp")
13035 (set_attr "prefix_extra" "1")
13036 (set_attr "prefix" "orig,orig,vex")
13037 (set_attr "mode" "TI")])
13038
13039 (define_insn "*sse2_eq<mode>3"
13040 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
13041 (eq:VI124_128
13042 (match_operand:VI124_128 1 "vector_operand" "%0,x")
13043 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
13044 "TARGET_SSE2 && !TARGET_XOP
13045 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13046 "@
13047 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
13048 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13049 [(set_attr "isa" "noavx,avx")
13050 (set_attr "type" "ssecmp")
13051 (set_attr "prefix_data16" "1,*")
13052 (set_attr "prefix" "orig,vex")
13053 (set_attr "mode" "TI")])
13054
13055 (define_expand "sse2_eq<mode>3"
13056 [(set (match_operand:VI124_128 0 "register_operand")
13057 (eq:VI124_128
13058 (match_operand:VI124_128 1 "vector_operand")
13059 (match_operand:VI124_128 2 "vector_operand")))]
13060 "TARGET_SSE2 && !TARGET_XOP "
13061 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
13062
13063 (define_expand "sse4_1_eqv2di3"
13064 [(set (match_operand:V2DI 0 "register_operand")
13065 (eq:V2DI
13066 (match_operand:V2DI 1 "vector_operand")
13067 (match_operand:V2DI 2 "vector_operand")))]
13068 "TARGET_SSE4_1"
13069 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
13070
13071 (define_insn "sse4_2_gtv2di3"
13072 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
13073 (gt:V2DI
13074 (match_operand:V2DI 1 "register_operand" "0,0,x")
13075 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
13076 "TARGET_SSE4_2"
13077 "@
13078 pcmpgtq\t{%2, %0|%0, %2}
13079 pcmpgtq\t{%2, %0|%0, %2}
13080 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
13081 [(set_attr "isa" "noavx,noavx,avx")
13082 (set_attr "type" "ssecmp")
13083 (set_attr "prefix_extra" "1")
13084 (set_attr "prefix" "orig,orig,vex")
13085 (set_attr "mode" "TI")])
13086
13087 (define_insn "avx2_gt<mode>3"
13088 [(set (match_operand:VI_256 0 "register_operand" "=x")
13089 (gt:VI_256
13090 (match_operand:VI_256 1 "register_operand" "x")
13091 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
13092 "TARGET_AVX2"
13093 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13094 [(set_attr "type" "ssecmp")
13095 (set_attr "prefix_extra" "1")
13096 (set_attr "prefix" "vex")
13097 (set_attr "mode" "OI")])
13098
13099 (define_insn_and_split "*avx2_gt<mode>3"
13100 [(set (match_operand:VI_128_256 0 "register_operand")
13101 (vec_merge:VI_128_256
13102 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13103 (match_operand:VI_128_256 2 "const0_operand")
13104 (unspec:<avx512fmaskmode>
13105 [(match_operand:VI_128_256 3 "register_operand")
13106 (match_operand:VI_128_256 4 "nonimmediate_operand")]
13107 UNSPEC_MASKED_GT)))]
13108 "TARGET_AVX512VL
13109 && ix86_pre_reload_split ()"
13110 "#"
13111 "&& 1"
13112 [(set (match_dup 0)
13113 (gt:VI_128_256
13114 (match_dup 3)
13115 (match_dup 4)))])
13116
13117 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
13118 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13119 (unspec:<avx512fmaskmode>
13120 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
13121 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
13122 "TARGET_AVX512F"
13123 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13124 [(set_attr "type" "ssecmp")
13125 (set_attr "prefix_extra" "1")
13126 (set_attr "prefix" "evex")
13127 (set_attr "mode" "<sseinsnmode>")])
13128
13129 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
13130 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13131 (unspec:<avx512fmaskmode>
13132 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
13133 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
13134 "TARGET_AVX512BW"
13135 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13136 [(set_attr "type" "ssecmp")
13137 (set_attr "prefix_extra" "1")
13138 (set_attr "prefix" "evex")
13139 (set_attr "mode" "<sseinsnmode>")])
13140
13141 (define_insn "sse2_gt<mode>3"
13142 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
13143 (gt:VI124_128
13144 (match_operand:VI124_128 1 "register_operand" "0,x")
13145 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
13146 "TARGET_SSE2 && !TARGET_XOP"
13147 "@
13148 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
13149 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13150 [(set_attr "isa" "noavx,avx")
13151 (set_attr "type" "ssecmp")
13152 (set_attr "prefix_data16" "1,*")
13153 (set_attr "prefix" "orig,vex")
13154 (set_attr "mode" "TI")])
13155
13156 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
13157 [(set (match_operand:V_512 0 "register_operand")
13158 (if_then_else:V_512
13159 (match_operator 3 ""
13160 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
13161 (match_operand:VI_AVX512BW 5 "general_operand")])
13162 (match_operand:V_512 1)
13163 (match_operand:V_512 2)))]
13164 "TARGET_AVX512F
13165 && (GET_MODE_NUNITS (<V_512:MODE>mode)
13166 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
13167 {
13168 bool ok = ix86_expand_int_vcond (operands);
13169 gcc_assert (ok);
13170 DONE;
13171 })
13172
13173 (define_expand "vcond<V_256:mode><VI_256:mode>"
13174 [(set (match_operand:V_256 0 "register_operand")
13175 (if_then_else:V_256
13176 (match_operator 3 ""
13177 [(match_operand:VI_256 4 "nonimmediate_operand")
13178 (match_operand:VI_256 5 "general_operand")])
13179 (match_operand:V_256 1)
13180 (match_operand:V_256 2)))]
13181 "TARGET_AVX2
13182 && (GET_MODE_NUNITS (<V_256:MODE>mode)
13183 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
13184 {
13185 bool ok = ix86_expand_int_vcond (operands);
13186 gcc_assert (ok);
13187 DONE;
13188 })
13189
13190 (define_expand "vcond<V_128:mode><VI124_128:mode>"
13191 [(set (match_operand:V_128 0 "register_operand")
13192 (if_then_else:V_128
13193 (match_operator 3 ""
13194 [(match_operand:VI124_128 4 "vector_operand")
13195 (match_operand:VI124_128 5 "general_operand")])
13196 (match_operand:V_128 1)
13197 (match_operand:V_128 2)))]
13198 "TARGET_SSE2
13199 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13200 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13201 {
13202 bool ok = ix86_expand_int_vcond (operands);
13203 gcc_assert (ok);
13204 DONE;
13205 })
13206
13207 (define_expand "vcond<VI8F_128:mode>v2di"
13208 [(set (match_operand:VI8F_128 0 "register_operand")
13209 (if_then_else:VI8F_128
13210 (match_operator 3 ""
13211 [(match_operand:V2DI 4 "vector_operand")
13212 (match_operand:V2DI 5 "general_operand")])
13213 (match_operand:VI8F_128 1)
13214 (match_operand:VI8F_128 2)))]
13215 "TARGET_SSE4_2"
13216 {
13217 bool ok = ix86_expand_int_vcond (operands);
13218 gcc_assert (ok);
13219 DONE;
13220 })
13221
13222 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
13223 [(set (match_operand:V_512 0 "register_operand")
13224 (if_then_else:V_512
13225 (match_operator 3 ""
13226 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
13227 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
13228 (match_operand:V_512 1 "general_operand")
13229 (match_operand:V_512 2 "general_operand")))]
13230 "TARGET_AVX512F
13231 && (GET_MODE_NUNITS (<V_512:MODE>mode)
13232 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
13233 {
13234 bool ok = ix86_expand_int_vcond (operands);
13235 gcc_assert (ok);
13236 DONE;
13237 })
13238
13239 (define_expand "vcondu<V_256:mode><VI_256:mode>"
13240 [(set (match_operand:V_256 0 "register_operand")
13241 (if_then_else:V_256
13242 (match_operator 3 ""
13243 [(match_operand:VI_256 4 "nonimmediate_operand")
13244 (match_operand:VI_256 5 "nonimmediate_operand")])
13245 (match_operand:V_256 1 "general_operand")
13246 (match_operand:V_256 2 "general_operand")))]
13247 "TARGET_AVX2
13248 && (GET_MODE_NUNITS (<V_256:MODE>mode)
13249 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
13250 {
13251 bool ok = ix86_expand_int_vcond (operands);
13252 gcc_assert (ok);
13253 DONE;
13254 })
13255
13256 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
13257 [(set (match_operand:V_128 0 "register_operand")
13258 (if_then_else:V_128
13259 (match_operator 3 ""
13260 [(match_operand:VI124_128 4 "vector_operand")
13261 (match_operand:VI124_128 5 "vector_operand")])
13262 (match_operand:V_128 1 "general_operand")
13263 (match_operand:V_128 2 "general_operand")))]
13264 "TARGET_SSE2
13265 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13266 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13267 {
13268 bool ok = ix86_expand_int_vcond (operands);
13269 gcc_assert (ok);
13270 DONE;
13271 })
13272
13273 (define_expand "vcondu<VI8F_128:mode>v2di"
13274 [(set (match_operand:VI8F_128 0 "register_operand")
13275 (if_then_else:VI8F_128
13276 (match_operator 3 ""
13277 [(match_operand:V2DI 4 "vector_operand")
13278 (match_operand:V2DI 5 "vector_operand")])
13279 (match_operand:VI8F_128 1 "general_operand")
13280 (match_operand:VI8F_128 2 "general_operand")))]
13281 "TARGET_SSE4_2"
13282 {
13283 bool ok = ix86_expand_int_vcond (operands);
13284 gcc_assert (ok);
13285 DONE;
13286 })
13287
13288 (define_expand "vcondeq<VI8F_128:mode>v2di"
13289 [(set (match_operand:VI8F_128 0 "register_operand")
13290 (if_then_else:VI8F_128
13291 (match_operator 3 ""
13292 [(match_operand:V2DI 4 "vector_operand")
13293 (match_operand:V2DI 5 "general_operand")])
13294 (match_operand:VI8F_128 1)
13295 (match_operand:VI8F_128 2)))]
13296 "TARGET_SSE4_1"
13297 {
13298 bool ok = ix86_expand_int_vcond (operands);
13299 gcc_assert (ok);
13300 DONE;
13301 })
13302
13303 (define_mode_iterator VEC_PERM_AVX2
13304 [V16QI V8HI V4SI V2DI V4SF V2DF
13305 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
13306 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
13307 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
13308 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
13309 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
13310 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
13311
13312 (define_expand "vec_perm<mode>"
13313 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
13314 (match_operand:VEC_PERM_AVX2 1 "register_operand")
13315 (match_operand:VEC_PERM_AVX2 2 "register_operand")
13316 (match_operand:<sseintvecmode> 3 "register_operand")]
13317 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
13318 {
13319 ix86_expand_vec_perm (operands);
13320 DONE;
13321 })
13322
13323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13324 ;;
13325 ;; Parallel bitwise logical operations
13326 ;;
13327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13328
13329 (define_expand "one_cmpl<mode>2"
13330 [(set (match_operand:VI 0 "register_operand")
13331 (xor:VI (match_operand:VI 1 "vector_operand")
13332 (match_dup 2)))]
13333 "TARGET_SSE"
13334 {
13335 if (!TARGET_AVX512F)
13336 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
13337 else
13338 operands[2] = CONSTM1_RTX (<MODE>mode);
13339 })
13340
13341 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
13342 [(set (match_operand:VI 0 "register_operand" "=v,v")
13343 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
13344 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
13345 "TARGET_AVX512F
13346 && (!<mask_applied>
13347 || <ssescalarmode>mode == SImode
13348 || <ssescalarmode>mode == DImode)"
13349 {
13350 if (TARGET_AVX512VL)
13351 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
13352 else
13353 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
13354 }
13355 [(set_attr "type" "sselog")
13356 (set_attr "prefix" "evex")
13357 (set (attr "mode")
13358 (if_then_else (match_test "TARGET_AVX512VL")
13359 (const_string "<sseinsnmode>")
13360 (const_string "XI")))
13361 (set (attr "enabled")
13362 (if_then_else (eq_attr "alternative" "1")
13363 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
13364 (const_int 1)))])
13365
13366 (define_expand "<sse2_avx2>_andnot<mode>3"
13367 [(set (match_operand:VI_AVX2 0 "register_operand")
13368 (and:VI_AVX2
13369 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
13370 (match_operand:VI_AVX2 2 "vector_operand")))]
13371 "TARGET_SSE2")
13372
13373 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13374 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13375 (vec_merge:VI48_AVX512VL
13376 (and:VI48_AVX512VL
13377 (not:VI48_AVX512VL
13378 (match_operand:VI48_AVX512VL 1 "register_operand"))
13379 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13380 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13381 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13382 "TARGET_AVX512F")
13383
13384 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13385 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
13386 (vec_merge:VI12_AVX512VL
13387 (and:VI12_AVX512VL
13388 (not:VI12_AVX512VL
13389 (match_operand:VI12_AVX512VL 1 "register_operand"))
13390 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
13391 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
13392 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13393 "TARGET_AVX512BW")
13394
13395 (define_insn "*andnot<mode>3"
13396 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
13397 (and:VI
13398 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
13399 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13400 "TARGET_SSE"
13401 {
13402 char buf[64];
13403 const char *ops;
13404 const char *tmp;
13405 const char *ssesuffix;
13406
13407 switch (get_attr_mode (insn))
13408 {
13409 case MODE_XI:
13410 gcc_assert (TARGET_AVX512F);
13411 /* FALLTHRU */
13412 case MODE_OI:
13413 gcc_assert (TARGET_AVX2);
13414 /* FALLTHRU */
13415 case MODE_TI:
13416 gcc_assert (TARGET_SSE2);
13417 tmp = "pandn";
13418 switch (<MODE>mode)
13419 {
13420 case E_V64QImode:
13421 case E_V32HImode:
13422 /* There is no vpandnb or vpandnw instruction, nor vpandn for
13423 512-bit vectors. Use vpandnq instead. */
13424 ssesuffix = "q";
13425 break;
13426 case E_V16SImode:
13427 case E_V8DImode:
13428 ssesuffix = "<ssemodesuffix>";
13429 break;
13430 case E_V8SImode:
13431 case E_V4DImode:
13432 case E_V4SImode:
13433 case E_V2DImode:
13434 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
13435 ? "<ssemodesuffix>" : "");
13436 break;
13437 default:
13438 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13439 }
13440 break;
13441
13442 case MODE_V16SF:
13443 gcc_assert (TARGET_AVX512F);
13444 /* FALLTHRU */
13445 case MODE_V8SF:
13446 gcc_assert (TARGET_AVX);
13447 /* FALLTHRU */
13448 case MODE_V4SF:
13449 gcc_assert (TARGET_SSE);
13450 tmp = "andn";
13451 ssesuffix = "ps";
13452 break;
13453
13454 default:
13455 gcc_unreachable ();
13456 }
13457
13458 switch (which_alternative)
13459 {
13460 case 0:
13461 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13462 break;
13463 case 1:
13464 case 2:
13465 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13466 break;
13467 default:
13468 gcc_unreachable ();
13469 }
13470
13471 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13472 output_asm_insn (buf, operands);
13473 return "";
13474 }
13475 [(set_attr "isa" "noavx,avx,avx")
13476 (set_attr "type" "sselog")
13477 (set (attr "prefix_data16")
13478 (if_then_else
13479 (and (eq_attr "alternative" "0")
13480 (eq_attr "mode" "TI"))
13481 (const_string "1")
13482 (const_string "*")))
13483 (set_attr "prefix" "orig,vex,evex")
13484 (set (attr "mode")
13485 (cond [(match_test "TARGET_AVX2")
13486 (const_string "<sseinsnmode>")
13487 (match_test "TARGET_AVX")
13488 (if_then_else
13489 (match_test "<MODE_SIZE> > 16")
13490 (const_string "V8SF")
13491 (const_string "<sseinsnmode>"))
13492 (ior (not (match_test "TARGET_SSE2"))
13493 (match_test "optimize_function_for_size_p (cfun)"))
13494 (const_string "V4SF")
13495 ]
13496 (const_string "<sseinsnmode>")))])
13497
13498 (define_insn "*andnot<mode>3_mask"
13499 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13500 (vec_merge:VI48_AVX512VL
13501 (and:VI48_AVX512VL
13502 (not:VI48_AVX512VL
13503 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
13504 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
13505 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
13506 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
13507 "TARGET_AVX512F"
13508 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
13509 [(set_attr "type" "sselog")
13510 (set_attr "prefix" "evex")
13511 (set_attr "mode" "<sseinsnmode>")])
13512
13513 (define_expand "<code><mode>3"
13514 [(set (match_operand:VI 0 "register_operand")
13515 (any_logic:VI
13516 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
13517 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
13518 "TARGET_SSE"
13519 {
13520 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
13521 DONE;
13522 })
13523
13524 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13525 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
13526 (any_logic:VI48_AVX_AVX512F
13527 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
13528 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13529 "TARGET_SSE && <mask_mode512bit_condition>
13530 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
13531 {
13532 char buf[64];
13533 const char *ops;
13534 const char *tmp;
13535 const char *ssesuffix;
13536
13537 switch (get_attr_mode (insn))
13538 {
13539 case MODE_XI:
13540 gcc_assert (TARGET_AVX512F);
13541 /* FALLTHRU */
13542 case MODE_OI:
13543 gcc_assert (TARGET_AVX2);
13544 /* FALLTHRU */
13545 case MODE_TI:
13546 gcc_assert (TARGET_SSE2);
13547 tmp = "p<logic>";
13548 switch (<MODE>mode)
13549 {
13550 case E_V16SImode:
13551 case E_V8DImode:
13552 ssesuffix = "<ssemodesuffix>";
13553 break;
13554 case E_V8SImode:
13555 case E_V4DImode:
13556 case E_V4SImode:
13557 case E_V2DImode:
13558 ssesuffix = (TARGET_AVX512VL
13559 && (<mask_applied> || which_alternative == 2)
13560 ? "<ssemodesuffix>" : "");
13561 break;
13562 default:
13563 gcc_unreachable ();
13564 }
13565 break;
13566
13567 case MODE_V8SF:
13568 gcc_assert (TARGET_AVX);
13569 /* FALLTHRU */
13570 case MODE_V4SF:
13571 gcc_assert (TARGET_SSE);
13572 tmp = "<logic>";
13573 ssesuffix = "ps";
13574 break;
13575
13576 default:
13577 gcc_unreachable ();
13578 }
13579
13580 switch (which_alternative)
13581 {
13582 case 0:
13583 if (<mask_applied>)
13584 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
13585 else
13586 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13587 break;
13588 case 1:
13589 case 2:
13590 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
13591 break;
13592 default:
13593 gcc_unreachable ();
13594 }
13595
13596 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13597 output_asm_insn (buf, operands);
13598 return "";
13599 }
13600 [(set_attr "isa" "noavx,avx,avx")
13601 (set_attr "type" "sselog")
13602 (set (attr "prefix_data16")
13603 (if_then_else
13604 (and (eq_attr "alternative" "0")
13605 (eq_attr "mode" "TI"))
13606 (const_string "1")
13607 (const_string "*")))
13608 (set_attr "prefix" "<mask_prefix3>,evex")
13609 (set (attr "mode")
13610 (cond [(match_test "TARGET_AVX2")
13611 (const_string "<sseinsnmode>")
13612 (match_test "TARGET_AVX")
13613 (if_then_else
13614 (match_test "<MODE_SIZE> > 16")
13615 (const_string "V8SF")
13616 (const_string "<sseinsnmode>"))
13617 (ior (not (match_test "TARGET_SSE2"))
13618 (match_test "optimize_function_for_size_p (cfun)"))
13619 (const_string "V4SF")
13620 ]
13621 (const_string "<sseinsnmode>")))])
13622
13623 (define_insn "*<code><mode>3"
13624 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
13625 (any_logic:VI12_AVX_AVX512F
13626 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13627 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13628 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13629 {
13630 char buf[64];
13631 const char *ops;
13632 const char *tmp;
13633 const char *ssesuffix;
13634
13635 switch (get_attr_mode (insn))
13636 {
13637 case MODE_XI:
13638 gcc_assert (TARGET_AVX512F);
13639 /* FALLTHRU */
13640 case MODE_OI:
13641 gcc_assert (TARGET_AVX2);
13642 /* FALLTHRU */
13643 case MODE_TI:
13644 gcc_assert (TARGET_SSE2);
13645 tmp = "p<logic>";
13646 switch (<MODE>mode)
13647 {
13648 case E_V64QImode:
13649 case E_V32HImode:
13650 ssesuffix = "q";
13651 break;
13652 case E_V32QImode:
13653 case E_V16HImode:
13654 case E_V16QImode:
13655 case E_V8HImode:
13656 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13657 break;
13658 default:
13659 gcc_unreachable ();
13660 }
13661 break;
13662
13663 case MODE_V8SF:
13664 gcc_assert (TARGET_AVX);
13665 /* FALLTHRU */
13666 case MODE_V4SF:
13667 gcc_assert (TARGET_SSE);
13668 tmp = "<logic>";
13669 ssesuffix = "ps";
13670 break;
13671
13672 default:
13673 gcc_unreachable ();
13674 }
13675
13676 switch (which_alternative)
13677 {
13678 case 0:
13679 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13680 break;
13681 case 1:
13682 case 2:
13683 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13684 break;
13685 default:
13686 gcc_unreachable ();
13687 }
13688
13689 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13690 output_asm_insn (buf, operands);
13691 return "";
13692 }
13693 [(set_attr "isa" "noavx,avx,avx")
13694 (set_attr "type" "sselog")
13695 (set (attr "prefix_data16")
13696 (if_then_else
13697 (and (eq_attr "alternative" "0")
13698 (eq_attr "mode" "TI"))
13699 (const_string "1")
13700 (const_string "*")))
13701 (set_attr "prefix" "orig,vex,evex")
13702 (set (attr "mode")
13703 (cond [(match_test "TARGET_AVX2")
13704 (const_string "<sseinsnmode>")
13705 (match_test "TARGET_AVX")
13706 (if_then_else
13707 (match_test "<MODE_SIZE> > 16")
13708 (const_string "V8SF")
13709 (const_string "<sseinsnmode>"))
13710 (ior (not (match_test "TARGET_SSE2"))
13711 (match_test "optimize_function_for_size_p (cfun)"))
13712 (const_string "V4SF")
13713 ]
13714 (const_string "<sseinsnmode>")))])
13715
13716 (define_mode_iterator VI1248_AVX512VLBW
13717 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
13718 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
13719 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
13720 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
13721 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
13722 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13723
13724 (define_mode_iterator AVX512ZEXTMASK
13725 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
13726
13727 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
13728 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13729 (unspec:<avx512fmaskmode>
13730 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13731 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13732 UNSPEC_TESTM))]
13733 "TARGET_AVX512F"
13734 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13735 [(set_attr "prefix" "evex")
13736 (set_attr "mode" "<sseinsnmode>")])
13737
13738 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
13739 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13740 (unspec:<avx512fmaskmode>
13741 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13742 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13743 UNSPEC_TESTNM))]
13744 "TARGET_AVX512F"
13745 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13746 [(set_attr "prefix" "evex")
13747 (set_attr "mode" "<sseinsnmode>")])
13748
13749 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
13750 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13751 (zero_extend:AVX512ZEXTMASK
13752 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13753 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13754 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13755 UNSPEC_TESTM)))]
13756 "TARGET_AVX512BW
13757 && (<AVX512ZEXTMASK:MODE_SIZE>
13758 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13759 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13760 [(set_attr "prefix" "evex")
13761 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13762
13763 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
13764 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13765 (zero_extend:AVX512ZEXTMASK
13766 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13767 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13768 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13769 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13770 UNSPEC_TESTM)
13771 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13772 "TARGET_AVX512BW
13773 && (<AVX512ZEXTMASK:MODE_SIZE>
13774 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13775 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13776 [(set_attr "prefix" "evex")
13777 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13778
13779 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
13780 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13781 (zero_extend:AVX512ZEXTMASK
13782 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13783 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13784 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13785 UNSPEC_TESTNM)))]
13786 "TARGET_AVX512BW
13787 && (<AVX512ZEXTMASK:MODE_SIZE>
13788 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13789 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13790 [(set_attr "prefix" "evex")
13791 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13792
13793 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
13794 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13795 (zero_extend:AVX512ZEXTMASK
13796 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13797 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13798 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13799 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13800 UNSPEC_TESTNM)
13801 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13802 "TARGET_AVX512BW
13803 && (<AVX512ZEXTMASK:MODE_SIZE>
13804 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13805 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13806 [(set_attr "prefix" "evex")
13807 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13808
13809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13810 ;;
13811 ;; Parallel integral element swizzling
13812 ;;
13813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13814
13815 (define_expand "vec_pack_trunc_<mode>"
13816 [(match_operand:<ssepackmode> 0 "register_operand")
13817 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
13818 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
13819 "TARGET_SSE2"
13820 {
13821 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
13822 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
13823 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
13824 DONE;
13825 })
13826
13827 (define_expand "vec_pack_trunc_qi"
13828 [(set (match_operand:HI 0 "register_operand")
13829 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
13830 (const_int 8))
13831 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
13832 "TARGET_AVX512F")
13833
13834 (define_expand "vec_pack_trunc_<mode>"
13835 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
13836 (ior:<DOUBLEMASKMODE>
13837 (ashift:<DOUBLEMASKMODE>
13838 (zero_extend:<DOUBLEMASKMODE>
13839 (match_operand:SWI24 2 "register_operand"))
13840 (match_dup 3))
13841 (zero_extend:<DOUBLEMASKMODE>
13842 (match_operand:SWI24 1 "register_operand"))))]
13843 "TARGET_AVX512BW"
13844 {
13845 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
13846 })
13847
13848 (define_expand "vec_pack_sbool_trunc_qi"
13849 [(match_operand:QI 0 "register_operand")
13850 (match_operand:QI 1 "register_operand")
13851 (match_operand:QI 2 "register_operand")
13852 (match_operand:QI 3 "const_int_operand")]
13853 "TARGET_AVX512F"
13854 {
13855 HOST_WIDE_INT nunits = INTVAL (operands[3]);
13856 rtx mask, tem1, tem2;
13857 if (nunits != 8 && nunits != 4)
13858 FAIL;
13859 mask = gen_reg_rtx (QImode);
13860 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
13861 tem1 = gen_reg_rtx (QImode);
13862 emit_insn (gen_kandqi (tem1, operands[1], mask));
13863 if (TARGET_AVX512DQ)
13864 {
13865 tem2 = gen_reg_rtx (QImode);
13866 emit_insn (gen_kashiftqi (tem2, operands[2],
13867 GEN_INT (nunits / 2)));
13868 }
13869 else
13870 {
13871 tem2 = gen_reg_rtx (HImode);
13872 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
13873 QImode),
13874 GEN_INT (nunits / 2)));
13875 tem2 = lowpart_subreg (QImode, tem2, HImode);
13876 }
13877 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
13878 DONE;
13879 })
13880
13881 (define_insn "<sse2_avx2>_packsswb<mask_name>"
13882 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13883 (vec_concat:VI1_AVX512
13884 (ss_truncate:<ssehalfvecmode>
13885 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13886 (ss_truncate:<ssehalfvecmode>
13887 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13888 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13889 "@
13890 packsswb\t{%2, %0|%0, %2}
13891 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13892 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13893 [(set_attr "isa" "noavx,avx,avx512bw")
13894 (set_attr "type" "sselog")
13895 (set_attr "prefix_data16" "1,*,*")
13896 (set_attr "prefix" "orig,<mask_prefix>,evex")
13897 (set_attr "mode" "<sseinsnmode>")])
13898
13899 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13900 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13901 (vec_concat:VI2_AVX2
13902 (ss_truncate:<ssehalfvecmode>
13903 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13904 (ss_truncate:<ssehalfvecmode>
13905 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13906 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13907 "@
13908 packssdw\t{%2, %0|%0, %2}
13909 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13910 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13911 [(set_attr "isa" "noavx,avx,avx512bw")
13912 (set_attr "type" "sselog")
13913 (set_attr "prefix_data16" "1,*,*")
13914 (set_attr "prefix" "orig,<mask_prefix>,evex")
13915 (set_attr "mode" "<sseinsnmode>")])
13916
13917 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13918 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13919 (vec_concat:VI1_AVX512
13920 (us_truncate:<ssehalfvecmode>
13921 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13922 (us_truncate:<ssehalfvecmode>
13923 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13924 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13925 "@
13926 packuswb\t{%2, %0|%0, %2}
13927 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13928 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13929 [(set_attr "isa" "noavx,avx,avx512bw")
13930 (set_attr "type" "sselog")
13931 (set_attr "prefix_data16" "1,*,*")
13932 (set_attr "prefix" "orig,<mask_prefix>,evex")
13933 (set_attr "mode" "<sseinsnmode>")])
13934
13935 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13936 [(set (match_operand:V64QI 0 "register_operand" "=v")
13937 (vec_select:V64QI
13938 (vec_concat:V128QI
13939 (match_operand:V64QI 1 "register_operand" "v")
13940 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13941 (parallel [(const_int 8) (const_int 72)
13942 (const_int 9) (const_int 73)
13943 (const_int 10) (const_int 74)
13944 (const_int 11) (const_int 75)
13945 (const_int 12) (const_int 76)
13946 (const_int 13) (const_int 77)
13947 (const_int 14) (const_int 78)
13948 (const_int 15) (const_int 79)
13949 (const_int 24) (const_int 88)
13950 (const_int 25) (const_int 89)
13951 (const_int 26) (const_int 90)
13952 (const_int 27) (const_int 91)
13953 (const_int 28) (const_int 92)
13954 (const_int 29) (const_int 93)
13955 (const_int 30) (const_int 94)
13956 (const_int 31) (const_int 95)
13957 (const_int 40) (const_int 104)
13958 (const_int 41) (const_int 105)
13959 (const_int 42) (const_int 106)
13960 (const_int 43) (const_int 107)
13961 (const_int 44) (const_int 108)
13962 (const_int 45) (const_int 109)
13963 (const_int 46) (const_int 110)
13964 (const_int 47) (const_int 111)
13965 (const_int 56) (const_int 120)
13966 (const_int 57) (const_int 121)
13967 (const_int 58) (const_int 122)
13968 (const_int 59) (const_int 123)
13969 (const_int 60) (const_int 124)
13970 (const_int 61) (const_int 125)
13971 (const_int 62) (const_int 126)
13972 (const_int 63) (const_int 127)])))]
13973 "TARGET_AVX512BW"
13974 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13975 [(set_attr "type" "sselog")
13976 (set_attr "prefix" "evex")
13977 (set_attr "mode" "XI")])
13978
13979 (define_insn "avx2_interleave_highv32qi<mask_name>"
13980 [(set (match_operand:V32QI 0 "register_operand" "=v")
13981 (vec_select:V32QI
13982 (vec_concat:V64QI
13983 (match_operand:V32QI 1 "register_operand" "v")
13984 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13985 (parallel [(const_int 8) (const_int 40)
13986 (const_int 9) (const_int 41)
13987 (const_int 10) (const_int 42)
13988 (const_int 11) (const_int 43)
13989 (const_int 12) (const_int 44)
13990 (const_int 13) (const_int 45)
13991 (const_int 14) (const_int 46)
13992 (const_int 15) (const_int 47)
13993 (const_int 24) (const_int 56)
13994 (const_int 25) (const_int 57)
13995 (const_int 26) (const_int 58)
13996 (const_int 27) (const_int 59)
13997 (const_int 28) (const_int 60)
13998 (const_int 29) (const_int 61)
13999 (const_int 30) (const_int 62)
14000 (const_int 31) (const_int 63)])))]
14001 "TARGET_AVX2 && <mask_avx512vl_condition>"
14002 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14003 [(set_attr "type" "sselog")
14004 (set_attr "prefix" "<mask_prefix>")
14005 (set_attr "mode" "OI")])
14006
14007 (define_insn "vec_interleave_highv16qi<mask_name>"
14008 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
14009 (vec_select:V16QI
14010 (vec_concat:V32QI
14011 (match_operand:V16QI 1 "register_operand" "0,v")
14012 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
14013 (parallel [(const_int 8) (const_int 24)
14014 (const_int 9) (const_int 25)
14015 (const_int 10) (const_int 26)
14016 (const_int 11) (const_int 27)
14017 (const_int 12) (const_int 28)
14018 (const_int 13) (const_int 29)
14019 (const_int 14) (const_int 30)
14020 (const_int 15) (const_int 31)])))]
14021 "TARGET_SSE2 && <mask_avx512vl_condition>"
14022 "@
14023 punpckhbw\t{%2, %0|%0, %2}
14024 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14025 [(set_attr "isa" "noavx,avx")
14026 (set_attr "type" "sselog")
14027 (set_attr "prefix_data16" "1,*")
14028 (set_attr "prefix" "orig,<mask_prefix>")
14029 (set_attr "mode" "TI")])
14030
14031 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
14032 [(set (match_operand:V64QI 0 "register_operand" "=v")
14033 (vec_select:V64QI
14034 (vec_concat:V128QI
14035 (match_operand:V64QI 1 "register_operand" "v")
14036 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
14037 (parallel [(const_int 0) (const_int 64)
14038 (const_int 1) (const_int 65)
14039 (const_int 2) (const_int 66)
14040 (const_int 3) (const_int 67)
14041 (const_int 4) (const_int 68)
14042 (const_int 5) (const_int 69)
14043 (const_int 6) (const_int 70)
14044 (const_int 7) (const_int 71)
14045 (const_int 16) (const_int 80)
14046 (const_int 17) (const_int 81)
14047 (const_int 18) (const_int 82)
14048 (const_int 19) (const_int 83)
14049 (const_int 20) (const_int 84)
14050 (const_int 21) (const_int 85)
14051 (const_int 22) (const_int 86)
14052 (const_int 23) (const_int 87)
14053 (const_int 32) (const_int 96)
14054 (const_int 33) (const_int 97)
14055 (const_int 34) (const_int 98)
14056 (const_int 35) (const_int 99)
14057 (const_int 36) (const_int 100)
14058 (const_int 37) (const_int 101)
14059 (const_int 38) (const_int 102)
14060 (const_int 39) (const_int 103)
14061 (const_int 48) (const_int 112)
14062 (const_int 49) (const_int 113)
14063 (const_int 50) (const_int 114)
14064 (const_int 51) (const_int 115)
14065 (const_int 52) (const_int 116)
14066 (const_int 53) (const_int 117)
14067 (const_int 54) (const_int 118)
14068 (const_int 55) (const_int 119)])))]
14069 "TARGET_AVX512BW"
14070 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14071 [(set_attr "type" "sselog")
14072 (set_attr "prefix" "evex")
14073 (set_attr "mode" "XI")])
14074
14075 (define_insn "avx2_interleave_lowv32qi<mask_name>"
14076 [(set (match_operand:V32QI 0 "register_operand" "=v")
14077 (vec_select:V32QI
14078 (vec_concat:V64QI
14079 (match_operand:V32QI 1 "register_operand" "v")
14080 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
14081 (parallel [(const_int 0) (const_int 32)
14082 (const_int 1) (const_int 33)
14083 (const_int 2) (const_int 34)
14084 (const_int 3) (const_int 35)
14085 (const_int 4) (const_int 36)
14086 (const_int 5) (const_int 37)
14087 (const_int 6) (const_int 38)
14088 (const_int 7) (const_int 39)
14089 (const_int 16) (const_int 48)
14090 (const_int 17) (const_int 49)
14091 (const_int 18) (const_int 50)
14092 (const_int 19) (const_int 51)
14093 (const_int 20) (const_int 52)
14094 (const_int 21) (const_int 53)
14095 (const_int 22) (const_int 54)
14096 (const_int 23) (const_int 55)])))]
14097 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14098 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14099 [(set_attr "type" "sselog")
14100 (set_attr "prefix" "maybe_vex")
14101 (set_attr "mode" "OI")])
14102
14103 (define_insn "vec_interleave_lowv16qi<mask_name>"
14104 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
14105 (vec_select:V16QI
14106 (vec_concat:V32QI
14107 (match_operand:V16QI 1 "register_operand" "0,v")
14108 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
14109 (parallel [(const_int 0) (const_int 16)
14110 (const_int 1) (const_int 17)
14111 (const_int 2) (const_int 18)
14112 (const_int 3) (const_int 19)
14113 (const_int 4) (const_int 20)
14114 (const_int 5) (const_int 21)
14115 (const_int 6) (const_int 22)
14116 (const_int 7) (const_int 23)])))]
14117 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14118 "@
14119 punpcklbw\t{%2, %0|%0, %2}
14120 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14121 [(set_attr "isa" "noavx,avx")
14122 (set_attr "type" "sselog")
14123 (set_attr "prefix_data16" "1,*")
14124 (set_attr "prefix" "orig,vex")
14125 (set_attr "mode" "TI")])
14126
14127 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
14128 [(set (match_operand:V32HI 0 "register_operand" "=v")
14129 (vec_select:V32HI
14130 (vec_concat:V64HI
14131 (match_operand:V32HI 1 "register_operand" "v")
14132 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
14133 (parallel [(const_int 4) (const_int 36)
14134 (const_int 5) (const_int 37)
14135 (const_int 6) (const_int 38)
14136 (const_int 7) (const_int 39)
14137 (const_int 12) (const_int 44)
14138 (const_int 13) (const_int 45)
14139 (const_int 14) (const_int 46)
14140 (const_int 15) (const_int 47)
14141 (const_int 20) (const_int 52)
14142 (const_int 21) (const_int 53)
14143 (const_int 22) (const_int 54)
14144 (const_int 23) (const_int 55)
14145 (const_int 28) (const_int 60)
14146 (const_int 29) (const_int 61)
14147 (const_int 30) (const_int 62)
14148 (const_int 31) (const_int 63)])))]
14149 "TARGET_AVX512BW"
14150 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14151 [(set_attr "type" "sselog")
14152 (set_attr "prefix" "evex")
14153 (set_attr "mode" "XI")])
14154
14155 (define_insn "avx2_interleave_highv16hi<mask_name>"
14156 [(set (match_operand:V16HI 0 "register_operand" "=v")
14157 (vec_select:V16HI
14158 (vec_concat:V32HI
14159 (match_operand:V16HI 1 "register_operand" "v")
14160 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
14161 (parallel [(const_int 4) (const_int 20)
14162 (const_int 5) (const_int 21)
14163 (const_int 6) (const_int 22)
14164 (const_int 7) (const_int 23)
14165 (const_int 12) (const_int 28)
14166 (const_int 13) (const_int 29)
14167 (const_int 14) (const_int 30)
14168 (const_int 15) (const_int 31)])))]
14169 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14170 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14171 [(set_attr "type" "sselog")
14172 (set_attr "prefix" "maybe_evex")
14173 (set_attr "mode" "OI")])
14174
14175 (define_insn "vec_interleave_highv8hi<mask_name>"
14176 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
14177 (vec_select:V8HI
14178 (vec_concat:V16HI
14179 (match_operand:V8HI 1 "register_operand" "0,v")
14180 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
14181 (parallel [(const_int 4) (const_int 12)
14182 (const_int 5) (const_int 13)
14183 (const_int 6) (const_int 14)
14184 (const_int 7) (const_int 15)])))]
14185 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14186 "@
14187 punpckhwd\t{%2, %0|%0, %2}
14188 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14189 [(set_attr "isa" "noavx,avx")
14190 (set_attr "type" "sselog")
14191 (set_attr "prefix_data16" "1,*")
14192 (set_attr "prefix" "orig,maybe_vex")
14193 (set_attr "mode" "TI")])
14194
14195 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
14196 [(set (match_operand:V32HI 0 "register_operand" "=v")
14197 (vec_select:V32HI
14198 (vec_concat:V64HI
14199 (match_operand:V32HI 1 "register_operand" "v")
14200 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
14201 (parallel [(const_int 0) (const_int 32)
14202 (const_int 1) (const_int 33)
14203 (const_int 2) (const_int 34)
14204 (const_int 3) (const_int 35)
14205 (const_int 8) (const_int 40)
14206 (const_int 9) (const_int 41)
14207 (const_int 10) (const_int 42)
14208 (const_int 11) (const_int 43)
14209 (const_int 16) (const_int 48)
14210 (const_int 17) (const_int 49)
14211 (const_int 18) (const_int 50)
14212 (const_int 19) (const_int 51)
14213 (const_int 24) (const_int 56)
14214 (const_int 25) (const_int 57)
14215 (const_int 26) (const_int 58)
14216 (const_int 27) (const_int 59)])))]
14217 "TARGET_AVX512BW"
14218 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14219 [(set_attr "type" "sselog")
14220 (set_attr "prefix" "evex")
14221 (set_attr "mode" "XI")])
14222
14223 (define_insn "avx2_interleave_lowv16hi<mask_name>"
14224 [(set (match_operand:V16HI 0 "register_operand" "=v")
14225 (vec_select:V16HI
14226 (vec_concat:V32HI
14227 (match_operand:V16HI 1 "register_operand" "v")
14228 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
14229 (parallel [(const_int 0) (const_int 16)
14230 (const_int 1) (const_int 17)
14231 (const_int 2) (const_int 18)
14232 (const_int 3) (const_int 19)
14233 (const_int 8) (const_int 24)
14234 (const_int 9) (const_int 25)
14235 (const_int 10) (const_int 26)
14236 (const_int 11) (const_int 27)])))]
14237 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14238 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14239 [(set_attr "type" "sselog")
14240 (set_attr "prefix" "maybe_evex")
14241 (set_attr "mode" "OI")])
14242
14243 (define_insn "vec_interleave_lowv8hi<mask_name>"
14244 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
14245 (vec_select:V8HI
14246 (vec_concat:V16HI
14247 (match_operand:V8HI 1 "register_operand" "0,v")
14248 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
14249 (parallel [(const_int 0) (const_int 8)
14250 (const_int 1) (const_int 9)
14251 (const_int 2) (const_int 10)
14252 (const_int 3) (const_int 11)])))]
14253 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14254 "@
14255 punpcklwd\t{%2, %0|%0, %2}
14256 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14257 [(set_attr "isa" "noavx,avx")
14258 (set_attr "type" "sselog")
14259 (set_attr "prefix_data16" "1,*")
14260 (set_attr "prefix" "orig,maybe_evex")
14261 (set_attr "mode" "TI")])
14262
14263 (define_insn "avx2_interleave_highv8si<mask_name>"
14264 [(set (match_operand:V8SI 0 "register_operand" "=v")
14265 (vec_select:V8SI
14266 (vec_concat:V16SI
14267 (match_operand:V8SI 1 "register_operand" "v")
14268 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14269 (parallel [(const_int 2) (const_int 10)
14270 (const_int 3) (const_int 11)
14271 (const_int 6) (const_int 14)
14272 (const_int 7) (const_int 15)])))]
14273 "TARGET_AVX2 && <mask_avx512vl_condition>"
14274 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14275 [(set_attr "type" "sselog")
14276 (set_attr "prefix" "maybe_evex")
14277 (set_attr "mode" "OI")])
14278
14279 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
14280 [(set (match_operand:V16SI 0 "register_operand" "=v")
14281 (vec_select:V16SI
14282 (vec_concat:V32SI
14283 (match_operand:V16SI 1 "register_operand" "v")
14284 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14285 (parallel [(const_int 2) (const_int 18)
14286 (const_int 3) (const_int 19)
14287 (const_int 6) (const_int 22)
14288 (const_int 7) (const_int 23)
14289 (const_int 10) (const_int 26)
14290 (const_int 11) (const_int 27)
14291 (const_int 14) (const_int 30)
14292 (const_int 15) (const_int 31)])))]
14293 "TARGET_AVX512F"
14294 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14295 [(set_attr "type" "sselog")
14296 (set_attr "prefix" "evex")
14297 (set_attr "mode" "XI")])
14298
14299
14300 (define_insn "vec_interleave_highv4si<mask_name>"
14301 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14302 (vec_select:V4SI
14303 (vec_concat:V8SI
14304 (match_operand:V4SI 1 "register_operand" "0,v")
14305 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14306 (parallel [(const_int 2) (const_int 6)
14307 (const_int 3) (const_int 7)])))]
14308 "TARGET_SSE2 && <mask_avx512vl_condition>"
14309 "@
14310 punpckhdq\t{%2, %0|%0, %2}
14311 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14312 [(set_attr "isa" "noavx,avx")
14313 (set_attr "type" "sselog")
14314 (set_attr "prefix_data16" "1,*")
14315 (set_attr "prefix" "orig,maybe_vex")
14316 (set_attr "mode" "TI")])
14317
14318 (define_insn "avx2_interleave_lowv8si<mask_name>"
14319 [(set (match_operand:V8SI 0 "register_operand" "=v")
14320 (vec_select:V8SI
14321 (vec_concat:V16SI
14322 (match_operand:V8SI 1 "register_operand" "v")
14323 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14324 (parallel [(const_int 0) (const_int 8)
14325 (const_int 1) (const_int 9)
14326 (const_int 4) (const_int 12)
14327 (const_int 5) (const_int 13)])))]
14328 "TARGET_AVX2 && <mask_avx512vl_condition>"
14329 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14330 [(set_attr "type" "sselog")
14331 (set_attr "prefix" "maybe_evex")
14332 (set_attr "mode" "OI")])
14333
14334 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
14335 [(set (match_operand:V16SI 0 "register_operand" "=v")
14336 (vec_select:V16SI
14337 (vec_concat:V32SI
14338 (match_operand:V16SI 1 "register_operand" "v")
14339 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14340 (parallel [(const_int 0) (const_int 16)
14341 (const_int 1) (const_int 17)
14342 (const_int 4) (const_int 20)
14343 (const_int 5) (const_int 21)
14344 (const_int 8) (const_int 24)
14345 (const_int 9) (const_int 25)
14346 (const_int 12) (const_int 28)
14347 (const_int 13) (const_int 29)])))]
14348 "TARGET_AVX512F"
14349 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14350 [(set_attr "type" "sselog")
14351 (set_attr "prefix" "evex")
14352 (set_attr "mode" "XI")])
14353
14354 (define_insn "vec_interleave_lowv4si<mask_name>"
14355 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14356 (vec_select:V4SI
14357 (vec_concat:V8SI
14358 (match_operand:V4SI 1 "register_operand" "0,v")
14359 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14360 (parallel [(const_int 0) (const_int 4)
14361 (const_int 1) (const_int 5)])))]
14362 "TARGET_SSE2 && <mask_avx512vl_condition>"
14363 "@
14364 punpckldq\t{%2, %0|%0, %2}
14365 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14366 [(set_attr "isa" "noavx,avx")
14367 (set_attr "type" "sselog")
14368 (set_attr "prefix_data16" "1,*")
14369 (set_attr "prefix" "orig,vex")
14370 (set_attr "mode" "TI")])
14371
14372 (define_expand "vec_interleave_high<mode>"
14373 [(match_operand:VI_256 0 "register_operand")
14374 (match_operand:VI_256 1 "register_operand")
14375 (match_operand:VI_256 2 "nonimmediate_operand")]
14376 "TARGET_AVX2"
14377 {
14378 rtx t1 = gen_reg_rtx (<MODE>mode);
14379 rtx t2 = gen_reg_rtx (<MODE>mode);
14380 rtx t3 = gen_reg_rtx (V4DImode);
14381 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14382 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14383 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14384 gen_lowpart (V4DImode, t2),
14385 GEN_INT (1 + (3 << 4))));
14386 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14387 DONE;
14388 })
14389
14390 (define_expand "vec_interleave_low<mode>"
14391 [(match_operand:VI_256 0 "register_operand")
14392 (match_operand:VI_256 1 "register_operand")
14393 (match_operand:VI_256 2 "nonimmediate_operand")]
14394 "TARGET_AVX2"
14395 {
14396 rtx t1 = gen_reg_rtx (<MODE>mode);
14397 rtx t2 = gen_reg_rtx (<MODE>mode);
14398 rtx t3 = gen_reg_rtx (V4DImode);
14399 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14400 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14401 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14402 gen_lowpart (V4DImode, t2),
14403 GEN_INT (0 + (2 << 4))));
14404 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14405 DONE;
14406 })
14407
14408 ;; Modes handled by pinsr patterns.
14409 (define_mode_iterator PINSR_MODE
14410 [(V16QI "TARGET_SSE4_1") V8HI
14411 (V4SI "TARGET_SSE4_1")
14412 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
14413
14414 (define_mode_attr sse2p4_1
14415 [(V16QI "sse4_1") (V8HI "sse2")
14416 (V4SI "sse4_1") (V2DI "sse4_1")])
14417
14418 (define_mode_attr pinsr_evex_isa
14419 [(V16QI "avx512bw") (V8HI "avx512bw")
14420 (V4SI "avx512dq") (V2DI "avx512dq")])
14421
14422 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
14423 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
14424 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
14425 (vec_merge:PINSR_MODE
14426 (vec_duplicate:PINSR_MODE
14427 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
14428 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
14429 (match_operand:SI 3 "const_int_operand")))]
14430 "TARGET_SSE2
14431 && ((unsigned) exact_log2 (INTVAL (operands[3]))
14432 < GET_MODE_NUNITS (<MODE>mode))"
14433 {
14434 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
14435
14436 switch (which_alternative)
14437 {
14438 case 0:
14439 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14440 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
14441 /* FALLTHRU */
14442 case 1:
14443 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
14444 case 2:
14445 case 4:
14446 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14447 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
14448 /* FALLTHRU */
14449 case 3:
14450 case 5:
14451 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14452 default:
14453 gcc_unreachable ();
14454 }
14455 }
14456 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
14457 (set_attr "type" "sselog")
14458 (set (attr "prefix_rex")
14459 (if_then_else
14460 (and (not (match_test "TARGET_AVX"))
14461 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
14462 (const_string "1")
14463 (const_string "*")))
14464 (set (attr "prefix_data16")
14465 (if_then_else
14466 (and (not (match_test "TARGET_AVX"))
14467 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14468 (const_string "1")
14469 (const_string "*")))
14470 (set (attr "prefix_extra")
14471 (if_then_else
14472 (and (not (match_test "TARGET_AVX"))
14473 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14474 (const_string "*")
14475 (const_string "1")))
14476 (set_attr "length_immediate" "1")
14477 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
14478 (set_attr "mode" "TI")])
14479
14480 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
14481 [(match_operand:AVX512_VEC 0 "register_operand")
14482 (match_operand:AVX512_VEC 1 "register_operand")
14483 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
14484 (match_operand:SI 3 "const_0_to_3_operand")
14485 (match_operand:AVX512_VEC 4 "register_operand")
14486 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14487 "TARGET_AVX512F"
14488 {
14489 int mask, selector;
14490 mask = INTVAL (operands[3]);
14491 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
14492 ? 0xFFFF ^ (0x000F << mask * 4)
14493 : 0xFF ^ (0x03 << mask * 2));
14494 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
14495 (operands[0], operands[1], operands[2], GEN_INT (selector),
14496 operands[4], operands[5]));
14497 DONE;
14498 })
14499
14500 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
14501 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
14502 (vec_merge:AVX512_VEC
14503 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
14504 (vec_duplicate:AVX512_VEC
14505 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
14506 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
14507 "TARGET_AVX512F
14508 && (INTVAL (operands[3])
14509 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
14510 {
14511 if (which_alternative == 0)
14512 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
14513 switch (<MODE>mode)
14514 {
14515 case E_V8DFmode:
14516 if (misaligned_operand (operands[2], <ssequartermode>mode))
14517 return "vmovupd\t{%2, %x0|%x0, %2}";
14518 else
14519 return "vmovapd\t{%2, %x0|%x0, %2}";
14520 case E_V16SFmode:
14521 if (misaligned_operand (operands[2], <ssequartermode>mode))
14522 return "vmovups\t{%2, %x0|%x0, %2}";
14523 else
14524 return "vmovaps\t{%2, %x0|%x0, %2}";
14525 case E_V8DImode:
14526 if (misaligned_operand (operands[2], <ssequartermode>mode))
14527 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
14528 : "vmovdqu\t{%2, %x0|%x0, %2}";
14529 else
14530 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
14531 : "vmovdqa\t{%2, %x0|%x0, %2}";
14532 case E_V16SImode:
14533 if (misaligned_operand (operands[2], <ssequartermode>mode))
14534 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
14535 : "vmovdqu\t{%2, %x0|%x0, %2}";
14536 else
14537 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
14538 : "vmovdqa\t{%2, %x0|%x0, %2}";
14539 default:
14540 gcc_unreachable ();
14541 }
14542 }
14543 [(set_attr "type" "sselog,ssemov,ssemov")
14544 (set_attr "length_immediate" "1,0,0")
14545 (set_attr "prefix" "evex,vex,evex")
14546 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
14547
14548 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
14549 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
14550 (vec_merge:AVX512_VEC
14551 (match_operand:AVX512_VEC 1 "register_operand" "v")
14552 (vec_duplicate:AVX512_VEC
14553 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
14554 (match_operand:SI 3 "const_int_operand" "n")))]
14555 "TARGET_AVX512F"
14556 {
14557 int mask;
14558 int selector = INTVAL (operands[3]);
14559
14560 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
14561 mask = 0;
14562 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
14563 mask = 1;
14564 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
14565 mask = 2;
14566 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
14567 mask = 3;
14568 else
14569 gcc_unreachable ();
14570
14571 operands[3] = GEN_INT (mask);
14572
14573 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
14574 }
14575 [(set_attr "type" "sselog")
14576 (set_attr "length_immediate" "1")
14577 (set_attr "prefix" "evex")
14578 (set_attr "mode" "<sseinsnmode>")])
14579
14580 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
14581 [(match_operand:AVX512_VEC_2 0 "register_operand")
14582 (match_operand:AVX512_VEC_2 1 "register_operand")
14583 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14584 (match_operand:SI 3 "const_0_to_1_operand")
14585 (match_operand:AVX512_VEC_2 4 "register_operand")
14586 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14587 "TARGET_AVX512F"
14588 {
14589 int mask = INTVAL (operands[3]);
14590 if (mask == 0)
14591 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
14592 operands[2], operands[4],
14593 operands[5]));
14594 else
14595 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
14596 operands[2], operands[4],
14597 operands[5]));
14598 DONE;
14599 })
14600
14601 (define_insn "vec_set_lo_<mode><mask_name>"
14602 [(set (match_operand:V16FI 0 "register_operand" "=v")
14603 (vec_concat:V16FI
14604 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14605 (vec_select:<ssehalfvecmode>
14606 (match_operand:V16FI 1 "register_operand" "v")
14607 (parallel [(const_int 8) (const_int 9)
14608 (const_int 10) (const_int 11)
14609 (const_int 12) (const_int 13)
14610 (const_int 14) (const_int 15)]))))]
14611 "TARGET_AVX512DQ"
14612 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14613 [(set_attr "type" "sselog")
14614 (set_attr "length_immediate" "1")
14615 (set_attr "prefix" "evex")
14616 (set_attr "mode" "<sseinsnmode>")])
14617
14618 (define_insn "vec_set_hi_<mode><mask_name>"
14619 [(set (match_operand:V16FI 0 "register_operand" "=v")
14620 (vec_concat:V16FI
14621 (vec_select:<ssehalfvecmode>
14622 (match_operand:V16FI 1 "register_operand" "v")
14623 (parallel [(const_int 0) (const_int 1)
14624 (const_int 2) (const_int 3)
14625 (const_int 4) (const_int 5)
14626 (const_int 6) (const_int 7)]))
14627 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14628 "TARGET_AVX512DQ"
14629 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14630 [(set_attr "type" "sselog")
14631 (set_attr "length_immediate" "1")
14632 (set_attr "prefix" "evex")
14633 (set_attr "mode" "<sseinsnmode>")])
14634
14635 (define_insn "vec_set_lo_<mode><mask_name>"
14636 [(set (match_operand:V8FI 0 "register_operand" "=v")
14637 (vec_concat:V8FI
14638 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14639 (vec_select:<ssehalfvecmode>
14640 (match_operand:V8FI 1 "register_operand" "v")
14641 (parallel [(const_int 4) (const_int 5)
14642 (const_int 6) (const_int 7)]))))]
14643 "TARGET_AVX512F"
14644 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14645 [(set_attr "type" "sselog")
14646 (set_attr "length_immediate" "1")
14647 (set_attr "prefix" "evex")
14648 (set_attr "mode" "XI")])
14649
14650 (define_insn "vec_set_hi_<mode><mask_name>"
14651 [(set (match_operand:V8FI 0 "register_operand" "=v")
14652 (vec_concat:V8FI
14653 (vec_select:<ssehalfvecmode>
14654 (match_operand:V8FI 1 "register_operand" "v")
14655 (parallel [(const_int 0) (const_int 1)
14656 (const_int 2) (const_int 3)]))
14657 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14658 "TARGET_AVX512F"
14659 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14660 [(set_attr "type" "sselog")
14661 (set_attr "length_immediate" "1")
14662 (set_attr "prefix" "evex")
14663 (set_attr "mode" "XI")])
14664
14665 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
14666 [(match_operand:VI8F_256 0 "register_operand")
14667 (match_operand:VI8F_256 1 "register_operand")
14668 (match_operand:VI8F_256 2 "nonimmediate_operand")
14669 (match_operand:SI 3 "const_0_to_3_operand")
14670 (match_operand:VI8F_256 4 "register_operand")
14671 (match_operand:QI 5 "register_operand")]
14672 "TARGET_AVX512DQ"
14673 {
14674 int mask = INTVAL (operands[3]);
14675 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
14676 (operands[0], operands[1], operands[2],
14677 GEN_INT (((mask >> 0) & 1) * 2 + 0),
14678 GEN_INT (((mask >> 0) & 1) * 2 + 1),
14679 GEN_INT (((mask >> 1) & 1) * 2 + 4),
14680 GEN_INT (((mask >> 1) & 1) * 2 + 5),
14681 operands[4], operands[5]));
14682 DONE;
14683 })
14684
14685 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
14686 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
14687 (vec_select:VI8F_256
14688 (vec_concat:<ssedoublemode>
14689 (match_operand:VI8F_256 1 "register_operand" "v")
14690 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
14691 (parallel [(match_operand 3 "const_0_to_3_operand")
14692 (match_operand 4 "const_0_to_3_operand")
14693 (match_operand 5 "const_4_to_7_operand")
14694 (match_operand 6 "const_4_to_7_operand")])))]
14695 "TARGET_AVX512VL
14696 && (INTVAL (operands[3]) & 1) == 0
14697 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14698 && (INTVAL (operands[5]) & 1) == 0
14699 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
14700 {
14701 int mask;
14702 mask = INTVAL (operands[3]) / 2;
14703 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
14704 operands[3] = GEN_INT (mask);
14705 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
14706 }
14707 [(set_attr "type" "sselog")
14708 (set_attr "length_immediate" "1")
14709 (set_attr "prefix" "evex")
14710 (set_attr "mode" "XI")])
14711
14712 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
14713 [(match_operand:V8FI 0 "register_operand")
14714 (match_operand:V8FI 1 "register_operand")
14715 (match_operand:V8FI 2 "nonimmediate_operand")
14716 (match_operand:SI 3 "const_0_to_255_operand")
14717 (match_operand:V8FI 4 "register_operand")
14718 (match_operand:QI 5 "register_operand")]
14719 "TARGET_AVX512F"
14720 {
14721 int mask = INTVAL (operands[3]);
14722 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
14723 (operands[0], operands[1], operands[2],
14724 GEN_INT (((mask >> 0) & 3) * 2),
14725 GEN_INT (((mask >> 0) & 3) * 2 + 1),
14726 GEN_INT (((mask >> 2) & 3) * 2),
14727 GEN_INT (((mask >> 2) & 3) * 2 + 1),
14728 GEN_INT (((mask >> 4) & 3) * 2 + 8),
14729 GEN_INT (((mask >> 4) & 3) * 2 + 9),
14730 GEN_INT (((mask >> 6) & 3) * 2 + 8),
14731 GEN_INT (((mask >> 6) & 3) * 2 + 9),
14732 operands[4], operands[5]));
14733 DONE;
14734 })
14735
14736 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
14737 [(set (match_operand:V8FI 0 "register_operand" "=v")
14738 (vec_select:V8FI
14739 (vec_concat:<ssedoublemode>
14740 (match_operand:V8FI 1 "register_operand" "v")
14741 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
14742 (parallel [(match_operand 3 "const_0_to_7_operand")
14743 (match_operand 4 "const_0_to_7_operand")
14744 (match_operand 5 "const_0_to_7_operand")
14745 (match_operand 6 "const_0_to_7_operand")
14746 (match_operand 7 "const_8_to_15_operand")
14747 (match_operand 8 "const_8_to_15_operand")
14748 (match_operand 9 "const_8_to_15_operand")
14749 (match_operand 10 "const_8_to_15_operand")])))]
14750 "TARGET_AVX512F
14751 && (INTVAL (operands[3]) & 1) == 0
14752 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14753 && (INTVAL (operands[5]) & 1) == 0
14754 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
14755 && (INTVAL (operands[7]) & 1) == 0
14756 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14757 && (INTVAL (operands[9]) & 1) == 0
14758 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
14759 {
14760 int mask;
14761 mask = INTVAL (operands[3]) / 2;
14762 mask |= INTVAL (operands[5]) / 2 << 2;
14763 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
14764 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
14765 operands[3] = GEN_INT (mask);
14766
14767 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14768 }
14769 [(set_attr "type" "sselog")
14770 (set_attr "length_immediate" "1")
14771 (set_attr "prefix" "evex")
14772 (set_attr "mode" "<sseinsnmode>")])
14773
14774 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
14775 [(match_operand:VI4F_256 0 "register_operand")
14776 (match_operand:VI4F_256 1 "register_operand")
14777 (match_operand:VI4F_256 2 "nonimmediate_operand")
14778 (match_operand:SI 3 "const_0_to_3_operand")
14779 (match_operand:VI4F_256 4 "register_operand")
14780 (match_operand:QI 5 "register_operand")]
14781 "TARGET_AVX512VL"
14782 {
14783 int mask = INTVAL (operands[3]);
14784 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
14785 (operands[0], operands[1], operands[2],
14786 GEN_INT (((mask >> 0) & 1) * 4 + 0),
14787 GEN_INT (((mask >> 0) & 1) * 4 + 1),
14788 GEN_INT (((mask >> 0) & 1) * 4 + 2),
14789 GEN_INT (((mask >> 0) & 1) * 4 + 3),
14790 GEN_INT (((mask >> 1) & 1) * 4 + 8),
14791 GEN_INT (((mask >> 1) & 1) * 4 + 9),
14792 GEN_INT (((mask >> 1) & 1) * 4 + 10),
14793 GEN_INT (((mask >> 1) & 1) * 4 + 11),
14794 operands[4], operands[5]));
14795 DONE;
14796 })
14797
14798 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
14799 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
14800 (vec_select:VI4F_256
14801 (vec_concat:<ssedoublemode>
14802 (match_operand:VI4F_256 1 "register_operand" "v")
14803 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
14804 (parallel [(match_operand 3 "const_0_to_7_operand")
14805 (match_operand 4 "const_0_to_7_operand")
14806 (match_operand 5 "const_0_to_7_operand")
14807 (match_operand 6 "const_0_to_7_operand")
14808 (match_operand 7 "const_8_to_15_operand")
14809 (match_operand 8 "const_8_to_15_operand")
14810 (match_operand 9 "const_8_to_15_operand")
14811 (match_operand 10 "const_8_to_15_operand")])))]
14812 "TARGET_AVX512VL
14813 && (INTVAL (operands[3]) & 3) == 0
14814 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14815 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14816 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14817 && (INTVAL (operands[7]) & 3) == 0
14818 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14819 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14820 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
14821 {
14822 int mask;
14823 mask = INTVAL (operands[3]) / 4;
14824 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
14825 operands[3] = GEN_INT (mask);
14826
14827 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14828 }
14829 [(set_attr "type" "sselog")
14830 (set_attr "length_immediate" "1")
14831 (set_attr "prefix" "evex")
14832 (set_attr "mode" "<sseinsnmode>")])
14833
14834 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
14835 [(match_operand:V16FI 0 "register_operand")
14836 (match_operand:V16FI 1 "register_operand")
14837 (match_operand:V16FI 2 "nonimmediate_operand")
14838 (match_operand:SI 3 "const_0_to_255_operand")
14839 (match_operand:V16FI 4 "register_operand")
14840 (match_operand:HI 5 "register_operand")]
14841 "TARGET_AVX512F"
14842 {
14843 int mask = INTVAL (operands[3]);
14844 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
14845 (operands[0], operands[1], operands[2],
14846 GEN_INT (((mask >> 0) & 3) * 4),
14847 GEN_INT (((mask >> 0) & 3) * 4 + 1),
14848 GEN_INT (((mask >> 0) & 3) * 4 + 2),
14849 GEN_INT (((mask >> 0) & 3) * 4 + 3),
14850 GEN_INT (((mask >> 2) & 3) * 4),
14851 GEN_INT (((mask >> 2) & 3) * 4 + 1),
14852 GEN_INT (((mask >> 2) & 3) * 4 + 2),
14853 GEN_INT (((mask >> 2) & 3) * 4 + 3),
14854 GEN_INT (((mask >> 4) & 3) * 4 + 16),
14855 GEN_INT (((mask >> 4) & 3) * 4 + 17),
14856 GEN_INT (((mask >> 4) & 3) * 4 + 18),
14857 GEN_INT (((mask >> 4) & 3) * 4 + 19),
14858 GEN_INT (((mask >> 6) & 3) * 4 + 16),
14859 GEN_INT (((mask >> 6) & 3) * 4 + 17),
14860 GEN_INT (((mask >> 6) & 3) * 4 + 18),
14861 GEN_INT (((mask >> 6) & 3) * 4 + 19),
14862 operands[4], operands[5]));
14863 DONE;
14864 })
14865
14866 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
14867 [(set (match_operand:V16FI 0 "register_operand" "=v")
14868 (vec_select:V16FI
14869 (vec_concat:<ssedoublemode>
14870 (match_operand:V16FI 1 "register_operand" "v")
14871 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
14872 (parallel [(match_operand 3 "const_0_to_15_operand")
14873 (match_operand 4 "const_0_to_15_operand")
14874 (match_operand 5 "const_0_to_15_operand")
14875 (match_operand 6 "const_0_to_15_operand")
14876 (match_operand 7 "const_0_to_15_operand")
14877 (match_operand 8 "const_0_to_15_operand")
14878 (match_operand 9 "const_0_to_15_operand")
14879 (match_operand 10 "const_0_to_15_operand")
14880 (match_operand 11 "const_16_to_31_operand")
14881 (match_operand 12 "const_16_to_31_operand")
14882 (match_operand 13 "const_16_to_31_operand")
14883 (match_operand 14 "const_16_to_31_operand")
14884 (match_operand 15 "const_16_to_31_operand")
14885 (match_operand 16 "const_16_to_31_operand")
14886 (match_operand 17 "const_16_to_31_operand")
14887 (match_operand 18 "const_16_to_31_operand")])))]
14888 "TARGET_AVX512F
14889 && (INTVAL (operands[3]) & 3) == 0
14890 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14891 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14892 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14893 && (INTVAL (operands[7]) & 3) == 0
14894 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14895 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14896 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
14897 && (INTVAL (operands[11]) & 3) == 0
14898 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
14899 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
14900 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
14901 && (INTVAL (operands[15]) & 3) == 0
14902 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
14903 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
14904 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14905 {
14906 int mask;
14907 mask = INTVAL (operands[3]) / 4;
14908 mask |= INTVAL (operands[7]) / 4 << 2;
14909 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14910 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14911 operands[3] = GEN_INT (mask);
14912
14913 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14914 }
14915 [(set_attr "type" "sselog")
14916 (set_attr "length_immediate" "1")
14917 (set_attr "prefix" "evex")
14918 (set_attr "mode" "<sseinsnmode>")])
14919
14920 (define_expand "avx512f_pshufdv3_mask"
14921 [(match_operand:V16SI 0 "register_operand")
14922 (match_operand:V16SI 1 "nonimmediate_operand")
14923 (match_operand:SI 2 "const_0_to_255_operand")
14924 (match_operand:V16SI 3 "register_operand")
14925 (match_operand:HI 4 "register_operand")]
14926 "TARGET_AVX512F"
14927 {
14928 int mask = INTVAL (operands[2]);
14929 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14930 GEN_INT ((mask >> 0) & 3),
14931 GEN_INT ((mask >> 2) & 3),
14932 GEN_INT ((mask >> 4) & 3),
14933 GEN_INT ((mask >> 6) & 3),
14934 GEN_INT (((mask >> 0) & 3) + 4),
14935 GEN_INT (((mask >> 2) & 3) + 4),
14936 GEN_INT (((mask >> 4) & 3) + 4),
14937 GEN_INT (((mask >> 6) & 3) + 4),
14938 GEN_INT (((mask >> 0) & 3) + 8),
14939 GEN_INT (((mask >> 2) & 3) + 8),
14940 GEN_INT (((mask >> 4) & 3) + 8),
14941 GEN_INT (((mask >> 6) & 3) + 8),
14942 GEN_INT (((mask >> 0) & 3) + 12),
14943 GEN_INT (((mask >> 2) & 3) + 12),
14944 GEN_INT (((mask >> 4) & 3) + 12),
14945 GEN_INT (((mask >> 6) & 3) + 12),
14946 operands[3], operands[4]));
14947 DONE;
14948 })
14949
14950 (define_insn "avx512f_pshufd_1<mask_name>"
14951 [(set (match_operand:V16SI 0 "register_operand" "=v")
14952 (vec_select:V16SI
14953 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14954 (parallel [(match_operand 2 "const_0_to_3_operand")
14955 (match_operand 3 "const_0_to_3_operand")
14956 (match_operand 4 "const_0_to_3_operand")
14957 (match_operand 5 "const_0_to_3_operand")
14958 (match_operand 6 "const_4_to_7_operand")
14959 (match_operand 7 "const_4_to_7_operand")
14960 (match_operand 8 "const_4_to_7_operand")
14961 (match_operand 9 "const_4_to_7_operand")
14962 (match_operand 10 "const_8_to_11_operand")
14963 (match_operand 11 "const_8_to_11_operand")
14964 (match_operand 12 "const_8_to_11_operand")
14965 (match_operand 13 "const_8_to_11_operand")
14966 (match_operand 14 "const_12_to_15_operand")
14967 (match_operand 15 "const_12_to_15_operand")
14968 (match_operand 16 "const_12_to_15_operand")
14969 (match_operand 17 "const_12_to_15_operand")])))]
14970 "TARGET_AVX512F
14971 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14972 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14973 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14974 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14975 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14976 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14977 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14978 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14979 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14980 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14981 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14982 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14983 {
14984 int mask = 0;
14985 mask |= INTVAL (operands[2]) << 0;
14986 mask |= INTVAL (operands[3]) << 2;
14987 mask |= INTVAL (operands[4]) << 4;
14988 mask |= INTVAL (operands[5]) << 6;
14989 operands[2] = GEN_INT (mask);
14990
14991 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14992 }
14993 [(set_attr "type" "sselog1")
14994 (set_attr "prefix" "evex")
14995 (set_attr "length_immediate" "1")
14996 (set_attr "mode" "XI")])
14997
14998 (define_expand "avx512vl_pshufdv3_mask"
14999 [(match_operand:V8SI 0 "register_operand")
15000 (match_operand:V8SI 1 "nonimmediate_operand")
15001 (match_operand:SI 2 "const_0_to_255_operand")
15002 (match_operand:V8SI 3 "register_operand")
15003 (match_operand:QI 4 "register_operand")]
15004 "TARGET_AVX512VL"
15005 {
15006 int mask = INTVAL (operands[2]);
15007 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
15008 GEN_INT ((mask >> 0) & 3),
15009 GEN_INT ((mask >> 2) & 3),
15010 GEN_INT ((mask >> 4) & 3),
15011 GEN_INT ((mask >> 6) & 3),
15012 GEN_INT (((mask >> 0) & 3) + 4),
15013 GEN_INT (((mask >> 2) & 3) + 4),
15014 GEN_INT (((mask >> 4) & 3) + 4),
15015 GEN_INT (((mask >> 6) & 3) + 4),
15016 operands[3], operands[4]));
15017 DONE;
15018 })
15019
15020 (define_expand "avx2_pshufdv3"
15021 [(match_operand:V8SI 0 "register_operand")
15022 (match_operand:V8SI 1 "nonimmediate_operand")
15023 (match_operand:SI 2 "const_0_to_255_operand")]
15024 "TARGET_AVX2"
15025 {
15026 int mask = INTVAL (operands[2]);
15027 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
15028 GEN_INT ((mask >> 0) & 3),
15029 GEN_INT ((mask >> 2) & 3),
15030 GEN_INT ((mask >> 4) & 3),
15031 GEN_INT ((mask >> 6) & 3),
15032 GEN_INT (((mask >> 0) & 3) + 4),
15033 GEN_INT (((mask >> 2) & 3) + 4),
15034 GEN_INT (((mask >> 4) & 3) + 4),
15035 GEN_INT (((mask >> 6) & 3) + 4)));
15036 DONE;
15037 })
15038
15039 (define_insn "avx2_pshufd_1<mask_name>"
15040 [(set (match_operand:V8SI 0 "register_operand" "=v")
15041 (vec_select:V8SI
15042 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
15043 (parallel [(match_operand 2 "const_0_to_3_operand")
15044 (match_operand 3 "const_0_to_3_operand")
15045 (match_operand 4 "const_0_to_3_operand")
15046 (match_operand 5 "const_0_to_3_operand")
15047 (match_operand 6 "const_4_to_7_operand")
15048 (match_operand 7 "const_4_to_7_operand")
15049 (match_operand 8 "const_4_to_7_operand")
15050 (match_operand 9 "const_4_to_7_operand")])))]
15051 "TARGET_AVX2
15052 && <mask_avx512vl_condition>
15053 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
15054 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
15055 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
15056 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
15057 {
15058 int mask = 0;
15059 mask |= INTVAL (operands[2]) << 0;
15060 mask |= INTVAL (operands[3]) << 2;
15061 mask |= INTVAL (operands[4]) << 4;
15062 mask |= INTVAL (operands[5]) << 6;
15063 operands[2] = GEN_INT (mask);
15064
15065 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15066 }
15067 [(set_attr "type" "sselog1")
15068 (set_attr "prefix" "maybe_evex")
15069 (set_attr "length_immediate" "1")
15070 (set_attr "mode" "OI")])
15071
15072 (define_expand "avx512vl_pshufd_mask"
15073 [(match_operand:V4SI 0 "register_operand")
15074 (match_operand:V4SI 1 "nonimmediate_operand")
15075 (match_operand:SI 2 "const_0_to_255_operand")
15076 (match_operand:V4SI 3 "register_operand")
15077 (match_operand:QI 4 "register_operand")]
15078 "TARGET_AVX512VL"
15079 {
15080 int mask = INTVAL (operands[2]);
15081 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
15082 GEN_INT ((mask >> 0) & 3),
15083 GEN_INT ((mask >> 2) & 3),
15084 GEN_INT ((mask >> 4) & 3),
15085 GEN_INT ((mask >> 6) & 3),
15086 operands[3], operands[4]));
15087 DONE;
15088 })
15089
15090 (define_expand "sse2_pshufd"
15091 [(match_operand:V4SI 0 "register_operand")
15092 (match_operand:V4SI 1 "vector_operand")
15093 (match_operand:SI 2 "const_int_operand")]
15094 "TARGET_SSE2"
15095 {
15096 int mask = INTVAL (operands[2]);
15097 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
15098 GEN_INT ((mask >> 0) & 3),
15099 GEN_INT ((mask >> 2) & 3),
15100 GEN_INT ((mask >> 4) & 3),
15101 GEN_INT ((mask >> 6) & 3)));
15102 DONE;
15103 })
15104
15105 (define_insn "sse2_pshufd_1<mask_name>"
15106 [(set (match_operand:V4SI 0 "register_operand" "=v")
15107 (vec_select:V4SI
15108 (match_operand:V4SI 1 "vector_operand" "vBm")
15109 (parallel [(match_operand 2 "const_0_to_3_operand")
15110 (match_operand 3 "const_0_to_3_operand")
15111 (match_operand 4 "const_0_to_3_operand")
15112 (match_operand 5 "const_0_to_3_operand")])))]
15113 "TARGET_SSE2 && <mask_avx512vl_condition>"
15114 {
15115 int mask = 0;
15116 mask |= INTVAL (operands[2]) << 0;
15117 mask |= INTVAL (operands[3]) << 2;
15118 mask |= INTVAL (operands[4]) << 4;
15119 mask |= INTVAL (operands[5]) << 6;
15120 operands[2] = GEN_INT (mask);
15121
15122 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15123 }
15124 [(set_attr "type" "sselog1")
15125 (set_attr "prefix_data16" "1")
15126 (set_attr "prefix" "<mask_prefix2>")
15127 (set_attr "length_immediate" "1")
15128 (set_attr "mode" "TI")])
15129
15130 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
15131 [(set (match_operand:V32HI 0 "register_operand" "=v")
15132 (unspec:V32HI
15133 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15134 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15135 UNSPEC_PSHUFLW))]
15136 "TARGET_AVX512BW"
15137 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15138 [(set_attr "type" "sselog")
15139 (set_attr "prefix" "evex")
15140 (set_attr "mode" "XI")])
15141
15142 (define_expand "avx512vl_pshuflwv3_mask"
15143 [(match_operand:V16HI 0 "register_operand")
15144 (match_operand:V16HI 1 "nonimmediate_operand")
15145 (match_operand:SI 2 "const_0_to_255_operand")
15146 (match_operand:V16HI 3 "register_operand")
15147 (match_operand:HI 4 "register_operand")]
15148 "TARGET_AVX512VL && TARGET_AVX512BW"
15149 {
15150 int mask = INTVAL (operands[2]);
15151 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
15152 GEN_INT ((mask >> 0) & 3),
15153 GEN_INT ((mask >> 2) & 3),
15154 GEN_INT ((mask >> 4) & 3),
15155 GEN_INT ((mask >> 6) & 3),
15156 GEN_INT (((mask >> 0) & 3) + 8),
15157 GEN_INT (((mask >> 2) & 3) + 8),
15158 GEN_INT (((mask >> 4) & 3) + 8),
15159 GEN_INT (((mask >> 6) & 3) + 8),
15160 operands[3], operands[4]));
15161 DONE;
15162 })
15163
15164 (define_expand "avx2_pshuflwv3"
15165 [(match_operand:V16HI 0 "register_operand")
15166 (match_operand:V16HI 1 "nonimmediate_operand")
15167 (match_operand:SI 2 "const_0_to_255_operand")]
15168 "TARGET_AVX2"
15169 {
15170 int mask = INTVAL (operands[2]);
15171 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
15172 GEN_INT ((mask >> 0) & 3),
15173 GEN_INT ((mask >> 2) & 3),
15174 GEN_INT ((mask >> 4) & 3),
15175 GEN_INT ((mask >> 6) & 3),
15176 GEN_INT (((mask >> 0) & 3) + 8),
15177 GEN_INT (((mask >> 2) & 3) + 8),
15178 GEN_INT (((mask >> 4) & 3) + 8),
15179 GEN_INT (((mask >> 6) & 3) + 8)));
15180 DONE;
15181 })
15182
15183 (define_insn "avx2_pshuflw_1<mask_name>"
15184 [(set (match_operand:V16HI 0 "register_operand" "=v")
15185 (vec_select:V16HI
15186 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
15187 (parallel [(match_operand 2 "const_0_to_3_operand")
15188 (match_operand 3 "const_0_to_3_operand")
15189 (match_operand 4 "const_0_to_3_operand")
15190 (match_operand 5 "const_0_to_3_operand")
15191 (const_int 4)
15192 (const_int 5)
15193 (const_int 6)
15194 (const_int 7)
15195 (match_operand 6 "const_8_to_11_operand")
15196 (match_operand 7 "const_8_to_11_operand")
15197 (match_operand 8 "const_8_to_11_operand")
15198 (match_operand 9 "const_8_to_11_operand")
15199 (const_int 12)
15200 (const_int 13)
15201 (const_int 14)
15202 (const_int 15)])))]
15203 "TARGET_AVX2
15204 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15205 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15206 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15207 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15208 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15209 {
15210 int mask = 0;
15211 mask |= INTVAL (operands[2]) << 0;
15212 mask |= INTVAL (operands[3]) << 2;
15213 mask |= INTVAL (operands[4]) << 4;
15214 mask |= INTVAL (operands[5]) << 6;
15215 operands[2] = GEN_INT (mask);
15216
15217 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15218 }
15219 [(set_attr "type" "sselog")
15220 (set_attr "prefix" "maybe_evex")
15221 (set_attr "length_immediate" "1")
15222 (set_attr "mode" "OI")])
15223
15224 (define_expand "avx512vl_pshuflw_mask"
15225 [(match_operand:V8HI 0 "register_operand")
15226 (match_operand:V8HI 1 "nonimmediate_operand")
15227 (match_operand:SI 2 "const_0_to_255_operand")
15228 (match_operand:V8HI 3 "register_operand")
15229 (match_operand:QI 4 "register_operand")]
15230 "TARGET_AVX512VL && TARGET_AVX512BW"
15231 {
15232 int mask = INTVAL (operands[2]);
15233 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
15234 GEN_INT ((mask >> 0) & 3),
15235 GEN_INT ((mask >> 2) & 3),
15236 GEN_INT ((mask >> 4) & 3),
15237 GEN_INT ((mask >> 6) & 3),
15238 operands[3], operands[4]));
15239 DONE;
15240 })
15241
15242 (define_expand "sse2_pshuflw"
15243 [(match_operand:V8HI 0 "register_operand")
15244 (match_operand:V8HI 1 "vector_operand")
15245 (match_operand:SI 2 "const_int_operand")]
15246 "TARGET_SSE2"
15247 {
15248 int mask = INTVAL (operands[2]);
15249 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
15250 GEN_INT ((mask >> 0) & 3),
15251 GEN_INT ((mask >> 2) & 3),
15252 GEN_INT ((mask >> 4) & 3),
15253 GEN_INT ((mask >> 6) & 3)));
15254 DONE;
15255 })
15256
15257 (define_insn "sse2_pshuflw_1<mask_name>"
15258 [(set (match_operand:V8HI 0 "register_operand" "=v")
15259 (vec_select:V8HI
15260 (match_operand:V8HI 1 "vector_operand" "vBm")
15261 (parallel [(match_operand 2 "const_0_to_3_operand")
15262 (match_operand 3 "const_0_to_3_operand")
15263 (match_operand 4 "const_0_to_3_operand")
15264 (match_operand 5 "const_0_to_3_operand")
15265 (const_int 4)
15266 (const_int 5)
15267 (const_int 6)
15268 (const_int 7)])))]
15269 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15270 {
15271 int mask = 0;
15272 mask |= INTVAL (operands[2]) << 0;
15273 mask |= INTVAL (operands[3]) << 2;
15274 mask |= INTVAL (operands[4]) << 4;
15275 mask |= INTVAL (operands[5]) << 6;
15276 operands[2] = GEN_INT (mask);
15277
15278 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15279 }
15280 [(set_attr "type" "sselog")
15281 (set_attr "prefix_data16" "0")
15282 (set_attr "prefix_rep" "1")
15283 (set_attr "prefix" "maybe_vex")
15284 (set_attr "length_immediate" "1")
15285 (set_attr "mode" "TI")])
15286
15287 (define_expand "avx2_pshufhwv3"
15288 [(match_operand:V16HI 0 "register_operand")
15289 (match_operand:V16HI 1 "nonimmediate_operand")
15290 (match_operand:SI 2 "const_0_to_255_operand")]
15291 "TARGET_AVX2"
15292 {
15293 int mask = INTVAL (operands[2]);
15294 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
15295 GEN_INT (((mask >> 0) & 3) + 4),
15296 GEN_INT (((mask >> 2) & 3) + 4),
15297 GEN_INT (((mask >> 4) & 3) + 4),
15298 GEN_INT (((mask >> 6) & 3) + 4),
15299 GEN_INT (((mask >> 0) & 3) + 12),
15300 GEN_INT (((mask >> 2) & 3) + 12),
15301 GEN_INT (((mask >> 4) & 3) + 12),
15302 GEN_INT (((mask >> 6) & 3) + 12)));
15303 DONE;
15304 })
15305
15306 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
15307 [(set (match_operand:V32HI 0 "register_operand" "=v")
15308 (unspec:V32HI
15309 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15310 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15311 UNSPEC_PSHUFHW))]
15312 "TARGET_AVX512BW"
15313 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15314 [(set_attr "type" "sselog")
15315 (set_attr "prefix" "evex")
15316 (set_attr "mode" "XI")])
15317
15318 (define_expand "avx512vl_pshufhwv3_mask"
15319 [(match_operand:V16HI 0 "register_operand")
15320 (match_operand:V16HI 1 "nonimmediate_operand")
15321 (match_operand:SI 2 "const_0_to_255_operand")
15322 (match_operand:V16HI 3 "register_operand")
15323 (match_operand:HI 4 "register_operand")]
15324 "TARGET_AVX512VL && TARGET_AVX512BW"
15325 {
15326 int mask = INTVAL (operands[2]);
15327 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
15328 GEN_INT (((mask >> 0) & 3) + 4),
15329 GEN_INT (((mask >> 2) & 3) + 4),
15330 GEN_INT (((mask >> 4) & 3) + 4),
15331 GEN_INT (((mask >> 6) & 3) + 4),
15332 GEN_INT (((mask >> 0) & 3) + 12),
15333 GEN_INT (((mask >> 2) & 3) + 12),
15334 GEN_INT (((mask >> 4) & 3) + 12),
15335 GEN_INT (((mask >> 6) & 3) + 12),
15336 operands[3], operands[4]));
15337 DONE;
15338 })
15339
15340 (define_insn "avx2_pshufhw_1<mask_name>"
15341 [(set (match_operand:V16HI 0 "register_operand" "=v")
15342 (vec_select:V16HI
15343 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
15344 (parallel [(const_int 0)
15345 (const_int 1)
15346 (const_int 2)
15347 (const_int 3)
15348 (match_operand 2 "const_4_to_7_operand")
15349 (match_operand 3 "const_4_to_7_operand")
15350 (match_operand 4 "const_4_to_7_operand")
15351 (match_operand 5 "const_4_to_7_operand")
15352 (const_int 8)
15353 (const_int 9)
15354 (const_int 10)
15355 (const_int 11)
15356 (match_operand 6 "const_12_to_15_operand")
15357 (match_operand 7 "const_12_to_15_operand")
15358 (match_operand 8 "const_12_to_15_operand")
15359 (match_operand 9 "const_12_to_15_operand")])))]
15360 "TARGET_AVX2
15361 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15362 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15363 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15364 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15365 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15366 {
15367 int mask = 0;
15368 mask |= (INTVAL (operands[2]) - 4) << 0;
15369 mask |= (INTVAL (operands[3]) - 4) << 2;
15370 mask |= (INTVAL (operands[4]) - 4) << 4;
15371 mask |= (INTVAL (operands[5]) - 4) << 6;
15372 operands[2] = GEN_INT (mask);
15373
15374 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15375 }
15376 [(set_attr "type" "sselog")
15377 (set_attr "prefix" "maybe_evex")
15378 (set_attr "length_immediate" "1")
15379 (set_attr "mode" "OI")])
15380
15381 (define_expand "avx512vl_pshufhw_mask"
15382 [(match_operand:V8HI 0 "register_operand")
15383 (match_operand:V8HI 1 "nonimmediate_operand")
15384 (match_operand:SI 2 "const_0_to_255_operand")
15385 (match_operand:V8HI 3 "register_operand")
15386 (match_operand:QI 4 "register_operand")]
15387 "TARGET_AVX512VL && TARGET_AVX512BW"
15388 {
15389 int mask = INTVAL (operands[2]);
15390 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
15391 GEN_INT (((mask >> 0) & 3) + 4),
15392 GEN_INT (((mask >> 2) & 3) + 4),
15393 GEN_INT (((mask >> 4) & 3) + 4),
15394 GEN_INT (((mask >> 6) & 3) + 4),
15395 operands[3], operands[4]));
15396 DONE;
15397 })
15398
15399 (define_expand "sse2_pshufhw"
15400 [(match_operand:V8HI 0 "register_operand")
15401 (match_operand:V8HI 1 "vector_operand")
15402 (match_operand:SI 2 "const_int_operand")]
15403 "TARGET_SSE2"
15404 {
15405 int mask = INTVAL (operands[2]);
15406 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
15407 GEN_INT (((mask >> 0) & 3) + 4),
15408 GEN_INT (((mask >> 2) & 3) + 4),
15409 GEN_INT (((mask >> 4) & 3) + 4),
15410 GEN_INT (((mask >> 6) & 3) + 4)));
15411 DONE;
15412 })
15413
15414 (define_insn "sse2_pshufhw_1<mask_name>"
15415 [(set (match_operand:V8HI 0 "register_operand" "=v")
15416 (vec_select:V8HI
15417 (match_operand:V8HI 1 "vector_operand" "vBm")
15418 (parallel [(const_int 0)
15419 (const_int 1)
15420 (const_int 2)
15421 (const_int 3)
15422 (match_operand 2 "const_4_to_7_operand")
15423 (match_operand 3 "const_4_to_7_operand")
15424 (match_operand 4 "const_4_to_7_operand")
15425 (match_operand 5 "const_4_to_7_operand")])))]
15426 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15427 {
15428 int mask = 0;
15429 mask |= (INTVAL (operands[2]) - 4) << 0;
15430 mask |= (INTVAL (operands[3]) - 4) << 2;
15431 mask |= (INTVAL (operands[4]) - 4) << 4;
15432 mask |= (INTVAL (operands[5]) - 4) << 6;
15433 operands[2] = GEN_INT (mask);
15434
15435 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15436 }
15437 [(set_attr "type" "sselog")
15438 (set_attr "prefix_rep" "1")
15439 (set_attr "prefix_data16" "0")
15440 (set_attr "prefix" "maybe_vex")
15441 (set_attr "length_immediate" "1")
15442 (set_attr "mode" "TI")])
15443
15444 (define_expand "sse2_loadd"
15445 [(set (match_operand:V4SI 0 "register_operand")
15446 (vec_merge:V4SI
15447 (vec_duplicate:V4SI
15448 (match_operand:SI 1 "nonimmediate_operand"))
15449 (match_dup 2)
15450 (const_int 1)))]
15451 "TARGET_SSE"
15452 "operands[2] = CONST0_RTX (V4SImode);")
15453
15454 (define_insn "sse2_loadld"
15455 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
15456 (vec_merge:V4SI
15457 (vec_duplicate:V4SI
15458 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
15459 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
15460 (const_int 1)))]
15461 "TARGET_SSE"
15462 "@
15463 %vmovd\t{%2, %0|%0, %2}
15464 %vmovd\t{%2, %0|%0, %2}
15465 movss\t{%2, %0|%0, %2}
15466 movss\t{%2, %0|%0, %2}
15467 vmovss\t{%2, %1, %0|%0, %1, %2}"
15468 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
15469 (set_attr "type" "ssemov")
15470 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
15471 (set_attr "mode" "TI,TI,V4SF,SF,SF")
15472 (set (attr "preferred_for_speed")
15473 (cond [(eq_attr "alternative" "1")
15474 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15475 ]
15476 (symbol_ref "true")))])
15477
15478 ;; QI and HI modes handled by pextr patterns.
15479 (define_mode_iterator PEXTR_MODE12
15480 [(V16QI "TARGET_SSE4_1") V8HI])
15481
15482 (define_insn "*vec_extract<mode>"
15483 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
15484 (vec_select:<ssescalarmode>
15485 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
15486 (parallel
15487 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
15488 "TARGET_SSE2"
15489 "@
15490 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15491 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
15492 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15493 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15494 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
15495 (set_attr "type" "sselog1")
15496 (set_attr "prefix_data16" "1")
15497 (set (attr "prefix_extra")
15498 (if_then_else
15499 (and (eq_attr "alternative" "0,2")
15500 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15501 (const_string "*")
15502 (const_string "1")))
15503 (set_attr "length_immediate" "1")
15504 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
15505 (set_attr "mode" "TI")])
15506
15507 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
15508 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
15509 (zero_extend:SWI48
15510 (vec_select:<PEXTR_MODE12:ssescalarmode>
15511 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
15512 (parallel
15513 [(match_operand:SI 2
15514 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
15515 "TARGET_SSE2"
15516 "@
15517 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15518 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
15519 [(set_attr "isa" "*,avx512bw")
15520 (set_attr "type" "sselog1")
15521 (set_attr "prefix_data16" "1")
15522 (set (attr "prefix_extra")
15523 (if_then_else
15524 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
15525 (const_string "*")
15526 (const_string "1")))
15527 (set_attr "length_immediate" "1")
15528 (set_attr "prefix" "maybe_vex")
15529 (set_attr "mode" "TI")])
15530
15531 (define_insn "*vec_extractv16qi_zext"
15532 [(set (match_operand:HI 0 "register_operand" "=r,r")
15533 (zero_extend:HI
15534 (vec_select:QI
15535 (match_operand:V16QI 1 "register_operand" "x,v")
15536 (parallel
15537 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
15538 "TARGET_SSE4_1"
15539 "@
15540 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
15541 vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
15542 [(set_attr "isa" "*,avx512bw")
15543 (set_attr "type" "sselog1")
15544 (set_attr "prefix_data16" "1")
15545 (set_attr "prefix_extra" "1")
15546 (set_attr "length_immediate" "1")
15547 (set_attr "prefix" "maybe_vex")
15548 (set_attr "mode" "TI")])
15549
15550 (define_insn "*vec_extract<mode>_mem"
15551 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
15552 (vec_select:<ssescalarmode>
15553 (match_operand:VI12_128 1 "memory_operand" "o")
15554 (parallel
15555 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15556 "TARGET_SSE"
15557 "#")
15558
15559 (define_insn "*vec_extract<ssevecmodelower>_0"
15560 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
15561 (vec_select:SWI48
15562 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
15563 (parallel [(const_int 0)])))]
15564 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15565 "#"
15566 [(set_attr "isa" "*,sse2,*,*")
15567 (set (attr "preferred_for_speed")
15568 (cond [(eq_attr "alternative" "1")
15569 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15570 ]
15571 (symbol_ref "true")))])
15572
15573 (define_insn "*vec_extractv2di_0_sse"
15574 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
15575 (vec_select:DI
15576 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
15577 (parallel [(const_int 0)])))]
15578 "TARGET_SSE && !TARGET_64BIT
15579 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15580 "#"
15581 [(set_attr "isa" "sse4,*,*")
15582 (set (attr "preferred_for_speed")
15583 (cond [(eq_attr "alternative" "0")
15584 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15585 ]
15586 (symbol_ref "true")))])
15587
15588 (define_split
15589 [(set (match_operand:DI 0 "general_reg_operand")
15590 (vec_select:DI
15591 (match_operand:V2DI 1 "register_operand")
15592 (parallel [(const_int 0)])))]
15593 "TARGET_SSE4_1 && !TARGET_64BIT
15594 && reload_completed"
15595 [(set (match_dup 2) (match_dup 4))
15596 (set (match_dup 3)
15597 (vec_select:SI
15598 (match_dup 5)
15599 (parallel [(const_int 1)])))]
15600 {
15601 operands[4] = gen_lowpart (SImode, operands[1]);
15602 operands[5] = gen_lowpart (V4SImode, operands[1]);
15603 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
15604 })
15605
15606 (define_split
15607 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15608 (vec_select:SWI48x
15609 (match_operand:<ssevecmode> 1 "register_operand")
15610 (parallel [(const_int 0)])))]
15611 "TARGET_SSE && reload_completed"
15612 [(set (match_dup 0) (match_dup 1))]
15613 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
15614
15615 (define_insn "*vec_extractv4si_0_zext_sse4"
15616 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
15617 (zero_extend:DI
15618 (vec_select:SI
15619 (match_operand:V4SI 1 "register_operand" "v,x,v")
15620 (parallel [(const_int 0)]))))]
15621 "TARGET_SSE4_1"
15622 "#"
15623 [(set_attr "isa" "x64,*,avx512f")
15624 (set (attr "preferred_for_speed")
15625 (cond [(eq_attr "alternative" "0")
15626 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15627 ]
15628 (symbol_ref "true")))])
15629
15630 (define_insn "*vec_extractv4si_0_zext"
15631 [(set (match_operand:DI 0 "register_operand" "=r")
15632 (zero_extend:DI
15633 (vec_select:SI
15634 (match_operand:V4SI 1 "register_operand" "x")
15635 (parallel [(const_int 0)]))))]
15636 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
15637 "#")
15638
15639 (define_split
15640 [(set (match_operand:DI 0 "register_operand")
15641 (zero_extend:DI
15642 (vec_select:SI
15643 (match_operand:V4SI 1 "register_operand")
15644 (parallel [(const_int 0)]))))]
15645 "TARGET_SSE2 && reload_completed"
15646 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15647 "operands[1] = gen_lowpart (SImode, operands[1]);")
15648
15649 (define_insn "*vec_extractv4si"
15650 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
15651 (vec_select:SI
15652 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
15653 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
15654 "TARGET_SSE4_1"
15655 {
15656 switch (which_alternative)
15657 {
15658 case 0:
15659 case 1:
15660 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
15661
15662 case 2:
15663 case 3:
15664 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15665 return "psrldq\t{%2, %0|%0, %2}";
15666
15667 case 4:
15668 case 5:
15669 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15670 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
15671
15672 default:
15673 gcc_unreachable ();
15674 }
15675 }
15676 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
15677 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
15678 (set (attr "prefix_extra")
15679 (if_then_else (eq_attr "alternative" "0,1")
15680 (const_string "1")
15681 (const_string "*")))
15682 (set_attr "length_immediate" "1")
15683 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
15684 (set_attr "mode" "TI")])
15685
15686 (define_insn "*vec_extractv4si_zext"
15687 [(set (match_operand:DI 0 "register_operand" "=r,r")
15688 (zero_extend:DI
15689 (vec_select:SI
15690 (match_operand:V4SI 1 "register_operand" "x,v")
15691 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15692 "TARGET_64BIT && TARGET_SSE4_1"
15693 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
15694 [(set_attr "isa" "*,avx512dq")
15695 (set_attr "type" "sselog1")
15696 (set_attr "prefix_extra" "1")
15697 (set_attr "length_immediate" "1")
15698 (set_attr "prefix" "maybe_vex")
15699 (set_attr "mode" "TI")])
15700
15701 (define_insn "*vec_extractv4si_mem"
15702 [(set (match_operand:SI 0 "register_operand" "=x,r")
15703 (vec_select:SI
15704 (match_operand:V4SI 1 "memory_operand" "o,o")
15705 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
15706 "TARGET_SSE"
15707 "#")
15708
15709 (define_insn_and_split "*vec_extractv4si_zext_mem"
15710 [(set (match_operand:DI 0 "register_operand" "=x,r")
15711 (zero_extend:DI
15712 (vec_select:SI
15713 (match_operand:V4SI 1 "memory_operand" "o,o")
15714 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15715 "TARGET_64BIT && TARGET_SSE"
15716 "#"
15717 "&& reload_completed"
15718 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15719 {
15720 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
15721 })
15722
15723 (define_insn "*vec_extractv2di_1"
15724 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
15725 (vec_select:DI
15726 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
15727 (parallel [(const_int 1)])))]
15728 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15729 "@
15730 %vpextrq\t{$1, %1, %0|%0, %1, 1}
15731 vpextrq\t{$1, %1, %0|%0, %1, 1}
15732 %vmovhps\t{%1, %0|%0, %1}
15733 psrldq\t{$8, %0|%0, 8}
15734 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15735 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15736 movhlps\t{%1, %0|%0, %1}
15737 #
15738 #"
15739 [(set (attr "isa")
15740 (cond [(eq_attr "alternative" "0")
15741 (const_string "x64_sse4")
15742 (eq_attr "alternative" "1")
15743 (const_string "x64_avx512dq")
15744 (eq_attr "alternative" "3")
15745 (const_string "sse2_noavx")
15746 (eq_attr "alternative" "4")
15747 (const_string "avx")
15748 (eq_attr "alternative" "5")
15749 (const_string "avx512bw")
15750 (eq_attr "alternative" "6")
15751 (const_string "noavx")
15752 (eq_attr "alternative" "8")
15753 (const_string "x64")
15754 ]
15755 (const_string "*")))
15756 (set (attr "type")
15757 (cond [(eq_attr "alternative" "2,6,7")
15758 (const_string "ssemov")
15759 (eq_attr "alternative" "3,4,5")
15760 (const_string "sseishft1")
15761 (eq_attr "alternative" "8")
15762 (const_string "imov")
15763 ]
15764 (const_string "sselog1")))
15765 (set (attr "length_immediate")
15766 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
15767 (const_string "1")
15768 (const_string "*")))
15769 (set (attr "prefix_rex")
15770 (if_then_else (eq_attr "alternative" "0,1")
15771 (const_string "1")
15772 (const_string "*")))
15773 (set (attr "prefix_extra")
15774 (if_then_else (eq_attr "alternative" "0,1")
15775 (const_string "1")
15776 (const_string "*")))
15777 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
15778 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
15779
15780 (define_split
15781 [(set (match_operand:<ssescalarmode> 0 "register_operand")
15782 (vec_select:<ssescalarmode>
15783 (match_operand:VI_128 1 "memory_operand")
15784 (parallel
15785 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15786 "TARGET_SSE && reload_completed"
15787 [(set (match_dup 0) (match_dup 1))]
15788 {
15789 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
15790
15791 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
15792 })
15793
15794 (define_insn "*vec_extractv2ti"
15795 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
15796 (vec_select:TI
15797 (match_operand:V2TI 1 "register_operand" "x,v")
15798 (parallel
15799 [(match_operand:SI 2 "const_0_to_1_operand")])))]
15800 "TARGET_AVX"
15801 "@
15802 vextract%~128\t{%2, %1, %0|%0, %1, %2}
15803 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
15804 [(set_attr "type" "sselog")
15805 (set_attr "prefix_extra" "1")
15806 (set_attr "length_immediate" "1")
15807 (set_attr "prefix" "vex,evex")
15808 (set_attr "mode" "OI")])
15809
15810 (define_insn "*vec_extractv4ti"
15811 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
15812 (vec_select:TI
15813 (match_operand:V4TI 1 "register_operand" "v")
15814 (parallel
15815 [(match_operand:SI 2 "const_0_to_3_operand")])))]
15816 "TARGET_AVX512F"
15817 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
15818 [(set_attr "type" "sselog")
15819 (set_attr "prefix_extra" "1")
15820 (set_attr "length_immediate" "1")
15821 (set_attr "prefix" "evex")
15822 (set_attr "mode" "XI")])
15823
15824 (define_mode_iterator VEXTRACTI128_MODE
15825 [(V4TI "TARGET_AVX512F") V2TI])
15826
15827 (define_split
15828 [(set (match_operand:TI 0 "nonimmediate_operand")
15829 (vec_select:TI
15830 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
15831 (parallel [(const_int 0)])))]
15832 "TARGET_AVX
15833 && reload_completed
15834 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
15835 [(set (match_dup 0) (match_dup 1))]
15836 "operands[1] = gen_lowpart (TImode, operands[1]);")
15837
15838 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
15839 ;; vector modes into vec_extract*.
15840 (define_split
15841 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15842 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
15843 "can_create_pseudo_p ()
15844 && REG_P (operands[1])
15845 && VECTOR_MODE_P (GET_MODE (operands[1]))
15846 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
15847 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
15848 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
15849 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
15850 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
15851 (parallel [(const_int 0)])))]
15852 {
15853 rtx tmp;
15854
15855 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
15856 {
15857 case 64:
15858 if (<MODE>mode == SImode)
15859 {
15860 tmp = gen_reg_rtx (V8SImode);
15861 emit_insn (gen_vec_extract_lo_v16si (tmp,
15862 gen_lowpart (V16SImode,
15863 operands[1])));
15864 }
15865 else
15866 {
15867 tmp = gen_reg_rtx (V4DImode);
15868 emit_insn (gen_vec_extract_lo_v8di (tmp,
15869 gen_lowpart (V8DImode,
15870 operands[1])));
15871 }
15872 operands[1] = tmp;
15873 /* FALLTHRU */
15874 case 32:
15875 tmp = gen_reg_rtx (<ssevecmode>mode);
15876 if (<MODE>mode == SImode)
15877 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
15878 operands[1])));
15879 else
15880 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
15881 operands[1])));
15882 operands[1] = tmp;
15883 break;
15884 case 16:
15885 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
15886 break;
15887 }
15888 })
15889
15890 (define_insn "*vec_concatv2si_sse4_1"
15891 [(set (match_operand:V2SI 0 "register_operand"
15892 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
15893 (vec_concat:V2SI
15894 (match_operand:SI 1 "nonimmediate_operand"
15895 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
15896 (match_operand:SI 2 "nonimm_or_0_operand"
15897 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
15898 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15899 "@
15900 pinsrd\t{$1, %2, %0|%0, %2, 1}
15901 pinsrd\t{$1, %2, %0|%0, %2, 1}
15902 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15903 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15904 punpckldq\t{%2, %0|%0, %2}
15905 punpckldq\t{%2, %0|%0, %2}
15906 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
15907 %vmovd\t{%1, %0|%0, %1}
15908 punpckldq\t{%2, %0|%0, %2}
15909 movd\t{%1, %0|%0, %1}"
15910 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
15911 (set (attr "mmx_isa")
15912 (if_then_else (eq_attr "alternative" "8,9")
15913 (const_string "native")
15914 (const_string "*")))
15915 (set (attr "type")
15916 (cond [(eq_attr "alternative" "7")
15917 (const_string "ssemov")
15918 (eq_attr "alternative" "8")
15919 (const_string "mmxcvt")
15920 (eq_attr "alternative" "9")
15921 (const_string "mmxmov")
15922 ]
15923 (const_string "sselog")))
15924 (set (attr "prefix_extra")
15925 (if_then_else (eq_attr "alternative" "0,1,2,3")
15926 (const_string "1")
15927 (const_string "*")))
15928 (set (attr "length_immediate")
15929 (if_then_else (eq_attr "alternative" "0,1,2,3")
15930 (const_string "1")
15931 (const_string "*")))
15932 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15933 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15934
15935 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15936 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15937 ;; alternatives pretty much forces the MMX alternative to be chosen.
15938 (define_insn "*vec_concatv2si"
15939 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15940 (vec_concat:V2SI
15941 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15942 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15943 "TARGET_SSE && !TARGET_SSE4_1"
15944 "@
15945 punpckldq\t{%2, %0|%0, %2}
15946 movd\t{%1, %0|%0, %1}
15947 unpcklps\t{%2, %0|%0, %2}
15948 movss\t{%1, %0|%0, %1}
15949 punpckldq\t{%2, %0|%0, %2}
15950 movd\t{%1, %0|%0, %1}"
15951 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15952 (set_attr "mmx_isa" "*,*,*,*,native,native")
15953 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15954 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15955
15956 (define_insn "*vec_concatv4si"
15957 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15958 (vec_concat:V4SI
15959 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15960 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15961 "TARGET_SSE"
15962 "@
15963 punpcklqdq\t{%2, %0|%0, %2}
15964 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15965 movlhps\t{%2, %0|%0, %2}
15966 movhps\t{%2, %0|%0, %q2}
15967 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15968 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15969 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15970 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15971 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15972
15973 (define_insn "*vec_concatv4si_0"
15974 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15975 (vec_concat:V4SI
15976 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15977 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15978 "TARGET_SSE2"
15979 "@
15980 %vmovq\t{%1, %0|%0, %1}
15981 movq2dq\t{%1, %0|%0, %1}"
15982 [(set_attr "mmx_isa" "*,native")
15983 (set_attr "type" "ssemov")
15984 (set_attr "prefix" "maybe_vex,orig")
15985 (set_attr "mode" "TI")])
15986
15987 (define_insn "vec_concatv2di"
15988 [(set (match_operand:V2DI 0 "register_operand"
15989 "=Yr,*x,x ,v ,x,v ,x,x,v")
15990 (vec_concat:V2DI
15991 (match_operand:DI 1 "register_operand"
15992 " 0, 0,x ,Yv,0,Yv,0,0,v")
15993 (match_operand:DI 2 "nonimmediate_operand"
15994 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15995 "TARGET_SSE"
15996 "@
15997 pinsrq\t{$1, %2, %0|%0, %2, 1}
15998 pinsrq\t{$1, %2, %0|%0, %2, 1}
15999 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
16000 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
16001 punpcklqdq\t{%2, %0|%0, %2}
16002 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
16003 movlhps\t{%2, %0|%0, %2}
16004 movhps\t{%2, %0|%0, %2}
16005 vmovhps\t{%2, %1, %0|%0, %1, %2}"
16006 [(set (attr "isa")
16007 (cond [(eq_attr "alternative" "0,1")
16008 (const_string "x64_sse4_noavx")
16009 (eq_attr "alternative" "2")
16010 (const_string "x64_avx")
16011 (eq_attr "alternative" "3")
16012 (const_string "x64_avx512dq")
16013 (eq_attr "alternative" "4")
16014 (const_string "sse2_noavx")
16015 (eq_attr "alternative" "5,8")
16016 (const_string "avx")
16017 ]
16018 (const_string "noavx")))
16019 (set (attr "type")
16020 (if_then_else
16021 (eq_attr "alternative" "0,1,2,3,4,5")
16022 (const_string "sselog")
16023 (const_string "ssemov")))
16024 (set (attr "prefix_rex")
16025 (if_then_else (eq_attr "alternative" "0,1,2,3")
16026 (const_string "1")
16027 (const_string "*")))
16028 (set (attr "prefix_extra")
16029 (if_then_else (eq_attr "alternative" "0,1,2,3")
16030 (const_string "1")
16031 (const_string "*")))
16032 (set (attr "length_immediate")
16033 (if_then_else (eq_attr "alternative" "0,1,2,3")
16034 (const_string "1")
16035 (const_string "*")))
16036 (set (attr "prefix")
16037 (cond [(eq_attr "alternative" "2")
16038 (const_string "vex")
16039 (eq_attr "alternative" "3")
16040 (const_string "evex")
16041 (eq_attr "alternative" "5,8")
16042 (const_string "maybe_evex")
16043 ]
16044 (const_string "orig")))
16045 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
16046
16047 (define_insn "*vec_concatv2di_0"
16048 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
16049 (vec_concat:V2DI
16050 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
16051 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
16052 "TARGET_SSE2"
16053 "@
16054 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
16055 %vmovq\t{%1, %0|%0, %1}
16056 movq2dq\t{%1, %0|%0, %1}"
16057 [(set_attr "isa" "x64,*,*")
16058 (set_attr "mmx_isa" "*,*,native")
16059 (set_attr "type" "ssemov")
16060 (set_attr "prefix_rex" "1,*,*")
16061 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
16062 (set_attr "mode" "TI")
16063 (set (attr "preferred_for_speed")
16064 (cond [(eq_attr "alternative" "0")
16065 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16066 ]
16067 (symbol_ref "true")))])
16068
16069 ;; vmovq clears also the higher bits.
16070 (define_insn "vec_set<mode>_0"
16071 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
16072 (vec_merge:VI8_AVX_AVX512F
16073 (vec_duplicate:VI8_AVX_AVX512F
16074 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
16075 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
16076 (const_int 1)))]
16077 "TARGET_AVX"
16078 "vmovq\t{%2, %x0|%x0, %2}"
16079 [(set_attr "isa" "x64,*")
16080 (set_attr "type" "ssemov")
16081 (set_attr "prefix_rex" "1,*")
16082 (set_attr "prefix" "maybe_evex")
16083 (set_attr "mode" "TI")
16084 (set (attr "preferred_for_speed")
16085 (cond [(eq_attr "alternative" "0")
16086 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16087 ]
16088 (symbol_ref "true")))])
16089
16090 (define_expand "vec_unpacks_lo_<mode>"
16091 [(match_operand:<sseunpackmode> 0 "register_operand")
16092 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16093 "TARGET_SSE2"
16094 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
16095
16096 (define_expand "vec_unpacks_hi_<mode>"
16097 [(match_operand:<sseunpackmode> 0 "register_operand")
16098 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16099 "TARGET_SSE2"
16100 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
16101
16102 (define_expand "vec_unpacku_lo_<mode>"
16103 [(match_operand:<sseunpackmode> 0 "register_operand")
16104 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16105 "TARGET_SSE2"
16106 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
16107
16108 (define_expand "vec_unpacks_sbool_lo_qi"
16109 [(match_operand:QI 0 "register_operand")
16110 (match_operand:QI 1 "register_operand")
16111 (match_operand:QI 2 "const_int_operand")]
16112 "TARGET_AVX512F"
16113 {
16114 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
16115 FAIL;
16116 emit_move_insn (operands[0], operands[1]);
16117 DONE;
16118 })
16119
16120 (define_expand "vec_unpacks_lo_hi"
16121 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
16122 (match_operand:HI 1 "register_operand"))]
16123 "TARGET_AVX512F")
16124
16125 (define_expand "vec_unpacks_lo_si"
16126 [(set (match_operand:HI 0 "register_operand")
16127 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
16128 "TARGET_AVX512F")
16129
16130 (define_expand "vec_unpacks_lo_di"
16131 [(set (match_operand:SI 0 "register_operand")
16132 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
16133 "TARGET_AVX512BW")
16134
16135 (define_expand "vec_unpacku_hi_<mode>"
16136 [(match_operand:<sseunpackmode> 0 "register_operand")
16137 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16138 "TARGET_SSE2"
16139 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
16140
16141 (define_expand "vec_unpacks_sbool_hi_qi"
16142 [(match_operand:QI 0 "register_operand")
16143 (match_operand:QI 1 "register_operand")
16144 (match_operand:QI 2 "const_int_operand")]
16145 "TARGET_AVX512F"
16146 {
16147 HOST_WIDE_INT nunits = INTVAL (operands[2]);
16148 if (nunits != 8 && nunits != 4)
16149 FAIL;
16150 if (TARGET_AVX512DQ)
16151 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
16152 GEN_INT (nunits / 2)));
16153 else
16154 {
16155 rtx tem = gen_reg_rtx (HImode);
16156 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
16157 QImode),
16158 GEN_INT (nunits / 2)));
16159 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
16160 }
16161 DONE;
16162 })
16163
16164 (define_expand "vec_unpacks_hi_hi"
16165 [(parallel
16166 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
16167 (lshiftrt:HI (match_operand:HI 1 "register_operand")
16168 (const_int 8)))
16169 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
16170 "TARGET_AVX512F")
16171
16172 (define_expand "vec_unpacks_hi_<mode>"
16173 [(parallel
16174 [(set (subreg:SWI48x
16175 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
16176 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
16177 (match_dup 2)))
16178 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
16179 "TARGET_AVX512BW"
16180 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
16181
16182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16183 ;;
16184 ;; Miscellaneous
16185 ;;
16186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16187
16188 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
16189 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
16190 (truncate:VI12_AVX2_AVX512BW
16191 (lshiftrt:<ssedoublemode>
16192 (plus:<ssedoublemode>
16193 (plus:<ssedoublemode>
16194 (zero_extend:<ssedoublemode>
16195 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
16196 (zero_extend:<ssedoublemode>
16197 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
16198 (match_dup <mask_expand_op3>))
16199 (const_int 1))))]
16200 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16201 {
16202 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
16203 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
16204 })
16205
16206 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
16207 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
16208 (truncate:VI12_AVX2_AVX512BW
16209 (lshiftrt:<ssedoublemode>
16210 (plus:<ssedoublemode>
16211 (plus:<ssedoublemode>
16212 (zero_extend:<ssedoublemode>
16213 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
16214 (zero_extend:<ssedoublemode>
16215 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
16216 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
16217 (const_int 1))))]
16218 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16219 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16220 "@
16221 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
16222 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16223 [(set_attr "isa" "noavx,avx")
16224 (set_attr "type" "sseiadd")
16225 (set_attr "prefix_data16" "1,*")
16226 (set_attr "prefix" "orig,<mask_prefix>")
16227 (set_attr "mode" "<sseinsnmode>")])
16228
16229 ;; The correct representation for this is absolutely enormous, and
16230 ;; surely not generally useful.
16231 (define_insn "<sse2_avx2>_psadbw"
16232 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
16233 (unspec:VI8_AVX2_AVX512BW
16234 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
16235 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
16236 UNSPEC_PSADBW))]
16237 "TARGET_SSE2"
16238 "@
16239 psadbw\t{%2, %0|%0, %2}
16240 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
16241 [(set_attr "isa" "noavx,avx")
16242 (set_attr "type" "sseiadd")
16243 (set_attr "atom_unit" "simul")
16244 (set_attr "prefix_data16" "1,*")
16245 (set_attr "prefix" "orig,maybe_evex")
16246 (set_attr "mode" "<sseinsnmode>")])
16247
16248 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
16249 [(set (match_operand:SI 0 "register_operand" "=r")
16250 (unspec:SI
16251 [(match_operand:VF_128_256 1 "register_operand" "x")]
16252 UNSPEC_MOVMSK))]
16253 "TARGET_SSE"
16254 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
16255 [(set_attr "type" "ssemov")
16256 (set_attr "prefix" "maybe_vex")
16257 (set_attr "mode" "<MODE>")])
16258
16259 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
16260 [(set (match_operand:DI 0 "register_operand" "=r")
16261 (any_extend:DI
16262 (unspec:SI
16263 [(match_operand:VF_128_256 1 "register_operand" "x")]
16264 UNSPEC_MOVMSK)))]
16265 "TARGET_64BIT && TARGET_SSE"
16266 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
16267 [(set_attr "type" "ssemov")
16268 (set_attr "prefix" "maybe_vex")
16269 (set_attr "mode" "<MODE>")])
16270
16271 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
16272 [(set (match_operand:SI 0 "register_operand" "=r")
16273 (unspec:SI
16274 [(lt:VF_128_256
16275 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16276 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16277 UNSPEC_MOVMSK))]
16278 "TARGET_SSE"
16279 "#"
16280 "&& reload_completed"
16281 [(set (match_dup 0)
16282 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16283 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16284 [(set_attr "type" "ssemov")
16285 (set_attr "prefix" "maybe_vex")
16286 (set_attr "mode" "<MODE>")])
16287
16288 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
16289 [(set (match_operand:DI 0 "register_operand" "=r")
16290 (any_extend:DI
16291 (unspec:SI
16292 [(lt:VF_128_256
16293 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16294 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16295 UNSPEC_MOVMSK)))]
16296 "TARGET_64BIT && TARGET_SSE"
16297 "#"
16298 "&& reload_completed"
16299 [(set (match_dup 0)
16300 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16301 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16302 [(set_attr "type" "ssemov")
16303 (set_attr "prefix" "maybe_vex")
16304 (set_attr "mode" "<MODE>")])
16305
16306 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
16307 [(set (match_operand:SI 0 "register_operand" "=r")
16308 (unspec:SI
16309 [(subreg:VF_128_256
16310 (ashiftrt:<sseintvecmode>
16311 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16312 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16313 UNSPEC_MOVMSK))]
16314 "TARGET_SSE"
16315 "#"
16316 "&& reload_completed"
16317 [(set (match_dup 0)
16318 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16319 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16320 [(set_attr "type" "ssemov")
16321 (set_attr "prefix" "maybe_vex")
16322 (set_attr "mode" "<MODE>")])
16323
16324 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
16325 [(set (match_operand:DI 0 "register_operand" "=r")
16326 (any_extend:DI
16327 (unspec:SI
16328 [(subreg:VF_128_256
16329 (ashiftrt:<sseintvecmode>
16330 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16331 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16332 UNSPEC_MOVMSK)))]
16333 "TARGET_64BIT && TARGET_SSE"
16334 "#"
16335 "&& reload_completed"
16336 [(set (match_dup 0)
16337 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16338 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16339 [(set_attr "type" "ssemov")
16340 (set_attr "prefix" "maybe_vex")
16341 (set_attr "mode" "<MODE>")])
16342
16343 (define_insn "<sse2_avx2>_pmovmskb"
16344 [(set (match_operand:SI 0 "register_operand" "=r")
16345 (unspec:SI
16346 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16347 UNSPEC_MOVMSK))]
16348 "TARGET_SSE2"
16349 "%vpmovmskb\t{%1, %0|%0, %1}"
16350 [(set_attr "type" "ssemov")
16351 (set (attr "prefix_data16")
16352 (if_then_else
16353 (match_test "TARGET_AVX")
16354 (const_string "*")
16355 (const_string "1")))
16356 (set_attr "prefix" "maybe_vex")
16357 (set_attr "mode" "SI")])
16358
16359 (define_insn "*<sse2_avx2>_pmovmskb_zext"
16360 [(set (match_operand:DI 0 "register_operand" "=r")
16361 (zero_extend:DI
16362 (unspec:SI
16363 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16364 UNSPEC_MOVMSK)))]
16365 "TARGET_64BIT && TARGET_SSE2"
16366 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16367 [(set_attr "type" "ssemov")
16368 (set (attr "prefix_data16")
16369 (if_then_else
16370 (match_test "TARGET_AVX")
16371 (const_string "*")
16372 (const_string "1")))
16373 (set_attr "prefix" "maybe_vex")
16374 (set_attr "mode" "SI")])
16375
16376 (define_insn "*sse2_pmovmskb_ext"
16377 [(set (match_operand:DI 0 "register_operand" "=r")
16378 (sign_extend:DI
16379 (unspec:SI
16380 [(match_operand:V16QI 1 "register_operand" "x")]
16381 UNSPEC_MOVMSK)))]
16382 "TARGET_64BIT && TARGET_SSE2"
16383 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16384 [(set_attr "type" "ssemov")
16385 (set (attr "prefix_data16")
16386 (if_then_else
16387 (match_test "TARGET_AVX")
16388 (const_string "*")
16389 (const_string "1")))
16390 (set_attr "prefix" "maybe_vex")
16391 (set_attr "mode" "SI")])
16392
16393 (define_insn_and_split "*sse2_pmovskb_zexthisi"
16394 [(set (match_operand:SI 0 "register_operand")
16395 (zero_extend:SI
16396 (subreg:HI
16397 (unspec:SI
16398 [(match_operand:V16QI 1 "register_operand")]
16399 UNSPEC_MOVMSK) 0)))]
16400 "TARGET_SSE2 && ix86_pre_reload_split ()"
16401 "#"
16402 "&& 1"
16403 [(set (match_dup 0)
16404 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
16405
16406 (define_split
16407 [(set (match_operand:SI 0 "register_operand")
16408 (zero_extend:SI
16409 (not:HI
16410 (subreg:HI
16411 (unspec:SI
16412 [(match_operand:V16QI 1 "register_operand")]
16413 UNSPEC_MOVMSK) 0))))]
16414 "TARGET_SSE2"
16415 [(set (match_dup 2)
16416 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16417 (set (match_dup 0)
16418 (xor:SI (match_dup 2) (const_int 65535)))]
16419 "operands[2] = gen_reg_rtx (SImode);")
16420
16421 (define_split
16422 [(set (match_operand:SI 0 "register_operand")
16423 (unspec:SI
16424 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
16425 UNSPEC_MOVMSK))]
16426 "TARGET_SSE2"
16427 [(set (match_dup 2)
16428 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16429 (set (match_dup 0) (match_dup 3))]
16430 {
16431 operands[2] = gen_reg_rtx (SImode);
16432 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16433 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16434 else
16435 {
16436 operands[3]
16437 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16438 SImode);
16439 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16440 }
16441 })
16442
16443 (define_split
16444 [(set (match_operand:SI 0 "register_operand")
16445 (unspec:SI
16446 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
16447 UNSPEC_MOVMSK))]
16448 "TARGET_SSE2
16449 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
16450 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
16451 [(set (match_dup 2)
16452 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16453 (set (match_dup 0) (match_dup 3))]
16454 {
16455 operands[2] = gen_reg_rtx (SImode);
16456 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
16457 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16458 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16459 else
16460 {
16461 operands[3]
16462 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16463 SImode);
16464 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16465 }
16466 })
16467
16468 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
16469 [(set (match_operand:SI 0 "register_operand" "=r")
16470 (unspec:SI
16471 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16472 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16473 UNSPEC_MOVMSK))]
16474 "TARGET_SSE2"
16475 "#"
16476 ""
16477 [(set (match_dup 0)
16478 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16479 ""
16480 [(set_attr "type" "ssemov")
16481 (set (attr "prefix_data16")
16482 (if_then_else
16483 (match_test "TARGET_AVX")
16484 (const_string "*")
16485 (const_string "1")))
16486 (set_attr "prefix" "maybe_vex")
16487 (set_attr "mode" "SI")])
16488
16489 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
16490 [(set (match_operand:DI 0 "register_operand" "=r")
16491 (zero_extend:DI
16492 (unspec:SI
16493 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16494 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16495 UNSPEC_MOVMSK)))]
16496 "TARGET_64BIT && TARGET_SSE2"
16497 "#"
16498 ""
16499 [(set (match_dup 0)
16500 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16501 ""
16502 [(set_attr "type" "ssemov")
16503 (set (attr "prefix_data16")
16504 (if_then_else
16505 (match_test "TARGET_AVX")
16506 (const_string "*")
16507 (const_string "1")))
16508 (set_attr "prefix" "maybe_vex")
16509 (set_attr "mode" "SI")])
16510
16511 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
16512 [(set (match_operand:DI 0 "register_operand" "=r")
16513 (sign_extend:DI
16514 (unspec:SI
16515 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
16516 (match_operand:V16QI 2 "const0_operand" "C"))]
16517 UNSPEC_MOVMSK)))]
16518 "TARGET_64BIT && TARGET_SSE2"
16519 "#"
16520 ""
16521 [(set (match_dup 0)
16522 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16523 ""
16524 [(set_attr "type" "ssemov")
16525 (set (attr "prefix_data16")
16526 (if_then_else
16527 (match_test "TARGET_AVX")
16528 (const_string "*")
16529 (const_string "1")))
16530 (set_attr "prefix" "maybe_vex")
16531 (set_attr "mode" "SI")])
16532
16533 (define_expand "sse2_maskmovdqu"
16534 [(set (match_operand:V16QI 0 "memory_operand")
16535 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
16536 (match_operand:V16QI 2 "register_operand")
16537 (match_dup 0)]
16538 UNSPEC_MASKMOV))]
16539 "TARGET_SSE2")
16540
16541 (define_insn "*sse2_maskmovdqu"
16542 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
16543 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
16544 (match_operand:V16QI 2 "register_operand" "x")
16545 (mem:V16QI (match_dup 0))]
16546 UNSPEC_MASKMOV))]
16547 "TARGET_SSE2"
16548 {
16549 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
16550 that requires %v to be at the beginning of the opcode name. */
16551 if (Pmode != word_mode)
16552 fputs ("\taddr32", asm_out_file);
16553 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
16554 }
16555 [(set_attr "type" "ssemov")
16556 (set_attr "prefix_data16" "1")
16557 (set (attr "length_address")
16558 (symbol_ref ("Pmode != word_mode")))
16559 ;; The implicit %rdi operand confuses default length_vex computation.
16560 (set (attr "length_vex")
16561 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
16562 (set_attr "prefix" "maybe_vex")
16563 (set_attr "znver1_decode" "vector")
16564 (set_attr "mode" "TI")])
16565
16566 (define_insn "sse_ldmxcsr"
16567 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
16568 UNSPECV_LDMXCSR)]
16569 "TARGET_SSE"
16570 "%vldmxcsr\t%0"
16571 [(set_attr "type" "sse")
16572 (set_attr "atom_sse_attr" "mxcsr")
16573 (set_attr "prefix" "maybe_vex")
16574 (set_attr "memory" "load")])
16575
16576 (define_insn "sse_stmxcsr"
16577 [(set (match_operand:SI 0 "memory_operand" "=m")
16578 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
16579 "TARGET_SSE"
16580 "%vstmxcsr\t%0"
16581 [(set_attr "type" "sse")
16582 (set_attr "atom_sse_attr" "mxcsr")
16583 (set_attr "prefix" "maybe_vex")
16584 (set_attr "memory" "store")])
16585
16586 (define_insn "sse2_clflush"
16587 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
16588 UNSPECV_CLFLUSH)]
16589 "TARGET_SSE2"
16590 "clflush\t%a0"
16591 [(set_attr "type" "sse")
16592 (set_attr "atom_sse_attr" "fence")
16593 (set_attr "memory" "unknown")])
16594
16595 ;; As per AMD and Intel ISA manuals, the first operand is extensions
16596 ;; and it goes to %ecx. The second operand received is hints and it goes
16597 ;; to %eax.
16598 (define_insn "sse3_mwait"
16599 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
16600 (match_operand:SI 1 "register_operand" "a")]
16601 UNSPECV_MWAIT)]
16602 "TARGET_SSE3"
16603 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
16604 ;; Since 32bit register operands are implicitly zero extended to 64bit,
16605 ;; we only need to set up 32bit registers.
16606 "mwait"
16607 [(set_attr "length" "3")])
16608
16609 (define_insn "@sse3_monitor_<mode>"
16610 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
16611 (match_operand:SI 1 "register_operand" "c")
16612 (match_operand:SI 2 "register_operand" "d")]
16613 UNSPECV_MONITOR)]
16614 "TARGET_SSE3"
16615 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
16616 ;; RCX and RDX are used. Since 32bit register operands are implicitly
16617 ;; zero extended to 64bit, we only need to set up 32bit registers.
16618 "%^monitor"
16619 [(set (attr "length")
16620 (symbol_ref ("(Pmode != word_mode) + 3")))])
16621
16622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16623 ;;
16624 ;; SSSE3 instructions
16625 ;;
16626 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16627
16628 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
16629
16630 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
16631 [(set (match_operand:V16HI 0 "register_operand" "=x")
16632 (ssse3_plusminus:V16HI
16633 (vec_select:V16HI
16634 (vec_concat:V32HI
16635 (match_operand:V16HI 1 "register_operand" "x")
16636 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
16637 (parallel
16638 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16639 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
16640 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
16641 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
16642 (vec_select:V16HI
16643 (vec_concat:V32HI (match_dup 1) (match_dup 2))
16644 (parallel
16645 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16646 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
16647 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
16648 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
16649 "TARGET_AVX2"
16650 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16651 [(set_attr "type" "sseiadd")
16652 (set_attr "prefix_extra" "1")
16653 (set_attr "prefix" "vex")
16654 (set_attr "mode" "OI")])
16655
16656 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
16657 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16658 (ssse3_plusminus:V8HI
16659 (vec_select:V8HI
16660 (vec_concat:V16HI
16661 (match_operand:V8HI 1 "register_operand" "0,x")
16662 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
16663 (parallel
16664 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16665 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
16666 (vec_select:V8HI
16667 (vec_concat:V16HI (match_dup 1) (match_dup 2))
16668 (parallel
16669 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16670 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
16671 "TARGET_SSSE3"
16672 "@
16673 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16674 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16675 [(set_attr "isa" "noavx,avx")
16676 (set_attr "type" "sseiadd")
16677 (set_attr "atom_unit" "complex")
16678 (set_attr "prefix_data16" "1,*")
16679 (set_attr "prefix_extra" "1")
16680 (set_attr "prefix" "orig,vex")
16681 (set_attr "mode" "TI")])
16682
16683 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
16684 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16685 (ssse3_plusminus:V4HI
16686 (vec_select:V4HI
16687 (vec_concat:V8HI
16688 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
16689 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
16690 (parallel
16691 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16692 (vec_select:V4HI
16693 (vec_concat:V8HI (match_dup 1) (match_dup 2))
16694 (parallel
16695 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16696 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16697 "@
16698 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16699 #
16700 #"
16701 "TARGET_SSSE3 && reload_completed
16702 && SSE_REGNO_P (REGNO (operands[0]))"
16703 [(const_int 0)]
16704 {
16705 /* Generate SSE version of the operation. */
16706 rtx op0 = lowpart_subreg (V8HImode, operands[0],
16707 GET_MODE (operands[0]));
16708 rtx op1 = lowpart_subreg (V8HImode, operands[1],
16709 GET_MODE (operands[1]));
16710 rtx op2 = lowpart_subreg (V8HImode, operands[2],
16711 GET_MODE (operands[2]));
16712 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
16713 ix86_move_vector_high_sse_to_mmx (op0);
16714 DONE;
16715 }
16716 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16717 (set_attr "type" "sseiadd")
16718 (set_attr "atom_unit" "complex")
16719 (set_attr "prefix_extra" "1")
16720 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16721 (set_attr "mode" "DI,TI,TI")])
16722
16723 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
16724 [(set (match_operand:V8SI 0 "register_operand" "=x")
16725 (plusminus:V8SI
16726 (vec_select:V8SI
16727 (vec_concat:V16SI
16728 (match_operand:V8SI 1 "register_operand" "x")
16729 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
16730 (parallel
16731 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
16732 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
16733 (vec_select:V8SI
16734 (vec_concat:V16SI (match_dup 1) (match_dup 2))
16735 (parallel
16736 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
16737 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
16738 "TARGET_AVX2"
16739 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16740 [(set_attr "type" "sseiadd")
16741 (set_attr "prefix_extra" "1")
16742 (set_attr "prefix" "vex")
16743 (set_attr "mode" "OI")])
16744
16745 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
16746 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16747 (plusminus:V4SI
16748 (vec_select:V4SI
16749 (vec_concat:V8SI
16750 (match_operand:V4SI 1 "register_operand" "0,x")
16751 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
16752 (parallel
16753 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16754 (vec_select:V4SI
16755 (vec_concat:V8SI (match_dup 1) (match_dup 2))
16756 (parallel
16757 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16758 "TARGET_SSSE3"
16759 "@
16760 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16761 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16762 [(set_attr "isa" "noavx,avx")
16763 (set_attr "type" "sseiadd")
16764 (set_attr "atom_unit" "complex")
16765 (set_attr "prefix_data16" "1,*")
16766 (set_attr "prefix_extra" "1")
16767 (set_attr "prefix" "orig,vex")
16768 (set_attr "mode" "TI")])
16769
16770 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
16771 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
16772 (plusminus:V2SI
16773 (vec_select:V2SI
16774 (vec_concat:V4SI
16775 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
16776 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
16777 (parallel [(const_int 0) (const_int 2)]))
16778 (vec_select:V2SI
16779 (vec_concat:V4SI (match_dup 1) (match_dup 2))
16780 (parallel [(const_int 1) (const_int 3)]))))]
16781 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16782 "@
16783 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16784 #
16785 #"
16786 "TARGET_SSSE3 && reload_completed
16787 && SSE_REGNO_P (REGNO (operands[0]))"
16788 [(const_int 0)]
16789 {
16790 /* Generate SSE version of the operation. */
16791 rtx op0 = lowpart_subreg (V4SImode, operands[0],
16792 GET_MODE (operands[0]));
16793 rtx op1 = lowpart_subreg (V4SImode, operands[1],
16794 GET_MODE (operands[1]));
16795 rtx op2 = lowpart_subreg (V4SImode, operands[2],
16796 GET_MODE (operands[2]));
16797 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
16798 ix86_move_vector_high_sse_to_mmx (op0);
16799 DONE;
16800 }
16801 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16802 (set_attr "type" "sseiadd")
16803 (set_attr "atom_unit" "complex")
16804 (set_attr "prefix_extra" "1")
16805 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16806 (set_attr "mode" "DI,TI,TI")])
16807
16808 (define_insn "avx2_pmaddubsw256"
16809 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
16810 (ss_plus:V16HI
16811 (mult:V16HI
16812 (zero_extend:V16HI
16813 (vec_select:V16QI
16814 (match_operand:V32QI 1 "register_operand" "x,v")
16815 (parallel [(const_int 0) (const_int 2)
16816 (const_int 4) (const_int 6)
16817 (const_int 8) (const_int 10)
16818 (const_int 12) (const_int 14)
16819 (const_int 16) (const_int 18)
16820 (const_int 20) (const_int 22)
16821 (const_int 24) (const_int 26)
16822 (const_int 28) (const_int 30)])))
16823 (sign_extend:V16HI
16824 (vec_select:V16QI
16825 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
16826 (parallel [(const_int 0) (const_int 2)
16827 (const_int 4) (const_int 6)
16828 (const_int 8) (const_int 10)
16829 (const_int 12) (const_int 14)
16830 (const_int 16) (const_int 18)
16831 (const_int 20) (const_int 22)
16832 (const_int 24) (const_int 26)
16833 (const_int 28) (const_int 30)]))))
16834 (mult:V16HI
16835 (zero_extend:V16HI
16836 (vec_select:V16QI (match_dup 1)
16837 (parallel [(const_int 1) (const_int 3)
16838 (const_int 5) (const_int 7)
16839 (const_int 9) (const_int 11)
16840 (const_int 13) (const_int 15)
16841 (const_int 17) (const_int 19)
16842 (const_int 21) (const_int 23)
16843 (const_int 25) (const_int 27)
16844 (const_int 29) (const_int 31)])))
16845 (sign_extend:V16HI
16846 (vec_select:V16QI (match_dup 2)
16847 (parallel [(const_int 1) (const_int 3)
16848 (const_int 5) (const_int 7)
16849 (const_int 9) (const_int 11)
16850 (const_int 13) (const_int 15)
16851 (const_int 17) (const_int 19)
16852 (const_int 21) (const_int 23)
16853 (const_int 25) (const_int 27)
16854 (const_int 29) (const_int 31)]))))))]
16855 "TARGET_AVX2"
16856 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16857 [(set_attr "isa" "*,avx512bw")
16858 (set_attr "type" "sseiadd")
16859 (set_attr "prefix_extra" "1")
16860 (set_attr "prefix" "vex,evex")
16861 (set_attr "mode" "OI")])
16862
16863 ;; The correct representation for this is absolutely enormous, and
16864 ;; surely not generally useful.
16865 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
16866 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16867 (unspec:VI2_AVX512VL
16868 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
16869 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
16870 UNSPEC_PMADDUBSW512))]
16871 "TARGET_AVX512BW"
16872 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
16873 [(set_attr "type" "sseiadd")
16874 (set_attr "prefix" "evex")
16875 (set_attr "mode" "XI")])
16876
16877 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
16878 [(set (match_operand:V32HI 0 "register_operand" "=v")
16879 (truncate:V32HI
16880 (lshiftrt:V32SI
16881 (plus:V32SI
16882 (lshiftrt:V32SI
16883 (mult:V32SI
16884 (sign_extend:V32SI
16885 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
16886 (sign_extend:V32SI
16887 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
16888 (const_int 14))
16889 (const_vector:V32HI [(const_int 1) (const_int 1)
16890 (const_int 1) (const_int 1)
16891 (const_int 1) (const_int 1)
16892 (const_int 1) (const_int 1)
16893 (const_int 1) (const_int 1)
16894 (const_int 1) (const_int 1)
16895 (const_int 1) (const_int 1)
16896 (const_int 1) (const_int 1)
16897 (const_int 1) (const_int 1)
16898 (const_int 1) (const_int 1)
16899 (const_int 1) (const_int 1)
16900 (const_int 1) (const_int 1)
16901 (const_int 1) (const_int 1)
16902 (const_int 1) (const_int 1)
16903 (const_int 1) (const_int 1)
16904 (const_int 1) (const_int 1)]))
16905 (const_int 1))))]
16906 "TARGET_AVX512BW"
16907 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16908 [(set_attr "type" "sseimul")
16909 (set_attr "prefix" "evex")
16910 (set_attr "mode" "XI")])
16911
16912 (define_insn "ssse3_pmaddubsw128"
16913 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
16914 (ss_plus:V8HI
16915 (mult:V8HI
16916 (zero_extend:V8HI
16917 (vec_select:V8QI
16918 (match_operand:V16QI 1 "register_operand" "0,x,v")
16919 (parallel [(const_int 0) (const_int 2)
16920 (const_int 4) (const_int 6)
16921 (const_int 8) (const_int 10)
16922 (const_int 12) (const_int 14)])))
16923 (sign_extend:V8HI
16924 (vec_select:V8QI
16925 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
16926 (parallel [(const_int 0) (const_int 2)
16927 (const_int 4) (const_int 6)
16928 (const_int 8) (const_int 10)
16929 (const_int 12) (const_int 14)]))))
16930 (mult:V8HI
16931 (zero_extend:V8HI
16932 (vec_select:V8QI (match_dup 1)
16933 (parallel [(const_int 1) (const_int 3)
16934 (const_int 5) (const_int 7)
16935 (const_int 9) (const_int 11)
16936 (const_int 13) (const_int 15)])))
16937 (sign_extend:V8HI
16938 (vec_select:V8QI (match_dup 2)
16939 (parallel [(const_int 1) (const_int 3)
16940 (const_int 5) (const_int 7)
16941 (const_int 9) (const_int 11)
16942 (const_int 13) (const_int 15)]))))))]
16943 "TARGET_SSSE3"
16944 "@
16945 pmaddubsw\t{%2, %0|%0, %2}
16946 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16947 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16948 [(set_attr "isa" "noavx,avx,avx512bw")
16949 (set_attr "type" "sseiadd")
16950 (set_attr "atom_unit" "simul")
16951 (set_attr "prefix_data16" "1,*,*")
16952 (set_attr "prefix_extra" "1")
16953 (set_attr "prefix" "orig,vex,evex")
16954 (set_attr "mode" "TI")])
16955
16956 (define_insn "ssse3_pmaddubsw"
16957 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16958 (ss_plus:V4HI
16959 (mult:V4HI
16960 (zero_extend:V4HI
16961 (vec_select:V4QI
16962 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16963 (parallel [(const_int 0) (const_int 2)
16964 (const_int 4) (const_int 6)])))
16965 (sign_extend:V4HI
16966 (vec_select:V4QI
16967 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16968 (parallel [(const_int 0) (const_int 2)
16969 (const_int 4) (const_int 6)]))))
16970 (mult:V4HI
16971 (zero_extend:V4HI
16972 (vec_select:V4QI (match_dup 1)
16973 (parallel [(const_int 1) (const_int 3)
16974 (const_int 5) (const_int 7)])))
16975 (sign_extend:V4HI
16976 (vec_select:V4QI (match_dup 2)
16977 (parallel [(const_int 1) (const_int 3)
16978 (const_int 5) (const_int 7)]))))))]
16979 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16980 "@
16981 pmaddubsw\t{%2, %0|%0, %2}
16982 pmaddubsw\t{%2, %0|%0, %2}
16983 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16984 [(set_attr "isa" "*,noavx,avx")
16985 (set_attr "mmx_isa" "native,*,*")
16986 (set_attr "type" "sseiadd")
16987 (set_attr "atom_unit" "simul")
16988 (set_attr "prefix_extra" "1")
16989 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16990 (set_attr "mode" "DI,TI,TI")])
16991
16992 (define_mode_iterator PMULHRSW
16993 [V8HI (V16HI "TARGET_AVX2")])
16994
16995 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16996 [(set (match_operand:PMULHRSW 0 "register_operand")
16997 (vec_merge:PMULHRSW
16998 (truncate:PMULHRSW
16999 (lshiftrt:<ssedoublemode>
17000 (plus:<ssedoublemode>
17001 (lshiftrt:<ssedoublemode>
17002 (mult:<ssedoublemode>
17003 (sign_extend:<ssedoublemode>
17004 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
17005 (sign_extend:<ssedoublemode>
17006 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
17007 (const_int 14))
17008 (match_dup 5))
17009 (const_int 1)))
17010 (match_operand:PMULHRSW 3 "register_operand")
17011 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
17012 "TARGET_AVX512BW && TARGET_AVX512VL"
17013 {
17014 operands[5] = CONST1_RTX(<MODE>mode);
17015 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17016 })
17017
17018 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
17019 [(set (match_operand:PMULHRSW 0 "register_operand")
17020 (truncate:PMULHRSW
17021 (lshiftrt:<ssedoublemode>
17022 (plus:<ssedoublemode>
17023 (lshiftrt:<ssedoublemode>
17024 (mult:<ssedoublemode>
17025 (sign_extend:<ssedoublemode>
17026 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
17027 (sign_extend:<ssedoublemode>
17028 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
17029 (const_int 14))
17030 (match_dup 3))
17031 (const_int 1))))]
17032 "TARGET_SSSE3"
17033 {
17034 operands[3] = CONST1_RTX(<MODE>mode);
17035 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17036 })
17037
17038 (define_expand "smulhrs<mode>3"
17039 [(set (match_operand:VI2_AVX2 0 "register_operand")
17040 (truncate:VI2_AVX2
17041 (lshiftrt:<ssedoublemode>
17042 (plus:<ssedoublemode>
17043 (lshiftrt:<ssedoublemode>
17044 (mult:<ssedoublemode>
17045 (sign_extend:<ssedoublemode>
17046 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
17047 (sign_extend:<ssedoublemode>
17048 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
17049 (const_int 14))
17050 (match_dup 3))
17051 (const_int 1))))]
17052 "TARGET_SSSE3"
17053 {
17054 operands[3] = CONST1_RTX(<MODE>mode);
17055 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17056 })
17057
17058 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
17059 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
17060 (truncate:VI2_AVX2
17061 (lshiftrt:<ssedoublemode>
17062 (plus:<ssedoublemode>
17063 (lshiftrt:<ssedoublemode>
17064 (mult:<ssedoublemode>
17065 (sign_extend:<ssedoublemode>
17066 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
17067 (sign_extend:<ssedoublemode>
17068 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
17069 (const_int 14))
17070 (match_operand:VI2_AVX2 3 "const1_operand"))
17071 (const_int 1))))]
17072 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
17073 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17074 "@
17075 pmulhrsw\t{%2, %0|%0, %2}
17076 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
17077 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17078 [(set_attr "isa" "noavx,avx,avx512bw")
17079 (set_attr "type" "sseimul")
17080 (set_attr "prefix_data16" "1,*,*")
17081 (set_attr "prefix_extra" "1")
17082 (set_attr "prefix" "orig,maybe_evex,evex")
17083 (set_attr "mode" "<sseinsnmode>")])
17084
17085 (define_expand "smulhrsv4hi3"
17086 [(set (match_operand:V4HI 0 "register_operand")
17087 (truncate:V4HI
17088 (lshiftrt:V4SI
17089 (plus:V4SI
17090 (lshiftrt:V4SI
17091 (mult:V4SI
17092 (sign_extend:V4SI
17093 (match_operand:V4HI 1 "register_operand"))
17094 (sign_extend:V4SI
17095 (match_operand:V4HI 2 "register_operand")))
17096 (const_int 14))
17097 (match_dup 3))
17098 (const_int 1))))]
17099 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
17100 {
17101 operands[3] = CONST1_RTX(V4HImode);
17102 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
17103 })
17104
17105 (define_expand "ssse3_pmulhrswv4hi3"
17106 [(set (match_operand:V4HI 0 "register_operand")
17107 (truncate:V4HI
17108 (lshiftrt:V4SI
17109 (plus:V4SI
17110 (lshiftrt:V4SI
17111 (mult:V4SI
17112 (sign_extend:V4SI
17113 (match_operand:V4HI 1 "register_mmxmem_operand"))
17114 (sign_extend:V4SI
17115 (match_operand:V4HI 2 "register_mmxmem_operand")))
17116 (const_int 14))
17117 (match_dup 3))
17118 (const_int 1))))]
17119 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17120 {
17121 operands[3] = CONST1_RTX(V4HImode);
17122 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
17123 })
17124
17125 (define_insn "*ssse3_pmulhrswv4hi3"
17126 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
17127 (truncate:V4HI
17128 (lshiftrt:V4SI
17129 (plus:V4SI
17130 (lshiftrt:V4SI
17131 (mult:V4SI
17132 (sign_extend:V4SI
17133 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
17134 (sign_extend:V4SI
17135 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
17136 (const_int 14))
17137 (match_operand:V4HI 3 "const1_operand"))
17138 (const_int 1))))]
17139 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
17140 && TARGET_SSSE3
17141 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17142 "@
17143 pmulhrsw\t{%2, %0|%0, %2}
17144 pmulhrsw\t{%2, %0|%0, %2}
17145 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
17146 [(set_attr "isa" "*,noavx,avx")
17147 (set_attr "mmx_isa" "native,*,*")
17148 (set_attr "type" "sseimul")
17149 (set_attr "prefix_extra" "1")
17150 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17151 (set_attr "mode" "DI,TI,TI")])
17152
17153 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
17154 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
17155 (unspec:VI1_AVX512
17156 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
17157 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
17158 UNSPEC_PSHUFB))]
17159 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17160 "@
17161 pshufb\t{%2, %0|%0, %2}
17162 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17163 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17164 [(set_attr "isa" "noavx,avx,avx512bw")
17165 (set_attr "type" "sselog1")
17166 (set_attr "prefix_data16" "1,*,*")
17167 (set_attr "prefix_extra" "1")
17168 (set_attr "prefix" "orig,maybe_evex,evex")
17169 (set_attr "btver2_decode" "vector")
17170 (set_attr "mode" "<sseinsnmode>")])
17171
17172 (define_insn_and_split "ssse3_pshufbv8qi3"
17173 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
17174 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
17175 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
17176 UNSPEC_PSHUFB))
17177 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
17178 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17179 "@
17180 pshufb\t{%2, %0|%0, %2}
17181 #
17182 #"
17183 "TARGET_SSSE3 && reload_completed
17184 && SSE_REGNO_P (REGNO (operands[0]))"
17185 [(set (match_dup 3) (match_dup 5))
17186 (set (match_dup 3)
17187 (and:V4SI (match_dup 3) (match_dup 2)))
17188 (set (match_dup 0)
17189 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
17190 {
17191 /* Emulate MMX version of pshufb with SSE version by masking out the
17192 bit 3 of the shuffle control byte. */
17193 operands[0] = lowpart_subreg (V16QImode, operands[0],
17194 GET_MODE (operands[0]));
17195 operands[1] = lowpart_subreg (V16QImode, operands[1],
17196 GET_MODE (operands[1]));
17197 operands[2] = lowpart_subreg (V4SImode, operands[2],
17198 GET_MODE (operands[2]));
17199 operands[4] = lowpart_subreg (V16QImode, operands[3],
17200 GET_MODE (operands[3]));
17201 rtx vec_const = ix86_build_const_vector (V4SImode, true,
17202 gen_int_mode (0xf7f7f7f7, SImode));
17203 operands[5] = force_const_mem (V4SImode, vec_const);
17204 }
17205 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17206 (set_attr "prefix_extra" "1")
17207 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17208 (set_attr "mode" "DI,TI,TI")])
17209
17210 (define_insn "<ssse3_avx2>_psign<mode>3"
17211 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
17212 (unspec:VI124_AVX2
17213 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
17214 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
17215 UNSPEC_PSIGN))]
17216 "TARGET_SSSE3"
17217 "@
17218 psign<ssemodesuffix>\t{%2, %0|%0, %2}
17219 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17220 [(set_attr "isa" "noavx,avx")
17221 (set_attr "type" "sselog1")
17222 (set_attr "prefix_data16" "1,*")
17223 (set_attr "prefix_extra" "1")
17224 (set_attr "prefix" "orig,vex")
17225 (set_attr "mode" "<sseinsnmode>")])
17226
17227 (define_insn "ssse3_psign<mode>3"
17228 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
17229 (unspec:MMXMODEI
17230 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
17231 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
17232 UNSPEC_PSIGN))]
17233 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17234 "@
17235 psign<mmxvecsize>\t{%2, %0|%0, %2}
17236 psign<mmxvecsize>\t{%2, %0|%0, %2}
17237 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
17238 [(set_attr "isa" "*,noavx,avx")
17239 (set_attr "mmx_isa" "native,*,*")
17240 (set_attr "type" "sselog1")
17241 (set_attr "prefix_extra" "1")
17242 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17243 (set_attr "mode" "DI,TI,TI")])
17244
17245 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
17246 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
17247 (vec_merge:VI1_AVX512
17248 (unspec:VI1_AVX512
17249 [(match_operand:VI1_AVX512 1 "register_operand" "v")
17250 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
17251 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
17252 UNSPEC_PALIGNR)
17253 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
17254 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
17255 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
17256 {
17257 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17258 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
17259 }
17260 [(set_attr "type" "sseishft")
17261 (set_attr "atom_unit" "sishuf")
17262 (set_attr "prefix_extra" "1")
17263 (set_attr "length_immediate" "1")
17264 (set_attr "prefix" "evex")
17265 (set_attr "mode" "<sseinsnmode>")])
17266
17267 (define_insn "<ssse3_avx2>_palignr<mode>"
17268 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
17269 (unspec:SSESCALARMODE
17270 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
17271 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
17272 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17273 UNSPEC_PALIGNR))]
17274 "TARGET_SSSE3"
17275 {
17276 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17277
17278 switch (which_alternative)
17279 {
17280 case 0:
17281 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17282 case 1:
17283 case 2:
17284 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17285 default:
17286 gcc_unreachable ();
17287 }
17288 }
17289 [(set_attr "isa" "noavx,avx,avx512bw")
17290 (set_attr "type" "sseishft")
17291 (set_attr "atom_unit" "sishuf")
17292 (set_attr "prefix_data16" "1,*,*")
17293 (set_attr "prefix_extra" "1")
17294 (set_attr "length_immediate" "1")
17295 (set_attr "prefix" "orig,vex,evex")
17296 (set_attr "mode" "<sseinsnmode>")])
17297
17298 (define_insn_and_split "ssse3_palignrdi"
17299 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
17300 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
17301 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
17302 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17303 UNSPEC_PALIGNR))]
17304 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17305 {
17306 switch (which_alternative)
17307 {
17308 case 0:
17309 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17310 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17311 case 1:
17312 case 2:
17313 return "#";
17314 default:
17315 gcc_unreachable ();
17316 }
17317 }
17318 "TARGET_SSSE3 && reload_completed
17319 && SSE_REGNO_P (REGNO (operands[0]))"
17320 [(set (match_dup 0)
17321 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
17322 {
17323 /* Emulate MMX palignrdi with SSE psrldq. */
17324 rtx op0 = lowpart_subreg (V2DImode, operands[0],
17325 GET_MODE (operands[0]));
17326 if (TARGET_AVX)
17327 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
17328 else
17329 {
17330 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
17331 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
17332 /* Swap bits 0:63 with bits 64:127. */
17333 rtx mask = gen_rtx_PARALLEL (VOIDmode,
17334 gen_rtvec (4, GEN_INT (2),
17335 GEN_INT (3),
17336 GEN_INT (0),
17337 GEN_INT (1)));
17338 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
17339 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
17340 emit_insn (gen_rtx_SET (op1, op2));
17341 }
17342 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
17343 }
17344 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17345 (set_attr "type" "sseishft")
17346 (set_attr "atom_unit" "sishuf")
17347 (set_attr "prefix_extra" "1")
17348 (set_attr "length_immediate" "1")
17349 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17350 (set_attr "mode" "DI,TI,TI")])
17351
17352 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
17353 ;; modes for abs instruction on pre AVX-512 targets.
17354 (define_mode_iterator VI1248_AVX512VL_AVX512BW
17355 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
17356 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
17357 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
17358 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
17359
17360 (define_insn "*abs<mode>2"
17361 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
17362 (abs:VI1248_AVX512VL_AVX512BW
17363 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
17364 "TARGET_SSSE3"
17365 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
17366 [(set_attr "type" "sselog1")
17367 (set_attr "prefix_data16" "1")
17368 (set_attr "prefix_extra" "1")
17369 (set_attr "prefix" "maybe_vex")
17370 (set_attr "mode" "<sseinsnmode>")])
17371
17372 (define_insn "abs<mode>2_mask"
17373 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17374 (vec_merge:VI48_AVX512VL
17375 (abs:VI48_AVX512VL
17376 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
17377 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
17378 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17379 "TARGET_AVX512F"
17380 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17381 [(set_attr "type" "sselog1")
17382 (set_attr "prefix" "evex")
17383 (set_attr "mode" "<sseinsnmode>")])
17384
17385 (define_insn "abs<mode>2_mask"
17386 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17387 (vec_merge:VI12_AVX512VL
17388 (abs:VI12_AVX512VL
17389 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
17390 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
17391 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17392 "TARGET_AVX512BW"
17393 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17394 [(set_attr "type" "sselog1")
17395 (set_attr "prefix" "evex")
17396 (set_attr "mode" "<sseinsnmode>")])
17397
17398 (define_expand "abs<mode>2"
17399 [(set (match_operand:VI_AVX2 0 "register_operand")
17400 (abs:VI_AVX2
17401 (match_operand:VI_AVX2 1 "vector_operand")))]
17402 "TARGET_SSE2"
17403 {
17404 if (!TARGET_SSSE3
17405 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
17406 && !TARGET_AVX512VL))
17407 {
17408 ix86_expand_sse2_abs (operands[0], operands[1]);
17409 DONE;
17410 }
17411 })
17412
17413 (define_insn "ssse3_abs<mode>2"
17414 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
17415 (abs:MMXMODEI
17416 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
17417 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17418 "@
17419 pabs<mmxvecsize>\t{%1, %0|%0, %1}
17420 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
17421 [(set_attr "mmx_isa" "native,*")
17422 (set_attr "type" "sselog1")
17423 (set_attr "prefix_rep" "0")
17424 (set_attr "prefix_extra" "1")
17425 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17426 (set_attr "mode" "DI,TI")])
17427
17428 (define_insn "abs<mode>2"
17429 [(set (match_operand:MMXMODEI 0 "register_operand")
17430 (abs:MMXMODEI
17431 (match_operand:MMXMODEI 1 "register_operand")))]
17432 "TARGET_MMX_WITH_SSE && TARGET_SSSE3")
17433
17434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17435 ;;
17436 ;; AMD SSE4A instructions
17437 ;;
17438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17439
17440 (define_insn "sse4a_movnt<mode>"
17441 [(set (match_operand:MODEF 0 "memory_operand" "=m")
17442 (unspec:MODEF
17443 [(match_operand:MODEF 1 "register_operand" "x")]
17444 UNSPEC_MOVNT))]
17445 "TARGET_SSE4A"
17446 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
17447 [(set_attr "type" "ssemov")
17448 (set_attr "mode" "<MODE>")])
17449
17450 (define_insn "sse4a_vmmovnt<mode>"
17451 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
17452 (unspec:<ssescalarmode>
17453 [(vec_select:<ssescalarmode>
17454 (match_operand:VF_128 1 "register_operand" "x")
17455 (parallel [(const_int 0)]))]
17456 UNSPEC_MOVNT))]
17457 "TARGET_SSE4A"
17458 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
17459 [(set_attr "type" "ssemov")
17460 (set_attr "mode" "<ssescalarmode>")])
17461
17462 (define_insn "sse4a_extrqi"
17463 [(set (match_operand:V2DI 0 "register_operand" "=x")
17464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17465 (match_operand 2 "const_0_to_255_operand")
17466 (match_operand 3 "const_0_to_255_operand")]
17467 UNSPEC_EXTRQI))]
17468 "TARGET_SSE4A"
17469 "extrq\t{%3, %2, %0|%0, %2, %3}"
17470 [(set_attr "type" "sse")
17471 (set_attr "prefix_data16" "1")
17472 (set_attr "length_immediate" "2")
17473 (set_attr "mode" "TI")])
17474
17475 (define_insn "sse4a_extrq"
17476 [(set (match_operand:V2DI 0 "register_operand" "=x")
17477 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17478 (match_operand:V16QI 2 "register_operand" "x")]
17479 UNSPEC_EXTRQ))]
17480 "TARGET_SSE4A"
17481 "extrq\t{%2, %0|%0, %2}"
17482 [(set_attr "type" "sse")
17483 (set_attr "prefix_data16" "1")
17484 (set_attr "mode" "TI")])
17485
17486 (define_insn "sse4a_insertqi"
17487 [(set (match_operand:V2DI 0 "register_operand" "=x")
17488 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17489 (match_operand:V2DI 2 "register_operand" "x")
17490 (match_operand 3 "const_0_to_255_operand")
17491 (match_operand 4 "const_0_to_255_operand")]
17492 UNSPEC_INSERTQI))]
17493 "TARGET_SSE4A"
17494 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
17495 [(set_attr "type" "sseins")
17496 (set_attr "prefix_data16" "0")
17497 (set_attr "prefix_rep" "1")
17498 (set_attr "length_immediate" "2")
17499 (set_attr "mode" "TI")])
17500
17501 (define_insn "sse4a_insertq"
17502 [(set (match_operand:V2DI 0 "register_operand" "=x")
17503 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17504 (match_operand:V2DI 2 "register_operand" "x")]
17505 UNSPEC_INSERTQ))]
17506 "TARGET_SSE4A"
17507 "insertq\t{%2, %0|%0, %2}"
17508 [(set_attr "type" "sseins")
17509 (set_attr "prefix_data16" "0")
17510 (set_attr "prefix_rep" "1")
17511 (set_attr "mode" "TI")])
17512
17513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17514 ;;
17515 ;; Intel SSE4.1 instructions
17516 ;;
17517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17518
17519 ;; Mapping of immediate bits for blend instructions
17520 (define_mode_attr blendbits
17521 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
17522
17523 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
17524 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17525 (vec_merge:VF_128_256
17526 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17527 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
17528 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
17529 "TARGET_SSE4_1"
17530 "@
17531 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17532 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17533 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17534 [(set_attr "isa" "noavx,noavx,avx")
17535 (set_attr "type" "ssemov")
17536 (set_attr "length_immediate" "1")
17537 (set_attr "prefix_data16" "1,1,*")
17538 (set_attr "prefix_extra" "1")
17539 (set_attr "prefix" "orig,orig,vex")
17540 (set_attr "mode" "<MODE>")])
17541
17542 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
17543 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17544 (unspec:VF_128_256
17545 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17546 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17547 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
17548 UNSPEC_BLENDV))]
17549 "TARGET_SSE4_1"
17550 "@
17551 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17552 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17553 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17554 [(set_attr "isa" "noavx,noavx,avx")
17555 (set_attr "type" "ssemov")
17556 (set_attr "length_immediate" "1")
17557 (set_attr "prefix_data16" "1,1,*")
17558 (set_attr "prefix_extra" "1")
17559 (set_attr "prefix" "orig,orig,vex")
17560 (set_attr "btver2_decode" "vector,vector,vector")
17561 (set_attr "mode" "<MODE>")])
17562
17563 ;; Also define scalar versions. These are used for conditional move.
17564 ;; Using subregs into vector modes causes register allocation lossage.
17565 ;; These patterns do not allow memory operands because the native
17566 ;; instructions read the full 128-bits.
17567
17568 (define_insn "sse4_1_blendv<ssemodesuffix>"
17569 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
17570 (unspec:MODEF
17571 [(match_operand:MODEF 1 "register_operand" "0,0,x")
17572 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
17573 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
17574 UNSPEC_BLENDV))]
17575 "TARGET_SSE4_1"
17576 {
17577 if (get_attr_mode (insn) == MODE_V4SF)
17578 return (which_alternative == 2
17579 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17580 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
17581 else
17582 return (which_alternative == 2
17583 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17584 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
17585 }
17586 [(set_attr "isa" "noavx,noavx,avx")
17587 (set_attr "type" "ssemov")
17588 (set_attr "length_immediate" "1")
17589 (set_attr "prefix_data16" "1,1,*")
17590 (set_attr "prefix_extra" "1")
17591 (set_attr "prefix" "orig,orig,vex")
17592 (set_attr "btver2_decode" "vector,vector,vector")
17593 (set (attr "mode")
17594 (cond [(match_test "TARGET_AVX")
17595 (const_string "<ssevecmode>")
17596 (match_test "optimize_function_for_size_p (cfun)")
17597 (const_string "V4SF")
17598 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
17599 (const_string "V4SF")
17600 ]
17601 (const_string "<ssevecmode>")))])
17602
17603 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
17604 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17605 (unspec:VF_128_256
17606 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17607 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17608 (lt:VF_128_256
17609 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
17610 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
17611 UNSPEC_BLENDV))]
17612 "TARGET_SSE4_1"
17613 "#"
17614 "&& reload_completed"
17615 [(set (match_dup 0)
17616 (unspec:VF_128_256
17617 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17618 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
17619 [(set_attr "isa" "noavx,noavx,avx")
17620 (set_attr "type" "ssemov")
17621 (set_attr "length_immediate" "1")
17622 (set_attr "prefix_data16" "1,1,*")
17623 (set_attr "prefix_extra" "1")
17624 (set_attr "prefix" "orig,orig,vex")
17625 (set_attr "btver2_decode" "vector,vector,vector")
17626 (set_attr "mode" "<MODE>")])
17627
17628 (define_mode_attr ssefltmodesuffix
17629 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
17630
17631 (define_mode_attr ssefltvecmode
17632 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
17633
17634 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
17635 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
17636 (unspec:<ssebytemode>
17637 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
17638 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
17639 (subreg:<ssebytemode>
17640 (lt:VI48_AVX
17641 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
17642 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
17643 UNSPEC_BLENDV))]
17644 "TARGET_SSE4_1"
17645 "#"
17646 "&& reload_completed"
17647 [(set (match_dup 0)
17648 (unspec:<ssefltvecmode>
17649 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17650 {
17651 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
17652 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
17653 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
17654 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
17655 }
17656 [(set_attr "isa" "noavx,noavx,avx")
17657 (set_attr "type" "ssemov")
17658 (set_attr "length_immediate" "1")
17659 (set_attr "prefix_data16" "1,1,*")
17660 (set_attr "prefix_extra" "1")
17661 (set_attr "prefix" "orig,orig,vex")
17662 (set_attr "btver2_decode" "vector,vector,vector")
17663 (set_attr "mode" "<ssefltvecmode>")])
17664
17665 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
17666 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17667 (unspec:VF_128_256
17668 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
17669 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17670 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17671 UNSPEC_DP))]
17672 "TARGET_SSE4_1"
17673 "@
17674 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17675 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17676 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17677 [(set_attr "isa" "noavx,noavx,avx")
17678 (set_attr "type" "ssemul")
17679 (set_attr "length_immediate" "1")
17680 (set_attr "prefix_data16" "1,1,*")
17681 (set_attr "prefix_extra" "1")
17682 (set_attr "prefix" "orig,orig,vex")
17683 (set_attr "btver2_decode" "vector,vector,vector")
17684 (set_attr "znver1_decode" "vector,vector,vector")
17685 (set_attr "mode" "<MODE>")])
17686
17687 ;; Mode attribute used by `vmovntdqa' pattern
17688 (define_mode_attr vi8_sse4_1_avx2_avx512
17689 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
17690
17691 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
17692 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
17693 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
17694 UNSPEC_MOVNTDQA))]
17695 "TARGET_SSE4_1"
17696 "%vmovntdqa\t{%1, %0|%0, %1}"
17697 [(set_attr "isa" "noavx,noavx,avx")
17698 (set_attr "type" "ssemov")
17699 (set_attr "prefix_extra" "1,1,*")
17700 (set_attr "prefix" "orig,orig,maybe_evex")
17701 (set_attr "mode" "<sseinsnmode>")])
17702
17703 (define_insn "<sse4_1_avx2>_mpsadbw"
17704 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17705 (unspec:VI1_AVX2
17706 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17707 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17708 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17709 UNSPEC_MPSADBW))]
17710 "TARGET_SSE4_1"
17711 "@
17712 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17713 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17714 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17715 [(set_attr "isa" "noavx,noavx,avx")
17716 (set_attr "type" "sselog1")
17717 (set_attr "length_immediate" "1")
17718 (set_attr "prefix_extra" "1")
17719 (set_attr "prefix" "orig,orig,vex")
17720 (set_attr "btver2_decode" "vector,vector,vector")
17721 (set_attr "znver1_decode" "vector,vector,vector")
17722 (set_attr "mode" "<sseinsnmode>")])
17723
17724 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
17725 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
17726 (vec_concat:VI2_AVX2
17727 (us_truncate:<ssehalfvecmode>
17728 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
17729 (us_truncate:<ssehalfvecmode>
17730 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
17731 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17732 "@
17733 packusdw\t{%2, %0|%0, %2}
17734 packusdw\t{%2, %0|%0, %2}
17735 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17736 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17737 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
17738 (set_attr "type" "sselog")
17739 (set_attr "prefix_extra" "1")
17740 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
17741 (set_attr "mode" "<sseinsnmode>")])
17742
17743 (define_insn "<sse4_1_avx2>_pblendvb"
17744 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17745 (unspec:VI1_AVX2
17746 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17747 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17748 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
17749 UNSPEC_BLENDV))]
17750 "TARGET_SSE4_1"
17751 "@
17752 pblendvb\t{%3, %2, %0|%0, %2, %3}
17753 pblendvb\t{%3, %2, %0|%0, %2, %3}
17754 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17755 [(set_attr "isa" "noavx,noavx,avx")
17756 (set_attr "type" "ssemov")
17757 (set_attr "prefix_extra" "1")
17758 (set_attr "length_immediate" "*,*,1")
17759 (set_attr "prefix" "orig,orig,vex")
17760 (set_attr "btver2_decode" "vector,vector,vector")
17761 (set_attr "mode" "<sseinsnmode>")])
17762
17763 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
17764 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17765 (unspec:VI1_AVX2
17766 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17767 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17768 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
17769 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
17770 UNSPEC_BLENDV))]
17771 "TARGET_SSE4_1"
17772 "#"
17773 ""
17774 [(set (match_dup 0)
17775 (unspec:VI1_AVX2
17776 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17777 ""
17778 [(set_attr "isa" "noavx,noavx,avx")
17779 (set_attr "type" "ssemov")
17780 (set_attr "prefix_extra" "1")
17781 (set_attr "length_immediate" "*,*,1")
17782 (set_attr "prefix" "orig,orig,vex")
17783 (set_attr "btver2_decode" "vector,vector,vector")
17784 (set_attr "mode" "<sseinsnmode>")])
17785
17786 (define_insn "sse4_1_pblendw"
17787 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17788 (vec_merge:V8HI
17789 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
17790 (match_operand:V8HI 1 "register_operand" "0,0,x")
17791 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
17792 "TARGET_SSE4_1"
17793 "@
17794 pblendw\t{%3, %2, %0|%0, %2, %3}
17795 pblendw\t{%3, %2, %0|%0, %2, %3}
17796 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17797 [(set_attr "isa" "noavx,noavx,avx")
17798 (set_attr "type" "ssemov")
17799 (set_attr "prefix_extra" "1")
17800 (set_attr "length_immediate" "1")
17801 (set_attr "prefix" "orig,orig,vex")
17802 (set_attr "mode" "TI")])
17803
17804 ;; The builtin uses an 8-bit immediate. Expand that.
17805 (define_expand "avx2_pblendw"
17806 [(set (match_operand:V16HI 0 "register_operand")
17807 (vec_merge:V16HI
17808 (match_operand:V16HI 2 "nonimmediate_operand")
17809 (match_operand:V16HI 1 "register_operand")
17810 (match_operand:SI 3 "const_0_to_255_operand")))]
17811 "TARGET_AVX2"
17812 {
17813 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
17814 operands[3] = GEN_INT (val << 8 | val);
17815 })
17816
17817 (define_insn "*avx2_pblendw"
17818 [(set (match_operand:V16HI 0 "register_operand" "=x")
17819 (vec_merge:V16HI
17820 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
17821 (match_operand:V16HI 1 "register_operand" "x")
17822 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
17823 "TARGET_AVX2"
17824 {
17825 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
17826 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17827 }
17828 [(set_attr "type" "ssemov")
17829 (set_attr "prefix_extra" "1")
17830 (set_attr "length_immediate" "1")
17831 (set_attr "prefix" "vex")
17832 (set_attr "mode" "OI")])
17833
17834 (define_insn "avx2_pblendd<mode>"
17835 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
17836 (vec_merge:VI4_AVX2
17837 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
17838 (match_operand:VI4_AVX2 1 "register_operand" "x")
17839 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
17840 "TARGET_AVX2"
17841 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17842 [(set_attr "type" "ssemov")
17843 (set_attr "prefix_extra" "1")
17844 (set_attr "length_immediate" "1")
17845 (set_attr "prefix" "vex")
17846 (set_attr "mode" "<sseinsnmode>")])
17847
17848 (define_insn "sse4_1_phminposuw"
17849 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17850 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
17851 UNSPEC_PHMINPOSUW))]
17852 "TARGET_SSE4_1"
17853 "%vphminposuw\t{%1, %0|%0, %1}"
17854 [(set_attr "isa" "noavx,noavx,avx")
17855 (set_attr "type" "sselog1")
17856 (set_attr "prefix_extra" "1")
17857 (set_attr "prefix" "orig,orig,vex")
17858 (set_attr "mode" "TI")])
17859
17860 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
17861 [(set (match_operand:V16HI 0 "register_operand" "=v")
17862 (any_extend:V16HI
17863 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17864 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17865 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17866 [(set_attr "type" "ssemov")
17867 (set_attr "prefix_extra" "1")
17868 (set_attr "prefix" "maybe_evex")
17869 (set_attr "mode" "OI")])
17870
17871 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
17872 [(set (match_operand:V32QI 0 "register_operand" "=v")
17873 (vec_select:V32QI
17874 (vec_concat:V64QI
17875 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
17876 (match_operand:V32QI 2 "const0_operand" "C"))
17877 (match_parallel 3 "pmovzx_parallel"
17878 [(match_operand 4 "const_int_operand" "n")])))]
17879 "TARGET_AVX2"
17880 "#"
17881 "&& reload_completed"
17882 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
17883 {
17884 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
17885 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
17886 })
17887
17888 (define_expand "<insn>v16qiv16hi2"
17889 [(set (match_operand:V16HI 0 "register_operand")
17890 (any_extend:V16HI
17891 (match_operand:V16QI 1 "nonimmediate_operand")))]
17892 "TARGET_AVX2")
17893
17894 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
17895 [(set (match_operand:V32HI 0 "register_operand" "=v")
17896 (any_extend:V32HI
17897 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
17898 "TARGET_AVX512BW"
17899 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17900 [(set_attr "type" "ssemov")
17901 (set_attr "prefix_extra" "1")
17902 (set_attr "prefix" "evex")
17903 (set_attr "mode" "XI")])
17904
17905 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
17906 [(set (match_operand:V64QI 0 "register_operand" "=v")
17907 (vec_select:V64QI
17908 (vec_concat:V128QI
17909 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
17910 (match_operand:V64QI 2 "const0_operand" "C"))
17911 (match_parallel 3 "pmovzx_parallel"
17912 [(match_operand 4 "const_int_operand" "n")])))]
17913 "TARGET_AVX512BW"
17914 "#"
17915 "&& reload_completed"
17916 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
17917 {
17918 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
17919 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
17920 })
17921
17922 (define_expand "<insn>v32qiv32hi2"
17923 [(set (match_operand:V32HI 0 "register_operand")
17924 (any_extend:V32HI
17925 (match_operand:V32QI 1 "nonimmediate_operand")))]
17926 "TARGET_AVX512BW")
17927
17928 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
17929 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17930 (any_extend:V8HI
17931 (vec_select:V8QI
17932 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17933 (parallel [(const_int 0) (const_int 1)
17934 (const_int 2) (const_int 3)
17935 (const_int 4) (const_int 5)
17936 (const_int 6) (const_int 7)]))))]
17937 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17938 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17939 [(set_attr "isa" "noavx,noavx,avx")
17940 (set_attr "type" "ssemov")
17941 (set_attr "prefix_extra" "1")
17942 (set_attr "prefix" "orig,orig,maybe_evex")
17943 (set_attr "mode" "TI")])
17944
17945 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
17946 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17947 (any_extend:V8HI
17948 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
17949 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17950 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17951 [(set_attr "isa" "noavx,noavx,avx")
17952 (set_attr "type" "ssemov")
17953 (set_attr "prefix_extra" "1")
17954 (set_attr "prefix" "orig,orig,maybe_evex")
17955 (set_attr "mode" "TI")])
17956
17957 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
17958 [(set (match_operand:V8HI 0 "register_operand")
17959 (any_extend:V8HI
17960 (vec_select:V8QI
17961 (subreg:V16QI
17962 (vec_concat:V2DI
17963 (match_operand:DI 1 "memory_operand")
17964 (const_int 0)) 0)
17965 (parallel [(const_int 0) (const_int 1)
17966 (const_int 2) (const_int 3)
17967 (const_int 4) (const_int 5)
17968 (const_int 6) (const_int 7)]))))]
17969 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
17970 && ix86_pre_reload_split ()"
17971 "#"
17972 "&& 1"
17973 [(set (match_dup 0)
17974 (any_extend:V8HI (match_dup 1)))]
17975 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17976
17977 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
17978 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,v")
17979 (vec_select:V16QI
17980 (vec_concat:V32QI
17981 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,vm")
17982 (match_operand:V16QI 2 "const0_operand" "C,C,C"))
17983 (match_parallel 3 "pmovzx_parallel"
17984 [(match_operand 4 "const_int_operand" "n,n,n")])))]
17985 "TARGET_SSE4_1"
17986 "#"
17987 "&& reload_completed"
17988 [(set (match_dup 0)
17989 (zero_extend:V8HI
17990 (vec_select:V8QI
17991 (match_dup 1)
17992 (parallel [(const_int 0) (const_int 1)
17993 (const_int 2) (const_int 3)
17994 (const_int 4) (const_int 5)
17995 (const_int 6) (const_int 7)]))))]
17996 {
17997 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
17998 if (MEM_P (operands[1]))
17999 {
18000 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
18001 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
18002 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18003 DONE;
18004 }
18005 }
18006 [(set_attr "isa" "noavx,noavx,avx")])
18007
18008 (define_expand "<insn>v8qiv8hi2"
18009 [(set (match_operand:V8HI 0 "register_operand")
18010 (any_extend:V8HI
18011 (match_operand:V8QI 1 "nonimmediate_operand")))]
18012 "TARGET_SSE4_1"
18013 {
18014 if (!MEM_P (operands[1]))
18015 {
18016 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18017 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
18018 DONE;
18019 }
18020 })
18021
18022 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
18023 [(set (match_operand:V16SI 0 "register_operand" "=v")
18024 (any_extend:V16SI
18025 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
18026 "TARGET_AVX512F"
18027 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18028 [(set_attr "type" "ssemov")
18029 (set_attr "prefix" "evex")
18030 (set_attr "mode" "XI")])
18031
18032 (define_expand "<insn>v16qiv16si2"
18033 [(set (match_operand:V16SI 0 "register_operand")
18034 (any_extend:V16SI
18035 (match_operand:V16QI 1 "nonimmediate_operand")))]
18036 "TARGET_AVX512F")
18037
18038 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
18039 [(set (match_operand:V8SI 0 "register_operand" "=v")
18040 (any_extend:V8SI
18041 (vec_select:V8QI
18042 (match_operand:V16QI 1 "register_operand" "v")
18043 (parallel [(const_int 0) (const_int 1)
18044 (const_int 2) (const_int 3)
18045 (const_int 4) (const_int 5)
18046 (const_int 6) (const_int 7)]))))]
18047 "TARGET_AVX2 && <mask_avx512vl_condition>"
18048 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18049 [(set_attr "type" "ssemov")
18050 (set_attr "prefix_extra" "1")
18051 (set_attr "prefix" "maybe_evex")
18052 (set_attr "mode" "OI")])
18053
18054 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
18055 [(set (match_operand:V8SI 0 "register_operand" "=v")
18056 (any_extend:V8SI
18057 (match_operand:V8QI 1 "memory_operand" "m")))]
18058 "TARGET_AVX2 && <mask_avx512vl_condition>"
18059 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18060 [(set_attr "type" "ssemov")
18061 (set_attr "prefix_extra" "1")
18062 (set_attr "prefix" "maybe_evex")
18063 (set_attr "mode" "OI")])
18064
18065 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
18066 [(set (match_operand:V8SI 0 "register_operand")
18067 (any_extend:V8SI
18068 (vec_select:V8QI
18069 (subreg:V16QI
18070 (vec_concat:V2DI
18071 (match_operand:DI 1 "memory_operand")
18072 (const_int 0)) 0)
18073 (parallel [(const_int 0) (const_int 1)
18074 (const_int 2) (const_int 3)
18075 (const_int 4) (const_int 5)
18076 (const_int 6) (const_int 7)]))))]
18077 "TARGET_AVX2 && <mask_avx512vl_condition>
18078 && ix86_pre_reload_split ()"
18079 "#"
18080 "&& 1"
18081 [(set (match_dup 0)
18082 (any_extend:V8SI (match_dup 1)))]
18083 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18084
18085 (define_expand "<insn>v8qiv8si2"
18086 [(set (match_operand:V8SI 0 "register_operand")
18087 (any_extend:V8SI
18088 (match_operand:V8QI 1 "nonimmediate_operand")))]
18089 "TARGET_AVX2"
18090 {
18091 if (!MEM_P (operands[1]))
18092 {
18093 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18094 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
18095 DONE;
18096 }
18097 })
18098
18099 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
18100 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18101 (any_extend:V4SI
18102 (vec_select:V4QI
18103 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18104 (parallel [(const_int 0) (const_int 1)
18105 (const_int 2) (const_int 3)]))))]
18106 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18107 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18108 [(set_attr "isa" "noavx,noavx,avx")
18109 (set_attr "type" "ssemov")
18110 (set_attr "prefix_extra" "1")
18111 (set_attr "prefix" "orig,orig,maybe_evex")
18112 (set_attr "mode" "TI")])
18113
18114 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
18115 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18116 (any_extend:V4SI
18117 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
18118 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18119 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18120 [(set_attr "isa" "noavx,noavx,avx")
18121 (set_attr "type" "ssemov")
18122 (set_attr "prefix_extra" "1")
18123 (set_attr "prefix" "orig,orig,maybe_evex")
18124 (set_attr "mode" "TI")])
18125
18126 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
18127 [(set (match_operand:V4SI 0 "register_operand")
18128 (any_extend:V4SI
18129 (vec_select:V4QI
18130 (subreg:V16QI
18131 (vec_merge:V4SI
18132 (vec_duplicate:V4SI
18133 (match_operand:SI 1 "memory_operand"))
18134 (const_vector:V4SI
18135 [(const_int 0) (const_int 0)
18136 (const_int 0) (const_int 0)])
18137 (const_int 1)) 0)
18138 (parallel [(const_int 0) (const_int 1)
18139 (const_int 2) (const_int 3)]))))]
18140 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18141 && ix86_pre_reload_split ()"
18142 "#"
18143 "&& 1"
18144 [(set (match_dup 0)
18145 (any_extend:V4SI (match_dup 1)))]
18146 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18147
18148 (define_expand "<insn>v4qiv4si2"
18149 [(set (match_operand:V4SI 0 "register_operand")
18150 (any_extend:V4SI
18151 (match_operand:V4QI 1 "nonimmediate_operand")))]
18152 "TARGET_SSE4_1"
18153 {
18154 if (!MEM_P (operands[1]))
18155 {
18156 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
18157 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
18158 DONE;
18159 }
18160 })
18161
18162 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
18163 [(set (match_operand:V16SI 0 "register_operand" "=v")
18164 (any_extend:V16SI
18165 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
18166 "TARGET_AVX512F"
18167 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18168 [(set_attr "type" "ssemov")
18169 (set_attr "prefix" "evex")
18170 (set_attr "mode" "XI")])
18171
18172 (define_expand "<insn>v16hiv16si2"
18173 [(set (match_operand:V16SI 0 "register_operand")
18174 (any_extend:V16SI
18175 (match_operand:V16HI 1 "nonimmediate_operand")))]
18176 "TARGET_AVX512F")
18177
18178 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
18179 [(set (match_operand:V32HI 0 "register_operand" "=v")
18180 (vec_select:V32HI
18181 (vec_concat:V64HI
18182 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
18183 (match_operand:V32HI 2 "const0_operand" "C"))
18184 (match_parallel 3 "pmovzx_parallel"
18185 [(match_operand 4 "const_int_operand" "n")])))]
18186 "TARGET_AVX512F"
18187 "#"
18188 "&& reload_completed"
18189 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
18190 {
18191 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
18192 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
18193 })
18194
18195 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
18196 [(set (match_operand:V8SI 0 "register_operand" "=v")
18197 (any_extend:V8SI
18198 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
18199 "TARGET_AVX2 && <mask_avx512vl_condition>"
18200 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18201 [(set_attr "type" "ssemov")
18202 (set_attr "prefix_extra" "1")
18203 (set_attr "prefix" "maybe_evex")
18204 (set_attr "mode" "OI")])
18205
18206 (define_expand "<insn>v8hiv8si2"
18207 [(set (match_operand:V8SI 0 "register_operand")
18208 (any_extend:V8SI
18209 (match_operand:V8HI 1 "nonimmediate_operand")))]
18210 "TARGET_AVX2")
18211
18212 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
18213 [(set (match_operand:V16HI 0 "register_operand" "=v")
18214 (vec_select:V16HI
18215 (vec_concat:V32HI
18216 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
18217 (match_operand:V16HI 2 "const0_operand" "C"))
18218 (match_parallel 3 "pmovzx_parallel"
18219 [(match_operand 4 "const_int_operand" "n")])))]
18220 "TARGET_AVX2"
18221 "#"
18222 "&& reload_completed"
18223 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
18224 {
18225 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
18226 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
18227 })
18228
18229 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
18230 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18231 (any_extend:V4SI
18232 (vec_select:V4HI
18233 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
18234 (parallel [(const_int 0) (const_int 1)
18235 (const_int 2) (const_int 3)]))))]
18236 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18237 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18238 [(set_attr "isa" "noavx,noavx,avx")
18239 (set_attr "type" "ssemov")
18240 (set_attr "prefix_extra" "1")
18241 (set_attr "prefix" "orig,orig,maybe_evex")
18242 (set_attr "mode" "TI")])
18243
18244 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
18245 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18246 (any_extend:V4SI
18247 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
18248 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18249 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18250 [(set_attr "isa" "noavx,noavx,avx")
18251 (set_attr "type" "ssemov")
18252 (set_attr "prefix_extra" "1")
18253 (set_attr "prefix" "orig,orig,maybe_evex")
18254 (set_attr "mode" "TI")])
18255
18256 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
18257 [(set (match_operand:V4SI 0 "register_operand")
18258 (any_extend:V4SI
18259 (vec_select:V4HI
18260 (subreg:V8HI
18261 (vec_concat:V2DI
18262 (match_operand:DI 1 "memory_operand")
18263 (const_int 0)) 0)
18264 (parallel [(const_int 0) (const_int 1)
18265 (const_int 2) (const_int 3)]))))]
18266 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18267 && ix86_pre_reload_split ()"
18268 "#"
18269 "&& 1"
18270 [(set (match_dup 0)
18271 (any_extend:V4SI (match_dup 1)))]
18272 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18273
18274 (define_expand "<insn>v4hiv4si2"
18275 [(set (match_operand:V4SI 0 "register_operand")
18276 (any_extend:V4SI
18277 (match_operand:V4HI 1 "nonimmediate_operand")))]
18278 "TARGET_SSE4_1"
18279 {
18280 if (!MEM_P (operands[1]))
18281 {
18282 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18283 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
18284 DONE;
18285 }
18286 })
18287
18288 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
18289 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
18290 (vec_select:V8HI
18291 (vec_concat:V16HI
18292 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
18293 (match_operand:V8HI 2 "const0_operand" "C,C,C"))
18294 (match_parallel 3 "pmovzx_parallel"
18295 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18296 "TARGET_SSE4_1"
18297 "#"
18298 "&& reload_completed"
18299 [(set (match_dup 0)
18300 (zero_extend:V4SI
18301 (vec_select:V4HI
18302 (match_dup 1)
18303 (parallel [(const_int 0) (const_int 1)
18304 (const_int 2) (const_int 3)]))))]
18305 {
18306 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
18307 if (MEM_P (operands[1]))
18308 {
18309 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
18310 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
18311 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18312 DONE;
18313 }
18314 }
18315 [(set_attr "isa" "noavx,noavx,avx")])
18316
18317 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
18318 [(set (match_operand:V8DI 0 "register_operand" "=v")
18319 (any_extend:V8DI
18320 (vec_select:V8QI
18321 (match_operand:V16QI 1 "register_operand" "v")
18322 (parallel [(const_int 0) (const_int 1)
18323 (const_int 2) (const_int 3)
18324 (const_int 4) (const_int 5)
18325 (const_int 6) (const_int 7)]))))]
18326 "TARGET_AVX512F"
18327 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18328 [(set_attr "type" "ssemov")
18329 (set_attr "prefix" "evex")
18330 (set_attr "mode" "XI")])
18331
18332 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
18333 [(set (match_operand:V8DI 0 "register_operand" "=v")
18334 (any_extend:V8DI
18335 (match_operand:V8QI 1 "memory_operand" "m")))]
18336 "TARGET_AVX512F"
18337 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18338 [(set_attr "type" "ssemov")
18339 (set_attr "prefix" "evex")
18340 (set_attr "mode" "XI")])
18341
18342 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
18343 [(set (match_operand:V8DI 0 "register_operand")
18344 (any_extend:V8DI
18345 (vec_select:V8QI
18346 (subreg:V16QI
18347 (vec_concat:V2DI
18348 (match_operand:DI 1 "memory_operand")
18349 (const_int 0)) 0)
18350 (parallel [(const_int 0) (const_int 1)
18351 (const_int 2) (const_int 3)
18352 (const_int 4) (const_int 5)
18353 (const_int 6) (const_int 7)]))))]
18354 "TARGET_AVX512F && ix86_pre_reload_split ()"
18355 "#"
18356 "&& 1"
18357 [(set (match_dup 0)
18358 (any_extend:V8DI (match_dup 1)))]
18359 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18360
18361 (define_expand "<insn>v8qiv8di2"
18362 [(set (match_operand:V8DI 0 "register_operand")
18363 (any_extend:V8DI
18364 (match_operand:V8QI 1 "nonimmediate_operand")))]
18365 "TARGET_AVX512F"
18366 {
18367 if (!MEM_P (operands[1]))
18368 {
18369 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18370 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
18371 DONE;
18372 }
18373 })
18374
18375 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
18376 [(set (match_operand:V4DI 0 "register_operand" "=v")
18377 (any_extend:V4DI
18378 (vec_select:V4QI
18379 (match_operand:V16QI 1 "register_operand" "v")
18380 (parallel [(const_int 0) (const_int 1)
18381 (const_int 2) (const_int 3)]))))]
18382 "TARGET_AVX2 && <mask_avx512vl_condition>"
18383 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18384 [(set_attr "type" "ssemov")
18385 (set_attr "prefix_extra" "1")
18386 (set_attr "prefix" "maybe_evex")
18387 (set_attr "mode" "OI")])
18388
18389 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
18390 [(set (match_operand:V4DI 0 "register_operand" "=v")
18391 (any_extend:V4DI
18392 (match_operand:V4QI 1 "memory_operand" "m")))]
18393 "TARGET_AVX2 && <mask_avx512vl_condition>"
18394 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18395 [(set_attr "type" "ssemov")
18396 (set_attr "prefix_extra" "1")
18397 (set_attr "prefix" "maybe_evex")
18398 (set_attr "mode" "OI")])
18399
18400 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
18401 [(set (match_operand:V4DI 0 "register_operand")
18402 (any_extend:V4DI
18403 (vec_select:V4QI
18404 (subreg:V16QI
18405 (vec_merge:V4SI
18406 (vec_duplicate:V4SI
18407 (match_operand:SI 1 "memory_operand"))
18408 (const_vector:V4SI
18409 [(const_int 0) (const_int 0)
18410 (const_int 0) (const_int 0)])
18411 (const_int 1)) 0)
18412 (parallel [(const_int 0) (const_int 1)
18413 (const_int 2) (const_int 3)]))))]
18414 "TARGET_AVX2 && <mask_avx512vl_condition>
18415 && ix86_pre_reload_split ()"
18416 "#"
18417 "&& 1"
18418 [(set (match_dup 0)
18419 (any_extend:V4DI (match_dup 1)))]
18420 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18421
18422 (define_expand "<insn>v4qiv4di2"
18423 [(set (match_operand:V4DI 0 "register_operand")
18424 (any_extend:V4DI
18425 (match_operand:V4QI 1 "nonimmediate_operand")))]
18426 "TARGET_AVX2"
18427 {
18428 if (!MEM_P (operands[1]))
18429 {
18430 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18431 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
18432 DONE;
18433 }
18434 })
18435
18436 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
18437 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18438 (any_extend:V2DI
18439 (vec_select:V2QI
18440 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18441 (parallel [(const_int 0) (const_int 1)]))))]
18442 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18443 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18444 [(set_attr "isa" "noavx,noavx,avx")
18445 (set_attr "type" "ssemov")
18446 (set_attr "prefix_extra" "1")
18447 (set_attr "prefix" "orig,orig,maybe_evex")
18448 (set_attr "mode" "TI")])
18449
18450 (define_expand "<insn>v2qiv2di2"
18451 [(set (match_operand:V2DI 0 "register_operand")
18452 (any_extend:V2DI
18453 (match_operand:V2QI 1 "register_operand")))]
18454 "TARGET_SSE4_1"
18455 {
18456 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
18457 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
18458 DONE;
18459 })
18460
18461 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
18462 [(set (match_operand:V8DI 0 "register_operand" "=v")
18463 (any_extend:V8DI
18464 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
18465 "TARGET_AVX512F"
18466 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18467 [(set_attr "type" "ssemov")
18468 (set_attr "prefix" "evex")
18469 (set_attr "mode" "XI")])
18470
18471 (define_expand "<insn>v8hiv8di2"
18472 [(set (match_operand:V8DI 0 "register_operand")
18473 (any_extend:V8DI
18474 (match_operand:V8HI 1 "nonimmediate_operand")))]
18475 "TARGET_AVX512F")
18476
18477 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
18478 [(set (match_operand:V4DI 0 "register_operand" "=v")
18479 (any_extend:V4DI
18480 (vec_select:V4HI
18481 (match_operand:V8HI 1 "register_operand" "v")
18482 (parallel [(const_int 0) (const_int 1)
18483 (const_int 2) (const_int 3)]))))]
18484 "TARGET_AVX2 && <mask_avx512vl_condition>"
18485 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18486 [(set_attr "type" "ssemov")
18487 (set_attr "prefix_extra" "1")
18488 (set_attr "prefix" "maybe_evex")
18489 (set_attr "mode" "OI")])
18490
18491 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
18492 [(set (match_operand:V4DI 0 "register_operand" "=v")
18493 (any_extend:V4DI
18494 (match_operand:V4HI 1 "memory_operand" "m")))]
18495 "TARGET_AVX2 && <mask_avx512vl_condition>"
18496 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18497 [(set_attr "type" "ssemov")
18498 (set_attr "prefix_extra" "1")
18499 (set_attr "prefix" "maybe_evex")
18500 (set_attr "mode" "OI")])
18501
18502 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
18503 [(set (match_operand:V4DI 0 "register_operand")
18504 (any_extend:V4DI
18505 (vec_select:V4HI
18506 (subreg:V8HI
18507 (vec_concat:V2DI
18508 (match_operand:DI 1 "memory_operand")
18509 (const_int 0)) 0)
18510 (parallel [(const_int 0) (const_int 1)
18511 (const_int 2) (const_int 3)]))))]
18512 "TARGET_AVX2 && <mask_avx512vl_condition>
18513 && ix86_pre_reload_split ()"
18514 "#"
18515 "&& 1"
18516 [(set (match_dup 0)
18517 (any_extend:V4DI (match_dup 1)))]
18518 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18519
18520 (define_expand "<insn>v4hiv4di2"
18521 [(set (match_operand:V4DI 0 "register_operand")
18522 (any_extend:V4DI
18523 (match_operand:V4HI 1 "nonimmediate_operand")))]
18524 "TARGET_AVX2"
18525 {
18526 if (!MEM_P (operands[1]))
18527 {
18528 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18529 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
18530 DONE;
18531 }
18532 })
18533
18534 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
18535 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18536 (any_extend:V2DI
18537 (vec_select:V2HI
18538 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
18539 (parallel [(const_int 0) (const_int 1)]))))]
18540 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18541 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18542 [(set_attr "isa" "noavx,noavx,avx")
18543 (set_attr "type" "ssemov")
18544 (set_attr "prefix_extra" "1")
18545 (set_attr "prefix" "orig,orig,maybe_evex")
18546 (set_attr "mode" "TI")])
18547
18548 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
18549 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18550 (any_extend:V2DI
18551 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
18552 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18553 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18554 [(set_attr "isa" "noavx,noavx,avx")
18555 (set_attr "type" "ssemov")
18556 (set_attr "prefix_extra" "1")
18557 (set_attr "prefix" "orig,orig,maybe_evex")
18558 (set_attr "mode" "TI")])
18559
18560 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
18561 [(set (match_operand:V2DI 0 "register_operand")
18562 (any_extend:V2DI
18563 (vec_select:V2HI
18564 (subreg:V8HI
18565 (vec_merge:V4SI
18566 (vec_duplicate:V4SI
18567 (match_operand:SI 1 "memory_operand"))
18568 (const_vector:V4SI
18569 [(const_int 0) (const_int 0)
18570 (const_int 0) (const_int 0)])
18571 (const_int 1)) 0)
18572 (parallel [(const_int 0) (const_int 1)]))))]
18573 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18574 && ix86_pre_reload_split ()"
18575 "#"
18576 "&& 1"
18577 [(set (match_dup 0)
18578 (any_extend:V2DI (match_dup 1)))]
18579 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
18580
18581 (define_expand "<insn>v2hiv2di2"
18582 [(set (match_operand:V2DI 0 "register_operand")
18583 (any_extend:V2DI
18584 (match_operand:V2HI 1 "nonimmediate_operand")))]
18585 "TARGET_SSE4_1"
18586 {
18587 if (!MEM_P (operands[1]))
18588 {
18589 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
18590 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
18591 DONE;
18592 }
18593 })
18594
18595 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
18596 [(set (match_operand:V8DI 0 "register_operand" "=v")
18597 (any_extend:V8DI
18598 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18599 "TARGET_AVX512F"
18600 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18601 [(set_attr "type" "ssemov")
18602 (set_attr "prefix" "evex")
18603 (set_attr "mode" "XI")])
18604
18605 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
18606 [(set (match_operand:V16SI 0 "register_operand" "=v")
18607 (vec_select:V16SI
18608 (vec_concat:V32SI
18609 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
18610 (match_operand:V16SI 2 "const0_operand" "C"))
18611 (match_parallel 3 "pmovzx_parallel"
18612 [(match_operand 4 "const_int_operand" "n")])))]
18613 "TARGET_AVX512F"
18614 "#"
18615 "&& reload_completed"
18616 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
18617 {
18618 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
18619 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
18620 })
18621
18622 (define_expand "<insn>v8siv8di2"
18623 [(set (match_operand:V8DI 0 "register_operand" "=v")
18624 (any_extend:V8DI
18625 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18626 "TARGET_AVX512F")
18627
18628 (define_insn "avx2_<code>v4siv4di2<mask_name>"
18629 [(set (match_operand:V4DI 0 "register_operand" "=v")
18630 (any_extend:V4DI
18631 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18632 "TARGET_AVX2 && <mask_avx512vl_condition>"
18633 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18634 [(set_attr "type" "ssemov")
18635 (set_attr "prefix" "maybe_evex")
18636 (set_attr "prefix_extra" "1")
18637 (set_attr "mode" "OI")])
18638
18639 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
18640 [(set (match_operand:V8SI 0 "register_operand" "=v")
18641 (vec_select:V8SI
18642 (vec_concat:V16SI
18643 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
18644 (match_operand:V8SI 2 "const0_operand" "C"))
18645 (match_parallel 3 "pmovzx_parallel"
18646 [(match_operand 4 "const_int_operand" "n")])))]
18647 "TARGET_AVX2"
18648 "#"
18649 "&& reload_completed"
18650 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
18651 {
18652 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
18653 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
18654 })
18655
18656 (define_expand "<insn>v4siv4di2"
18657 [(set (match_operand:V4DI 0 "register_operand" "=v")
18658 (any_extend:V4DI
18659 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18660 "TARGET_AVX2")
18661
18662 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
18663 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18664 (any_extend:V2DI
18665 (vec_select:V2SI
18666 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
18667 (parallel [(const_int 0) (const_int 1)]))))]
18668 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18669 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18670 [(set_attr "isa" "noavx,noavx,avx")
18671 (set_attr "type" "ssemov")
18672 (set_attr "prefix_extra" "1")
18673 (set_attr "prefix" "orig,orig,maybe_evex")
18674 (set_attr "mode" "TI")])
18675
18676 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
18677 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18678 (any_extend:V2DI
18679 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
18680 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18681 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18682 [(set_attr "isa" "noavx,noavx,avx")
18683 (set_attr "type" "ssemov")
18684 (set_attr "prefix_extra" "1")
18685 (set_attr "prefix" "orig,orig,maybe_evex")
18686 (set_attr "mode" "TI")])
18687
18688 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
18689 [(set (match_operand:V2DI 0 "register_operand")
18690 (any_extend:V2DI
18691 (vec_select:V2SI
18692 (subreg:V4SI
18693 (vec_concat:V2DI
18694 (match_operand:DI 1 "memory_operand")
18695 (const_int 0)) 0)
18696 (parallel [(const_int 0) (const_int 1)]))))]
18697 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18698 && ix86_pre_reload_split ()"
18699 "#"
18700 "&& 1"
18701 [(set (match_dup 0)
18702 (any_extend:V2DI (match_dup 1)))]
18703 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
18704
18705 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
18706 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18707 (vec_select:V4SI
18708 (vec_concat:V8SI
18709 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
18710 (match_operand:V4SI 2 "const0_operand" "C,C,C"))
18711 (match_parallel 3 "pmovzx_parallel"
18712 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18713 "TARGET_SSE4_1"
18714 "#"
18715 "&& reload_completed"
18716 [(set (match_dup 0)
18717 (zero_extend:V2DI
18718 (vec_select:V2SI (match_dup 1)
18719 (parallel [(const_int 0) (const_int 1)]))))]
18720 {
18721 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
18722 if (MEM_P (operands[1]))
18723 {
18724 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
18725 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
18726 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18727 DONE;
18728 }
18729 }
18730 [(set_attr "isa" "noavx,noavx,avx")])
18731
18732 (define_expand "<insn>v2siv2di2"
18733 [(set (match_operand:V2DI 0 "register_operand")
18734 (any_extend:V2DI
18735 (match_operand:V2SI 1 "nonimmediate_operand")))]
18736 "TARGET_SSE4_1"
18737 {
18738 if (!MEM_P (operands[1]))
18739 {
18740 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
18741 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
18742 DONE;
18743 }
18744 })
18745
18746 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
18747 ;; setting FLAGS_REG. But it is not a really compare instruction.
18748 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
18749 [(set (reg:CC FLAGS_REG)
18750 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
18751 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
18752 UNSPEC_VTESTP))]
18753 "TARGET_AVX"
18754 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
18755 [(set_attr "type" "ssecomi")
18756 (set_attr "prefix_extra" "1")
18757 (set_attr "prefix" "vex")
18758 (set_attr "mode" "<MODE>")])
18759
18760 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
18761 ;; But it is not a really compare instruction.
18762 (define_insn "<sse4_1>_ptest<mode>"
18763 [(set (reg:CC FLAGS_REG)
18764 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
18765 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
18766 UNSPEC_PTEST))]
18767 "TARGET_SSE4_1"
18768 "%vptest\t{%1, %0|%0, %1}"
18769 [(set_attr "isa" "noavx,noavx,avx")
18770 (set_attr "type" "ssecomi")
18771 (set_attr "prefix_extra" "1")
18772 (set_attr "prefix" "orig,orig,vex")
18773 (set (attr "btver2_decode")
18774 (if_then_else
18775 (match_test "<sseinsnmode>mode==OImode")
18776 (const_string "vector")
18777 (const_string "*")))
18778 (set_attr "mode" "<sseinsnmode>")])
18779
18780 (define_insn "ptesttf2"
18781 [(set (reg:CC FLAGS_REG)
18782 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
18783 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
18784 UNSPEC_PTEST))]
18785 "TARGET_SSE4_1"
18786 "%vptest\t{%1, %0|%0, %1}"
18787 [(set_attr "isa" "noavx,noavx,avx")
18788 (set_attr "type" "ssecomi")
18789 (set_attr "prefix_extra" "1")
18790 (set_attr "prefix" "orig,orig,vex")
18791 (set_attr "mode" "TI")])
18792
18793 (define_expand "nearbyint<mode>2"
18794 [(set (match_operand:VF 0 "register_operand")
18795 (unspec:VF
18796 [(match_operand:VF 1 "vector_operand")
18797 (match_dup 2)]
18798 UNSPEC_ROUND))]
18799 "TARGET_SSE4_1"
18800 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
18801
18802 (define_expand "rint<mode>2"
18803 [(set (match_operand:VF 0 "register_operand")
18804 (unspec:VF
18805 [(match_operand:VF 1 "vector_operand")
18806 (match_dup 2)]
18807 UNSPEC_ROUND))]
18808 "TARGET_SSE4_1"
18809 "operands[2] = GEN_INT (ROUND_MXCSR);")
18810
18811 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
18812 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18813 (unspec:VF_128_256
18814 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
18815 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
18816 UNSPEC_ROUND))]
18817 "TARGET_SSE4_1"
18818 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18819 [(set_attr "isa" "noavx,noavx,avx")
18820 (set_attr "type" "ssecvt")
18821 (set_attr "prefix_data16" "1,1,*")
18822 (set_attr "prefix_extra" "1")
18823 (set_attr "length_immediate" "1")
18824 (set_attr "prefix" "orig,orig,vex")
18825 (set_attr "mode" "<MODE>")])
18826
18827 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
18828 [(match_operand:<sseintvecmode> 0 "register_operand")
18829 (match_operand:VF1_128_256 1 "vector_operand")
18830 (match_operand:SI 2 "const_0_to_15_operand")]
18831 "TARGET_SSE4_1"
18832 {
18833 rtx tmp = gen_reg_rtx (<MODE>mode);
18834
18835 emit_insn
18836 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
18837 operands[2]));
18838 emit_insn
18839 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18840 DONE;
18841 })
18842
18843 (define_expand "avx512f_round<castmode>512"
18844 [(match_operand:VF_512 0 "register_operand")
18845 (match_operand:VF_512 1 "nonimmediate_operand")
18846 (match_operand:SI 2 "const_0_to_15_operand")]
18847 "TARGET_AVX512F"
18848 {
18849 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
18850 DONE;
18851 })
18852
18853 (define_expand "avx512f_roundps512_sfix"
18854 [(match_operand:V16SI 0 "register_operand")
18855 (match_operand:V16SF 1 "nonimmediate_operand")
18856 (match_operand:SI 2 "const_0_to_15_operand")]
18857 "TARGET_AVX512F"
18858 {
18859 rtx tmp = gen_reg_rtx (V16SFmode);
18860 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
18861 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
18862 DONE;
18863 })
18864
18865 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
18866 [(match_operand:<ssepackfltmode> 0 "register_operand")
18867 (match_operand:VF2 1 "vector_operand")
18868 (match_operand:VF2 2 "vector_operand")
18869 (match_operand:SI 3 "const_0_to_15_operand")]
18870 "TARGET_SSE4_1"
18871 {
18872 rtx tmp0, tmp1;
18873
18874 if (<MODE>mode == V2DFmode
18875 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18876 {
18877 rtx tmp2 = gen_reg_rtx (V4DFmode);
18878
18879 tmp0 = gen_reg_rtx (V4DFmode);
18880 tmp1 = force_reg (V2DFmode, operands[1]);
18881
18882 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18883 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
18884 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18885 }
18886 else
18887 {
18888 tmp0 = gen_reg_rtx (<MODE>mode);
18889 tmp1 = gen_reg_rtx (<MODE>mode);
18890
18891 emit_insn
18892 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
18893 operands[3]));
18894 emit_insn
18895 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
18896 operands[3]));
18897 emit_insn
18898 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18899 }
18900 DONE;
18901 })
18902
18903 (define_insn "sse4_1_round<ssescalarmodesuffix>"
18904 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18905 (vec_merge:VF_128
18906 (unspec:VF_128
18907 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18908 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18909 UNSPEC_ROUND)
18910 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18911 (const_int 1)))]
18912 "TARGET_SSE4_1"
18913 "@
18914 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18915 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18916 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
18917 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
18918 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18919 (set_attr "type" "ssecvt")
18920 (set_attr "length_immediate" "1")
18921 (set_attr "prefix_data16" "1,1,*,*")
18922 (set_attr "prefix_extra" "1")
18923 (set_attr "prefix" "orig,orig,vex,evex")
18924 (set_attr "mode" "<MODE>")])
18925
18926 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
18927 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18928 (vec_merge:VF_128
18929 (vec_duplicate:VF_128
18930 (unspec:<ssescalarmode>
18931 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18932 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18933 UNSPEC_ROUND))
18934 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18935 (const_int 1)))]
18936 "TARGET_SSE4_1"
18937 "@
18938 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18939 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18940 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
18941 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18942 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18943 (set_attr "type" "ssecvt")
18944 (set_attr "length_immediate" "1")
18945 (set_attr "prefix_data16" "1,1,*,*")
18946 (set_attr "prefix_extra" "1")
18947 (set_attr "prefix" "orig,orig,vex,evex")
18948 (set_attr "mode" "<MODE>")])
18949
18950 (define_expand "round<mode>2"
18951 [(set (match_dup 3)
18952 (plus:VF
18953 (match_operand:VF 1 "register_operand")
18954 (match_dup 2)))
18955 (set (match_operand:VF 0 "register_operand")
18956 (unspec:VF
18957 [(match_dup 3) (match_dup 4)]
18958 UNSPEC_ROUND))]
18959 "TARGET_SSE4_1 && !flag_trapping_math"
18960 {
18961 machine_mode scalar_mode;
18962 const struct real_format *fmt;
18963 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
18964 rtx half, vec_half;
18965
18966 scalar_mode = GET_MODE_INNER (<MODE>mode);
18967
18968 /* load nextafter (0.5, 0.0) */
18969 fmt = REAL_MODE_FORMAT (scalar_mode);
18970 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
18971 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
18972 half = const_double_from_real_value (pred_half, scalar_mode);
18973
18974 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
18975 vec_half = force_reg (<MODE>mode, vec_half);
18976
18977 operands[2] = gen_reg_rtx (<MODE>mode);
18978 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
18979
18980 operands[3] = gen_reg_rtx (<MODE>mode);
18981 operands[4] = GEN_INT (ROUND_TRUNC);
18982 })
18983
18984 (define_expand "round<mode>2_sfix"
18985 [(match_operand:<sseintvecmode> 0 "register_operand")
18986 (match_operand:VF1 1 "register_operand")]
18987 "TARGET_SSE4_1 && !flag_trapping_math"
18988 {
18989 rtx tmp = gen_reg_rtx (<MODE>mode);
18990
18991 emit_insn (gen_round<mode>2 (tmp, operands[1]));
18992
18993 emit_insn
18994 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18995 DONE;
18996 })
18997
18998 (define_expand "round<mode>2_vec_pack_sfix"
18999 [(match_operand:<ssepackfltmode> 0 "register_operand")
19000 (match_operand:VF2 1 "register_operand")
19001 (match_operand:VF2 2 "register_operand")]
19002 "TARGET_SSE4_1 && !flag_trapping_math"
19003 {
19004 rtx tmp0, tmp1;
19005
19006 if (<MODE>mode == V2DFmode
19007 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
19008 {
19009 rtx tmp2 = gen_reg_rtx (V4DFmode);
19010
19011 tmp0 = gen_reg_rtx (V4DFmode);
19012 tmp1 = force_reg (V2DFmode, operands[1]);
19013
19014 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
19015 emit_insn (gen_roundv4df2 (tmp2, tmp0));
19016 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
19017 }
19018 else
19019 {
19020 tmp0 = gen_reg_rtx (<MODE>mode);
19021 tmp1 = gen_reg_rtx (<MODE>mode);
19022
19023 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
19024 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
19025
19026 emit_insn
19027 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
19028 }
19029 DONE;
19030 })
19031
19032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19033 ;;
19034 ;; Intel SSE4.2 string/text processing instructions
19035 ;;
19036 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19037
19038 (define_insn_and_split "sse4_2_pcmpestr"
19039 [(set (match_operand:SI 0 "register_operand" "=c,c")
19040 (unspec:SI
19041 [(match_operand:V16QI 2 "register_operand" "x,x")
19042 (match_operand:SI 3 "register_operand" "a,a")
19043 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
19044 (match_operand:SI 5 "register_operand" "d,d")
19045 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
19046 UNSPEC_PCMPESTR))
19047 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
19048 (unspec:V16QI
19049 [(match_dup 2)
19050 (match_dup 3)
19051 (match_dup 4)
19052 (match_dup 5)
19053 (match_dup 6)]
19054 UNSPEC_PCMPESTR))
19055 (set (reg:CC FLAGS_REG)
19056 (unspec:CC
19057 [(match_dup 2)
19058 (match_dup 3)
19059 (match_dup 4)
19060 (match_dup 5)
19061 (match_dup 6)]
19062 UNSPEC_PCMPESTR))]
19063 "TARGET_SSE4_2
19064 && ix86_pre_reload_split ()"
19065 "#"
19066 "&& 1"
19067 [(const_int 0)]
19068 {
19069 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
19070 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
19071 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
19072
19073 if (ecx)
19074 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
19075 operands[3], operands[4],
19076 operands[5], operands[6]));
19077 if (xmm0)
19078 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
19079 operands[3], operands[4],
19080 operands[5], operands[6]));
19081 if (flags && !(ecx || xmm0))
19082 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
19083 operands[2], operands[3],
19084 operands[4], operands[5],
19085 operands[6]));
19086 if (!(flags || ecx || xmm0))
19087 emit_note (NOTE_INSN_DELETED);
19088
19089 DONE;
19090 }
19091 [(set_attr "type" "sselog")
19092 (set_attr "prefix_data16" "1")
19093 (set_attr "prefix_extra" "1")
19094 (set_attr "length_immediate" "1")
19095 (set_attr "memory" "none,load")
19096 (set_attr "mode" "TI")])
19097
19098 (define_insn "sse4_2_pcmpestri"
19099 [(set (match_operand:SI 0 "register_operand" "=c,c")
19100 (unspec:SI
19101 [(match_operand:V16QI 1 "register_operand" "x,x")
19102 (match_operand:SI 2 "register_operand" "a,a")
19103 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
19104 (match_operand:SI 4 "register_operand" "d,d")
19105 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
19106 UNSPEC_PCMPESTR))
19107 (set (reg:CC FLAGS_REG)
19108 (unspec:CC
19109 [(match_dup 1)
19110 (match_dup 2)
19111 (match_dup 3)
19112 (match_dup 4)
19113 (match_dup 5)]
19114 UNSPEC_PCMPESTR))]
19115 "TARGET_SSE4_2"
19116 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
19117 [(set_attr "type" "sselog")
19118 (set_attr "prefix_data16" "1")
19119 (set_attr "prefix_extra" "1")
19120 (set_attr "prefix" "maybe_vex")
19121 (set_attr "length_immediate" "1")
19122 (set_attr "btver2_decode" "vector")
19123 (set_attr "memory" "none,load")
19124 (set_attr "mode" "TI")])
19125
19126 (define_insn "sse4_2_pcmpestrm"
19127 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
19128 (unspec:V16QI
19129 [(match_operand:V16QI 1 "register_operand" "x,x")
19130 (match_operand:SI 2 "register_operand" "a,a")
19131 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
19132 (match_operand:SI 4 "register_operand" "d,d")
19133 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
19134 UNSPEC_PCMPESTR))
19135 (set (reg:CC FLAGS_REG)
19136 (unspec:CC
19137 [(match_dup 1)
19138 (match_dup 2)
19139 (match_dup 3)
19140 (match_dup 4)
19141 (match_dup 5)]
19142 UNSPEC_PCMPESTR))]
19143 "TARGET_SSE4_2"
19144 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
19145 [(set_attr "type" "sselog")
19146 (set_attr "prefix_data16" "1")
19147 (set_attr "prefix_extra" "1")
19148 (set_attr "length_immediate" "1")
19149 (set_attr "prefix" "maybe_vex")
19150 (set_attr "btver2_decode" "vector")
19151 (set_attr "memory" "none,load")
19152 (set_attr "mode" "TI")])
19153
19154 (define_insn "sse4_2_pcmpestr_cconly"
19155 [(set (reg:CC FLAGS_REG)
19156 (unspec:CC
19157 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
19158 (match_operand:SI 3 "register_operand" "a,a,a,a")
19159 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
19160 (match_operand:SI 5 "register_operand" "d,d,d,d")
19161 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
19162 UNSPEC_PCMPESTR))
19163 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
19164 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
19165 "TARGET_SSE4_2"
19166 "@
19167 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
19168 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
19169 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
19170 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
19171 [(set_attr "type" "sselog")
19172 (set_attr "prefix_data16" "1")
19173 (set_attr "prefix_extra" "1")
19174 (set_attr "length_immediate" "1")
19175 (set_attr "memory" "none,load,none,load")
19176 (set_attr "btver2_decode" "vector,vector,vector,vector")
19177 (set_attr "prefix" "maybe_vex")
19178 (set_attr "mode" "TI")])
19179
19180 (define_insn_and_split "sse4_2_pcmpistr"
19181 [(set (match_operand:SI 0 "register_operand" "=c,c")
19182 (unspec:SI
19183 [(match_operand:V16QI 2 "register_operand" "x,x")
19184 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
19185 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
19186 UNSPEC_PCMPISTR))
19187 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
19188 (unspec:V16QI
19189 [(match_dup 2)
19190 (match_dup 3)
19191 (match_dup 4)]
19192 UNSPEC_PCMPISTR))
19193 (set (reg:CC FLAGS_REG)
19194 (unspec:CC
19195 [(match_dup 2)
19196 (match_dup 3)
19197 (match_dup 4)]
19198 UNSPEC_PCMPISTR))]
19199 "TARGET_SSE4_2
19200 && ix86_pre_reload_split ()"
19201 "#"
19202 "&& 1"
19203 [(const_int 0)]
19204 {
19205 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
19206 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
19207 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
19208
19209 if (ecx)
19210 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
19211 operands[3], operands[4]));
19212 if (xmm0)
19213 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
19214 operands[3], operands[4]));
19215 if (flags && !(ecx || xmm0))
19216 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
19217 operands[2], operands[3],
19218 operands[4]));
19219 if (!(flags || ecx || xmm0))
19220 emit_note (NOTE_INSN_DELETED);
19221
19222 DONE;
19223 }
19224 [(set_attr "type" "sselog")
19225 (set_attr "prefix_data16" "1")
19226 (set_attr "prefix_extra" "1")
19227 (set_attr "length_immediate" "1")
19228 (set_attr "memory" "none,load")
19229 (set_attr "mode" "TI")])
19230
19231 (define_insn "sse4_2_pcmpistri"
19232 [(set (match_operand:SI 0 "register_operand" "=c,c")
19233 (unspec:SI
19234 [(match_operand:V16QI 1 "register_operand" "x,x")
19235 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19236 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19237 UNSPEC_PCMPISTR))
19238 (set (reg:CC FLAGS_REG)
19239 (unspec:CC
19240 [(match_dup 1)
19241 (match_dup 2)
19242 (match_dup 3)]
19243 UNSPEC_PCMPISTR))]
19244 "TARGET_SSE4_2"
19245 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
19246 [(set_attr "type" "sselog")
19247 (set_attr "prefix_data16" "1")
19248 (set_attr "prefix_extra" "1")
19249 (set_attr "length_immediate" "1")
19250 (set_attr "prefix" "maybe_vex")
19251 (set_attr "memory" "none,load")
19252 (set_attr "btver2_decode" "vector")
19253 (set_attr "mode" "TI")])
19254
19255 (define_insn "sse4_2_pcmpistrm"
19256 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
19257 (unspec:V16QI
19258 [(match_operand:V16QI 1 "register_operand" "x,x")
19259 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19260 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19261 UNSPEC_PCMPISTR))
19262 (set (reg:CC FLAGS_REG)
19263 (unspec:CC
19264 [(match_dup 1)
19265 (match_dup 2)
19266 (match_dup 3)]
19267 UNSPEC_PCMPISTR))]
19268 "TARGET_SSE4_2"
19269 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
19270 [(set_attr "type" "sselog")
19271 (set_attr "prefix_data16" "1")
19272 (set_attr "prefix_extra" "1")
19273 (set_attr "length_immediate" "1")
19274 (set_attr "prefix" "maybe_vex")
19275 (set_attr "memory" "none,load")
19276 (set_attr "btver2_decode" "vector")
19277 (set_attr "mode" "TI")])
19278
19279 (define_insn "sse4_2_pcmpistr_cconly"
19280 [(set (reg:CC FLAGS_REG)
19281 (unspec:CC
19282 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
19283 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
19284 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
19285 UNSPEC_PCMPISTR))
19286 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
19287 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
19288 "TARGET_SSE4_2"
19289 "@
19290 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19291 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19292 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
19293 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
19294 [(set_attr "type" "sselog")
19295 (set_attr "prefix_data16" "1")
19296 (set_attr "prefix_extra" "1")
19297 (set_attr "length_immediate" "1")
19298 (set_attr "memory" "none,load,none,load")
19299 (set_attr "prefix" "maybe_vex")
19300 (set_attr "btver2_decode" "vector,vector,vector,vector")
19301 (set_attr "mode" "TI")])
19302
19303 ;; Packed float variants
19304 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
19305 [(V8DI "V8SF") (V16SI "V16SF")])
19306
19307 (define_expand "avx512pf_gatherpf<mode>sf"
19308 [(unspec
19309 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19310 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19311 (match_par_dup 5
19312 [(match_operand 2 "vsib_address_operand")
19313 (match_operand:VI48_512 1 "register_operand")
19314 (match_operand:SI 3 "const1248_operand")]))
19315 (match_operand:SI 4 "const_2_to_3_operand")]
19316 UNSPEC_GATHER_PREFETCH)]
19317 "TARGET_AVX512PF"
19318 {
19319 operands[5]
19320 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19321 operands[3]), UNSPEC_VSIBADDR);
19322 })
19323
19324 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
19325 [(unspec
19326 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19327 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19328 [(unspec:P
19329 [(match_operand:P 2 "vsib_address_operand" "Tv")
19330 (match_operand:VI48_512 1 "register_operand" "v")
19331 (match_operand:SI 3 "const1248_operand" "n")]
19332 UNSPEC_VSIBADDR)])
19333 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19334 UNSPEC_GATHER_PREFETCH)]
19335 "TARGET_AVX512PF"
19336 {
19337 switch (INTVAL (operands[4]))
19338 {
19339 case 3:
19340 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19341 gas changed what it requires incompatibly. */
19342 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19343 case 2:
19344 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19345 default:
19346 gcc_unreachable ();
19347 }
19348 }
19349 [(set_attr "type" "sse")
19350 (set_attr "prefix" "evex")
19351 (set_attr "mode" "XI")])
19352
19353 ;; Packed double variants
19354 (define_expand "avx512pf_gatherpf<mode>df"
19355 [(unspec
19356 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19357 (mem:V8DF
19358 (match_par_dup 5
19359 [(match_operand 2 "vsib_address_operand")
19360 (match_operand:VI4_256_8_512 1 "register_operand")
19361 (match_operand:SI 3 "const1248_operand")]))
19362 (match_operand:SI 4 "const_2_to_3_operand")]
19363 UNSPEC_GATHER_PREFETCH)]
19364 "TARGET_AVX512PF"
19365 {
19366 operands[5]
19367 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19368 operands[3]), UNSPEC_VSIBADDR);
19369 })
19370
19371 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
19372 [(unspec
19373 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19374 (match_operator:V8DF 5 "vsib_mem_operator"
19375 [(unspec:P
19376 [(match_operand:P 2 "vsib_address_operand" "Tv")
19377 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19378 (match_operand:SI 3 "const1248_operand" "n")]
19379 UNSPEC_VSIBADDR)])
19380 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19381 UNSPEC_GATHER_PREFETCH)]
19382 "TARGET_AVX512PF"
19383 {
19384 switch (INTVAL (operands[4]))
19385 {
19386 case 3:
19387 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19388 gas changed what it requires incompatibly. */
19389 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19390 case 2:
19391 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19392 default:
19393 gcc_unreachable ();
19394 }
19395 }
19396 [(set_attr "type" "sse")
19397 (set_attr "prefix" "evex")
19398 (set_attr "mode" "XI")])
19399
19400 ;; Packed float variants
19401 (define_expand "avx512pf_scatterpf<mode>sf"
19402 [(unspec
19403 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19404 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19405 (match_par_dup 5
19406 [(match_operand 2 "vsib_address_operand")
19407 (match_operand:VI48_512 1 "register_operand")
19408 (match_operand:SI 3 "const1248_operand")]))
19409 (match_operand:SI 4 "const2367_operand")]
19410 UNSPEC_SCATTER_PREFETCH)]
19411 "TARGET_AVX512PF"
19412 {
19413 operands[5]
19414 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19415 operands[3]), UNSPEC_VSIBADDR);
19416 })
19417
19418 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
19419 [(unspec
19420 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19421 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19422 [(unspec:P
19423 [(match_operand:P 2 "vsib_address_operand" "Tv")
19424 (match_operand:VI48_512 1 "register_operand" "v")
19425 (match_operand:SI 3 "const1248_operand" "n")]
19426 UNSPEC_VSIBADDR)])
19427 (match_operand:SI 4 "const2367_operand" "n")]
19428 UNSPEC_SCATTER_PREFETCH)]
19429 "TARGET_AVX512PF"
19430 {
19431 switch (INTVAL (operands[4]))
19432 {
19433 case 3:
19434 case 7:
19435 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19436 gas changed what it requires incompatibly. */
19437 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19438 case 2:
19439 case 6:
19440 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19441 default:
19442 gcc_unreachable ();
19443 }
19444 }
19445 [(set_attr "type" "sse")
19446 (set_attr "prefix" "evex")
19447 (set_attr "mode" "XI")])
19448
19449 ;; Packed double variants
19450 (define_expand "avx512pf_scatterpf<mode>df"
19451 [(unspec
19452 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19453 (mem:V8DF
19454 (match_par_dup 5
19455 [(match_operand 2 "vsib_address_operand")
19456 (match_operand:VI4_256_8_512 1 "register_operand")
19457 (match_operand:SI 3 "const1248_operand")]))
19458 (match_operand:SI 4 "const2367_operand")]
19459 UNSPEC_SCATTER_PREFETCH)]
19460 "TARGET_AVX512PF"
19461 {
19462 operands[5]
19463 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19464 operands[3]), UNSPEC_VSIBADDR);
19465 })
19466
19467 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
19468 [(unspec
19469 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19470 (match_operator:V8DF 5 "vsib_mem_operator"
19471 [(unspec:P
19472 [(match_operand:P 2 "vsib_address_operand" "Tv")
19473 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19474 (match_operand:SI 3 "const1248_operand" "n")]
19475 UNSPEC_VSIBADDR)])
19476 (match_operand:SI 4 "const2367_operand" "n")]
19477 UNSPEC_SCATTER_PREFETCH)]
19478 "TARGET_AVX512PF"
19479 {
19480 switch (INTVAL (operands[4]))
19481 {
19482 case 3:
19483 case 7:
19484 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19485 gas changed what it requires incompatibly. */
19486 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19487 case 2:
19488 case 6:
19489 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19490 default:
19491 gcc_unreachable ();
19492 }
19493 }
19494 [(set_attr "type" "sse")
19495 (set_attr "prefix" "evex")
19496 (set_attr "mode" "XI")])
19497
19498 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
19499 [(set (match_operand:VF_512 0 "register_operand" "=v")
19500 (unspec:VF_512
19501 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19502 UNSPEC_EXP2))]
19503 "TARGET_AVX512ER"
19504 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19505 [(set_attr "prefix" "evex")
19506 (set_attr "type" "sse")
19507 (set_attr "mode" "<MODE>")])
19508
19509 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
19510 [(set (match_operand:VF_512 0 "register_operand" "=v")
19511 (unspec:VF_512
19512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19513 UNSPEC_RCP28))]
19514 "TARGET_AVX512ER"
19515 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19516 [(set_attr "prefix" "evex")
19517 (set_attr "type" "sse")
19518 (set_attr "mode" "<MODE>")])
19519
19520 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
19521 [(set (match_operand:VF_128 0 "register_operand" "=v")
19522 (vec_merge:VF_128
19523 (unspec:VF_128
19524 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19525 UNSPEC_RCP28)
19526 (match_operand:VF_128 2 "register_operand" "v")
19527 (const_int 1)))]
19528 "TARGET_AVX512ER"
19529 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19530 [(set_attr "length_immediate" "1")
19531 (set_attr "prefix" "evex")
19532 (set_attr "type" "sse")
19533 (set_attr "mode" "<MODE>")])
19534
19535 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
19536 [(set (match_operand:VF_512 0 "register_operand" "=v")
19537 (unspec:VF_512
19538 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19539 UNSPEC_RSQRT28))]
19540 "TARGET_AVX512ER"
19541 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19542 [(set_attr "prefix" "evex")
19543 (set_attr "type" "sse")
19544 (set_attr "mode" "<MODE>")])
19545
19546 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
19547 [(set (match_operand:VF_128 0 "register_operand" "=v")
19548 (vec_merge:VF_128
19549 (unspec:VF_128
19550 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19551 UNSPEC_RSQRT28)
19552 (match_operand:VF_128 2 "register_operand" "v")
19553 (const_int 1)))]
19554 "TARGET_AVX512ER"
19555 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19556 [(set_attr "length_immediate" "1")
19557 (set_attr "type" "sse")
19558 (set_attr "prefix" "evex")
19559 (set_attr "mode" "<MODE>")])
19560
19561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19562 ;;
19563 ;; XOP instructions
19564 ;;
19565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19566
19567 (define_code_iterator xop_plus [plus ss_plus])
19568
19569 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
19570 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
19571
19572 ;; XOP parallel integer multiply/add instructions.
19573
19574 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
19575 [(set (match_operand:VI24_128 0 "register_operand" "=x")
19576 (xop_plus:VI24_128
19577 (mult:VI24_128
19578 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
19579 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
19580 (match_operand:VI24_128 3 "register_operand" "x")))]
19581 "TARGET_XOP"
19582 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19583 [(set_attr "type" "ssemuladd")
19584 (set_attr "mode" "TI")])
19585
19586 (define_insn "xop_p<macs>dql"
19587 [(set (match_operand:V2DI 0 "register_operand" "=x")
19588 (xop_plus:V2DI
19589 (mult:V2DI
19590 (sign_extend:V2DI
19591 (vec_select:V2SI
19592 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19593 (parallel [(const_int 0) (const_int 2)])))
19594 (sign_extend:V2DI
19595 (vec_select:V2SI
19596 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19597 (parallel [(const_int 0) (const_int 2)]))))
19598 (match_operand:V2DI 3 "register_operand" "x")))]
19599 "TARGET_XOP"
19600 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19601 [(set_attr "type" "ssemuladd")
19602 (set_attr "mode" "TI")])
19603
19604 (define_insn "xop_p<macs>dqh"
19605 [(set (match_operand:V2DI 0 "register_operand" "=x")
19606 (xop_plus:V2DI
19607 (mult:V2DI
19608 (sign_extend:V2DI
19609 (vec_select:V2SI
19610 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19611 (parallel [(const_int 1) (const_int 3)])))
19612 (sign_extend:V2DI
19613 (vec_select:V2SI
19614 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19615 (parallel [(const_int 1) (const_int 3)]))))
19616 (match_operand:V2DI 3 "register_operand" "x")))]
19617 "TARGET_XOP"
19618 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19619 [(set_attr "type" "ssemuladd")
19620 (set_attr "mode" "TI")])
19621
19622 ;; XOP parallel integer multiply/add instructions for the intrinisics
19623 (define_insn "xop_p<macs>wd"
19624 [(set (match_operand:V4SI 0 "register_operand" "=x")
19625 (xop_plus:V4SI
19626 (mult:V4SI
19627 (sign_extend:V4SI
19628 (vec_select:V4HI
19629 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19630 (parallel [(const_int 1) (const_int 3)
19631 (const_int 5) (const_int 7)])))
19632 (sign_extend:V4SI
19633 (vec_select:V4HI
19634 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19635 (parallel [(const_int 1) (const_int 3)
19636 (const_int 5) (const_int 7)]))))
19637 (match_operand:V4SI 3 "register_operand" "x")))]
19638 "TARGET_XOP"
19639 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19640 [(set_attr "type" "ssemuladd")
19641 (set_attr "mode" "TI")])
19642
19643 (define_insn "xop_p<madcs>wd"
19644 [(set (match_operand:V4SI 0 "register_operand" "=x")
19645 (xop_plus:V4SI
19646 (plus:V4SI
19647 (mult:V4SI
19648 (sign_extend:V4SI
19649 (vec_select:V4HI
19650 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19651 (parallel [(const_int 0) (const_int 2)
19652 (const_int 4) (const_int 6)])))
19653 (sign_extend:V4SI
19654 (vec_select:V4HI
19655 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19656 (parallel [(const_int 0) (const_int 2)
19657 (const_int 4) (const_int 6)]))))
19658 (mult:V4SI
19659 (sign_extend:V4SI
19660 (vec_select:V4HI
19661 (match_dup 1)
19662 (parallel [(const_int 1) (const_int 3)
19663 (const_int 5) (const_int 7)])))
19664 (sign_extend:V4SI
19665 (vec_select:V4HI
19666 (match_dup 2)
19667 (parallel [(const_int 1) (const_int 3)
19668 (const_int 5) (const_int 7)])))))
19669 (match_operand:V4SI 3 "register_operand" "x")))]
19670 "TARGET_XOP"
19671 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19672 [(set_attr "type" "ssemuladd")
19673 (set_attr "mode" "TI")])
19674
19675 ;; XOP parallel XMM conditional moves
19676 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
19677 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
19678 (if_then_else:V_128_256
19679 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
19680 (match_operand:V_128_256 1 "register_operand" "x,x")
19681 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
19682 "TARGET_XOP"
19683 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19684 [(set_attr "type" "sse4arg")])
19685
19686 ;; XOP horizontal add/subtract instructions
19687 (define_insn "xop_phadd<u>bw"
19688 [(set (match_operand:V8HI 0 "register_operand" "=x")
19689 (plus:V8HI
19690 (any_extend:V8HI
19691 (vec_select:V8QI
19692 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19693 (parallel [(const_int 0) (const_int 2)
19694 (const_int 4) (const_int 6)
19695 (const_int 8) (const_int 10)
19696 (const_int 12) (const_int 14)])))
19697 (any_extend:V8HI
19698 (vec_select:V8QI
19699 (match_dup 1)
19700 (parallel [(const_int 1) (const_int 3)
19701 (const_int 5) (const_int 7)
19702 (const_int 9) (const_int 11)
19703 (const_int 13) (const_int 15)])))))]
19704 "TARGET_XOP"
19705 "vphadd<u>bw\t{%1, %0|%0, %1}"
19706 [(set_attr "type" "sseiadd1")])
19707
19708 (define_insn "xop_phadd<u>bd"
19709 [(set (match_operand:V4SI 0 "register_operand" "=x")
19710 (plus:V4SI
19711 (plus:V4SI
19712 (any_extend:V4SI
19713 (vec_select:V4QI
19714 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19715 (parallel [(const_int 0) (const_int 4)
19716 (const_int 8) (const_int 12)])))
19717 (any_extend:V4SI
19718 (vec_select:V4QI
19719 (match_dup 1)
19720 (parallel [(const_int 1) (const_int 5)
19721 (const_int 9) (const_int 13)]))))
19722 (plus:V4SI
19723 (any_extend:V4SI
19724 (vec_select:V4QI
19725 (match_dup 1)
19726 (parallel [(const_int 2) (const_int 6)
19727 (const_int 10) (const_int 14)])))
19728 (any_extend:V4SI
19729 (vec_select:V4QI
19730 (match_dup 1)
19731 (parallel [(const_int 3) (const_int 7)
19732 (const_int 11) (const_int 15)]))))))]
19733 "TARGET_XOP"
19734 "vphadd<u>bd\t{%1, %0|%0, %1}"
19735 [(set_attr "type" "sseiadd1")])
19736
19737 (define_insn "xop_phadd<u>bq"
19738 [(set (match_operand:V2DI 0 "register_operand" "=x")
19739 (plus:V2DI
19740 (plus:V2DI
19741 (plus:V2DI
19742 (any_extend:V2DI
19743 (vec_select:V2QI
19744 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19745 (parallel [(const_int 0) (const_int 8)])))
19746 (any_extend:V2DI
19747 (vec_select:V2QI
19748 (match_dup 1)
19749 (parallel [(const_int 1) (const_int 9)]))))
19750 (plus:V2DI
19751 (any_extend:V2DI
19752 (vec_select:V2QI
19753 (match_dup 1)
19754 (parallel [(const_int 2) (const_int 10)])))
19755 (any_extend:V2DI
19756 (vec_select:V2QI
19757 (match_dup 1)
19758 (parallel [(const_int 3) (const_int 11)])))))
19759 (plus:V2DI
19760 (plus:V2DI
19761 (any_extend:V2DI
19762 (vec_select:V2QI
19763 (match_dup 1)
19764 (parallel [(const_int 4) (const_int 12)])))
19765 (any_extend:V2DI
19766 (vec_select:V2QI
19767 (match_dup 1)
19768 (parallel [(const_int 5) (const_int 13)]))))
19769 (plus:V2DI
19770 (any_extend:V2DI
19771 (vec_select:V2QI
19772 (match_dup 1)
19773 (parallel [(const_int 6) (const_int 14)])))
19774 (any_extend:V2DI
19775 (vec_select:V2QI
19776 (match_dup 1)
19777 (parallel [(const_int 7) (const_int 15)])))))))]
19778 "TARGET_XOP"
19779 "vphadd<u>bq\t{%1, %0|%0, %1}"
19780 [(set_attr "type" "sseiadd1")])
19781
19782 (define_insn "xop_phadd<u>wd"
19783 [(set (match_operand:V4SI 0 "register_operand" "=x")
19784 (plus:V4SI
19785 (any_extend:V4SI
19786 (vec_select:V4HI
19787 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19788 (parallel [(const_int 0) (const_int 2)
19789 (const_int 4) (const_int 6)])))
19790 (any_extend:V4SI
19791 (vec_select:V4HI
19792 (match_dup 1)
19793 (parallel [(const_int 1) (const_int 3)
19794 (const_int 5) (const_int 7)])))))]
19795 "TARGET_XOP"
19796 "vphadd<u>wd\t{%1, %0|%0, %1}"
19797 [(set_attr "type" "sseiadd1")])
19798
19799 (define_insn "xop_phadd<u>wq"
19800 [(set (match_operand:V2DI 0 "register_operand" "=x")
19801 (plus:V2DI
19802 (plus:V2DI
19803 (any_extend:V2DI
19804 (vec_select:V2HI
19805 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19806 (parallel [(const_int 0) (const_int 4)])))
19807 (any_extend:V2DI
19808 (vec_select:V2HI
19809 (match_dup 1)
19810 (parallel [(const_int 1) (const_int 5)]))))
19811 (plus:V2DI
19812 (any_extend:V2DI
19813 (vec_select:V2HI
19814 (match_dup 1)
19815 (parallel [(const_int 2) (const_int 6)])))
19816 (any_extend:V2DI
19817 (vec_select:V2HI
19818 (match_dup 1)
19819 (parallel [(const_int 3) (const_int 7)]))))))]
19820 "TARGET_XOP"
19821 "vphadd<u>wq\t{%1, %0|%0, %1}"
19822 [(set_attr "type" "sseiadd1")])
19823
19824 (define_insn "xop_phadd<u>dq"
19825 [(set (match_operand:V2DI 0 "register_operand" "=x")
19826 (plus:V2DI
19827 (any_extend:V2DI
19828 (vec_select:V2SI
19829 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19830 (parallel [(const_int 0) (const_int 2)])))
19831 (any_extend:V2DI
19832 (vec_select:V2SI
19833 (match_dup 1)
19834 (parallel [(const_int 1) (const_int 3)])))))]
19835 "TARGET_XOP"
19836 "vphadd<u>dq\t{%1, %0|%0, %1}"
19837 [(set_attr "type" "sseiadd1")])
19838
19839 (define_insn "xop_phsubbw"
19840 [(set (match_operand:V8HI 0 "register_operand" "=x")
19841 (minus:V8HI
19842 (sign_extend:V8HI
19843 (vec_select:V8QI
19844 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19845 (parallel [(const_int 0) (const_int 2)
19846 (const_int 4) (const_int 6)
19847 (const_int 8) (const_int 10)
19848 (const_int 12) (const_int 14)])))
19849 (sign_extend:V8HI
19850 (vec_select:V8QI
19851 (match_dup 1)
19852 (parallel [(const_int 1) (const_int 3)
19853 (const_int 5) (const_int 7)
19854 (const_int 9) (const_int 11)
19855 (const_int 13) (const_int 15)])))))]
19856 "TARGET_XOP"
19857 "vphsubbw\t{%1, %0|%0, %1}"
19858 [(set_attr "type" "sseiadd1")])
19859
19860 (define_insn "xop_phsubwd"
19861 [(set (match_operand:V4SI 0 "register_operand" "=x")
19862 (minus:V4SI
19863 (sign_extend:V4SI
19864 (vec_select:V4HI
19865 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19866 (parallel [(const_int 0) (const_int 2)
19867 (const_int 4) (const_int 6)])))
19868 (sign_extend:V4SI
19869 (vec_select:V4HI
19870 (match_dup 1)
19871 (parallel [(const_int 1) (const_int 3)
19872 (const_int 5) (const_int 7)])))))]
19873 "TARGET_XOP"
19874 "vphsubwd\t{%1, %0|%0, %1}"
19875 [(set_attr "type" "sseiadd1")])
19876
19877 (define_insn "xop_phsubdq"
19878 [(set (match_operand:V2DI 0 "register_operand" "=x")
19879 (minus:V2DI
19880 (sign_extend:V2DI
19881 (vec_select:V2SI
19882 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19883 (parallel [(const_int 0) (const_int 2)])))
19884 (sign_extend:V2DI
19885 (vec_select:V2SI
19886 (match_dup 1)
19887 (parallel [(const_int 1) (const_int 3)])))))]
19888 "TARGET_XOP"
19889 "vphsubdq\t{%1, %0|%0, %1}"
19890 [(set_attr "type" "sseiadd1")])
19891
19892 ;; XOP permute instructions
19893 (define_insn "xop_pperm"
19894 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19895 (unspec:V16QI
19896 [(match_operand:V16QI 1 "register_operand" "x,x")
19897 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19898 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
19899 UNSPEC_XOP_PERMUTE))]
19900 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19901 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19902 [(set_attr "type" "sse4arg")
19903 (set_attr "mode" "TI")])
19904
19905 ;; XOP pack instructions that combine two vectors into a smaller vector
19906 (define_insn "xop_pperm_pack_v2di_v4si"
19907 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
19908 (vec_concat:V4SI
19909 (truncate:V2SI
19910 (match_operand:V2DI 1 "register_operand" "x,x"))
19911 (truncate:V2SI
19912 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
19913 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19914 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19915 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19916 [(set_attr "type" "sse4arg")
19917 (set_attr "mode" "TI")])
19918
19919 (define_insn "xop_pperm_pack_v4si_v8hi"
19920 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
19921 (vec_concat:V8HI
19922 (truncate:V4HI
19923 (match_operand:V4SI 1 "register_operand" "x,x"))
19924 (truncate:V4HI
19925 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
19926 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19927 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19928 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19929 [(set_attr "type" "sse4arg")
19930 (set_attr "mode" "TI")])
19931
19932 (define_insn "xop_pperm_pack_v8hi_v16qi"
19933 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19934 (vec_concat:V16QI
19935 (truncate:V8QI
19936 (match_operand:V8HI 1 "register_operand" "x,x"))
19937 (truncate:V8QI
19938 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
19939 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19940 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19941 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19942 [(set_attr "type" "sse4arg")
19943 (set_attr "mode" "TI")])
19944
19945 ;; XOP packed rotate instructions
19946 (define_expand "rotl<mode>3"
19947 [(set (match_operand:VI_128 0 "register_operand")
19948 (rotate:VI_128
19949 (match_operand:VI_128 1 "nonimmediate_operand")
19950 (match_operand:SI 2 "general_operand")))]
19951 "TARGET_XOP"
19952 {
19953 /* If we were given a scalar, convert it to parallel */
19954 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19955 {
19956 rtvec vs = rtvec_alloc (<ssescalarnum>);
19957 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19958 rtx reg = gen_reg_rtx (<MODE>mode);
19959 rtx op2 = operands[2];
19960 int i;
19961
19962 if (GET_MODE (op2) != <ssescalarmode>mode)
19963 {
19964 op2 = gen_reg_rtx (<ssescalarmode>mode);
19965 convert_move (op2, operands[2], false);
19966 }
19967
19968 for (i = 0; i < <ssescalarnum>; i++)
19969 RTVEC_ELT (vs, i) = op2;
19970
19971 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19972 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19973 DONE;
19974 }
19975 })
19976
19977 (define_expand "rotr<mode>3"
19978 [(set (match_operand:VI_128 0 "register_operand")
19979 (rotatert:VI_128
19980 (match_operand:VI_128 1 "nonimmediate_operand")
19981 (match_operand:SI 2 "general_operand")))]
19982 "TARGET_XOP"
19983 {
19984 /* If we were given a scalar, convert it to parallel */
19985 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19986 {
19987 rtvec vs = rtvec_alloc (<ssescalarnum>);
19988 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19989 rtx neg = gen_reg_rtx (<MODE>mode);
19990 rtx reg = gen_reg_rtx (<MODE>mode);
19991 rtx op2 = operands[2];
19992 int i;
19993
19994 if (GET_MODE (op2) != <ssescalarmode>mode)
19995 {
19996 op2 = gen_reg_rtx (<ssescalarmode>mode);
19997 convert_move (op2, operands[2], false);
19998 }
19999
20000 for (i = 0; i < <ssescalarnum>; i++)
20001 RTVEC_ELT (vs, i) = op2;
20002
20003 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
20004 emit_insn (gen_neg<mode>2 (neg, reg));
20005 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
20006 DONE;
20007 }
20008 })
20009
20010 (define_insn "xop_rotl<mode>3"
20011 [(set (match_operand:VI_128 0 "register_operand" "=x")
20012 (rotate:VI_128
20013 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
20014 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
20015 "TARGET_XOP"
20016 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20017 [(set_attr "type" "sseishft")
20018 (set_attr "length_immediate" "1")
20019 (set_attr "mode" "TI")])
20020
20021 (define_insn "xop_rotr<mode>3"
20022 [(set (match_operand:VI_128 0 "register_operand" "=x")
20023 (rotatert:VI_128
20024 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
20025 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
20026 "TARGET_XOP"
20027 {
20028 operands[3]
20029 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
20030 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
20031 }
20032 [(set_attr "type" "sseishft")
20033 (set_attr "length_immediate" "1")
20034 (set_attr "mode" "TI")])
20035
20036 (define_expand "vrotr<mode>3"
20037 [(match_operand:VI_128 0 "register_operand")
20038 (match_operand:VI_128 1 "register_operand")
20039 (match_operand:VI_128 2 "register_operand")]
20040 "TARGET_XOP"
20041 {
20042 rtx reg = gen_reg_rtx (<MODE>mode);
20043 emit_insn (gen_neg<mode>2 (reg, operands[2]));
20044 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
20045 DONE;
20046 })
20047
20048 (define_expand "vrotl<mode>3"
20049 [(match_operand:VI_128 0 "register_operand")
20050 (match_operand:VI_128 1 "register_operand")
20051 (match_operand:VI_128 2 "register_operand")]
20052 "TARGET_XOP"
20053 {
20054 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
20055 DONE;
20056 })
20057
20058 (define_insn "xop_vrotl<mode>3"
20059 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
20060 (if_then_else:VI_128
20061 (ge:VI_128
20062 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20063 (const_int 0))
20064 (rotate:VI_128
20065 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20066 (match_dup 2))
20067 (rotatert:VI_128
20068 (match_dup 1)
20069 (neg:VI_128 (match_dup 2)))))]
20070 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20071 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20072 [(set_attr "type" "sseishft")
20073 (set_attr "prefix_data16" "0")
20074 (set_attr "prefix_extra" "2")
20075 (set_attr "mode" "TI")])
20076
20077 ;; XOP packed shift instructions.
20078 (define_expand "vlshr<mode>3"
20079 [(set (match_operand:VI12_128 0 "register_operand")
20080 (lshiftrt:VI12_128
20081 (match_operand:VI12_128 1 "register_operand")
20082 (match_operand:VI12_128 2 "nonimmediate_operand")))]
20083 "TARGET_XOP"
20084 {
20085 rtx neg = gen_reg_rtx (<MODE>mode);
20086 emit_insn (gen_neg<mode>2 (neg, operands[2]));
20087 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
20088 DONE;
20089 })
20090
20091 (define_expand "vlshr<mode>3"
20092 [(set (match_operand:VI48_128 0 "register_operand")
20093 (lshiftrt:VI48_128
20094 (match_operand:VI48_128 1 "register_operand")
20095 (match_operand:VI48_128 2 "nonimmediate_operand")))]
20096 "TARGET_AVX2 || TARGET_XOP"
20097 {
20098 if (!TARGET_AVX2)
20099 {
20100 rtx neg = gen_reg_rtx (<MODE>mode);
20101 emit_insn (gen_neg<mode>2 (neg, operands[2]));
20102 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
20103 DONE;
20104 }
20105 })
20106
20107 (define_expand "vlshr<mode>3"
20108 [(set (match_operand:VI48_512 0 "register_operand")
20109 (lshiftrt:VI48_512
20110 (match_operand:VI48_512 1 "register_operand")
20111 (match_operand:VI48_512 2 "nonimmediate_operand")))]
20112 "TARGET_AVX512F")
20113
20114 (define_expand "vlshr<mode>3"
20115 [(set (match_operand:VI48_256 0 "register_operand")
20116 (lshiftrt:VI48_256
20117 (match_operand:VI48_256 1 "register_operand")
20118 (match_operand:VI48_256 2 "nonimmediate_operand")))]
20119 "TARGET_AVX2")
20120
20121 (define_expand "vashrv8hi3<mask_name>"
20122 [(set (match_operand:V8HI 0 "register_operand")
20123 (ashiftrt:V8HI
20124 (match_operand:V8HI 1 "register_operand")
20125 (match_operand:V8HI 2 "nonimmediate_operand")))]
20126 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
20127 {
20128 if (TARGET_XOP)
20129 {
20130 rtx neg = gen_reg_rtx (V8HImode);
20131 emit_insn (gen_negv8hi2 (neg, operands[2]));
20132 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
20133 DONE;
20134 }
20135 })
20136
20137 (define_expand "vashrv16qi3"
20138 [(set (match_operand:V16QI 0 "register_operand")
20139 (ashiftrt:V16QI
20140 (match_operand:V16QI 1 "register_operand")
20141 (match_operand:V16QI 2 "nonimmediate_operand")))]
20142 "TARGET_XOP"
20143 {
20144 rtx neg = gen_reg_rtx (V16QImode);
20145 emit_insn (gen_negv16qi2 (neg, operands[2]));
20146 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
20147 DONE;
20148 })
20149
20150 (define_expand "vashrv2di3<mask_name>"
20151 [(set (match_operand:V2DI 0 "register_operand")
20152 (ashiftrt:V2DI
20153 (match_operand:V2DI 1 "register_operand")
20154 (match_operand:V2DI 2 "nonimmediate_operand")))]
20155 "TARGET_XOP || TARGET_AVX512VL"
20156 {
20157 if (TARGET_XOP)
20158 {
20159 rtx neg = gen_reg_rtx (V2DImode);
20160 emit_insn (gen_negv2di2 (neg, operands[2]));
20161 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
20162 DONE;
20163 }
20164 })
20165
20166 (define_expand "vashrv4si3"
20167 [(set (match_operand:V4SI 0 "register_operand")
20168 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
20169 (match_operand:V4SI 2 "nonimmediate_operand")))]
20170 "TARGET_AVX2 || TARGET_XOP"
20171 {
20172 if (!TARGET_AVX2)
20173 {
20174 rtx neg = gen_reg_rtx (V4SImode);
20175 emit_insn (gen_negv4si2 (neg, operands[2]));
20176 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
20177 DONE;
20178 }
20179 })
20180
20181 (define_expand "vashrv16si3"
20182 [(set (match_operand:V16SI 0 "register_operand")
20183 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
20184 (match_operand:V16SI 2 "nonimmediate_operand")))]
20185 "TARGET_AVX512F")
20186
20187 (define_expand "vashrv8si3"
20188 [(set (match_operand:V8SI 0 "register_operand")
20189 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
20190 (match_operand:V8SI 2 "nonimmediate_operand")))]
20191 "TARGET_AVX2")
20192
20193 (define_expand "vashl<mode>3"
20194 [(set (match_operand:VI12_128 0 "register_operand")
20195 (ashift:VI12_128
20196 (match_operand:VI12_128 1 "register_operand")
20197 (match_operand:VI12_128 2 "nonimmediate_operand")))]
20198 "TARGET_XOP"
20199 {
20200 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
20201 DONE;
20202 })
20203
20204 (define_expand "vashl<mode>3"
20205 [(set (match_operand:VI48_128 0 "register_operand")
20206 (ashift:VI48_128
20207 (match_operand:VI48_128 1 "register_operand")
20208 (match_operand:VI48_128 2 "nonimmediate_operand")))]
20209 "TARGET_AVX2 || TARGET_XOP"
20210 {
20211 if (!TARGET_AVX2)
20212 {
20213 operands[2] = force_reg (<MODE>mode, operands[2]);
20214 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
20215 DONE;
20216 }
20217 })
20218
20219 (define_expand "vashl<mode>3"
20220 [(set (match_operand:VI48_512 0 "register_operand")
20221 (ashift:VI48_512
20222 (match_operand:VI48_512 1 "register_operand")
20223 (match_operand:VI48_512 2 "nonimmediate_operand")))]
20224 "TARGET_AVX512F")
20225
20226 (define_expand "vashl<mode>3"
20227 [(set (match_operand:VI48_256 0 "register_operand")
20228 (ashift:VI48_256
20229 (match_operand:VI48_256 1 "register_operand")
20230 (match_operand:VI48_256 2 "nonimmediate_operand")))]
20231 "TARGET_AVX2")
20232
20233 (define_insn "xop_sha<mode>3"
20234 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
20235 (if_then_else:VI_128
20236 (ge:VI_128
20237 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20238 (const_int 0))
20239 (ashift:VI_128
20240 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20241 (match_dup 2))
20242 (ashiftrt:VI_128
20243 (match_dup 1)
20244 (neg:VI_128 (match_dup 2)))))]
20245 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20246 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20247 [(set_attr "type" "sseishft")
20248 (set_attr "prefix_data16" "0")
20249 (set_attr "prefix_extra" "2")
20250 (set_attr "mode" "TI")])
20251
20252 (define_insn "xop_shl<mode>3"
20253 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
20254 (if_then_else:VI_128
20255 (ge:VI_128
20256 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20257 (const_int 0))
20258 (ashift:VI_128
20259 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20260 (match_dup 2))
20261 (lshiftrt:VI_128
20262 (match_dup 1)
20263 (neg:VI_128 (match_dup 2)))))]
20264 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20265 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20266 [(set_attr "type" "sseishft")
20267 (set_attr "prefix_data16" "0")
20268 (set_attr "prefix_extra" "2")
20269 (set_attr "mode" "TI")])
20270
20271 (define_expand "<insn><mode>3"
20272 [(set (match_operand:VI1_AVX512 0 "register_operand")
20273 (any_shift:VI1_AVX512
20274 (match_operand:VI1_AVX512 1 "register_operand")
20275 (match_operand:SI 2 "nonmemory_operand")))]
20276 "TARGET_SSE2"
20277 {
20278 if (TARGET_XOP && <MODE>mode == V16QImode)
20279 {
20280 bool negate = false;
20281 rtx (*gen) (rtx, rtx, rtx);
20282 rtx tmp, par;
20283 int i;
20284
20285 if (<CODE> != ASHIFT)
20286 {
20287 if (CONST_INT_P (operands[2]))
20288 operands[2] = GEN_INT (-INTVAL (operands[2]));
20289 else
20290 negate = true;
20291 }
20292 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
20293 for (i = 0; i < 16; i++)
20294 XVECEXP (par, 0, i) = operands[2];
20295
20296 tmp = gen_reg_rtx (V16QImode);
20297 emit_insn (gen_vec_initv16qiqi (tmp, par));
20298
20299 if (negate)
20300 emit_insn (gen_negv16qi2 (tmp, tmp));
20301
20302 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
20303 emit_insn (gen (operands[0], operands[1], tmp));
20304 }
20305 else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
20306 operands[1], operands[2]))
20307 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
20308 DONE;
20309 })
20310
20311 (define_expand "ashrv2di3"
20312 [(set (match_operand:V2DI 0 "register_operand")
20313 (ashiftrt:V2DI
20314 (match_operand:V2DI 1 "register_operand")
20315 (match_operand:DI 2 "nonmemory_operand")))]
20316 "TARGET_XOP || TARGET_AVX512VL"
20317 {
20318 if (!TARGET_AVX512VL)
20319 {
20320 rtx reg = gen_reg_rtx (V2DImode);
20321 rtx par;
20322 bool negate = false;
20323 int i;
20324
20325 if (CONST_INT_P (operands[2]))
20326 operands[2] = GEN_INT (-INTVAL (operands[2]));
20327 else
20328 negate = true;
20329
20330 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
20331 for (i = 0; i < 2; i++)
20332 XVECEXP (par, 0, i) = operands[2];
20333
20334 emit_insn (gen_vec_initv2didi (reg, par));
20335
20336 if (negate)
20337 emit_insn (gen_negv2di2 (reg, reg));
20338
20339 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
20340 DONE;
20341 }
20342 })
20343
20344 ;; XOP FRCZ support
20345 (define_insn "xop_frcz<mode>2"
20346 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
20347 (unspec:FMAMODE
20348 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
20349 UNSPEC_FRCZ))]
20350 "TARGET_XOP"
20351 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
20352 [(set_attr "type" "ssecvt1")
20353 (set_attr "mode" "<MODE>")])
20354
20355 (define_expand "xop_vmfrcz<mode>2"
20356 [(set (match_operand:VF_128 0 "register_operand")
20357 (vec_merge:VF_128
20358 (unspec:VF_128
20359 [(match_operand:VF_128 1 "nonimmediate_operand")]
20360 UNSPEC_FRCZ)
20361 (match_dup 2)
20362 (const_int 1)))]
20363 "TARGET_XOP"
20364 "operands[2] = CONST0_RTX (<MODE>mode);")
20365
20366 (define_insn "*xop_vmfrcz<mode>2"
20367 [(set (match_operand:VF_128 0 "register_operand" "=x")
20368 (vec_merge:VF_128
20369 (unspec:VF_128
20370 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
20371 UNSPEC_FRCZ)
20372 (match_operand:VF_128 2 "const0_operand")
20373 (const_int 1)))]
20374 "TARGET_XOP"
20375 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
20376 [(set_attr "type" "ssecvt1")
20377 (set_attr "mode" "<MODE>")])
20378
20379 (define_insn "xop_maskcmp<mode>3"
20380 [(set (match_operand:VI_128 0 "register_operand" "=x")
20381 (match_operator:VI_128 1 "ix86_comparison_int_operator"
20382 [(match_operand:VI_128 2 "register_operand" "x")
20383 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20384 "TARGET_XOP"
20385 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20386 [(set_attr "type" "sse4arg")
20387 (set_attr "prefix_data16" "0")
20388 (set_attr "prefix_rep" "0")
20389 (set_attr "prefix_extra" "2")
20390 (set_attr "length_immediate" "1")
20391 (set_attr "mode" "TI")])
20392
20393 (define_insn "xop_maskcmp_uns<mode>3"
20394 [(set (match_operand:VI_128 0 "register_operand" "=x")
20395 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
20396 [(match_operand:VI_128 2 "register_operand" "x")
20397 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20398 "TARGET_XOP"
20399 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20400 [(set_attr "type" "ssecmp")
20401 (set_attr "prefix_data16" "0")
20402 (set_attr "prefix_rep" "0")
20403 (set_attr "prefix_extra" "2")
20404 (set_attr "length_immediate" "1")
20405 (set_attr "mode" "TI")])
20406
20407 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
20408 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
20409 ;; the exact instruction generated for the intrinsic.
20410 (define_insn "xop_maskcmp_uns2<mode>3"
20411 [(set (match_operand:VI_128 0 "register_operand" "=x")
20412 (unspec:VI_128
20413 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
20414 [(match_operand:VI_128 2 "register_operand" "x")
20415 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
20416 UNSPEC_XOP_UNSIGNED_CMP))]
20417 "TARGET_XOP"
20418 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20419 [(set_attr "type" "ssecmp")
20420 (set_attr "prefix_data16" "0")
20421 (set_attr "prefix_extra" "2")
20422 (set_attr "length_immediate" "1")
20423 (set_attr "mode" "TI")])
20424
20425 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
20426 ;; being added here to be complete.
20427 (define_insn "xop_pcom_tf<mode>3"
20428 [(set (match_operand:VI_128 0 "register_operand" "=x")
20429 (unspec:VI_128
20430 [(match_operand:VI_128 1 "register_operand" "x")
20431 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
20432 (match_operand:SI 3 "const_int_operand" "n")]
20433 UNSPEC_XOP_TRUEFALSE))]
20434 "TARGET_XOP"
20435 {
20436 return ((INTVAL (operands[3]) != 0)
20437 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20438 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
20439 }
20440 [(set_attr "type" "ssecmp")
20441 (set_attr "prefix_data16" "0")
20442 (set_attr "prefix_extra" "2")
20443 (set_attr "length_immediate" "1")
20444 (set_attr "mode" "TI")])
20445
20446 (define_insn "xop_vpermil2<mode>3"
20447 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
20448 (unspec:VF_128_256
20449 [(match_operand:VF_128_256 1 "register_operand" "x,x")
20450 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
20451 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
20452 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
20453 UNSPEC_VPERMIL2))]
20454 "TARGET_XOP"
20455 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
20456 [(set_attr "type" "sse4arg")
20457 (set_attr "length_immediate" "1")
20458 (set_attr "mode" "<MODE>")])
20459
20460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20461
20462 (define_insn "aesenc"
20463 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20465 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20466 UNSPEC_AESENC))]
20467 "TARGET_AES"
20468 "@
20469 aesenc\t{%2, %0|%0, %2}
20470 vaesenc\t{%2, %1, %0|%0, %1, %2}"
20471 [(set_attr "isa" "noavx,avx")
20472 (set_attr "type" "sselog1")
20473 (set_attr "prefix_extra" "1")
20474 (set_attr "prefix" "orig,vex")
20475 (set_attr "btver2_decode" "double,double")
20476 (set_attr "mode" "TI")])
20477
20478 (define_insn "aesenclast"
20479 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20481 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20482 UNSPEC_AESENCLAST))]
20483 "TARGET_AES"
20484 "@
20485 aesenclast\t{%2, %0|%0, %2}
20486 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
20487 [(set_attr "isa" "noavx,avx")
20488 (set_attr "type" "sselog1")
20489 (set_attr "prefix_extra" "1")
20490 (set_attr "prefix" "orig,vex")
20491 (set_attr "btver2_decode" "double,double")
20492 (set_attr "mode" "TI")])
20493
20494 (define_insn "aesdec"
20495 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20496 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20497 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20498 UNSPEC_AESDEC))]
20499 "TARGET_AES"
20500 "@
20501 aesdec\t{%2, %0|%0, %2}
20502 vaesdec\t{%2, %1, %0|%0, %1, %2}"
20503 [(set_attr "isa" "noavx,avx")
20504 (set_attr "type" "sselog1")
20505 (set_attr "prefix_extra" "1")
20506 (set_attr "prefix" "orig,vex")
20507 (set_attr "btver2_decode" "double,double")
20508 (set_attr "mode" "TI")])
20509
20510 (define_insn "aesdeclast"
20511 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20512 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20513 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20514 UNSPEC_AESDECLAST))]
20515 "TARGET_AES"
20516 "@
20517 aesdeclast\t{%2, %0|%0, %2}
20518 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
20519 [(set_attr "isa" "noavx,avx")
20520 (set_attr "type" "sselog1")
20521 (set_attr "prefix_extra" "1")
20522 (set_attr "prefix" "orig,vex")
20523 (set_attr "btver2_decode" "double,double")
20524 (set_attr "mode" "TI")])
20525
20526 (define_insn "aesimc"
20527 [(set (match_operand:V2DI 0 "register_operand" "=x")
20528 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
20529 UNSPEC_AESIMC))]
20530 "TARGET_AES"
20531 "%vaesimc\t{%1, %0|%0, %1}"
20532 [(set_attr "type" "sselog1")
20533 (set_attr "prefix_extra" "1")
20534 (set_attr "prefix" "maybe_vex")
20535 (set_attr "mode" "TI")])
20536
20537 (define_insn "aeskeygenassist"
20538 [(set (match_operand:V2DI 0 "register_operand" "=x")
20539 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
20540 (match_operand:SI 2 "const_0_to_255_operand" "n")]
20541 UNSPEC_AESKEYGENASSIST))]
20542 "TARGET_AES"
20543 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
20544 [(set_attr "type" "sselog1")
20545 (set_attr "prefix_extra" "1")
20546 (set_attr "length_immediate" "1")
20547 (set_attr "prefix" "maybe_vex")
20548 (set_attr "mode" "TI")])
20549
20550 (define_insn "pclmulqdq"
20551 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20552 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20553 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
20554 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20555 UNSPEC_PCLMUL))]
20556 "TARGET_PCLMUL"
20557 "@
20558 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
20559 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20560 [(set_attr "isa" "noavx,avx")
20561 (set_attr "type" "sselog1")
20562 (set_attr "prefix_extra" "1")
20563 (set_attr "length_immediate" "1")
20564 (set_attr "prefix" "orig,vex")
20565 (set_attr "mode" "TI")])
20566
20567 (define_expand "avx_vzeroall"
20568 [(match_par_dup 0 [(const_int 0)])]
20569 "TARGET_AVX"
20570 {
20571 int nregs = TARGET_64BIT ? 16 : 8;
20572 int regno;
20573
20574 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
20575
20576 XVECEXP (operands[0], 0, 0)
20577 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
20578 UNSPECV_VZEROALL);
20579
20580 for (regno = 0; regno < nregs; regno++)
20581 XVECEXP (operands[0], 0, regno + 1)
20582 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
20583 CONST0_RTX (V8SImode));
20584 })
20585
20586 (define_insn "*avx_vzeroall"
20587 [(match_parallel 0 "vzeroall_operation"
20588 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
20589 "TARGET_AVX"
20590 "vzeroall"
20591 [(set_attr "type" "sse")
20592 (set_attr "modrm" "0")
20593 (set_attr "memory" "none")
20594 (set_attr "prefix" "vex")
20595 (set_attr "btver2_decode" "vector")
20596 (set_attr "mode" "OI")])
20597
20598 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
20599 ;; if the upper 128bits are unused. Initially we expand the instructions
20600 ;; as though they had no effect on the SSE registers, but later add SETs and
20601 ;; CLOBBERs to the PARALLEL to model the real effect.
20602 (define_expand "avx_vzeroupper"
20603 [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20604 "TARGET_AVX")
20605
20606 (define_insn "*avx_vzeroupper"
20607 [(match_parallel 0 "vzeroupper_pattern"
20608 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20609 "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
20610 "vzeroupper"
20611 [(set_attr "type" "sse")
20612 (set_attr "modrm" "0")
20613 (set_attr "memory" "none")
20614 (set_attr "prefix" "vex")
20615 (set_attr "btver2_decode" "vector")
20616 (set_attr "mode" "OI")])
20617
20618 (define_insn_and_split "*avx_vzeroupper_1"
20619 [(match_parallel 0 "vzeroupper_pattern"
20620 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20621 "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
20622 "#"
20623 "&& epilogue_completed"
20624 [(match_dup 0)]
20625 {
20626 /* For IPA-RA purposes, make it clear the instruction clobbers
20627 even XMM registers not mentioned explicitly in the pattern. */
20628 unsigned int nregs = TARGET_64BIT ? 16 : 8;
20629 unsigned int npats = XVECLEN (operands[0], 0);
20630 rtvec vec = rtvec_alloc (nregs + 1);
20631 RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
20632 for (unsigned int i = 0, j = 1; i < nregs; ++i)
20633 {
20634 unsigned int regno = GET_SSE_REGNO (i);
20635 if (j < npats
20636 && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
20637 {
20638 RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
20639 j++;
20640 }
20641 else
20642 {
20643 rtx reg = gen_rtx_REG (V2DImode, regno);
20644 RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
20645 }
20646 }
20647 operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
20648 }
20649 [(set_attr "type" "sse")
20650 (set_attr "modrm" "0")
20651 (set_attr "memory" "none")
20652 (set_attr "prefix" "vex")
20653 (set_attr "btver2_decode" "vector")
20654 (set_attr "mode" "OI")])
20655
20656 (define_mode_attr pbroadcast_evex_isa
20657 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
20658 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
20659 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
20660 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
20661
20662 (define_insn "avx2_pbroadcast<mode>"
20663 [(set (match_operand:VI 0 "register_operand" "=x,v")
20664 (vec_duplicate:VI
20665 (vec_select:<ssescalarmode>
20666 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
20667 (parallel [(const_int 0)]))))]
20668 "TARGET_AVX2"
20669 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
20670 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
20671 (set_attr "type" "ssemov")
20672 (set_attr "prefix_extra" "1")
20673 (set_attr "prefix" "vex,evex")
20674 (set_attr "mode" "<sseinsnmode>")])
20675
20676 (define_insn "avx2_pbroadcast<mode>_1"
20677 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
20678 (vec_duplicate:VI_256
20679 (vec_select:<ssescalarmode>
20680 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
20681 (parallel [(const_int 0)]))))]
20682 "TARGET_AVX2"
20683 "@
20684 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20685 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20686 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20687 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
20688 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
20689 (set_attr "type" "ssemov")
20690 (set_attr "prefix_extra" "1")
20691 (set_attr "prefix" "vex")
20692 (set_attr "mode" "<sseinsnmode>")])
20693
20694 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
20695 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
20696 (unspec:VI48F_256_512
20697 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
20698 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20699 UNSPEC_VPERMVAR))]
20700 "TARGET_AVX2 && <mask_mode512bit_condition>"
20701 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20702 [(set_attr "type" "sselog")
20703 (set_attr "prefix" "<mask_prefix2>")
20704 (set_attr "mode" "<sseinsnmode>")])
20705
20706 (define_insn "<avx512>_permvar<mode><mask_name>"
20707 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
20708 (unspec:VI1_AVX512VL
20709 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
20710 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20711 UNSPEC_VPERMVAR))]
20712 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
20713 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20714 [(set_attr "type" "sselog")
20715 (set_attr "prefix" "<mask_prefix2>")
20716 (set_attr "mode" "<sseinsnmode>")])
20717
20718 (define_insn "<avx512>_permvar<mode><mask_name>"
20719 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20720 (unspec:VI2_AVX512VL
20721 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
20722 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20723 UNSPEC_VPERMVAR))]
20724 "TARGET_AVX512BW && <mask_mode512bit_condition>"
20725 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20726 [(set_attr "type" "sselog")
20727 (set_attr "prefix" "<mask_prefix2>")
20728 (set_attr "mode" "<sseinsnmode>")])
20729
20730 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
20731 ;; If it so happens that the input is in memory, use vbroadcast.
20732 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
20733 (define_insn "*avx_vperm_broadcast_v4sf"
20734 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
20735 (vec_select:V4SF
20736 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
20737 (match_parallel 2 "avx_vbroadcast_operand"
20738 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20739 "TARGET_AVX"
20740 {
20741 int elt = INTVAL (operands[3]);
20742 switch (which_alternative)
20743 {
20744 case 0:
20745 case 1:
20746 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
20747 return "vbroadcastss\t{%1, %0|%0, %k1}";
20748 case 2:
20749 operands[2] = GEN_INT (elt * 0x55);
20750 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
20751 default:
20752 gcc_unreachable ();
20753 }
20754 }
20755 [(set_attr "type" "ssemov,ssemov,sselog1")
20756 (set_attr "prefix_extra" "1")
20757 (set_attr "length_immediate" "0,0,1")
20758 (set_attr "prefix" "maybe_evex")
20759 (set_attr "mode" "SF,SF,V4SF")])
20760
20761 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
20762 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
20763 (vec_select:VF_256
20764 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
20765 (match_parallel 2 "avx_vbroadcast_operand"
20766 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20767 "TARGET_AVX
20768 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
20769 "#"
20770 "&& reload_completed"
20771 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
20772 {
20773 rtx op0 = operands[0], op1 = operands[1];
20774 int elt = INTVAL (operands[3]);
20775
20776 if (REG_P (op1))
20777 {
20778 int mask;
20779
20780 if (TARGET_AVX2 && elt == 0)
20781 {
20782 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
20783 op1)));
20784 DONE;
20785 }
20786
20787 /* Shuffle element we care about into all elements of the 128-bit lane.
20788 The other lane gets shuffled too, but we don't care. */
20789 if (<MODE>mode == V4DFmode)
20790 mask = (elt & 1 ? 15 : 0);
20791 else
20792 mask = (elt & 3) * 0x55;
20793 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
20794
20795 /* Shuffle the lane we care about into both lanes of the dest. */
20796 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
20797 if (EXT_REX_SSE_REG_P (op0))
20798 {
20799 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
20800 or VSHUFF128. */
20801 gcc_assert (<MODE>mode == V8SFmode);
20802 if ((mask & 1) == 0)
20803 emit_insn (gen_avx2_vec_dupv8sf (op0,
20804 gen_lowpart (V4SFmode, op0)));
20805 else
20806 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
20807 GEN_INT (4), GEN_INT (5),
20808 GEN_INT (6), GEN_INT (7),
20809 GEN_INT (12), GEN_INT (13),
20810 GEN_INT (14), GEN_INT (15)));
20811 DONE;
20812 }
20813
20814 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
20815 DONE;
20816 }
20817
20818 operands[1] = adjust_address (op1, <ssescalarmode>mode,
20819 elt * GET_MODE_SIZE (<ssescalarmode>mode));
20820 })
20821
20822 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20823 [(set (match_operand:VF2 0 "register_operand")
20824 (vec_select:VF2
20825 (match_operand:VF2 1 "nonimmediate_operand")
20826 (match_operand:SI 2 "const_0_to_255_operand")))]
20827 "TARGET_AVX && <mask_mode512bit_condition>"
20828 {
20829 int mask = INTVAL (operands[2]);
20830 rtx perm[<ssescalarnum>];
20831
20832 int i;
20833 for (i = 0; i < <ssescalarnum>; i = i + 2)
20834 {
20835 perm[i] = GEN_INT (((mask >> i) & 1) + i);
20836 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
20837 }
20838
20839 operands[2]
20840 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20841 })
20842
20843 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20844 [(set (match_operand:VF1 0 "register_operand")
20845 (vec_select:VF1
20846 (match_operand:VF1 1 "nonimmediate_operand")
20847 (match_operand:SI 2 "const_0_to_255_operand")))]
20848 "TARGET_AVX && <mask_mode512bit_condition>"
20849 {
20850 int mask = INTVAL (operands[2]);
20851 rtx perm[<ssescalarnum>];
20852
20853 int i;
20854 for (i = 0; i < <ssescalarnum>; i = i + 4)
20855 {
20856 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
20857 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
20858 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
20859 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
20860 }
20861
20862 operands[2]
20863 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20864 })
20865
20866 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
20867 ;; patterns, as they have the same RTL representation (vpermilp*
20868 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
20869 ;; latency as it never crosses lanes.
20870 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
20871 [(set (match_operand:VF 0 "register_operand" "=v")
20872 (vec_select:VF
20873 (match_operand:VF 1 "nonimmediate_operand" "vm")
20874 (match_parallel 2 ""
20875 [(match_operand 3 "const_int_operand")])))]
20876 "TARGET_AVX && <mask_mode512bit_condition>
20877 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
20878 {
20879 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
20880 operands[2] = GEN_INT (mask);
20881 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
20882 }
20883 [(set_attr "type" "sselog")
20884 (set_attr "prefix_extra" "1")
20885 (set_attr "length_immediate" "1")
20886 (set_attr "prefix" "<mask_prefix>")
20887 (set_attr "mode" "<sseinsnmode>")])
20888
20889 (define_expand "avx2_perm<mode>"
20890 [(match_operand:VI8F_256 0 "register_operand")
20891 (match_operand:VI8F_256 1 "nonimmediate_operand")
20892 (match_operand:SI 2 "const_0_to_255_operand")]
20893 "TARGET_AVX2"
20894 {
20895 int mask = INTVAL (operands[2]);
20896 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
20897 GEN_INT ((mask >> 0) & 3),
20898 GEN_INT ((mask >> 2) & 3),
20899 GEN_INT ((mask >> 4) & 3),
20900 GEN_INT ((mask >> 6) & 3)));
20901 DONE;
20902 })
20903
20904 (define_expand "avx512vl_perm<mode>_mask"
20905 [(match_operand:VI8F_256 0 "register_operand")
20906 (match_operand:VI8F_256 1 "nonimmediate_operand")
20907 (match_operand:SI 2 "const_0_to_255_operand")
20908 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
20909 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20910 "TARGET_AVX512VL"
20911 {
20912 int mask = INTVAL (operands[2]);
20913 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
20914 GEN_INT ((mask >> 0) & 3),
20915 GEN_INT ((mask >> 2) & 3),
20916 GEN_INT ((mask >> 4) & 3),
20917 GEN_INT ((mask >> 6) & 3),
20918 operands[3], operands[4]));
20919 DONE;
20920 })
20921
20922 (define_insn "avx2_perm<mode>_1<mask_name>"
20923 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20924 (vec_select:VI8F_256
20925 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
20926 (parallel [(match_operand 2 "const_0_to_3_operand")
20927 (match_operand 3 "const_0_to_3_operand")
20928 (match_operand 4 "const_0_to_3_operand")
20929 (match_operand 5 "const_0_to_3_operand")])))]
20930 "TARGET_AVX2 && <mask_mode512bit_condition>"
20931 {
20932 int mask = 0;
20933 mask |= INTVAL (operands[2]) << 0;
20934 mask |= INTVAL (operands[3]) << 2;
20935 mask |= INTVAL (operands[4]) << 4;
20936 mask |= INTVAL (operands[5]) << 6;
20937 operands[2] = GEN_INT (mask);
20938 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
20939 }
20940 [(set_attr "type" "sselog")
20941 (set_attr "prefix" "<mask_prefix2>")
20942 (set_attr "mode" "<sseinsnmode>")])
20943
20944 (define_expand "avx512f_perm<mode>"
20945 [(match_operand:V8FI 0 "register_operand")
20946 (match_operand:V8FI 1 "nonimmediate_operand")
20947 (match_operand:SI 2 "const_0_to_255_operand")]
20948 "TARGET_AVX512F"
20949 {
20950 int mask = INTVAL (operands[2]);
20951 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
20952 GEN_INT ((mask >> 0) & 3),
20953 GEN_INT ((mask >> 2) & 3),
20954 GEN_INT ((mask >> 4) & 3),
20955 GEN_INT ((mask >> 6) & 3),
20956 GEN_INT (((mask >> 0) & 3) + 4),
20957 GEN_INT (((mask >> 2) & 3) + 4),
20958 GEN_INT (((mask >> 4) & 3) + 4),
20959 GEN_INT (((mask >> 6) & 3) + 4)));
20960 DONE;
20961 })
20962
20963 (define_expand "avx512f_perm<mode>_mask"
20964 [(match_operand:V8FI 0 "register_operand")
20965 (match_operand:V8FI 1 "nonimmediate_operand")
20966 (match_operand:SI 2 "const_0_to_255_operand")
20967 (match_operand:V8FI 3 "nonimm_or_0_operand")
20968 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20969 "TARGET_AVX512F"
20970 {
20971 int mask = INTVAL (operands[2]);
20972 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
20973 GEN_INT ((mask >> 0) & 3),
20974 GEN_INT ((mask >> 2) & 3),
20975 GEN_INT ((mask >> 4) & 3),
20976 GEN_INT ((mask >> 6) & 3),
20977 GEN_INT (((mask >> 0) & 3) + 4),
20978 GEN_INT (((mask >> 2) & 3) + 4),
20979 GEN_INT (((mask >> 4) & 3) + 4),
20980 GEN_INT (((mask >> 6) & 3) + 4),
20981 operands[3], operands[4]));
20982 DONE;
20983 })
20984
20985 (define_insn "avx512f_perm<mode>_1<mask_name>"
20986 [(set (match_operand:V8FI 0 "register_operand" "=v")
20987 (vec_select:V8FI
20988 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
20989 (parallel [(match_operand 2 "const_0_to_3_operand")
20990 (match_operand 3 "const_0_to_3_operand")
20991 (match_operand 4 "const_0_to_3_operand")
20992 (match_operand 5 "const_0_to_3_operand")
20993 (match_operand 6 "const_4_to_7_operand")
20994 (match_operand 7 "const_4_to_7_operand")
20995 (match_operand 8 "const_4_to_7_operand")
20996 (match_operand 9 "const_4_to_7_operand")])))]
20997 "TARGET_AVX512F && <mask_mode512bit_condition>
20998 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
20999 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
21000 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
21001 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
21002 {
21003 int mask = 0;
21004 mask |= INTVAL (operands[2]) << 0;
21005 mask |= INTVAL (operands[3]) << 2;
21006 mask |= INTVAL (operands[4]) << 4;
21007 mask |= INTVAL (operands[5]) << 6;
21008 operands[2] = GEN_INT (mask);
21009 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
21010 }
21011 [(set_attr "type" "sselog")
21012 (set_attr "prefix" "<mask_prefix2>")
21013 (set_attr "mode" "<sseinsnmode>")])
21014
21015 (define_insn "avx2_permv2ti"
21016 [(set (match_operand:V4DI 0 "register_operand" "=x")
21017 (unspec:V4DI
21018 [(match_operand:V4DI 1 "register_operand" "x")
21019 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
21020 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21021 UNSPEC_VPERMTI))]
21022 "TARGET_AVX2"
21023 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21024 [(set_attr "type" "sselog")
21025 (set_attr "prefix" "vex")
21026 (set_attr "mode" "OI")])
21027
21028 (define_insn "avx2_vec_dupv4df"
21029 [(set (match_operand:V4DF 0 "register_operand" "=v")
21030 (vec_duplicate:V4DF
21031 (vec_select:DF
21032 (match_operand:V2DF 1 "register_operand" "v")
21033 (parallel [(const_int 0)]))))]
21034 "TARGET_AVX2"
21035 "vbroadcastsd\t{%1, %0|%0, %1}"
21036 [(set_attr "type" "sselog1")
21037 (set_attr "prefix" "maybe_evex")
21038 (set_attr "mode" "V4DF")])
21039
21040 (define_insn "<avx512>_vec_dup<mode>_1"
21041 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
21042 (vec_duplicate:VI_AVX512BW
21043 (vec_select:<ssescalarmode>
21044 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
21045 (parallel [(const_int 0)]))))]
21046 "TARGET_AVX512F"
21047 "@
21048 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
21049 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
21050 [(set_attr "type" "ssemov")
21051 (set_attr "prefix" "evex")
21052 (set_attr "mode" "<sseinsnmode>")])
21053
21054 (define_insn "<avx512>_vec_dup<mode><mask_name>"
21055 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
21056 (vec_duplicate:V48_AVX512VL
21057 (vec_select:<ssescalarmode>
21058 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21059 (parallel [(const_int 0)]))))]
21060 "TARGET_AVX512F"
21061 {
21062 /* There is no DF broadcast (in AVX-512*) to 128b register.
21063 Mimic it with integer variant. */
21064 if (<MODE>mode == V2DFmode)
21065 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
21066
21067 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
21068 }
21069 [(set_attr "type" "ssemov")
21070 (set_attr "prefix" "evex")
21071 (set_attr "mode" "<sseinsnmode>")])
21072
21073 (define_insn "<avx512>_vec_dup<mode><mask_name>"
21074 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21075 (vec_duplicate:VI12_AVX512VL
21076 (vec_select:<ssescalarmode>
21077 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21078 (parallel [(const_int 0)]))))]
21079 "TARGET_AVX512BW"
21080 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
21081 [(set_attr "type" "ssemov")
21082 (set_attr "prefix" "evex")
21083 (set_attr "mode" "<sseinsnmode>")])
21084
21085 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
21086 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
21087 (vec_duplicate:V16FI
21088 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
21089 "TARGET_AVX512F"
21090 "@
21091 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
21092 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21093 [(set_attr "type" "ssemov")
21094 (set_attr "prefix" "evex")
21095 (set_attr "mode" "<sseinsnmode>")])
21096
21097 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
21098 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
21099 (vec_duplicate:V8FI
21100 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
21101 "TARGET_AVX512F"
21102 "@
21103 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
21104 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21105 [(set_attr "type" "ssemov")
21106 (set_attr "prefix" "evex")
21107 (set_attr "mode" "<sseinsnmode>")])
21108
21109 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
21110 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
21111 (vec_duplicate:VI12_AVX512VL
21112 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
21113 "TARGET_AVX512BW"
21114 "@
21115 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
21116 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
21117 [(set_attr "type" "ssemov")
21118 (set_attr "prefix" "evex")
21119 (set_attr "mode" "<sseinsnmode>")])
21120
21121 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
21122 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
21123 (vec_duplicate:V48_AVX512VL
21124 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
21125 "TARGET_AVX512F"
21126 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21127 [(set_attr "type" "ssemov")
21128 (set_attr "prefix" "evex")
21129 (set_attr "mode" "<sseinsnmode>")
21130 (set (attr "enabled")
21131 (if_then_else (eq_attr "alternative" "1")
21132 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
21133 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
21134 (const_int 1)))])
21135
21136 (define_insn "vec_dupv4sf"
21137 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
21138 (vec_duplicate:V4SF
21139 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
21140 "TARGET_SSE"
21141 "@
21142 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
21143 vbroadcastss\t{%1, %0|%0, %1}
21144 shufps\t{$0, %0, %0|%0, %0, 0}"
21145 [(set_attr "isa" "avx,avx,noavx")
21146 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
21147 (set_attr "length_immediate" "1,0,1")
21148 (set_attr "prefix_extra" "0,1,*")
21149 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
21150 (set_attr "mode" "V4SF")])
21151
21152 (define_insn "*vec_dupv4si"
21153 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
21154 (vec_duplicate:V4SI
21155 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
21156 "TARGET_SSE"
21157 "@
21158 %vpshufd\t{$0, %1, %0|%0, %1, 0}
21159 vbroadcastss\t{%1, %0|%0, %1}
21160 shufps\t{$0, %0, %0|%0, %0, 0}"
21161 [(set_attr "isa" "sse2,avx,noavx")
21162 (set_attr "type" "sselog1,ssemov,sselog1")
21163 (set_attr "length_immediate" "1,0,1")
21164 (set_attr "prefix_extra" "0,1,*")
21165 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
21166 (set_attr "mode" "TI,V4SF,V4SF")])
21167
21168 (define_insn "*vec_dupv2di"
21169 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
21170 (vec_duplicate:V2DI
21171 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
21172 "TARGET_SSE"
21173 "@
21174 punpcklqdq\t%0, %0
21175 vpunpcklqdq\t{%d1, %0|%0, %d1}
21176 %vmovddup\t{%1, %0|%0, %1}
21177 movlhps\t%0, %0"
21178 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
21179 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
21180 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
21181 (set_attr "mode" "TI,TI,DF,V4SF")])
21182
21183 (define_insn "avx2_vbroadcasti128_<mode>"
21184 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
21185 (vec_concat:VI_256
21186 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
21187 (match_dup 1)))]
21188 "TARGET_AVX2"
21189 "@
21190 vbroadcasti128\t{%1, %0|%0, %1}
21191 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
21192 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
21193 [(set_attr "isa" "*,avx512dq,avx512vl")
21194 (set_attr "type" "ssemov")
21195 (set_attr "prefix_extra" "1")
21196 (set_attr "prefix" "vex,evex,evex")
21197 (set_attr "mode" "OI")])
21198
21199 ;; Modes handled by AVX vec_dup patterns.
21200 (define_mode_iterator AVX_VEC_DUP_MODE
21201 [V8SI V8SF V4DI V4DF])
21202 (define_mode_attr vecdupssescalarmodesuffix
21203 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
21204 ;; Modes handled by AVX2 vec_dup patterns.
21205 (define_mode_iterator AVX2_VEC_DUP_MODE
21206 [V32QI V16QI V16HI V8HI V8SI V4SI])
21207
21208 (define_insn "*vec_dup<mode>"
21209 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
21210 (vec_duplicate:AVX2_VEC_DUP_MODE
21211 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
21212 "TARGET_AVX2"
21213 "@
21214 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
21215 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
21216 #"
21217 [(set_attr "isa" "*,*,noavx512vl")
21218 (set_attr "type" "ssemov")
21219 (set_attr "prefix_extra" "1")
21220 (set_attr "prefix" "maybe_evex")
21221 (set_attr "mode" "<sseinsnmode>")
21222 (set (attr "preferred_for_speed")
21223 (cond [(eq_attr "alternative" "2")
21224 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
21225 ]
21226 (symbol_ref "true")))])
21227
21228 (define_insn "vec_dup<mode>"
21229 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
21230 (vec_duplicate:AVX_VEC_DUP_MODE
21231 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
21232 "TARGET_AVX"
21233 "@
21234 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
21235 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
21236 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
21237 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
21238 #"
21239 [(set_attr "type" "ssemov")
21240 (set_attr "prefix_extra" "1")
21241 (set_attr "prefix" "maybe_evex")
21242 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
21243 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
21244
21245 (define_split
21246 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
21247 (vec_duplicate:AVX2_VEC_DUP_MODE
21248 (match_operand:<ssescalarmode> 1 "register_operand")))]
21249 "TARGET_AVX2
21250 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
21251 available, because then we can broadcast from GPRs directly.
21252 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
21253 for V*SI mode it requires just -mavx512vl. */
21254 && !(TARGET_AVX512VL
21255 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
21256 && reload_completed && GENERAL_REG_P (operands[1])"
21257 [(const_int 0)]
21258 {
21259 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
21260 CONST0_RTX (V4SImode),
21261 gen_lowpart (SImode, operands[1])));
21262 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
21263 gen_lowpart (<ssexmmmode>mode,
21264 operands[0])));
21265 DONE;
21266 })
21267
21268 (define_split
21269 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
21270 (vec_duplicate:AVX_VEC_DUP_MODE
21271 (match_operand:<ssescalarmode> 1 "register_operand")))]
21272 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
21273 [(set (match_dup 2)
21274 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
21275 (set (match_dup 0)
21276 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
21277 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
21278
21279 (define_insn "avx_vbroadcastf128_<mode>"
21280 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
21281 (vec_concat:V_256
21282 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
21283 (match_dup 1)))]
21284 "TARGET_AVX"
21285 "@
21286 vbroadcast<i128>\t{%1, %0|%0, %1}
21287 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21288 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
21289 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
21290 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21291 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
21292 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
21293 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
21294 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
21295 (set_attr "prefix_extra" "1")
21296 (set_attr "length_immediate" "0,1,1,0,1,0,1")
21297 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
21298 (set_attr "mode" "<sseinsnmode>")])
21299
21300 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
21301 (define_mode_iterator VI4F_BRCST32x2
21302 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21303 V16SF (V8SF "TARGET_AVX512VL")])
21304
21305 (define_mode_attr 64x2mode
21306 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
21307
21308 (define_mode_attr 32x2mode
21309 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
21310 (V8SF "V2SF") (V4SI "V2SI")])
21311
21312 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
21313 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
21314 (vec_duplicate:VI4F_BRCST32x2
21315 (vec_select:<32x2mode>
21316 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21317 (parallel [(const_int 0) (const_int 1)]))))]
21318 "TARGET_AVX512DQ"
21319 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
21320 [(set_attr "type" "ssemov")
21321 (set_attr "prefix_extra" "1")
21322 (set_attr "prefix" "evex")
21323 (set_attr "mode" "<sseinsnmode>")])
21324
21325 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
21326 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
21327 (vec_duplicate:VI4F_256
21328 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
21329 "TARGET_AVX512VL"
21330 "@
21331 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
21332 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21333 [(set_attr "type" "ssemov")
21334 (set_attr "prefix_extra" "1")
21335 (set_attr "prefix" "evex")
21336 (set_attr "mode" "<sseinsnmode>")])
21337
21338 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21339 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
21340 (vec_duplicate:V16FI
21341 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
21342 "TARGET_AVX512DQ"
21343 "@
21344 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
21345 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21346 [(set_attr "type" "ssemov")
21347 (set_attr "prefix_extra" "1")
21348 (set_attr "prefix" "evex")
21349 (set_attr "mode" "<sseinsnmode>")])
21350
21351 ;; For broadcast[i|f]64x2
21352 (define_mode_iterator VI8F_BRCST64x2
21353 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
21354
21355 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21356 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
21357 (vec_duplicate:VI8F_BRCST64x2
21358 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
21359 "TARGET_AVX512DQ"
21360 "@
21361 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
21362 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21363 [(set_attr "type" "ssemov")
21364 (set_attr "prefix_extra" "1")
21365 (set_attr "prefix" "evex")
21366 (set_attr "mode" "<sseinsnmode>")])
21367
21368 (define_insn "avx512cd_maskb_vec_dup<mode>"
21369 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21370 (vec_duplicate:VI8_AVX512VL
21371 (zero_extend:DI
21372 (match_operand:QI 1 "register_operand" "k"))))]
21373 "TARGET_AVX512CD"
21374 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
21375 [(set_attr "type" "mskmov")
21376 (set_attr "prefix" "evex")
21377 (set_attr "mode" "XI")])
21378
21379 (define_insn "avx512cd_maskw_vec_dup<mode>"
21380 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21381 (vec_duplicate:VI4_AVX512VL
21382 (zero_extend:SI
21383 (match_operand:HI 1 "register_operand" "k"))))]
21384 "TARGET_AVX512CD"
21385 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
21386 [(set_attr "type" "mskmov")
21387 (set_attr "prefix" "evex")
21388 (set_attr "mode" "XI")])
21389
21390 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
21391 [(set (match_operand:VF 0 "register_operand" "=v")
21392 (unspec:VF
21393 [(match_operand:VF 1 "register_operand" "v")
21394 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
21395 UNSPEC_VPERMIL))]
21396 "TARGET_AVX && <mask_mode512bit_condition>"
21397 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21398 [(set_attr "type" "sselog")
21399 (set_attr "prefix_extra" "1")
21400 (set_attr "btver2_decode" "vector")
21401 (set_attr "prefix" "<mask_prefix>")
21402 (set_attr "mode" "<sseinsnmode>")])
21403
21404 (define_mode_iterator VPERMI2
21405 [V16SI V16SF V8DI V8DF
21406 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
21407 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
21408 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
21409 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
21410 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21411 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21412 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21413 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21414
21415 (define_mode_iterator VPERMI2I
21416 [V16SI V8DI
21417 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21418 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
21419 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21420 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21421 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21422 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21423
21424 (define_expand "<avx512>_vpermi2var<mode>3_mask"
21425 [(set (match_operand:VPERMI2 0 "register_operand")
21426 (vec_merge:VPERMI2
21427 (unspec:VPERMI2
21428 [(match_operand:<sseintvecmode> 2 "register_operand")
21429 (match_operand:VPERMI2 1 "register_operand")
21430 (match_operand:VPERMI2 3 "nonimmediate_operand")]
21431 UNSPEC_VPERMT2)
21432 (match_dup 5)
21433 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
21434 "TARGET_AVX512F"
21435 {
21436 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
21437 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
21438 })
21439
21440 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21441 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
21442 (vec_merge:VPERMI2I
21443 (unspec:VPERMI2I
21444 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21445 (match_operand:VPERMI2I 1 "register_operand" "v")
21446 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
21447 UNSPEC_VPERMT2)
21448 (match_dup 2)
21449 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21450 "TARGET_AVX512F"
21451 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21452 [(set_attr "type" "sselog")
21453 (set_attr "prefix" "evex")
21454 (set_attr "mode" "<sseinsnmode>")])
21455
21456 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21457 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21458 (vec_merge:VF_AVX512VL
21459 (unspec:VF_AVX512VL
21460 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21461 (match_operand:VF_AVX512VL 1 "register_operand" "v")
21462 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
21463 UNSPEC_VPERMT2)
21464 (subreg:VF_AVX512VL (match_dup 2) 0)
21465 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21466 "TARGET_AVX512F"
21467 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21468 [(set_attr "type" "sselog")
21469 (set_attr "prefix" "evex")
21470 (set_attr "mode" "<sseinsnmode>")])
21471
21472 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
21473 [(match_operand:VPERMI2 0 "register_operand")
21474 (match_operand:<sseintvecmode> 1 "register_operand")
21475 (match_operand:VPERMI2 2 "register_operand")
21476 (match_operand:VPERMI2 3 "nonimmediate_operand")
21477 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21478 "TARGET_AVX512F"
21479 {
21480 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
21481 operands[0], operands[1], operands[2], operands[3],
21482 CONST0_RTX (<MODE>mode), operands[4]));
21483 DONE;
21484 })
21485
21486 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
21487 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
21488 (unspec:VPERMI2
21489 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
21490 (match_operand:VPERMI2 2 "register_operand" "0,v")
21491 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
21492 UNSPEC_VPERMT2))]
21493 "TARGET_AVX512F"
21494 "@
21495 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
21496 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21497 [(set_attr "type" "sselog")
21498 (set_attr "prefix" "evex")
21499 (set_attr "mode" "<sseinsnmode>")])
21500
21501 (define_insn "<avx512>_vpermt2var<mode>3_mask"
21502 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
21503 (vec_merge:VPERMI2
21504 (unspec:VPERMI2
21505 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
21506 (match_operand:VPERMI2 2 "register_operand" "0")
21507 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
21508 UNSPEC_VPERMT2)
21509 (match_dup 2)
21510 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21511 "TARGET_AVX512F"
21512 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21513 [(set_attr "type" "sselog")
21514 (set_attr "prefix" "evex")
21515 (set_attr "mode" "<sseinsnmode>")])
21516
21517 (define_expand "avx_vperm2f128<mode>3"
21518 [(set (match_operand:AVX256MODE2P 0 "register_operand")
21519 (unspec:AVX256MODE2P
21520 [(match_operand:AVX256MODE2P 1 "register_operand")
21521 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
21522 (match_operand:SI 3 "const_0_to_255_operand")]
21523 UNSPEC_VPERMIL2F128))]
21524 "TARGET_AVX"
21525 {
21526 int mask = INTVAL (operands[3]);
21527 if ((mask & 0x88) == 0)
21528 {
21529 rtx perm[<ssescalarnum>], t1, t2;
21530 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
21531
21532 base = (mask & 3) * nelt2;
21533 for (i = 0; i < nelt2; ++i)
21534 perm[i] = GEN_INT (base + i);
21535
21536 base = ((mask >> 4) & 3) * nelt2;
21537 for (i = 0; i < nelt2; ++i)
21538 perm[i + nelt2] = GEN_INT (base + i);
21539
21540 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
21541 operands[1], operands[2]);
21542 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
21543 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
21544 t2 = gen_rtx_SET (operands[0], t2);
21545 emit_insn (t2);
21546 DONE;
21547 }
21548 })
21549
21550 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
21551 ;; means that in order to represent this properly in rtl we'd have to
21552 ;; nest *another* vec_concat with a zero operand and do the select from
21553 ;; a 4x wide vector. That doesn't seem very nice.
21554 (define_insn "*avx_vperm2f128<mode>_full"
21555 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21556 (unspec:AVX256MODE2P
21557 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
21558 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
21559 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21560 UNSPEC_VPERMIL2F128))]
21561 "TARGET_AVX"
21562 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21563 [(set_attr "type" "sselog")
21564 (set_attr "prefix_extra" "1")
21565 (set_attr "length_immediate" "1")
21566 (set_attr "prefix" "vex")
21567 (set_attr "mode" "<sseinsnmode>")])
21568
21569 (define_insn "*avx_vperm2f128<mode>_nozero"
21570 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21571 (vec_select:AVX256MODE2P
21572 (vec_concat:<ssedoublevecmode>
21573 (match_operand:AVX256MODE2P 1 "register_operand" "x")
21574 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
21575 (match_parallel 3 ""
21576 [(match_operand 4 "const_int_operand")])))]
21577 "TARGET_AVX
21578 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
21579 {
21580 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
21581 if (mask == 0x12)
21582 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
21583 if (mask == 0x20)
21584 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
21585 operands[3] = GEN_INT (mask);
21586 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
21587 }
21588 [(set_attr "type" "sselog")
21589 (set_attr "prefix_extra" "1")
21590 (set_attr "length_immediate" "1")
21591 (set_attr "prefix" "vex")
21592 (set_attr "mode" "<sseinsnmode>")])
21593
21594 (define_insn "*ssse3_palignr<mode>_perm"
21595 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
21596 (vec_select:V_128
21597 (match_operand:V_128 1 "register_operand" "0,x,v")
21598 (match_parallel 2 "palignr_operand"
21599 [(match_operand 3 "const_int_operand" "n,n,n")])))]
21600 "TARGET_SSSE3"
21601 {
21602 operands[2] = (GEN_INT (INTVAL (operands[3])
21603 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
21604
21605 switch (which_alternative)
21606 {
21607 case 0:
21608 return "palignr\t{%2, %1, %0|%0, %1, %2}";
21609 case 1:
21610 case 2:
21611 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
21612 default:
21613 gcc_unreachable ();
21614 }
21615 }
21616 [(set_attr "isa" "noavx,avx,avx512bw")
21617 (set_attr "type" "sseishft")
21618 (set_attr "atom_unit" "sishuf")
21619 (set_attr "prefix_data16" "1,*,*")
21620 (set_attr "prefix_extra" "1")
21621 (set_attr "length_immediate" "1")
21622 (set_attr "prefix" "orig,vex,evex")])
21623
21624 (define_expand "avx512vl_vinsert<mode>"
21625 [(match_operand:VI48F_256 0 "register_operand")
21626 (match_operand:VI48F_256 1 "register_operand")
21627 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21628 (match_operand:SI 3 "const_0_to_1_operand")
21629 (match_operand:VI48F_256 4 "register_operand")
21630 (match_operand:<avx512fmaskmode> 5 "register_operand")]
21631 "TARGET_AVX512VL"
21632 {
21633 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21634
21635 switch (INTVAL (operands[3]))
21636 {
21637 case 0:
21638 insn = gen_vec_set_lo_<mode>_mask;
21639 break;
21640 case 1:
21641 insn = gen_vec_set_hi_<mode>_mask;
21642 break;
21643 default:
21644 gcc_unreachable ();
21645 }
21646
21647 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
21648 operands[5]));
21649 DONE;
21650 })
21651
21652 (define_expand "avx_vinsertf128<mode>"
21653 [(match_operand:V_256 0 "register_operand")
21654 (match_operand:V_256 1 "register_operand")
21655 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21656 (match_operand:SI 3 "const_0_to_1_operand")]
21657 "TARGET_AVX"
21658 {
21659 rtx (*insn)(rtx, rtx, rtx);
21660
21661 switch (INTVAL (operands[3]))
21662 {
21663 case 0:
21664 insn = gen_vec_set_lo_<mode>;
21665 break;
21666 case 1:
21667 insn = gen_vec_set_hi_<mode>;
21668 break;
21669 default:
21670 gcc_unreachable ();
21671 }
21672
21673 emit_insn (insn (operands[0], operands[1], operands[2]));
21674 DONE;
21675 })
21676
21677 (define_insn "vec_set_lo_<mode><mask_name>"
21678 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21679 (vec_concat:VI8F_256
21680 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21681 (vec_select:<ssehalfvecmode>
21682 (match_operand:VI8F_256 1 "register_operand" "v")
21683 (parallel [(const_int 2) (const_int 3)]))))]
21684 "TARGET_AVX && <mask_avx512dq_condition>"
21685 {
21686 if (TARGET_AVX512DQ)
21687 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21688 else if (TARGET_AVX512VL)
21689 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21690 else
21691 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21692 }
21693 [(set_attr "type" "sselog")
21694 (set_attr "prefix_extra" "1")
21695 (set_attr "length_immediate" "1")
21696 (set_attr "prefix" "vex")
21697 (set_attr "mode" "<sseinsnmode>")])
21698
21699 (define_insn "vec_set_hi_<mode><mask_name>"
21700 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21701 (vec_concat:VI8F_256
21702 (vec_select:<ssehalfvecmode>
21703 (match_operand:VI8F_256 1 "register_operand" "v")
21704 (parallel [(const_int 0) (const_int 1)]))
21705 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21706 "TARGET_AVX && <mask_avx512dq_condition>"
21707 {
21708 if (TARGET_AVX512DQ)
21709 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21710 else if (TARGET_AVX512VL)
21711 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21712 else
21713 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21714 }
21715 [(set_attr "type" "sselog")
21716 (set_attr "prefix_extra" "1")
21717 (set_attr "length_immediate" "1")
21718 (set_attr "prefix" "vex")
21719 (set_attr "mode" "<sseinsnmode>")])
21720
21721 (define_insn "vec_set_lo_<mode><mask_name>"
21722 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21723 (vec_concat:VI4F_256
21724 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21725 (vec_select:<ssehalfvecmode>
21726 (match_operand:VI4F_256 1 "register_operand" "v")
21727 (parallel [(const_int 4) (const_int 5)
21728 (const_int 6) (const_int 7)]))))]
21729 "TARGET_AVX"
21730 {
21731 if (TARGET_AVX512VL)
21732 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21733 else
21734 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21735 }
21736 [(set_attr "type" "sselog")
21737 (set_attr "prefix_extra" "1")
21738 (set_attr "length_immediate" "1")
21739 (set_attr "prefix" "vex")
21740 (set_attr "mode" "<sseinsnmode>")])
21741
21742 (define_insn "vec_set_hi_<mode><mask_name>"
21743 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21744 (vec_concat:VI4F_256
21745 (vec_select:<ssehalfvecmode>
21746 (match_operand:VI4F_256 1 "register_operand" "v")
21747 (parallel [(const_int 0) (const_int 1)
21748 (const_int 2) (const_int 3)]))
21749 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21750 "TARGET_AVX"
21751 {
21752 if (TARGET_AVX512VL)
21753 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21754 else
21755 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21756 }
21757 [(set_attr "type" "sselog")
21758 (set_attr "prefix_extra" "1")
21759 (set_attr "length_immediate" "1")
21760 (set_attr "prefix" "vex")
21761 (set_attr "mode" "<sseinsnmode>")])
21762
21763 (define_insn "vec_set_lo_v16hi"
21764 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21765 (vec_concat:V16HI
21766 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
21767 (vec_select:V8HI
21768 (match_operand:V16HI 1 "register_operand" "x,v")
21769 (parallel [(const_int 8) (const_int 9)
21770 (const_int 10) (const_int 11)
21771 (const_int 12) (const_int 13)
21772 (const_int 14) (const_int 15)]))))]
21773 "TARGET_AVX"
21774 "@
21775 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21776 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21777 [(set_attr "type" "sselog")
21778 (set_attr "prefix_extra" "1")
21779 (set_attr "length_immediate" "1")
21780 (set_attr "prefix" "vex,evex")
21781 (set_attr "mode" "OI")])
21782
21783 (define_insn "vec_set_hi_v16hi"
21784 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21785 (vec_concat:V16HI
21786 (vec_select:V8HI
21787 (match_operand:V16HI 1 "register_operand" "x,v")
21788 (parallel [(const_int 0) (const_int 1)
21789 (const_int 2) (const_int 3)
21790 (const_int 4) (const_int 5)
21791 (const_int 6) (const_int 7)]))
21792 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
21793 "TARGET_AVX"
21794 "@
21795 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21796 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21797 [(set_attr "type" "sselog")
21798 (set_attr "prefix_extra" "1")
21799 (set_attr "length_immediate" "1")
21800 (set_attr "prefix" "vex,evex")
21801 (set_attr "mode" "OI")])
21802
21803 (define_insn "vec_set_lo_v32qi"
21804 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21805 (vec_concat:V32QI
21806 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
21807 (vec_select:V16QI
21808 (match_operand:V32QI 1 "register_operand" "x,v")
21809 (parallel [(const_int 16) (const_int 17)
21810 (const_int 18) (const_int 19)
21811 (const_int 20) (const_int 21)
21812 (const_int 22) (const_int 23)
21813 (const_int 24) (const_int 25)
21814 (const_int 26) (const_int 27)
21815 (const_int 28) (const_int 29)
21816 (const_int 30) (const_int 31)]))))]
21817 "TARGET_AVX"
21818 "@
21819 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21820 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21821 [(set_attr "type" "sselog")
21822 (set_attr "prefix_extra" "1")
21823 (set_attr "length_immediate" "1")
21824 (set_attr "prefix" "vex,evex")
21825 (set_attr "mode" "OI")])
21826
21827 (define_insn "vec_set_hi_v32qi"
21828 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21829 (vec_concat:V32QI
21830 (vec_select:V16QI
21831 (match_operand:V32QI 1 "register_operand" "x,v")
21832 (parallel [(const_int 0) (const_int 1)
21833 (const_int 2) (const_int 3)
21834 (const_int 4) (const_int 5)
21835 (const_int 6) (const_int 7)
21836 (const_int 8) (const_int 9)
21837 (const_int 10) (const_int 11)
21838 (const_int 12) (const_int 13)
21839 (const_int 14) (const_int 15)]))
21840 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
21841 "TARGET_AVX"
21842 "@
21843 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21844 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21845 [(set_attr "type" "sselog")
21846 (set_attr "prefix_extra" "1")
21847 (set_attr "length_immediate" "1")
21848 (set_attr "prefix" "vex,evex")
21849 (set_attr "mode" "OI")])
21850
21851 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
21852 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
21853 (unspec:V48_AVX2
21854 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
21855 (match_operand:V48_AVX2 1 "memory_operand" "m")]
21856 UNSPEC_MASKMOV))]
21857 "TARGET_AVX"
21858 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
21859 [(set_attr "type" "sselog1")
21860 (set_attr "prefix_extra" "1")
21861 (set_attr "prefix" "vex")
21862 (set_attr "btver2_decode" "vector")
21863 (set_attr "mode" "<sseinsnmode>")])
21864
21865 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
21866 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
21867 (unspec:V48_AVX2
21868 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
21869 (match_operand:V48_AVX2 2 "register_operand" "x")
21870 (match_dup 0)]
21871 UNSPEC_MASKMOV))]
21872 "TARGET_AVX"
21873 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21874 [(set_attr "type" "sselog1")
21875 (set_attr "prefix_extra" "1")
21876 (set_attr "prefix" "vex")
21877 (set_attr "btver2_decode" "vector")
21878 (set_attr "mode" "<sseinsnmode>")])
21879
21880 (define_expand "maskload<mode><sseintvecmodelower>"
21881 [(set (match_operand:V48_AVX2 0 "register_operand")
21882 (unspec:V48_AVX2
21883 [(match_operand:<sseintvecmode> 2 "register_operand")
21884 (match_operand:V48_AVX2 1 "memory_operand")]
21885 UNSPEC_MASKMOV))]
21886 "TARGET_AVX")
21887
21888 (define_expand "maskload<mode><avx512fmaskmodelower>"
21889 [(set (match_operand:V48_AVX512VL 0 "register_operand")
21890 (vec_merge:V48_AVX512VL
21891 (match_operand:V48_AVX512VL 1 "memory_operand")
21892 (match_dup 0)
21893 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21894 "TARGET_AVX512F")
21895
21896 (define_expand "maskload<mode><avx512fmaskmodelower>"
21897 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
21898 (vec_merge:VI12_AVX512VL
21899 (match_operand:VI12_AVX512VL 1 "memory_operand")
21900 (match_dup 0)
21901 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21902 "TARGET_AVX512BW")
21903
21904 (define_expand "maskstore<mode><sseintvecmodelower>"
21905 [(set (match_operand:V48_AVX2 0 "memory_operand")
21906 (unspec:V48_AVX2
21907 [(match_operand:<sseintvecmode> 2 "register_operand")
21908 (match_operand:V48_AVX2 1 "register_operand")
21909 (match_dup 0)]
21910 UNSPEC_MASKMOV))]
21911 "TARGET_AVX")
21912
21913 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21914 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
21915 (vec_merge:V48_AVX512VL
21916 (match_operand:V48_AVX512VL 1 "register_operand")
21917 (match_dup 0)
21918 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21919 "TARGET_AVX512F")
21920
21921 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21922 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
21923 (vec_merge:VI12_AVX512VL
21924 (match_operand:VI12_AVX512VL 1 "register_operand")
21925 (match_dup 0)
21926 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21927 "TARGET_AVX512BW")
21928
21929 (define_expand "cbranch<mode>4"
21930 [(set (reg:CC FLAGS_REG)
21931 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
21932 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
21933 (set (pc) (if_then_else
21934 (match_operator 0 "bt_comparison_operator"
21935 [(reg:CC FLAGS_REG) (const_int 0)])
21936 (label_ref (match_operand 3))
21937 (pc)))]
21938 "TARGET_SSE4_1"
21939 {
21940 ix86_expand_branch (GET_CODE (operands[0]),
21941 operands[1], operands[2], operands[3]);
21942 DONE;
21943 })
21944
21945
21946 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
21947 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
21948 (vec_concat:AVX256MODE2P
21949 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
21950 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
21951 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21952 "#"
21953 "&& reload_completed"
21954 [(set (match_dup 0) (match_dup 1))]
21955 {
21956 if (REG_P (operands[0]))
21957 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21958 else
21959 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21960 <ssehalfvecmode>mode);
21961 })
21962
21963 ;; Modes handled by vec_init expanders.
21964 (define_mode_iterator VEC_INIT_MODE
21965 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21966 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21967 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21968 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
21969 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21970 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
21971 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
21972
21973 ;; Likewise, but for initialization from half sized vectors.
21974 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
21975 (define_mode_iterator VEC_INIT_HALF_MODE
21976 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21977 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21978 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21979 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
21980 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21981 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
21982 (V4TI "TARGET_AVX512F")])
21983
21984 (define_expand "vec_init<mode><ssescalarmodelower>"
21985 [(match_operand:VEC_INIT_MODE 0 "register_operand")
21986 (match_operand 1)]
21987 "TARGET_SSE"
21988 {
21989 ix86_expand_vector_init (false, operands[0], operands[1]);
21990 DONE;
21991 })
21992
21993 (define_expand "vec_init<mode><ssehalfvecmodelower>"
21994 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
21995 (match_operand 1)]
21996 "TARGET_SSE"
21997 {
21998 ix86_expand_vector_init (false, operands[0], operands[1]);
21999 DONE;
22000 })
22001
22002 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
22003 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
22004 (ashiftrt:VI48_AVX512F_AVX512VL
22005 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
22006 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
22007 "TARGET_AVX2 && <mask_mode512bit_condition>"
22008 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22009 [(set_attr "type" "sseishft")
22010 (set_attr "prefix" "maybe_evex")
22011 (set_attr "mode" "<sseinsnmode>")])
22012
22013 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
22014 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22015 (ashiftrt:VI2_AVX512VL
22016 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
22017 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
22018 "TARGET_AVX512BW"
22019 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22020 [(set_attr "type" "sseishft")
22021 (set_attr "prefix" "maybe_evex")
22022 (set_attr "mode" "<sseinsnmode>")])
22023
22024 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
22025 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
22026 (any_lshift:VI48_AVX512F
22027 (match_operand:VI48_AVX512F 1 "register_operand" "v")
22028 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
22029 "TARGET_AVX2 && <mask_mode512bit_condition>"
22030 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22031 [(set_attr "type" "sseishft")
22032 (set_attr "prefix" "maybe_evex")
22033 (set_attr "mode" "<sseinsnmode>")])
22034
22035 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
22036 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22037 (any_lshift:VI2_AVX512VL
22038 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
22039 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
22040 "TARGET_AVX512BW"
22041 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22042 [(set_attr "type" "sseishft")
22043 (set_attr "prefix" "maybe_evex")
22044 (set_attr "mode" "<sseinsnmode>")])
22045
22046 (define_insn "avx_vec_concat<mode>"
22047 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
22048 (vec_concat:V_256_512
22049 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
22050 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
22051 "TARGET_AVX
22052 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
22053 || !MEM_P (operands[1]))"
22054 {
22055 switch (which_alternative)
22056 {
22057 case 0:
22058 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22059 case 1:
22060 if (<MODE_SIZE> == 64)
22061 {
22062 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
22063 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22064 else
22065 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22066 }
22067 else
22068 {
22069 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22070 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22071 else
22072 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22073 }
22074 case 2:
22075 case 3:
22076 switch (get_attr_mode (insn))
22077 {
22078 case MODE_V16SF:
22079 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22080 return "vmovups\t{%1, %t0|%t0, %1}";
22081 else
22082 return "vmovaps\t{%1, %t0|%t0, %1}";
22083 case MODE_V8DF:
22084 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22085 return "vmovupd\t{%1, %t0|%t0, %1}";
22086 else
22087 return "vmovapd\t{%1, %t0|%t0, %1}";
22088 case MODE_V8SF:
22089 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22090 return "vmovups\t{%1, %x0|%x0, %1}";
22091 else
22092 return "vmovaps\t{%1, %x0|%x0, %1}";
22093 case MODE_V4DF:
22094 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22095 return "vmovupd\t{%1, %x0|%x0, %1}";
22096 else
22097 return "vmovapd\t{%1, %x0|%x0, %1}";
22098 case MODE_XI:
22099 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22100 {
22101 if (which_alternative == 2)
22102 return "vmovdqu\t{%1, %t0|%t0, %1}";
22103 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22104 return "vmovdqu64\t{%1, %t0|%t0, %1}";
22105 else
22106 return "vmovdqu32\t{%1, %t0|%t0, %1}";
22107 }
22108 else
22109 {
22110 if (which_alternative == 2)
22111 return "vmovdqa\t{%1, %t0|%t0, %1}";
22112 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22113 return "vmovdqa64\t{%1, %t0|%t0, %1}";
22114 else
22115 return "vmovdqa32\t{%1, %t0|%t0, %1}";
22116 }
22117 case MODE_OI:
22118 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22119 {
22120 if (which_alternative == 2)
22121 return "vmovdqu\t{%1, %x0|%x0, %1}";
22122 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22123 return "vmovdqu64\t{%1, %x0|%x0, %1}";
22124 else
22125 return "vmovdqu32\t{%1, %x0|%x0, %1}";
22126 }
22127 else
22128 {
22129 if (which_alternative == 2)
22130 return "vmovdqa\t{%1, %x0|%x0, %1}";
22131 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22132 return "vmovdqa64\t{%1, %x0|%x0, %1}";
22133 else
22134 return "vmovdqa32\t{%1, %x0|%x0, %1}";
22135 }
22136 default:
22137 gcc_unreachable ();
22138 }
22139 default:
22140 gcc_unreachable ();
22141 }
22142 }
22143 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
22144 (set_attr "prefix_extra" "1,1,*,*")
22145 (set_attr "length_immediate" "1,1,*,*")
22146 (set_attr "prefix" "maybe_evex")
22147 (set_attr "mode" "<sseinsnmode>")])
22148
22149 (define_insn "vcvtph2ps<mask_name>"
22150 [(set (match_operand:V4SF 0 "register_operand" "=v")
22151 (vec_select:V4SF
22152 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
22153 UNSPEC_VCVTPH2PS)
22154 (parallel [(const_int 0) (const_int 1)
22155 (const_int 2) (const_int 3)])))]
22156 "TARGET_F16C || TARGET_AVX512VL"
22157 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22158 [(set_attr "type" "ssecvt")
22159 (set_attr "prefix" "maybe_evex")
22160 (set_attr "mode" "V4SF")])
22161
22162 (define_insn "*vcvtph2ps_load<mask_name>"
22163 [(set (match_operand:V4SF 0 "register_operand" "=v")
22164 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
22165 UNSPEC_VCVTPH2PS))]
22166 "TARGET_F16C || TARGET_AVX512VL"
22167 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22168 [(set_attr "type" "ssecvt")
22169 (set_attr "prefix" "vex")
22170 (set_attr "mode" "V8SF")])
22171
22172 (define_insn "vcvtph2ps256<mask_name>"
22173 [(set (match_operand:V8SF 0 "register_operand" "=v")
22174 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
22175 UNSPEC_VCVTPH2PS))]
22176 "TARGET_F16C || TARGET_AVX512VL"
22177 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22178 [(set_attr "type" "ssecvt")
22179 (set_attr "prefix" "vex")
22180 (set_attr "btver2_decode" "double")
22181 (set_attr "mode" "V8SF")])
22182
22183 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
22184 [(set (match_operand:V16SF 0 "register_operand" "=v")
22185 (unspec:V16SF
22186 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
22187 UNSPEC_VCVTPH2PS))]
22188 "TARGET_AVX512F"
22189 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
22190 [(set_attr "type" "ssecvt")
22191 (set_attr "prefix" "evex")
22192 (set_attr "mode" "V16SF")])
22193
22194 (define_expand "vcvtps2ph_mask"
22195 [(set (match_operand:V8HI 0 "register_operand")
22196 (vec_merge:V8HI
22197 (vec_concat:V8HI
22198 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
22199 (match_operand:SI 2 "const_0_to_255_operand")]
22200 UNSPEC_VCVTPS2PH)
22201 (match_dup 5))
22202 (match_operand:V8HI 3 "nonimm_or_0_operand")
22203 (match_operand:QI 4 "register_operand")))]
22204 "TARGET_AVX512VL"
22205 "operands[5] = CONST0_RTX (V4HImode);")
22206
22207 (define_expand "vcvtps2ph"
22208 [(set (match_operand:V8HI 0 "register_operand")
22209 (vec_concat:V8HI
22210 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
22211 (match_operand:SI 2 "const_0_to_255_operand")]
22212 UNSPEC_VCVTPS2PH)
22213 (match_dup 3)))]
22214 "TARGET_F16C"
22215 "operands[3] = CONST0_RTX (V4HImode);")
22216
22217 (define_insn "*vcvtps2ph<mask_name>"
22218 [(set (match_operand:V8HI 0 "register_operand" "=v")
22219 (vec_concat:V8HI
22220 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
22221 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22222 UNSPEC_VCVTPS2PH)
22223 (match_operand:V4HI 3 "const0_operand")))]
22224 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
22225 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
22226 [(set_attr "type" "ssecvt")
22227 (set_attr "prefix" "maybe_evex")
22228 (set_attr "mode" "V4SF")])
22229
22230 (define_insn "*vcvtps2ph_store<merge_mask_name>"
22231 [(set (match_operand:V4HI 0 "memory_operand" "=m")
22232 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
22233 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22234 UNSPEC_VCVTPS2PH))]
22235 "TARGET_F16C || TARGET_AVX512VL"
22236 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22237 [(set_attr "type" "ssecvt")
22238 (set_attr "prefix" "maybe_evex")
22239 (set_attr "mode" "V4SF")])
22240
22241 (define_insn "vcvtps2ph256<mask_name>"
22242 [(set (match_operand:V8HI 0 "register_operand" "=v")
22243 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
22244 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22245 UNSPEC_VCVTPS2PH))]
22246 "TARGET_F16C || TARGET_AVX512VL"
22247 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22248 [(set_attr "type" "ssecvt")
22249 (set_attr "prefix" "maybe_evex")
22250 (set_attr "btver2_decode" "vector")
22251 (set_attr "mode" "V8SF")])
22252
22253 (define_insn "*vcvtps2ph256<merge_mask_name>"
22254 [(set (match_operand:V8HI 0 "memory_operand" "=m")
22255 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
22256 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22257 UNSPEC_VCVTPS2PH))]
22258 "TARGET_F16C || TARGET_AVX512VL"
22259 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22260 [(set_attr "type" "ssecvt")
22261 (set_attr "prefix" "maybe_evex")
22262 (set_attr "btver2_decode" "vector")
22263 (set_attr "mode" "V8SF")])
22264
22265 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
22266 [(set (match_operand:V16HI 0 "register_operand" "=v")
22267 (unspec:V16HI
22268 [(match_operand:V16SF 1 "register_operand" "v")
22269 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22270 UNSPEC_VCVTPS2PH))]
22271 "TARGET_AVX512F"
22272 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22273 [(set_attr "type" "ssecvt")
22274 (set_attr "prefix" "evex")
22275 (set_attr "mode" "V16SF")])
22276
22277 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
22278 [(set (match_operand:V16HI 0 "memory_operand" "=m")
22279 (unspec:V16HI
22280 [(match_operand:V16SF 1 "register_operand" "v")
22281 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22282 UNSPEC_VCVTPS2PH))]
22283 "TARGET_AVX512F"
22284 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22285 [(set_attr "type" "ssecvt")
22286 (set_attr "prefix" "evex")
22287 (set_attr "mode" "V16SF")])
22288
22289 ;; For gather* insn patterns
22290 (define_mode_iterator VEC_GATHER_MODE
22291 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
22292 (define_mode_attr VEC_GATHER_IDXSI
22293 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
22294 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
22295 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
22296 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
22297
22298 (define_mode_attr VEC_GATHER_IDXDI
22299 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22300 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
22301 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
22302 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
22303
22304 (define_mode_attr VEC_GATHER_SRCDI
22305 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22306 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
22307 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
22308 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
22309
22310 (define_expand "avx2_gathersi<mode>"
22311 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22312 (unspec:VEC_GATHER_MODE
22313 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
22314 (mem:<ssescalarmode>
22315 (match_par_dup 6
22316 [(match_operand 2 "vsib_address_operand")
22317 (match_operand:<VEC_GATHER_IDXSI>
22318 3 "register_operand")
22319 (match_operand:SI 5 "const1248_operand ")]))
22320 (mem:BLK (scratch))
22321 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
22322 UNSPEC_GATHER))
22323 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22324 "TARGET_AVX2"
22325 {
22326 operands[6]
22327 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22328 operands[5]), UNSPEC_VSIBADDR);
22329 })
22330
22331 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
22332 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22333 (unspec:VEC_GATHER_MODE
22334 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
22335 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22336 [(unspec:P
22337 [(match_operand:P 3 "vsib_address_operand" "Tv")
22338 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
22339 (match_operand:SI 6 "const1248_operand" "n")]
22340 UNSPEC_VSIBADDR)])
22341 (mem:BLK (scratch))
22342 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
22343 UNSPEC_GATHER))
22344 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22345 "TARGET_AVX2"
22346 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
22347 [(set_attr "type" "ssemov")
22348 (set_attr "prefix" "vex")
22349 (set_attr "mode" "<sseinsnmode>")])
22350
22351 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
22352 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22353 (unspec:VEC_GATHER_MODE
22354 [(pc)
22355 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22356 [(unspec:P
22357 [(match_operand:P 2 "vsib_address_operand" "Tv")
22358 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
22359 (match_operand:SI 5 "const1248_operand" "n")]
22360 UNSPEC_VSIBADDR)])
22361 (mem:BLK (scratch))
22362 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
22363 UNSPEC_GATHER))
22364 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22365 "TARGET_AVX2"
22366 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
22367 [(set_attr "type" "ssemov")
22368 (set_attr "prefix" "vex")
22369 (set_attr "mode" "<sseinsnmode>")])
22370
22371 (define_expand "avx2_gatherdi<mode>"
22372 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22373 (unspec:VEC_GATHER_MODE
22374 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22375 (mem:<ssescalarmode>
22376 (match_par_dup 6
22377 [(match_operand 2 "vsib_address_operand")
22378 (match_operand:<VEC_GATHER_IDXDI>
22379 3 "register_operand")
22380 (match_operand:SI 5 "const1248_operand ")]))
22381 (mem:BLK (scratch))
22382 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
22383 UNSPEC_GATHER))
22384 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22385 "TARGET_AVX2"
22386 {
22387 operands[6]
22388 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22389 operands[5]), UNSPEC_VSIBADDR);
22390 })
22391
22392 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
22393 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22394 (unspec:VEC_GATHER_MODE
22395 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22396 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22397 [(unspec:P
22398 [(match_operand:P 3 "vsib_address_operand" "Tv")
22399 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22400 (match_operand:SI 6 "const1248_operand" "n")]
22401 UNSPEC_VSIBADDR)])
22402 (mem:BLK (scratch))
22403 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22404 UNSPEC_GATHER))
22405 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22406 "TARGET_AVX2"
22407 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
22408 [(set_attr "type" "ssemov")
22409 (set_attr "prefix" "vex")
22410 (set_attr "mode" "<sseinsnmode>")])
22411
22412 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
22413 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22414 (unspec:VEC_GATHER_MODE
22415 [(pc)
22416 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22417 [(unspec:P
22418 [(match_operand:P 2 "vsib_address_operand" "Tv")
22419 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22420 (match_operand:SI 5 "const1248_operand" "n")]
22421 UNSPEC_VSIBADDR)])
22422 (mem:BLK (scratch))
22423 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22424 UNSPEC_GATHER))
22425 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22426 "TARGET_AVX2"
22427 {
22428 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
22429 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
22430 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
22431 }
22432 [(set_attr "type" "ssemov")
22433 (set_attr "prefix" "vex")
22434 (set_attr "mode" "<sseinsnmode>")])
22435
22436 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
22437 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22438 (vec_select:<VEC_GATHER_SRCDI>
22439 (unspec:VI4F_256
22440 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22441 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22442 [(unspec:P
22443 [(match_operand:P 3 "vsib_address_operand" "Tv")
22444 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22445 (match_operand:SI 6 "const1248_operand" "n")]
22446 UNSPEC_VSIBADDR)])
22447 (mem:BLK (scratch))
22448 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22449 UNSPEC_GATHER)
22450 (parallel [(const_int 0) (const_int 1)
22451 (const_int 2) (const_int 3)])))
22452 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22453 "TARGET_AVX2"
22454 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
22455 [(set_attr "type" "ssemov")
22456 (set_attr "prefix" "vex")
22457 (set_attr "mode" "<sseinsnmode>")])
22458
22459 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
22460 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22461 (vec_select:<VEC_GATHER_SRCDI>
22462 (unspec:VI4F_256
22463 [(pc)
22464 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22465 [(unspec:P
22466 [(match_operand:P 2 "vsib_address_operand" "Tv")
22467 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22468 (match_operand:SI 5 "const1248_operand" "n")]
22469 UNSPEC_VSIBADDR)])
22470 (mem:BLK (scratch))
22471 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22472 UNSPEC_GATHER)
22473 (parallel [(const_int 0) (const_int 1)
22474 (const_int 2) (const_int 3)])))
22475 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22476 "TARGET_AVX2"
22477 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
22478 [(set_attr "type" "ssemov")
22479 (set_attr "prefix" "vex")
22480 (set_attr "mode" "<sseinsnmode>")])
22481
22482 (define_expand "<avx512>_gathersi<mode>"
22483 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22484 (unspec:VI48F
22485 [(match_operand:VI48F 1 "register_operand")
22486 (match_operand:<avx512fmaskmode> 4 "register_operand")
22487 (mem:<ssescalarmode>
22488 (match_par_dup 6
22489 [(match_operand 2 "vsib_address_operand")
22490 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
22491 (match_operand:SI 5 "const1248_operand")]))]
22492 UNSPEC_GATHER))
22493 (clobber (match_scratch:<avx512fmaskmode> 7))])]
22494 "TARGET_AVX512F"
22495 {
22496 operands[6]
22497 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22498 operands[5]), UNSPEC_VSIBADDR);
22499 })
22500
22501 (define_insn "*avx512f_gathersi<VI48F:mode>"
22502 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22503 (unspec:VI48F
22504 [(match_operand:VI48F 1 "register_operand" "0")
22505 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
22506 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22507 [(unspec:P
22508 [(match_operand:P 4 "vsib_address_operand" "Tv")
22509 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
22510 (match_operand:SI 5 "const1248_operand" "n")]
22511 UNSPEC_VSIBADDR)])]
22512 UNSPEC_GATHER))
22513 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
22514 "TARGET_AVX512F"
22515 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22516 ;; gas changed what it requires incompatibly.
22517 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
22518 [(set_attr "type" "ssemov")
22519 (set_attr "prefix" "evex")
22520 (set_attr "mode" "<sseinsnmode>")])
22521
22522 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
22523 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22524 (unspec:VI48F
22525 [(pc)
22526 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22527 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22528 [(unspec:P
22529 [(match_operand:P 3 "vsib_address_operand" "Tv")
22530 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22531 (match_operand:SI 4 "const1248_operand" "n")]
22532 UNSPEC_VSIBADDR)])]
22533 UNSPEC_GATHER))
22534 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22535 "TARGET_AVX512F"
22536 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22537 ;; gas changed what it requires incompatibly.
22538 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
22539 [(set_attr "type" "ssemov")
22540 (set_attr "prefix" "evex")
22541 (set_attr "mode" "<sseinsnmode>")])
22542
22543
22544 (define_expand "<avx512>_gatherdi<mode>"
22545 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22546 (unspec:VI48F
22547 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22548 (match_operand:QI 4 "register_operand")
22549 (mem:<ssescalarmode>
22550 (match_par_dup 6
22551 [(match_operand 2 "vsib_address_operand")
22552 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
22553 (match_operand:SI 5 "const1248_operand")]))]
22554 UNSPEC_GATHER))
22555 (clobber (match_scratch:QI 7))])]
22556 "TARGET_AVX512F"
22557 {
22558 operands[6]
22559 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22560 operands[5]), UNSPEC_VSIBADDR);
22561 })
22562
22563 (define_insn "*avx512f_gatherdi<VI48F:mode>"
22564 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22565 (unspec:VI48F
22566 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
22567 (match_operand:QI 7 "register_operand" "2")
22568 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22569 [(unspec:P
22570 [(match_operand:P 4 "vsib_address_operand" "Tv")
22571 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
22572 (match_operand:SI 5 "const1248_operand" "n")]
22573 UNSPEC_VSIBADDR)])]
22574 UNSPEC_GATHER))
22575 (clobber (match_scratch:QI 2 "=&Yk"))]
22576 "TARGET_AVX512F"
22577 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22578 ;; gas changed what it requires incompatibly.
22579 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
22580 [(set_attr "type" "ssemov")
22581 (set_attr "prefix" "evex")
22582 (set_attr "mode" "<sseinsnmode>")])
22583
22584 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
22585 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22586 (unspec:VI48F
22587 [(pc)
22588 (match_operand:QI 6 "register_operand" "1")
22589 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22590 [(unspec:P
22591 [(match_operand:P 3 "vsib_address_operand" "Tv")
22592 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22593 (match_operand:SI 4 "const1248_operand" "n")]
22594 UNSPEC_VSIBADDR)])]
22595 UNSPEC_GATHER))
22596 (clobber (match_scratch:QI 1 "=&Yk"))]
22597 "TARGET_AVX512F"
22598 {
22599 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22600 gas changed what it requires incompatibly. */
22601 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
22602 {
22603 if (<VI48F:MODE_SIZE> != 64)
22604 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
22605 else
22606 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
22607 }
22608 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
22609 }
22610 [(set_attr "type" "ssemov")
22611 (set_attr "prefix" "evex")
22612 (set_attr "mode" "<sseinsnmode>")])
22613
22614 (define_expand "<avx512>_scattersi<mode>"
22615 [(parallel [(set (mem:VI48F
22616 (match_par_dup 5
22617 [(match_operand 0 "vsib_address_operand")
22618 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
22619 (match_operand:SI 4 "const1248_operand")]))
22620 (unspec:VI48F
22621 [(match_operand:<avx512fmaskmode> 1 "register_operand")
22622 (match_operand:VI48F 3 "register_operand")]
22623 UNSPEC_SCATTER))
22624 (clobber (match_scratch:<avx512fmaskmode> 6))])]
22625 "TARGET_AVX512F"
22626 {
22627 operands[5]
22628 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22629 operands[4]), UNSPEC_VSIBADDR);
22630 })
22631
22632 (define_insn "*avx512f_scattersi<VI48F:mode>"
22633 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22634 [(unspec:P
22635 [(match_operand:P 0 "vsib_address_operand" "Tv")
22636 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22637 (match_operand:SI 4 "const1248_operand" "n")]
22638 UNSPEC_VSIBADDR)])
22639 (unspec:VI48F
22640 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22641 (match_operand:VI48F 3 "register_operand" "v")]
22642 UNSPEC_SCATTER))
22643 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22644 "TARGET_AVX512F"
22645 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22646 ;; gas changed what it requires incompatibly.
22647 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22648 [(set_attr "type" "ssemov")
22649 (set_attr "prefix" "evex")
22650 (set_attr "mode" "<sseinsnmode>")])
22651
22652 (define_expand "<avx512>_scatterdi<mode>"
22653 [(parallel [(set (mem:VI48F
22654 (match_par_dup 5
22655 [(match_operand 0 "vsib_address_operand")
22656 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
22657 (match_operand:SI 4 "const1248_operand")]))
22658 (unspec:VI48F
22659 [(match_operand:QI 1 "register_operand")
22660 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
22661 UNSPEC_SCATTER))
22662 (clobber (match_scratch:QI 6))])]
22663 "TARGET_AVX512F"
22664 {
22665 operands[5]
22666 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22667 operands[4]), UNSPEC_VSIBADDR);
22668 })
22669
22670 (define_insn "*avx512f_scatterdi<VI48F:mode>"
22671 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22672 [(unspec:P
22673 [(match_operand:P 0 "vsib_address_operand" "Tv")
22674 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22675 (match_operand:SI 4 "const1248_operand" "n")]
22676 UNSPEC_VSIBADDR)])
22677 (unspec:VI48F
22678 [(match_operand:QI 6 "register_operand" "1")
22679 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
22680 UNSPEC_SCATTER))
22681 (clobber (match_scratch:QI 1 "=&Yk"))]
22682 "TARGET_AVX512F"
22683 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22684 ;; gas changed what it requires incompatibly.
22685 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22686 [(set_attr "type" "ssemov")
22687 (set_attr "prefix" "evex")
22688 (set_attr "mode" "<sseinsnmode>")])
22689
22690 (define_insn "<avx512>_compress<mode>_mask"
22691 [(set (match_operand:VI48F 0 "register_operand" "=v")
22692 (unspec:VI48F
22693 [(match_operand:VI48F 1 "register_operand" "v")
22694 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
22695 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22696 UNSPEC_COMPRESS))]
22697 "TARGET_AVX512F"
22698 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22699 [(set_attr "type" "ssemov")
22700 (set_attr "prefix" "evex")
22701 (set_attr "mode" "<sseinsnmode>")])
22702
22703 (define_insn "compress<mode>_mask"
22704 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
22705 (unspec:VI12_AVX512VLBW
22706 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
22707 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
22708 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22709 UNSPEC_COMPRESS))]
22710 "TARGET_AVX512VBMI2"
22711 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22712 [(set_attr "type" "ssemov")
22713 (set_attr "prefix" "evex")
22714 (set_attr "mode" "<sseinsnmode>")])
22715
22716 (define_insn "<avx512>_compressstore<mode>_mask"
22717 [(set (match_operand:VI48F 0 "memory_operand" "=m")
22718 (unspec:VI48F
22719 [(match_operand:VI48F 1 "register_operand" "x")
22720 (match_dup 0)
22721 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22722 UNSPEC_COMPRESS_STORE))]
22723 "TARGET_AVX512F"
22724 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22725 [(set_attr "type" "ssemov")
22726 (set_attr "prefix" "evex")
22727 (set_attr "memory" "store")
22728 (set_attr "mode" "<sseinsnmode>")])
22729
22730 (define_insn "compressstore<mode>_mask"
22731 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
22732 (unspec:VI12_AVX512VLBW
22733 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
22734 (match_dup 0)
22735 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22736 UNSPEC_COMPRESS_STORE))]
22737 "TARGET_AVX512VBMI2"
22738 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22739 [(set_attr "type" "ssemov")
22740 (set_attr "prefix" "evex")
22741 (set_attr "memory" "store")
22742 (set_attr "mode" "<sseinsnmode>")])
22743
22744 (define_expand "<avx512>_expand<mode>_maskz"
22745 [(set (match_operand:VI48F 0 "register_operand")
22746 (unspec:VI48F
22747 [(match_operand:VI48F 1 "nonimmediate_operand")
22748 (match_operand:VI48F 2 "nonimm_or_0_operand")
22749 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22750 UNSPEC_EXPAND))]
22751 "TARGET_AVX512F"
22752 "operands[2] = CONST0_RTX (<MODE>mode);")
22753
22754 (define_insn "<avx512>_expand<mode>_mask"
22755 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
22756 (unspec:VI48F
22757 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
22758 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
22759 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22760 UNSPEC_EXPAND))]
22761 "TARGET_AVX512F"
22762 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22763 [(set_attr "type" "ssemov")
22764 (set_attr "prefix" "evex")
22765 (set_attr "memory" "none,load")
22766 (set_attr "mode" "<sseinsnmode>")])
22767
22768 (define_insn "expand<mode>_mask"
22769 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
22770 (unspec:VI12_AVX512VLBW
22771 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
22772 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
22773 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22774 UNSPEC_EXPAND))]
22775 "TARGET_AVX512VBMI2"
22776 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22777 [(set_attr "type" "ssemov")
22778 (set_attr "prefix" "evex")
22779 (set_attr "memory" "none,load")
22780 (set_attr "mode" "<sseinsnmode>")])
22781
22782 (define_expand "expand<mode>_maskz"
22783 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
22784 (unspec:VI12_AVX512VLBW
22785 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
22786 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
22787 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22788 UNSPEC_EXPAND))]
22789 "TARGET_AVX512VBMI2"
22790 "operands[2] = CONST0_RTX (<MODE>mode);")
22791
22792 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
22793 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22794 (unspec:VF_AVX512VL
22795 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
22796 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
22797 (match_operand:SI 3 "const_0_to_15_operand")]
22798 UNSPEC_RANGE))]
22799 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
22800 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
22801 [(set_attr "type" "sse")
22802 (set_attr "prefix" "evex")
22803 (set_attr "mode" "<MODE>")])
22804
22805 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
22806 [(set (match_operand:VF_128 0 "register_operand" "=v")
22807 (vec_merge:VF_128
22808 (unspec:VF_128
22809 [(match_operand:VF_128 1 "register_operand" "v")
22810 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22811 (match_operand:SI 3 "const_0_to_15_operand")]
22812 UNSPEC_RANGE)
22813 (match_dup 1)
22814 (const_int 1)))]
22815 "TARGET_AVX512DQ"
22816 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
22817 [(set_attr "type" "sse")
22818 (set_attr "prefix" "evex")
22819 (set_attr "mode" "<MODE>")])
22820
22821 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
22822 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22823 (unspec:<avx512fmaskmode>
22824 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
22825 (match_operand 2 "const_0_to_255_operand" "n")]
22826 UNSPEC_FPCLASS))]
22827 "TARGET_AVX512DQ"
22828 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22829 [(set_attr "type" "sse")
22830 (set_attr "length_immediate" "1")
22831 (set_attr "prefix" "evex")
22832 (set_attr "mode" "<MODE>")])
22833
22834 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
22835 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22836 (and:<avx512fmaskmode>
22837 (unspec:<avx512fmaskmode>
22838 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
22839 (match_operand 2 "const_0_to_255_operand" "n")]
22840 UNSPEC_FPCLASS)
22841 (const_int 1)))]
22842 "TARGET_AVX512DQ"
22843 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22844 [(set_attr "type" "sse")
22845 (set_attr "length_immediate" "1")
22846 (set_attr "prefix" "evex")
22847 (set_attr "mode" "<MODE>")])
22848
22849 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
22850 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22851 (unspec:VF_AVX512VL
22852 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
22853 (match_operand:SI 2 "const_0_to_15_operand")]
22854 UNSPEC_GETMANT))]
22855 "TARGET_AVX512F"
22856 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
22857 [(set_attr "prefix" "evex")
22858 (set_attr "mode" "<MODE>")])
22859
22860 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
22861 [(set (match_operand:VF_128 0 "register_operand" "=v")
22862 (vec_merge:VF_128
22863 (unspec:VF_128
22864 [(match_operand:VF_128 1 "register_operand" "v")
22865 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22866 (match_operand:SI 3 "const_0_to_15_operand")]
22867 UNSPEC_GETMANT)
22868 (match_dup 1)
22869 (const_int 1)))]
22870 "TARGET_AVX512F"
22871 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
22872 [(set_attr "prefix" "evex")
22873 (set_attr "mode" "<ssescalarmode>")])
22874
22875 ;; The correct representation for this is absolutely enormous, and
22876 ;; surely not generally useful.
22877 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
22878 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22879 (unspec:VI2_AVX512VL
22880 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
22881 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
22882 (match_operand:SI 3 "const_0_to_255_operand")]
22883 UNSPEC_DBPSADBW))]
22884 "TARGET_AVX512BW"
22885 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
22886 [(set_attr "type" "sselog1")
22887 (set_attr "length_immediate" "1")
22888 (set_attr "prefix" "evex")
22889 (set_attr "mode" "<sseinsnmode>")])
22890
22891 (define_insn "clz<mode>2<mask_name>"
22892 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22893 (clz:VI48_AVX512VL
22894 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22895 "TARGET_AVX512CD"
22896 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22897 [(set_attr "type" "sse")
22898 (set_attr "prefix" "evex")
22899 (set_attr "mode" "<sseinsnmode>")])
22900
22901 (define_insn "<mask_codefor>conflict<mode><mask_name>"
22902 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22903 (unspec:VI48_AVX512VL
22904 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
22905 UNSPEC_CONFLICT))]
22906 "TARGET_AVX512CD"
22907 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22908 [(set_attr "type" "sse")
22909 (set_attr "prefix" "evex")
22910 (set_attr "mode" "<sseinsnmode>")])
22911
22912 (define_insn "sha1msg1"
22913 [(set (match_operand:V4SI 0 "register_operand" "=x")
22914 (unspec:V4SI
22915 [(match_operand:V4SI 1 "register_operand" "0")
22916 (match_operand:V4SI 2 "vector_operand" "xBm")]
22917 UNSPEC_SHA1MSG1))]
22918 "TARGET_SHA"
22919 "sha1msg1\t{%2, %0|%0, %2}"
22920 [(set_attr "type" "sselog1")
22921 (set_attr "mode" "TI")])
22922
22923 (define_insn "sha1msg2"
22924 [(set (match_operand:V4SI 0 "register_operand" "=x")
22925 (unspec:V4SI
22926 [(match_operand:V4SI 1 "register_operand" "0")
22927 (match_operand:V4SI 2 "vector_operand" "xBm")]
22928 UNSPEC_SHA1MSG2))]
22929 "TARGET_SHA"
22930 "sha1msg2\t{%2, %0|%0, %2}"
22931 [(set_attr "type" "sselog1")
22932 (set_attr "mode" "TI")])
22933
22934 (define_insn "sha1nexte"
22935 [(set (match_operand:V4SI 0 "register_operand" "=x")
22936 (unspec:V4SI
22937 [(match_operand:V4SI 1 "register_operand" "0")
22938 (match_operand:V4SI 2 "vector_operand" "xBm")]
22939 UNSPEC_SHA1NEXTE))]
22940 "TARGET_SHA"
22941 "sha1nexte\t{%2, %0|%0, %2}"
22942 [(set_attr "type" "sselog1")
22943 (set_attr "mode" "TI")])
22944
22945 (define_insn "sha1rnds4"
22946 [(set (match_operand:V4SI 0 "register_operand" "=x")
22947 (unspec:V4SI
22948 [(match_operand:V4SI 1 "register_operand" "0")
22949 (match_operand:V4SI 2 "vector_operand" "xBm")
22950 (match_operand:SI 3 "const_0_to_3_operand" "n")]
22951 UNSPEC_SHA1RNDS4))]
22952 "TARGET_SHA"
22953 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
22954 [(set_attr "type" "sselog1")
22955 (set_attr "length_immediate" "1")
22956 (set_attr "mode" "TI")])
22957
22958 (define_insn "sha256msg1"
22959 [(set (match_operand:V4SI 0 "register_operand" "=x")
22960 (unspec:V4SI
22961 [(match_operand:V4SI 1 "register_operand" "0")
22962 (match_operand:V4SI 2 "vector_operand" "xBm")]
22963 UNSPEC_SHA256MSG1))]
22964 "TARGET_SHA"
22965 "sha256msg1\t{%2, %0|%0, %2}"
22966 [(set_attr "type" "sselog1")
22967 (set_attr "mode" "TI")])
22968
22969 (define_insn "sha256msg2"
22970 [(set (match_operand:V4SI 0 "register_operand" "=x")
22971 (unspec:V4SI
22972 [(match_operand:V4SI 1 "register_operand" "0")
22973 (match_operand:V4SI 2 "vector_operand" "xBm")]
22974 UNSPEC_SHA256MSG2))]
22975 "TARGET_SHA"
22976 "sha256msg2\t{%2, %0|%0, %2}"
22977 [(set_attr "type" "sselog1")
22978 (set_attr "mode" "TI")])
22979
22980 (define_insn "sha256rnds2"
22981 [(set (match_operand:V4SI 0 "register_operand" "=x")
22982 (unspec:V4SI
22983 [(match_operand:V4SI 1 "register_operand" "0")
22984 (match_operand:V4SI 2 "vector_operand" "xBm")
22985 (match_operand:V4SI 3 "register_operand" "Yz")]
22986 UNSPEC_SHA256RNDS2))]
22987 "TARGET_SHA"
22988 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
22989 [(set_attr "type" "sselog1")
22990 (set_attr "length_immediate" "1")
22991 (set_attr "mode" "TI")])
22992
22993 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
22994 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22995 (vec_concat:AVX512MODE2P
22996 (vec_concat:<ssehalfvecmode>
22997 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
22998 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
22999 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
23000 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23001 "#"
23002 "&& reload_completed"
23003 [(set (match_dup 0) (match_dup 1))]
23004 {
23005 if (REG_P (operands[0]))
23006 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
23007 else
23008 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
23009 <ssequartermode>mode);
23010 })
23011
23012 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
23013 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
23014 (vec_concat:AVX512MODE2P
23015 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
23016 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
23017 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23018 "#"
23019 "&& reload_completed"
23020 [(set (match_dup 0) (match_dup 1))]
23021 {
23022 if (REG_P (operands[0]))
23023 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
23024 else
23025 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
23026 <ssehalfvecmode>mode);
23027 })
23028
23029 (define_int_iterator VPMADD52
23030 [UNSPEC_VPMADD52LUQ
23031 UNSPEC_VPMADD52HUQ])
23032
23033 (define_int_attr vpmadd52type
23034 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
23035
23036 (define_expand "vpamdd52huq<mode>_maskz"
23037 [(match_operand:VI8_AVX512VL 0 "register_operand")
23038 (match_operand:VI8_AVX512VL 1 "register_operand")
23039 (match_operand:VI8_AVX512VL 2 "register_operand")
23040 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
23041 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23042 "TARGET_AVX512IFMA"
23043 {
23044 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
23045 operands[0], operands[1], operands[2], operands[3],
23046 CONST0_RTX (<MODE>mode), operands[4]));
23047 DONE;
23048 })
23049
23050 (define_expand "vpamdd52luq<mode>_maskz"
23051 [(match_operand:VI8_AVX512VL 0 "register_operand")
23052 (match_operand:VI8_AVX512VL 1 "register_operand")
23053 (match_operand:VI8_AVX512VL 2 "register_operand")
23054 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
23055 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23056 "TARGET_AVX512IFMA"
23057 {
23058 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
23059 operands[0], operands[1], operands[2], operands[3],
23060 CONST0_RTX (<MODE>mode), operands[4]));
23061 DONE;
23062 })
23063
23064 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
23065 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
23066 (unspec:VI8_AVX512VL
23067 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
23068 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
23069 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
23070 VPMADD52))]
23071 "TARGET_AVX512IFMA"
23072 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
23073 [(set_attr "type" "ssemuladd")
23074 (set_attr "prefix" "evex")
23075 (set_attr "mode" "<sseinsnmode>")])
23076
23077 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
23078 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
23079 (vec_merge:VI8_AVX512VL
23080 (unspec:VI8_AVX512VL
23081 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
23082 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
23083 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
23084 VPMADD52)
23085 (match_dup 1)
23086 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23087 "TARGET_AVX512IFMA"
23088 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
23089 [(set_attr "type" "ssemuladd")
23090 (set_attr "prefix" "evex")
23091 (set_attr "mode" "<sseinsnmode>")])
23092
23093 (define_insn "vpmultishiftqb<mode><mask_name>"
23094 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
23095 (unspec:VI1_AVX512VL
23096 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
23097 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
23098 UNSPEC_VPMULTISHIFT))]
23099 "TARGET_AVX512VBMI"
23100 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23101 [(set_attr "type" "sselog")
23102 (set_attr "prefix" "evex")
23103 (set_attr "mode" "<sseinsnmode>")])
23104
23105 (define_mode_iterator IMOD4
23106 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
23107
23108 (define_mode_attr imod4_narrow
23109 [(V64SF "V16SF") (V64SI "V16SI")])
23110
23111 (define_expand "mov<mode>"
23112 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
23113 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
23114 "TARGET_AVX512F"
23115 {
23116 ix86_expand_vector_move (<MODE>mode, operands);
23117 DONE;
23118 })
23119
23120 (define_insn_and_split "*mov<mode>_internal"
23121 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
23122 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
23123 "TARGET_AVX512F
23124 && (register_operand (operands[0], <MODE>mode)
23125 || register_operand (operands[1], <MODE>mode))"
23126 "#"
23127 "&& reload_completed"
23128 [(const_int 0)]
23129 {
23130 rtx op0, op1;
23131 int i;
23132
23133 for (i = 0; i < 4; i++)
23134 {
23135 op0 = simplify_subreg
23136 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
23137 op1 = simplify_subreg
23138 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
23139 emit_move_insn (op0, op1);
23140 }
23141 DONE;
23142 })
23143
23144 (define_insn "avx5124fmaddps_4fmaddps"
23145 [(set (match_operand:V16SF 0 "register_operand" "=v")
23146 (unspec:V16SF
23147 [(match_operand:V16SF 1 "register_operand" "0")
23148 (match_operand:V64SF 2 "register_operand" "v")
23149 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
23150 "TARGET_AVX5124FMAPS"
23151 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
23152 [(set_attr ("type") ("ssemuladd"))
23153 (set_attr ("prefix") ("evex"))
23154 (set_attr ("mode") ("V16SF"))])
23155
23156 (define_insn "avx5124fmaddps_4fmaddps_mask"
23157 [(set (match_operand:V16SF 0 "register_operand" "=v")
23158 (vec_merge:V16SF
23159 (unspec:V16SF
23160 [(match_operand:V64SF 1 "register_operand" "v")
23161 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
23162 (match_operand:V16SF 3 "register_operand" "0")
23163 (match_operand:HI 4 "register_operand" "Yk")))]
23164 "TARGET_AVX5124FMAPS"
23165 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23166 [(set_attr ("type") ("ssemuladd"))
23167 (set_attr ("prefix") ("evex"))
23168 (set_attr ("mode") ("V16SF"))])
23169
23170 (define_insn "avx5124fmaddps_4fmaddps_maskz"
23171 [(set (match_operand:V16SF 0 "register_operand" "=v")
23172 (vec_merge:V16SF
23173 (unspec:V16SF
23174 [(match_operand:V16SF 1 "register_operand" "0")
23175 (match_operand:V64SF 2 "register_operand" "v")
23176 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
23177 (match_operand:V16SF 4 "const0_operand" "C")
23178 (match_operand:HI 5 "register_operand" "Yk")))]
23179 "TARGET_AVX5124FMAPS"
23180 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23181 [(set_attr ("type") ("ssemuladd"))
23182 (set_attr ("prefix") ("evex"))
23183 (set_attr ("mode") ("V16SF"))])
23184
23185 (define_insn "avx5124fmaddps_4fmaddss"
23186 [(set (match_operand:V4SF 0 "register_operand" "=v")
23187 (unspec:V4SF
23188 [(match_operand:V4SF 1 "register_operand" "0")
23189 (match_operand:V64SF 2 "register_operand" "v")
23190 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
23191 "TARGET_AVX5124FMAPS"
23192 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
23193 [(set_attr ("type") ("ssemuladd"))
23194 (set_attr ("prefix") ("evex"))
23195 (set_attr ("mode") ("SF"))])
23196
23197 (define_insn "avx5124fmaddps_4fmaddss_mask"
23198 [(set (match_operand:V4SF 0 "register_operand" "=v")
23199 (vec_merge:V4SF
23200 (unspec:V4SF
23201 [(match_operand:V64SF 1 "register_operand" "v")
23202 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
23203 (match_operand:V4SF 3 "register_operand" "0")
23204 (match_operand:QI 4 "register_operand" "Yk")))]
23205 "TARGET_AVX5124FMAPS"
23206 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
23207 [(set_attr ("type") ("ssemuladd"))
23208 (set_attr ("prefix") ("evex"))
23209 (set_attr ("mode") ("SF"))])
23210
23211 (define_insn "avx5124fmaddps_4fmaddss_maskz"
23212 [(set (match_operand:V4SF 0 "register_operand" "=v")
23213 (vec_merge:V4SF
23214 (unspec:V4SF
23215 [(match_operand:V4SF 1 "register_operand" "0")
23216 (match_operand:V64SF 2 "register_operand" "v")
23217 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
23218 (match_operand:V4SF 4 "const0_operand" "C")
23219 (match_operand:QI 5 "register_operand" "Yk")))]
23220 "TARGET_AVX5124FMAPS"
23221 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
23222 [(set_attr ("type") ("ssemuladd"))
23223 (set_attr ("prefix") ("evex"))
23224 (set_attr ("mode") ("SF"))])
23225
23226 (define_insn "avx5124fmaddps_4fnmaddps"
23227 [(set (match_operand:V16SF 0 "register_operand" "=v")
23228 (unspec:V16SF
23229 [(match_operand:V16SF 1 "register_operand" "0")
23230 (match_operand:V64SF 2 "register_operand" "v")
23231 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
23232 "TARGET_AVX5124FMAPS"
23233 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
23234 [(set_attr ("type") ("ssemuladd"))
23235 (set_attr ("prefix") ("evex"))
23236 (set_attr ("mode") ("V16SF"))])
23237
23238 (define_insn "avx5124fmaddps_4fnmaddps_mask"
23239 [(set (match_operand:V16SF 0 "register_operand" "=v")
23240 (vec_merge:V16SF
23241 (unspec:V16SF
23242 [(match_operand:V64SF 1 "register_operand" "v")
23243 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23244 (match_operand:V16SF 3 "register_operand" "0")
23245 (match_operand:HI 4 "register_operand" "Yk")))]
23246 "TARGET_AVX5124FMAPS"
23247 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23248 [(set_attr ("type") ("ssemuladd"))
23249 (set_attr ("prefix") ("evex"))
23250 (set_attr ("mode") ("V16SF"))])
23251
23252 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
23253 [(set (match_operand:V16SF 0 "register_operand" "=v")
23254 (vec_merge:V16SF
23255 (unspec:V16SF
23256 [(match_operand:V16SF 1 "register_operand" "0")
23257 (match_operand:V64SF 2 "register_operand" "v")
23258 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23259 (match_operand:V16SF 4 "const0_operand" "C")
23260 (match_operand:HI 5 "register_operand" "Yk")))]
23261 "TARGET_AVX5124FMAPS"
23262 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23263 [(set_attr ("type") ("ssemuladd"))
23264 (set_attr ("prefix") ("evex"))
23265 (set_attr ("mode") ("V16SF"))])
23266
23267 (define_insn "avx5124fmaddps_4fnmaddss"
23268 [(set (match_operand:V4SF 0 "register_operand" "=v")
23269 (unspec:V4SF
23270 [(match_operand:V4SF 1 "register_operand" "0")
23271 (match_operand:V64SF 2 "register_operand" "v")
23272 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
23273 "TARGET_AVX5124FMAPS"
23274 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
23275 [(set_attr ("type") ("ssemuladd"))
23276 (set_attr ("prefix") ("evex"))
23277 (set_attr ("mode") ("SF"))])
23278
23279 (define_insn "avx5124fmaddps_4fnmaddss_mask"
23280 [(set (match_operand:V4SF 0 "register_operand" "=v")
23281 (vec_merge:V4SF
23282 (unspec:V4SF
23283 [(match_operand:V64SF 1 "register_operand" "v")
23284 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23285 (match_operand:V4SF 3 "register_operand" "0")
23286 (match_operand:QI 4 "register_operand" "Yk")))]
23287 "TARGET_AVX5124FMAPS"
23288 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
23289 [(set_attr ("type") ("ssemuladd"))
23290 (set_attr ("prefix") ("evex"))
23291 (set_attr ("mode") ("SF"))])
23292
23293 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
23294 [(set (match_operand:V4SF 0 "register_operand" "=v")
23295 (vec_merge:V4SF
23296 (unspec:V4SF
23297 [(match_operand:V4SF 1 "register_operand" "0")
23298 (match_operand:V64SF 2 "register_operand" "v")
23299 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23300 (match_operand:V4SF 4 "const0_operand" "C")
23301 (match_operand:QI 5 "register_operand" "Yk")))]
23302 "TARGET_AVX5124FMAPS"
23303 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
23304 [(set_attr ("type") ("ssemuladd"))
23305 (set_attr ("prefix") ("evex"))
23306 (set_attr ("mode") ("SF"))])
23307
23308 (define_insn "avx5124vnniw_vp4dpwssd"
23309 [(set (match_operand:V16SI 0 "register_operand" "=v")
23310 (unspec:V16SI
23311 [(match_operand:V16SI 1 "register_operand" "0")
23312 (match_operand:V64SI 2 "register_operand" "v")
23313 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
23314 "TARGET_AVX5124VNNIW"
23315 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
23316 [(set_attr ("type") ("ssemuladd"))
23317 (set_attr ("prefix") ("evex"))
23318 (set_attr ("mode") ("TI"))])
23319
23320 (define_insn "avx5124vnniw_vp4dpwssd_mask"
23321 [(set (match_operand:V16SI 0 "register_operand" "=v")
23322 (vec_merge:V16SI
23323 (unspec:V16SI
23324 [(match_operand:V64SI 1 "register_operand" "v")
23325 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23326 (match_operand:V16SI 3 "register_operand" "0")
23327 (match_operand:HI 4 "register_operand" "Yk")))]
23328 "TARGET_AVX5124VNNIW"
23329 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23330 [(set_attr ("type") ("ssemuladd"))
23331 (set_attr ("prefix") ("evex"))
23332 (set_attr ("mode") ("TI"))])
23333
23334 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
23335 [(set (match_operand:V16SI 0 "register_operand" "=v")
23336 (vec_merge:V16SI
23337 (unspec:V16SI
23338 [(match_operand:V16SI 1 "register_operand" "0")
23339 (match_operand:V64SI 2 "register_operand" "v")
23340 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23341 (match_operand:V16SI 4 "const0_operand" "C")
23342 (match_operand:HI 5 "register_operand" "Yk")))]
23343 "TARGET_AVX5124VNNIW"
23344 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23345 [(set_attr ("type") ("ssemuladd"))
23346 (set_attr ("prefix") ("evex"))
23347 (set_attr ("mode") ("TI"))])
23348
23349 (define_insn "avx5124vnniw_vp4dpwssds"
23350 [(set (match_operand:V16SI 0 "register_operand" "=v")
23351 (unspec:V16SI
23352 [(match_operand:V16SI 1 "register_operand" "0")
23353 (match_operand:V64SI 2 "register_operand" "v")
23354 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
23355 "TARGET_AVX5124VNNIW"
23356 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
23357 [(set_attr ("type") ("ssemuladd"))
23358 (set_attr ("prefix") ("evex"))
23359 (set_attr ("mode") ("TI"))])
23360
23361 (define_insn "avx5124vnniw_vp4dpwssds_mask"
23362 [(set (match_operand:V16SI 0 "register_operand" "=v")
23363 (vec_merge:V16SI
23364 (unspec:V16SI
23365 [(match_operand:V64SI 1 "register_operand" "v")
23366 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23367 (match_operand:V16SI 3 "register_operand" "0")
23368 (match_operand:HI 4 "register_operand" "Yk")))]
23369 "TARGET_AVX5124VNNIW"
23370 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23371 [(set_attr ("type") ("ssemuladd"))
23372 (set_attr ("prefix") ("evex"))
23373 (set_attr ("mode") ("TI"))])
23374
23375 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
23376 [(set (match_operand:V16SI 0 "register_operand" "=v")
23377 (vec_merge:V16SI
23378 (unspec:V16SI
23379 [(match_operand:V16SI 1 "register_operand" "0")
23380 (match_operand:V64SI 2 "register_operand" "v")
23381 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23382 (match_operand:V16SI 4 "const0_operand" "C")
23383 (match_operand:HI 5 "register_operand" "Yk")))]
23384 "TARGET_AVX5124VNNIW"
23385 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23386 [(set_attr ("type") ("ssemuladd"))
23387 (set_attr ("prefix") ("evex"))
23388 (set_attr ("mode") ("TI"))])
23389
23390 (define_expand "popcount<mode>2"
23391 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
23392 (popcount:VI48_AVX512VL
23393 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
23394 "TARGET_AVX512VPOPCNTDQ")
23395
23396 (define_insn "vpopcount<mode><mask_name>"
23397 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
23398 (popcount:VI48_AVX512VL
23399 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
23400 "TARGET_AVX512VPOPCNTDQ"
23401 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23402
23403 ;; Save multiple registers out-of-line.
23404 (define_insn "*save_multiple<mode>"
23405 [(match_parallel 0 "save_multiple"
23406 [(use (match_operand:P 1 "symbol_operand"))])]
23407 "TARGET_SSE && TARGET_64BIT"
23408 "call\t%P1")
23409
23410 ;; Restore multiple registers out-of-line.
23411 (define_insn "*restore_multiple<mode>"
23412 [(match_parallel 0 "restore_multiple"
23413 [(use (match_operand:P 1 "symbol_operand"))])]
23414 "TARGET_SSE && TARGET_64BIT"
23415 "call\t%P1")
23416
23417 ;; Restore multiple registers out-of-line and return.
23418 (define_insn "*restore_multiple_and_return<mode>"
23419 [(match_parallel 0 "restore_multiple"
23420 [(return)
23421 (use (match_operand:P 1 "symbol_operand"))
23422 (set (reg:DI SP_REG) (reg:DI R10_REG))
23423 ])]
23424 "TARGET_SSE && TARGET_64BIT"
23425 "jmp\t%P1")
23426
23427 ;; Restore multiple registers out-of-line when hard frame pointer is used,
23428 ;; perform the leave operation prior to returning (from the function).
23429 (define_insn "*restore_multiple_leave_return<mode>"
23430 [(match_parallel 0 "restore_multiple"
23431 [(return)
23432 (use (match_operand:P 1 "symbol_operand"))
23433 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
23434 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
23435 (clobber (mem:BLK (scratch)))
23436 ])]
23437 "TARGET_SSE && TARGET_64BIT"
23438 "jmp\t%P1")
23439
23440 (define_expand "popcount<mode>2"
23441 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23442 (popcount:VI12_AVX512VL
23443 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23444 "TARGET_AVX512BITALG")
23445
23446 (define_insn "vpopcount<mode><mask_name>"
23447 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23448 (popcount:VI12_AVX512VL
23449 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23450 "TARGET_AVX512BITALG"
23451 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23452
23453 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
23454 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23455 (unspec:VI1_AVX512F
23456 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23457 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23458 (match_operand 3 "const_0_to_255_operand" "n,n")]
23459 UNSPEC_GF2P8AFFINEINV))]
23460 "TARGET_GFNI"
23461 "@
23462 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
23463 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23464 [(set_attr "isa" "noavx,avx")
23465 (set_attr "prefix_data16" "1,*")
23466 (set_attr "prefix_extra" "1")
23467 (set_attr "prefix" "orig,maybe_evex")
23468 (set_attr "mode" "<sseinsnmode>")])
23469
23470 (define_insn "vgf2p8affineqb_<mode><mask_name>"
23471 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23472 (unspec:VI1_AVX512F
23473 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23474 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23475 (match_operand 3 "const_0_to_255_operand" "n,n")]
23476 UNSPEC_GF2P8AFFINE))]
23477 "TARGET_GFNI"
23478 "@
23479 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
23480 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23481 [(set_attr "isa" "noavx,avx")
23482 (set_attr "prefix_data16" "1,*")
23483 (set_attr "prefix_extra" "1")
23484 (set_attr "prefix" "orig,maybe_evex")
23485 (set_attr "mode" "<sseinsnmode>")])
23486
23487 (define_insn "vgf2p8mulb_<mode><mask_name>"
23488 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23489 (unspec:VI1_AVX512F
23490 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
23491 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
23492 UNSPEC_GF2P8MUL))]
23493 "TARGET_GFNI"
23494 "@
23495 gf2p8mulb\t{%2, %0| %0, %2}
23496 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
23497 [(set_attr "isa" "noavx,avx")
23498 (set_attr "prefix_data16" "1,*")
23499 (set_attr "prefix_extra" "1")
23500 (set_attr "prefix" "orig,maybe_evex")
23501 (set_attr "mode" "<sseinsnmode>")])
23502
23503 (define_insn "vpshrd_<mode><mask_name>"
23504 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23505 (unspec:VI248_AVX512VL
23506 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23507 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23508 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23509 UNSPEC_VPSHRD))]
23510 "TARGET_AVX512VBMI2"
23511 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23512 [(set_attr ("prefix") ("evex"))])
23513
23514 (define_insn "vpshld_<mode><mask_name>"
23515 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23516 (unspec:VI248_AVX512VL
23517 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23518 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23519 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23520 UNSPEC_VPSHLD))]
23521 "TARGET_AVX512VBMI2"
23522 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23523 [(set_attr ("prefix") ("evex"))])
23524
23525 (define_insn "vpshrdv_<mode>"
23526 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23527 (unspec:VI248_AVX512VL
23528 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23529 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23530 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23531 UNSPEC_VPSHRDV))]
23532 "TARGET_AVX512VBMI2"
23533 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23534 [(set_attr ("prefix") ("evex"))
23535 (set_attr "mode" "<sseinsnmode>")])
23536
23537 (define_insn "vpshrdv_<mode>_mask"
23538 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23539 (vec_merge:VI248_AVX512VL
23540 (unspec:VI248_AVX512VL
23541 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23542 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23543 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23544 UNSPEC_VPSHRDV)
23545 (match_dup 1)
23546 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23547 "TARGET_AVX512VBMI2"
23548 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23549 [(set_attr ("prefix") ("evex"))
23550 (set_attr "mode" "<sseinsnmode>")])
23551
23552 (define_expand "vpshrdv_<mode>_maskz"
23553 [(match_operand:VI248_AVX512VL 0 "register_operand")
23554 (match_operand:VI248_AVX512VL 1 "register_operand")
23555 (match_operand:VI248_AVX512VL 2 "register_operand")
23556 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23557 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23558 "TARGET_AVX512VBMI2"
23559 {
23560 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
23561 operands[2], operands[3],
23562 CONST0_RTX (<MODE>mode),
23563 operands[4]));
23564 DONE;
23565 })
23566
23567 (define_insn "vpshrdv_<mode>_maskz_1"
23568 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23569 (vec_merge:VI248_AVX512VL
23570 (unspec:VI248_AVX512VL
23571 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23572 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23573 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23574 UNSPEC_VPSHRDV)
23575 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23576 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23577 "TARGET_AVX512VBMI2"
23578 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23579 [(set_attr ("prefix") ("evex"))
23580 (set_attr "mode" "<sseinsnmode>")])
23581
23582 (define_insn "vpshldv_<mode>"
23583 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23584 (unspec:VI248_AVX512VL
23585 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23586 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23587 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23588 UNSPEC_VPSHLDV))]
23589 "TARGET_AVX512VBMI2"
23590 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23591 [(set_attr ("prefix") ("evex"))
23592 (set_attr "mode" "<sseinsnmode>")])
23593
23594 (define_insn "vpshldv_<mode>_mask"
23595 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23596 (vec_merge:VI248_AVX512VL
23597 (unspec:VI248_AVX512VL
23598 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23599 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23600 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23601 UNSPEC_VPSHLDV)
23602 (match_dup 1)
23603 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23604 "TARGET_AVX512VBMI2"
23605 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23606 [(set_attr ("prefix") ("evex"))
23607 (set_attr "mode" "<sseinsnmode>")])
23608
23609 (define_expand "vpshldv_<mode>_maskz"
23610 [(match_operand:VI248_AVX512VL 0 "register_operand")
23611 (match_operand:VI248_AVX512VL 1 "register_operand")
23612 (match_operand:VI248_AVX512VL 2 "register_operand")
23613 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23614 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23615 "TARGET_AVX512VBMI2"
23616 {
23617 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
23618 operands[2], operands[3],
23619 CONST0_RTX (<MODE>mode),
23620 operands[4]));
23621 DONE;
23622 })
23623
23624 (define_insn "vpshldv_<mode>_maskz_1"
23625 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23626 (vec_merge:VI248_AVX512VL
23627 (unspec:VI248_AVX512VL
23628 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23629 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23630 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23631 UNSPEC_VPSHLDV)
23632 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23633 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23634 "TARGET_AVX512VBMI2"
23635 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23636 [(set_attr ("prefix") ("evex"))
23637 (set_attr "mode" "<sseinsnmode>")])
23638
23639 (define_insn "vpdpbusd_v16si"
23640 [(set (match_operand:V16SI 0 "register_operand" "=v")
23641 (unspec:V16SI
23642 [(match_operand:V16SI 1 "register_operand" "0")
23643 (match_operand:V16SI 2 "register_operand" "v")
23644 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23645 UNSPEC_VPMADDUBSWACCD))]
23646 "TARGET_AVX512VNNI"
23647 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23648 [(set_attr ("prefix") ("evex"))])
23649
23650 (define_insn "vpdpbusd_<mode>"
23651 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23652 (unspec:VI4_AVX2
23653 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23654 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23655 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23656 UNSPEC_VPMADDUBSWACCD))]
23657 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23658 "@
23659 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
23660 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23661 [(set_attr ("prefix") ("vex,evex"))
23662 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23663
23664 (define_insn "vpdpbusd_<mode>_mask"
23665 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23666 (vec_merge:VI4_AVX512VL
23667 (unspec:VI4_AVX512VL
23668 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23669 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23670 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23671 UNSPEC_VPMADDUBSWACCD)
23672 (match_dup 1)
23673 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23674 "TARGET_AVX512VNNI"
23675 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23676 [(set_attr ("prefix") ("evex"))])
23677
23678 (define_expand "vpdpbusd_<mode>_maskz"
23679 [(match_operand:VI4_AVX512VL 0 "register_operand")
23680 (match_operand:VI4_AVX512VL 1 "register_operand")
23681 (match_operand:VI4_AVX512VL 2 "register_operand")
23682 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23683 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23684 "TARGET_AVX512VNNI"
23685 {
23686 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
23687 operands[2], operands[3],
23688 CONST0_RTX (<MODE>mode),
23689 operands[4]));
23690 DONE;
23691 })
23692
23693 (define_insn "vpdpbusd_<mode>_maskz_1"
23694 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23695 (vec_merge:VI4_AVX512VL
23696 (unspec:VI4_AVX512VL
23697 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23698 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23699 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
23700 ] UNSPEC_VPMADDUBSWACCD)
23701 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23702 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23703 "TARGET_AVX512VNNI"
23704 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23705 [(set_attr ("prefix") ("evex"))])
23706
23707 (define_insn "vpdpbusds_v16si"
23708 [(set (match_operand:V16SI 0 "register_operand" "=v")
23709 (unspec:V16SI
23710 [(match_operand:V16SI 1 "register_operand" "0")
23711 (match_operand:V16SI 2 "register_operand" "v")
23712 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23713 UNSPEC_VPMADDUBSWACCSSD))]
23714 "TARGET_AVX512VNNI"
23715 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23716 [(set_attr ("prefix") ("evex"))])
23717
23718 (define_insn "vpdpbusds_<mode>"
23719 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23720 (unspec:VI4_AVX2
23721 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23722 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23723 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23724 UNSPEC_VPMADDUBSWACCSSD))]
23725 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23726 "@
23727 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
23728 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23729 [(set_attr ("prefix") ("vex,evex"))
23730 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23731
23732 (define_insn "vpdpbusds_<mode>_mask"
23733 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23734 (vec_merge:VI4_AVX512VL
23735 (unspec:VI4_AVX512VL
23736 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23737 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23738 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23739 UNSPEC_VPMADDUBSWACCSSD)
23740 (match_dup 1)
23741 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23742 "TARGET_AVX512VNNI"
23743 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23744 [(set_attr ("prefix") ("evex"))])
23745
23746 (define_expand "vpdpbusds_<mode>_maskz"
23747 [(match_operand:VI4_AVX512VL 0 "register_operand")
23748 (match_operand:VI4_AVX512VL 1 "register_operand")
23749 (match_operand:VI4_AVX512VL 2 "register_operand")
23750 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23751 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23752 "TARGET_AVX512VNNI"
23753 {
23754 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
23755 operands[2], operands[3],
23756 CONST0_RTX (<MODE>mode),
23757 operands[4]));
23758 DONE;
23759 })
23760
23761 (define_insn "vpdpbusds_<mode>_maskz_1"
23762 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23763 (vec_merge:VI4_AVX512VL
23764 (unspec:VI4_AVX512VL
23765 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23766 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23767 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23768 UNSPEC_VPMADDUBSWACCSSD)
23769 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23770 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23771 "TARGET_AVX512VNNI"
23772 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23773 [(set_attr ("prefix") ("evex"))])
23774
23775 (define_insn "vpdpwssd_v16si"
23776 [(set (match_operand:V16SI 0 "register_operand" "=v")
23777 (unspec:V16SI
23778 [(match_operand:V16SI 1 "register_operand" "0")
23779 (match_operand:V16SI 2 "register_operand" "v")
23780 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23781 UNSPEC_VPMADDWDACCD))]
23782 "TARGET_AVX512VNNI"
23783 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23784 [(set_attr ("prefix") ("evex"))])
23785
23786 (define_insn "vpdpwssd_<mode>"
23787 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23788 (unspec:VI4_AVX2
23789 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23790 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23791 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23792 UNSPEC_VPMADDWDACCD))]
23793 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23794 "@
23795 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
23796 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23797 [(set_attr ("prefix") ("vex,evex"))
23798 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23799
23800 (define_insn "vpdpwssd_<mode>_mask"
23801 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23802 (vec_merge:VI4_AVX512VL
23803 (unspec:VI4_AVX512VL
23804 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23805 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23806 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23807 UNSPEC_VPMADDWDACCD)
23808 (match_dup 1)
23809 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23810 "TARGET_AVX512VNNI"
23811 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23812 [(set_attr ("prefix") ("evex"))])
23813
23814 (define_expand "vpdpwssd_<mode>_maskz"
23815 [(match_operand:VI4_AVX512VL 0 "register_operand")
23816 (match_operand:VI4_AVX512VL 1 "register_operand")
23817 (match_operand:VI4_AVX512VL 2 "register_operand")
23818 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23819 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23820 "TARGET_AVX512VNNI"
23821 {
23822 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
23823 operands[2], operands[3],
23824 CONST0_RTX (<MODE>mode),
23825 operands[4]));
23826 DONE;
23827 })
23828
23829 (define_insn "vpdpwssd_<mode>_maskz_1"
23830 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23831 (vec_merge:VI4_AVX512VL
23832 (unspec:VI4_AVX512VL
23833 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23834 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23835 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23836 UNSPEC_VPMADDWDACCD)
23837 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23838 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23839 "TARGET_AVX512VNNI"
23840 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23841 [(set_attr ("prefix") ("evex"))])
23842
23843 (define_insn "vpdpwssds_v16si"
23844 [(set (match_operand:V16SI 0 "register_operand" "=v")
23845 (unspec:V16SI
23846 [(match_operand:V16SI 1 "register_operand" "0")
23847 (match_operand:V16SI 2 "register_operand" "v")
23848 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23849 UNSPEC_VPMADDWDACCSSD))]
23850 "TARGET_AVX512VNNI"
23851 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23852 [(set_attr ("prefix") ("evex"))])
23853
23854 (define_insn "vpdpwssds_<mode>"
23855 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23856 (unspec:VI4_AVX2
23857 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23858 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23859 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23860 UNSPEC_VPMADDWDACCSSD))]
23861 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23862 "@
23863 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
23864 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23865 [(set_attr ("prefix") ("vex,evex"))
23866 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23867
23868 (define_insn "vpdpwssds_<mode>_mask"
23869 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23870 (vec_merge:VI4_AVX512VL
23871 (unspec:VI4_AVX512VL
23872 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23873 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23874 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23875 UNSPEC_VPMADDWDACCSSD)
23876 (match_dup 1)
23877 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23878 "TARGET_AVX512VNNI"
23879 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23880 [(set_attr ("prefix") ("evex"))])
23881
23882 (define_expand "vpdpwssds_<mode>_maskz"
23883 [(match_operand:VI4_AVX512VL 0 "register_operand")
23884 (match_operand:VI4_AVX512VL 1 "register_operand")
23885 (match_operand:VI4_AVX512VL 2 "register_operand")
23886 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23887 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23888 "TARGET_AVX512VNNI"
23889 {
23890 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
23891 operands[2], operands[3],
23892 CONST0_RTX (<MODE>mode),
23893 operands[4]));
23894 DONE;
23895 })
23896
23897 (define_insn "vpdpwssds_<mode>_maskz_1"
23898 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23899 (vec_merge:VI4_AVX512VL
23900 (unspec:VI4_AVX512VL
23901 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23902 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23903 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23904 UNSPEC_VPMADDWDACCSSD)
23905 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23906 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23907 "TARGET_AVX512VNNI"
23908 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23909 [(set_attr ("prefix") ("evex"))])
23910
23911 (define_insn "vaesdec_<mode>"
23912 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23913 (unspec:VI1_AVX512VL_F
23914 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23915 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23916 UNSPEC_VAESDEC))]
23917 "TARGET_VAES"
23918 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
23919 )
23920
23921 (define_insn "vaesdeclast_<mode>"
23922 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23923 (unspec:VI1_AVX512VL_F
23924 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23925 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23926 UNSPEC_VAESDECLAST))]
23927 "TARGET_VAES"
23928 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
23929 )
23930
23931 (define_insn "vaesenc_<mode>"
23932 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23933 (unspec:VI1_AVX512VL_F
23934 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23935 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23936 UNSPEC_VAESENC))]
23937 "TARGET_VAES"
23938 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
23939 )
23940
23941 (define_insn "vaesenclast_<mode>"
23942 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23943 (unspec:VI1_AVX512VL_F
23944 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23945 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23946 UNSPEC_VAESENCLAST))]
23947 "TARGET_VAES"
23948 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
23949 )
23950
23951 (define_insn "vpclmulqdq_<mode>"
23952 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
23953 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
23954 (match_operand:VI8_FVL 2 "vector_operand" "vm")
23955 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23956 UNSPEC_VPCLMULQDQ))]
23957 "TARGET_VPCLMULQDQ"
23958 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23959 [(set_attr "mode" "DI")])
23960
23961 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
23962 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
23963 (unspec:<avx512fmaskmode>
23964 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
23965 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
23966 UNSPEC_VPSHUFBIT))]
23967 "TARGET_AVX512BITALG"
23968 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
23969 [(set_attr "prefix" "evex")
23970 (set_attr "mode" "<sseinsnmode>")])
23971
23972 (define_mode_iterator VI48_AVX512VP2VL
23973 [V8DI
23974 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
23975 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
23976
23977 (define_mode_iterator MASK_DWI [P2QI P2HI])
23978
23979 (define_expand "mov<mode>"
23980 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
23981 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
23982 "TARGET_AVX512VP2INTERSECT"
23983 {
23984 if (MEM_P (operands[0]) && MEM_P (operands[1]))
23985 operands[1] = force_reg (<MODE>mode, operands[1]);
23986 })
23987
23988 (define_insn_and_split "*mov<mode>_internal"
23989 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
23990 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
23991 "TARGET_AVX512VP2INTERSECT
23992 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23993 "#"
23994 "&& reload_completed"
23995 [(set (match_dup 0) (match_dup 1))
23996 (set (match_dup 2) (match_dup 3))]
23997 {
23998 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
23999 })
24000
24001 (define_insn "avx512vp2intersect_2intersect<mode>"
24002 [(set (match_operand:P2QI 0 "register_operand" "=k")
24003 (unspec:P2QI
24004 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
24005 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
24006 UNSPEC_VP2INTERSECT))]
24007 "TARGET_AVX512VP2INTERSECT"
24008 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24009 [(set_attr ("prefix") ("evex"))])
24010
24011 (define_insn "avx512vp2intersect_2intersectv16si"
24012 [(set (match_operand:P2HI 0 "register_operand" "=k")
24013 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
24014 (match_operand:V16SI 2 "vector_operand" "vm")]
24015 UNSPEC_VP2INTERSECT))]
24016 "TARGET_AVX512VP2INTERSECT"
24017 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
24018 [(set_attr ("prefix") ("evex"))])
24019
24020 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
24021 ;; Converting from BF to SF
24022 (define_mode_attr bf16_cvt_2sf
24023 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
24024 ;; Converting from SF to BF
24025 (define_mode_attr sf_cvt_bf16
24026 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
24027 ;; Mapping from BF to SF
24028 (define_mode_attr sf_bf16
24029 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
24030
24031 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
24032 [(match_operand:BF16 0 "register_operand")
24033 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
24034 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
24035 (match_operand:<avx512fmaskmode> 3 "register_operand")]
24036 "TARGET_AVX512BF16"
24037 {
24038 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
24039 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
24040 DONE;
24041 })
24042
24043 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
24044 [(set (match_operand:BF16 0 "register_operand" "=v")
24045 (unspec:BF16
24046 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
24047 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
24048 UNSPEC_VCVTNE2PS2BF16))]
24049 "TARGET_AVX512BF16"
24050 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
24051
24052 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
24053 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
24054 (match_operand:VF1_AVX512VL 1 "register_operand")
24055 (match_operand:<avx512fmaskmode> 2 "register_operand")]
24056 "TARGET_AVX512BF16"
24057 {
24058 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
24059 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
24060 DONE;
24061 })
24062
24063 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
24064 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
24065 (unspec:<sf_cvt_bf16>
24066 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
24067 UNSPEC_VCVTNEPS2BF16))]
24068 "TARGET_AVX512BF16"
24069 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
24070
24071 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
24072 [(match_operand:VF1_AVX512VL 0 "register_operand")
24073 (match_operand:VF1_AVX512VL 1 "register_operand")
24074 (match_operand:<sf_bf16> 2 "register_operand")
24075 (match_operand:<sf_bf16> 3 "register_operand")
24076 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
24077 "TARGET_AVX512BF16"
24078 {
24079 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
24080 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
24081 DONE;
24082 })
24083
24084 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
24085 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
24086 (unspec:VF1_AVX512VL
24087 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
24088 (match_operand:<sf_bf16> 2 "register_operand" "v")
24089 (match_operand:<sf_bf16> 3 "register_operand" "v")]
24090 UNSPEC_VDPBF16PS))]
24091 "TARGET_AVX512BF16"
24092 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
24093
24094 (define_insn "avx512f_dpbf16ps_<mode>_mask"
24095 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
24096 (vec_merge:VF1_AVX512VL
24097 (unspec:VF1_AVX512VL
24098 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
24099 (match_operand:<sf_bf16> 2 "register_operand" "v")
24100 (match_operand:<sf_bf16> 3 "register_operand" "v")]
24101 UNSPEC_VDPBF16PS)
24102 (match_dup 1)
24103 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
24104 "TARGET_AVX512BF16"
24105 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
24106
24107 ;; KEYLOCKER
24108 (define_insn "loadiwkey"
24109 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
24110 (match_operand:V2DI 1 "register_operand" "v")
24111 (match_operand:V2DI 2 "register_operand" "Yz")
24112 (match_operand:SI 3 "register_operand" "a")]
24113 UNSPECV_LOADIWKEY)
24114 (clobber (reg:CC FLAGS_REG))]
24115 "TARGET_KL"
24116 "loadiwkey\t{%0, %1|%1, %0}"
24117 [(set_attr "type" "other")])
24118
24119 (define_expand "encodekey128u32"
24120 [(match_par_dup 2
24121 [(set (match_operand:SI 0 "register_operand")
24122 (unspec_volatile:SI
24123 [(match_operand:SI 1 "register_operand")
24124 (reg:V2DI XMM0_REG)]
24125 UNSPECV_ENCODEKEY128U32))])]
24126 "TARGET_KL"
24127 {
24128 rtx xmm_regs[7];
24129 rtx tmp_unspec;
24130 unsigned i;
24131
24132 /* parallel rtx for encodekey128 predicate */
24133 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
24134
24135 for (i = 0; i < 7; i++)
24136 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24137
24138 tmp_unspec
24139 = gen_rtx_UNSPEC_VOLATILE (SImode,
24140 gen_rtvec (2, operands[1], xmm_regs[0]),
24141 UNSPECV_ENCODEKEY128U32);
24142
24143 XVECEXP (operands[2], 0, 0)
24144 = gen_rtx_SET (operands[0], tmp_unspec);
24145
24146 tmp_unspec
24147 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24148 gen_rtvec (1, const0_rtx),
24149 UNSPECV_ENCODEKEY128U32);
24150
24151 for (i = 0; i < 3; i++)
24152 XVECEXP (operands[2], 0, i + 1)
24153 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
24154
24155 for (i = 4; i < 7; i++)
24156 XVECEXP (operands[2], 0, i)
24157 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
24158
24159 XVECEXP (operands[2], 0, 7)
24160 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
24161 })
24162
24163 (define_insn "*encodekey128u32"
24164 [(match_parallel 2 "encodekey128_operation"
24165 [(set (match_operand:SI 0 "register_operand" "=r")
24166 (unspec_volatile:SI
24167 [(match_operand:SI 1 "register_operand" "r")
24168 (reg:V2DI XMM0_REG)]
24169 UNSPECV_ENCODEKEY128U32))])]
24170 "TARGET_KL"
24171 "encodekey128\t{%1, %0|%0, %1}"
24172 [(set_attr "type" "other")])
24173
24174 (define_expand "encodekey256u32"
24175 [(match_par_dup 2
24176 [(set (match_operand:SI 0 "register_operand")
24177 (unspec_volatile:SI
24178 [(match_operand:SI 1 "register_operand")
24179 (reg:V2DI XMM0_REG)
24180 (reg:V2DI XMM1_REG)]
24181 UNSPECV_ENCODEKEY256U32))])]
24182 "TARGET_KL"
24183 {
24184 rtx xmm_regs[7];
24185 rtx tmp_unspec;
24186 unsigned i;
24187
24188 /* parallel rtx for encodekey256 predicate */
24189 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
24190
24191 for (i = 0; i < 7; i++)
24192 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24193
24194 tmp_unspec
24195 = gen_rtx_UNSPEC_VOLATILE (SImode,
24196 gen_rtvec (3, operands[1],
24197 xmm_regs[0], xmm_regs[1]),
24198 UNSPECV_ENCODEKEY256U32);
24199
24200 XVECEXP (operands[2], 0, 0)
24201 = gen_rtx_SET (operands[0], tmp_unspec);
24202
24203 tmp_unspec
24204 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24205 gen_rtvec (1, const0_rtx),
24206 UNSPECV_ENCODEKEY256U32);
24207
24208 for (i = 0; i < 4; i++)
24209 XVECEXP (operands[2], 0, i + 1)
24210 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
24211
24212 for (i = 4; i < 7; i++)
24213 XVECEXP (operands[2], 0, i + 1)
24214 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
24215
24216 XVECEXP (operands[2], 0, 8)
24217 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
24218 })
24219
24220 (define_insn "*encodekey256u32"
24221 [(match_parallel 2 "encodekey256_operation"
24222 [(set (match_operand:SI 0 "register_operand" "=r")
24223 (unspec_volatile:SI
24224 [(match_operand:SI 1 "register_operand" "r")
24225 (reg:V2DI XMM0_REG)
24226 (reg:V2DI XMM1_REG)]
24227 UNSPECV_ENCODEKEY256U32))])]
24228 "TARGET_KL"
24229 "encodekey256\t{%1, %0|%0, %1}"
24230 [(set_attr "type" "other")])
24231
24232 (define_int_iterator AESDECENCKL
24233 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
24234 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
24235
24236 (define_int_attr aesklvariant
24237 [(UNSPECV_AESDEC128KLU8 "dec128kl")
24238 (UNSPECV_AESDEC256KLU8 "dec256kl")
24239 (UNSPECV_AESENC128KLU8 "enc128kl")
24240 (UNSPECV_AESENC256KLU8 "enc256kl")])
24241
24242 (define_insn "aes<aesklvariant>u8"
24243 [(set (match_operand:V2DI 0 "register_operand" "=v")
24244 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
24245 (match_operand:BLK 2 "memory_operand" "m")]
24246 AESDECENCKL))
24247 (set (reg:CCZ FLAGS_REG)
24248 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
24249 "TARGET_KL"
24250 "aes<aesklvariant>\t{%2, %0|%0, %2}"
24251 [(set_attr "type" "other")])
24252
24253 (define_int_iterator AESDECENCWIDEKL
24254 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
24255 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
24256
24257 (define_int_attr aeswideklvariant
24258 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
24259 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
24260 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
24261 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
24262
24263 (define_int_attr AESWIDEKLVARIANT
24264 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
24265 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
24266 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
24267 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
24268
24269 (define_expand "aes<aeswideklvariant>u8"
24270 [(match_par_dup 1
24271 [(set (reg:CCZ FLAGS_REG)
24272 (unspec_volatile:CCZ
24273 [(match_operand:BLK 0 "memory_operand")]
24274 AESDECENCWIDEKL))])]
24275 "TARGET_WIDEKL"
24276 {
24277 rtx tmp_unspec;
24278 unsigned i;
24279
24280 /* parallel rtx for widekl predicate */
24281 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
24282
24283 tmp_unspec
24284 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
24285 gen_rtvec (1, operands[0]),
24286 UNSPECV_<AESWIDEKLVARIANT>);
24287
24288 XVECEXP (operands[1], 0, 0)
24289 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
24290 tmp_unspec);
24291
24292 for (i = 0; i < 8; i++)
24293 {
24294 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24295
24296 tmp_unspec
24297 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24298 gen_rtvec (1, xmm_reg),
24299 UNSPECV_<AESWIDEKLVARIANT>);
24300 XVECEXP (operands[1], 0, i + 1)
24301 = gen_rtx_SET (xmm_reg, tmp_unspec);
24302 }
24303 })
24304
24305 (define_insn "*aes<aeswideklvariant>u8"
24306 [(match_parallel 1 "aeswidekl_operation"
24307 [(set (reg:CCZ FLAGS_REG)
24308 (unspec_volatile:CCZ
24309 [(match_operand:BLK 0 "memory_operand" "m")]
24310 AESDECENCWIDEKL))])]
24311 "TARGET_WIDEKL"
24312 "aes<aeswideklvariant>\t{%0}"
24313 [(set_attr "type" "other")])