1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
56 UNSPEC_XOP_UNSIGNED_CMP
67 UNSPEC_AESKEYGENASSIST
88 ;; For AVX512F support
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
105 UNSPEC_COMPRESS_STORE
117 ;; For embed. rounding feature
118 UNSPEC_EMBEDDED_ROUNDING
120 ;; For AVX512PF support
121 UNSPEC_GATHER_PREFETCH
122 UNSPEC_SCATTER_PREFETCH
124 ;; For AVX512ER support
138 ;; For AVX512BW support
146 ;; For AVX512DQ support
151 ;; For AVX512IFMA support
155 ;; For AVX512VBMI support
158 ;; For AVX5124FMAPS/AVX5124VNNIW support
165 UNSPEC_GF2P8AFFINEINV
169 ;; For AVX512VBMI2 support
175 ;; For AVX512VNNI support
176 UNSPEC_VPMADDUBSWACCD
177 UNSPEC_VPMADDUBSWACCSSD
179 UNSPEC_VPMADDWDACCSSD
187 ;; For VPCLMULQDQ support
190 ;; For AVX512BITALG support
193 ;; For VP2INTERSECT support
196 ;; For AVX512BF16 support
197 UNSPEC_VCVTNE2PS2BF16
202 (define_c_enum "unspecv" [
213 UNSPECV_AESDEC128KLU8
214 UNSPECV_AESENC128KLU8
215 UNSPECV_AESDEC256KLU8
216 UNSPECV_AESENC256KLU8
217 UNSPECV_AESDECWIDE128KLU8
218 UNSPECV_AESENCWIDE128KLU8
219 UNSPECV_AESDECWIDE256KLU8
220 UNSPECV_AESENCWIDE256KLU8
221 UNSPECV_ENCODEKEY128U32
222 UNSPECV_ENCODEKEY256U32
225 ;; All vector modes including V?TImode, used in move patterns.
226 (define_mode_iterator VMOVE
227 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
228 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
229 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
230 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
231 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
232 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
233 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
235 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
236 (define_mode_iterator V48_AVX512VL
237 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
238 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
239 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
240 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
242 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
243 (define_mode_iterator VI12_AVX512VL
244 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
245 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
247 ;; Same iterator, but without supposed TARGET_AVX512BW
248 (define_mode_iterator VI12_AVX512VLBW
249 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
250 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
251 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
253 (define_mode_iterator VI1_AVX512VL
254 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
257 (define_mode_iterator V
258 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
259 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
260 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
261 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
262 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
263 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
265 ;; All 128bit vector modes
266 (define_mode_iterator V_128
267 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
269 ;; All 256bit vector modes
270 (define_mode_iterator V_256
271 [V32QI V16HI V8SI V4DI V8SF V4DF])
273 ;; All 128bit and 256bit vector modes
274 (define_mode_iterator V_128_256
275 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
277 ;; All 512bit vector modes
278 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
280 ;; All 256bit and 512bit vector modes
281 (define_mode_iterator V_256_512
282 [V32QI V16HI V8SI V4DI V8SF V4DF
283 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
284 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
286 ;; All vector float modes
287 (define_mode_iterator VF
288 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
289 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
291 ;; 128- and 256-bit float vector modes
292 (define_mode_iterator VF_128_256
293 [(V8SF "TARGET_AVX") V4SF
294 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
296 ;; All SFmode vector float modes
297 (define_mode_iterator VF1
298 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
300 (define_mode_iterator VF1_AVX2
301 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
303 ;; 128- and 256-bit SF vector modes
304 (define_mode_iterator VF1_128_256
305 [(V8SF "TARGET_AVX") V4SF])
307 (define_mode_iterator VF1_128_256VL
308 [V8SF (V4SF "TARGET_AVX512VL")])
310 ;; All DFmode vector float modes
311 (define_mode_iterator VF2
312 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
314 ;; 128- and 256-bit DF vector modes
315 (define_mode_iterator VF2_128_256
316 [(V4DF "TARGET_AVX") V2DF])
318 (define_mode_iterator VF2_512_256
319 [(V8DF "TARGET_AVX512F") V4DF])
321 (define_mode_iterator VF2_512_256VL
322 [V8DF (V4DF "TARGET_AVX512VL")])
324 ;; All 128bit vector float modes
325 (define_mode_iterator VF_128
326 [V4SF (V2DF "TARGET_SSE2")])
328 ;; All 256bit vector float modes
329 (define_mode_iterator VF_256
332 ;; All 512bit vector float modes
333 (define_mode_iterator VF_512
336 (define_mode_iterator VI48_AVX512VL
337 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
338 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
340 (define_mode_iterator VF_AVX512VL
341 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
342 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
344 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
345 (define_mode_iterator VF1_AVX512ER_128_256
346 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
348 (define_mode_iterator VF2_AVX512VL
349 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
351 (define_mode_iterator VF1_AVX512VL
352 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
354 ;; All vector integer modes
355 (define_mode_iterator VI
356 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
357 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
358 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
359 (V8SI "TARGET_AVX") V4SI
360 (V4DI "TARGET_AVX") V2DI])
362 (define_mode_iterator VI_AVX2
363 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
364 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
365 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
366 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
368 ;; All QImode vector integer modes
369 (define_mode_iterator VI1
370 [(V32QI "TARGET_AVX") V16QI])
372 ;; All DImode vector integer modes
373 (define_mode_iterator V_AVX
374 [V16QI V8HI V4SI V2DI V4SF V2DF
375 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
376 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
377 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
379 (define_mode_iterator VI48_AVX
381 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
383 (define_mode_iterator VI8
384 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
386 (define_mode_iterator VI8_FVL
387 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
389 (define_mode_iterator VI8_AVX512VL
390 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
392 (define_mode_iterator VI8_256_512
393 [V8DI (V4DI "TARGET_AVX512VL")])
395 (define_mode_iterator VI1_AVX2
396 [(V32QI "TARGET_AVX2") V16QI])
398 (define_mode_iterator VI1_AVX512
399 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
401 (define_mode_iterator VI1_AVX512F
402 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
404 (define_mode_iterator VI2_AVX2
405 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
407 (define_mode_iterator VI2_AVX512F
408 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
410 (define_mode_iterator VI4_AVX
411 [(V8SI "TARGET_AVX") V4SI])
413 (define_mode_iterator VI4_AVX2
414 [(V8SI "TARGET_AVX2") V4SI])
416 (define_mode_iterator VI4_AVX512F
417 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
419 (define_mode_iterator VI4_AVX512VL
420 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
422 (define_mode_iterator VI48_AVX512F_AVX512VL
423 [V4SI V8SI (V16SI "TARGET_AVX512F")
424 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
426 (define_mode_iterator VI2_AVX512VL
427 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
429 (define_mode_iterator VI1_AVX512VL_F
430 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
432 (define_mode_iterator VI8_AVX2_AVX512BW
433 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
435 (define_mode_iterator VI8_AVX2
436 [(V4DI "TARGET_AVX2") V2DI])
438 (define_mode_iterator VI8_AVX2_AVX512F
439 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
441 (define_mode_iterator VI8_AVX_AVX512F
442 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
444 (define_mode_iterator VI4_128_8_256
448 (define_mode_iterator V8FI
452 (define_mode_iterator V16FI
455 ;; ??? We should probably use TImode instead.
456 (define_mode_iterator VIMAX_AVX2_AVX512BW
457 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
459 ;; Suppose TARGET_AVX512BW as baseline
460 (define_mode_iterator VIMAX_AVX512VL
461 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
463 (define_mode_iterator VIMAX_AVX2
464 [(V2TI "TARGET_AVX2") V1TI])
466 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
467 (define_mode_iterator SSESCALARMODE
468 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
470 (define_mode_iterator VI12_AVX2
471 [(V32QI "TARGET_AVX2") V16QI
472 (V16HI "TARGET_AVX2") V8HI])
474 (define_mode_iterator VI12_AVX2_AVX512BW
475 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
476 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
478 (define_mode_iterator VI24_AVX2
479 [(V16HI "TARGET_AVX2") V8HI
480 (V8SI "TARGET_AVX2") V4SI])
482 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
483 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
484 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
485 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
487 (define_mode_iterator VI124_AVX2
488 [(V32QI "TARGET_AVX2") V16QI
489 (V16HI "TARGET_AVX2") V8HI
490 (V8SI "TARGET_AVX2") V4SI])
492 (define_mode_iterator VI2_AVX2_AVX512BW
493 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
495 (define_mode_iterator VI248_AVX512VL
497 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
498 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
499 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
501 (define_mode_iterator VI48_AVX2
502 [(V8SI "TARGET_AVX2") V4SI
503 (V4DI "TARGET_AVX2") V2DI])
505 (define_mode_iterator VI248_AVX2
506 [(V16HI "TARGET_AVX2") V8HI
507 (V8SI "TARGET_AVX2") V4SI
508 (V4DI "TARGET_AVX2") V2DI])
510 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
511 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
512 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
513 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
515 (define_mode_iterator VI248_AVX512BW
516 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
518 (define_mode_iterator VI248_AVX512BW_AVX512VL
519 [(V32HI "TARGET_AVX512BW")
520 (V4DI "TARGET_AVX512VL") V16SI V8DI])
522 ;; Suppose TARGET_AVX512VL as baseline
523 (define_mode_iterator VI248_AVX512BW_1
524 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
528 (define_mode_iterator VI248_AVX512BW_2
529 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
533 (define_mode_iterator VI48_AVX512F
534 [(V16SI "TARGET_AVX512F") V8SI V4SI
535 (V8DI "TARGET_AVX512F") V4DI V2DI])
537 (define_mode_iterator VI48_AVX_AVX512F
538 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
541 (define_mode_iterator VI12_AVX_AVX512F
542 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
543 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
545 (define_mode_iterator V48_AVX2
548 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
549 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
551 (define_mode_iterator VI1_AVX512VLBW
552 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
553 (V16QI "TARGET_AVX512VL")])
555 (define_mode_attr avx512
556 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
557 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
558 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
559 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
560 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
561 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
563 (define_mode_attr sse2_avx_avx512f
564 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
565 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
566 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
567 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
568 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
569 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
571 (define_mode_attr sse2_avx2
572 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
573 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
574 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
575 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
576 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
578 (define_mode_attr ssse3_avx2
579 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
580 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
581 (V4SI "ssse3") (V8SI "avx2")
582 (V2DI "ssse3") (V4DI "avx2")
583 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
585 (define_mode_attr sse4_1_avx2
586 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
587 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
588 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
589 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
591 (define_mode_attr avx_avx2
592 [(V4SF "avx") (V2DF "avx")
593 (V8SF "avx") (V4DF "avx")
594 (V4SI "avx2") (V2DI "avx2")
595 (V8SI "avx2") (V4DI "avx2")])
597 (define_mode_attr vec_avx2
598 [(V16QI "vec") (V32QI "avx2")
599 (V8HI "vec") (V16HI "avx2")
600 (V4SI "vec") (V8SI "avx2")
601 (V2DI "vec") (V4DI "avx2")])
603 (define_mode_attr avx2_avx512
604 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
605 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
606 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
607 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
608 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
610 (define_mode_attr shuffletype
611 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
612 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
613 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
614 (V32HI "i") (V16HI "i") (V8HI "i")
615 (V64QI "i") (V32QI "i") (V16QI "i")
616 (V4TI "i") (V2TI "i") (V1TI "i")])
618 (define_mode_attr ssequartermode
619 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
621 (define_mode_attr ssequarterinsnmode
622 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
624 (define_mode_attr vecmemsuffix
625 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
626 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
628 (define_mode_attr ssedoublemodelower
629 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
630 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
631 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
633 (define_mode_attr ssedoublemode
634 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
635 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
636 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
637 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
638 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
639 (V4DI "V8DI") (V8DI "V16DI")])
641 (define_mode_attr ssebytemode
642 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
643 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
645 ;; All 128bit vector integer modes
646 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
648 ;; All 256bit vector integer modes
649 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
651 ;; All 128 and 256bit vector integer modes
652 (define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
654 ;; Various 128bit vector integer mode combinations
655 (define_mode_iterator VI12_128 [V16QI V8HI])
656 (define_mode_iterator VI14_128 [V16QI V4SI])
657 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
658 (define_mode_iterator VI24_128 [V8HI V4SI])
659 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
660 (define_mode_iterator VI48_128 [V4SI V2DI])
662 ;; Various 256bit and 512 vector integer mode combinations
663 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
664 (define_mode_iterator VI124_256_AVX512F_AVX512BW
666 (V64QI "TARGET_AVX512BW")
667 (V32HI "TARGET_AVX512BW")
668 (V16SI "TARGET_AVX512F")])
669 (define_mode_iterator VI48_256 [V8SI V4DI])
670 (define_mode_iterator VI48_512 [V16SI V8DI])
671 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
672 (define_mode_iterator VI_AVX512BW
673 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
675 ;; Int-float size matches
676 (define_mode_iterator VI4F_128 [V4SI V4SF])
677 (define_mode_iterator VI8F_128 [V2DI V2DF])
678 (define_mode_iterator VI4F_256 [V8SI V8SF])
679 (define_mode_iterator VI8F_256 [V4DI V4DF])
680 (define_mode_iterator VI4F_256_512
682 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
683 (define_mode_iterator VI48F_256_512
685 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
686 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
687 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
688 (define_mode_iterator VF48_I1248
689 [V16SI V16SF V8DI V8DF V32HI V64QI])
690 (define_mode_iterator VI48F
691 [V16SI V16SF V8DI V8DF
692 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
693 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
694 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
695 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
696 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
698 (define_mode_iterator VF_AVX512
699 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
700 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
703 (define_mode_attr avx512bcst
704 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
705 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
706 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
707 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
708 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
709 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
711 ;; Mapping from float mode to required SSE level
712 (define_mode_attr sse
713 [(SF "sse") (DF "sse2")
714 (V4SF "sse") (V2DF "sse2")
715 (V16SF "avx512f") (V8SF "avx")
716 (V8DF "avx512f") (V4DF "avx")])
718 (define_mode_attr sse2
719 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
720 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
722 (define_mode_attr sse3
723 [(V16QI "sse3") (V32QI "avx")])
725 (define_mode_attr sse4_1
726 [(V4SF "sse4_1") (V2DF "sse4_1")
727 (V8SF "avx") (V4DF "avx")
729 (V4DI "avx") (V2DI "sse4_1")
730 (V8SI "avx") (V4SI "sse4_1")
731 (V16QI "sse4_1") (V32QI "avx")
732 (V8HI "sse4_1") (V16HI "avx")])
734 (define_mode_attr avxsizesuffix
735 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
736 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
737 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
738 (V16SF "512") (V8DF "512")
739 (V8SF "256") (V4DF "256")
740 (V4SF "") (V2DF "")])
742 ;; SSE instruction mode
743 (define_mode_attr sseinsnmode
744 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
745 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
746 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
747 (V16SF "V16SF") (V8DF "V8DF")
748 (V8SF "V8SF") (V4DF "V4DF")
749 (V4SF "V4SF") (V2DF "V2DF")
752 ;; Mapping of vector modes to corresponding mask size
753 (define_mode_attr avx512fmaskmode
754 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
755 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
756 (V16SI "HI") (V8SI "QI") (V4SI "QI")
757 (V8DI "QI") (V4DI "QI") (V2DI "QI")
758 (V16SF "HI") (V8SF "QI") (V4SF "QI")
759 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
761 ;; Mapping of vector modes to corresponding mask size
762 (define_mode_attr avx512fmaskmodelower
763 [(V64QI "di") (V32QI "si") (V16QI "hi")
764 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
765 (V16SI "hi") (V8SI "qi") (V4SI "qi")
766 (V8DI "qi") (V4DI "qi") (V2DI "qi")
767 (V16SF "hi") (V8SF "qi") (V4SF "qi")
768 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
770 ;; Mapping of vector modes to corresponding mask half size
771 (define_mode_attr avx512fmaskhalfmode
772 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
773 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
774 (V16SI "QI") (V8SI "QI") (V4SI "QI")
775 (V8DI "QI") (V4DI "QI") (V2DI "QI")
776 (V16SF "QI") (V8SF "QI") (V4SF "QI")
777 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
779 ;; Mapping of vector float modes to an integer mode of the same size
780 (define_mode_attr sseintvecmode
781 [(V16SF "V16SI") (V8DF "V8DI")
782 (V8SF "V8SI") (V4DF "V4DI")
783 (V4SF "V4SI") (V2DF "V2DI")
784 (V16SI "V16SI") (V8DI "V8DI")
785 (V8SI "V8SI") (V4DI "V4DI")
786 (V4SI "V4SI") (V2DI "V2DI")
787 (V16HI "V16HI") (V8HI "V8HI")
788 (V32HI "V32HI") (V64QI "V64QI")
789 (V32QI "V32QI") (V16QI "V16QI")])
791 (define_mode_attr sseintvecmode2
792 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
793 (V8SF "OI") (V4SF "TI")])
795 (define_mode_attr sseintvecmodelower
796 [(V16SF "v16si") (V8DF "v8di")
797 (V8SF "v8si") (V4DF "v4di")
798 (V4SF "v4si") (V2DF "v2di")
799 (V8SI "v8si") (V4DI "v4di")
800 (V4SI "v4si") (V2DI "v2di")
801 (V16HI "v16hi") (V8HI "v8hi")
802 (V32QI "v32qi") (V16QI "v16qi")])
804 ;; Mapping of vector modes to a vector mode of double size
805 (define_mode_attr ssedoublevecmode
806 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
807 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
808 (V8SF "V16SF") (V4DF "V8DF")
809 (V4SF "V8SF") (V2DF "V4DF")])
811 ;; Mapping of vector modes to a vector mode of half size
812 (define_mode_attr ssehalfvecmode
813 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
814 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
815 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
816 (V16SF "V8SF") (V8DF "V4DF")
817 (V8SF "V4SF") (V4DF "V2DF")
820 (define_mode_attr ssehalfvecmodelower
821 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
822 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
823 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
824 (V16SF "v8sf") (V8DF "v4df")
825 (V8SF "v4sf") (V4DF "v2df")
828 ;; Mapping of vector modes ti packed single mode of the same size
829 (define_mode_attr ssePSmode
830 [(V16SI "V16SF") (V8DF "V16SF")
831 (V16SF "V16SF") (V8DI "V16SF")
832 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
833 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
834 (V8SI "V8SF") (V4SI "V4SF")
835 (V4DI "V8SF") (V2DI "V4SF")
836 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
837 (V8SF "V8SF") (V4SF "V4SF")
838 (V4DF "V8SF") (V2DF "V4SF")])
840 (define_mode_attr ssePSmode2
841 [(V8DI "V8SF") (V4DI "V4SF")])
843 ;; Mapping of vector modes back to the scalar modes
844 (define_mode_attr ssescalarmode
845 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
846 (V32HI "HI") (V16HI "HI") (V8HI "HI")
847 (V16SI "SI") (V8SI "SI") (V4SI "SI")
848 (V8DI "DI") (V4DI "DI") (V2DI "DI")
849 (V16SF "SF") (V8SF "SF") (V4SF "SF")
850 (V8DF "DF") (V4DF "DF") (V2DF "DF")
851 (V4TI "TI") (V2TI "TI")])
853 ;; Mapping of vector modes back to the scalar modes
854 (define_mode_attr ssescalarmodelower
855 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
856 (V32HI "hi") (V16HI "hi") (V8HI "hi")
857 (V16SI "si") (V8SI "si") (V4SI "si")
858 (V8DI "di") (V4DI "di") (V2DI "di")
859 (V16SF "sf") (V8SF "sf") (V4SF "sf")
860 (V8DF "df") (V4DF "df") (V2DF "df")
861 (V4TI "ti") (V2TI "ti")])
863 ;; Mapping of vector modes to the 128bit modes
864 (define_mode_attr ssexmmmode
865 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
866 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
867 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
868 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
869 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
870 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
872 ;; Pointer size override for scalar modes (Intel asm dialect)
873 (define_mode_attr iptr
874 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
875 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
876 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
877 (V16SF "k") (V8DF "q")
878 (V8SF "k") (V4DF "q")
879 (V4SF "k") (V2DF "q")
882 ;; Mapping of vector modes to VPTERNLOG suffix
883 (define_mode_attr ternlogsuffix
884 [(V8DI "q") (V4DI "q") (V2DI "q")
885 (V16SI "d") (V8SI "d") (V4SI "d")
886 (V32HI "d") (V16HI "d") (V8HI "d")
887 (V64QI "d") (V32QI "d") (V16QI "d")])
889 ;; Number of scalar elements in each vector type
890 (define_mode_attr ssescalarnum
891 [(V64QI "64") (V16SI "16") (V8DI "8")
892 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
893 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
894 (V16SF "16") (V8DF "8")
895 (V8SF "8") (V4DF "4")
896 (V4SF "4") (V2DF "2")])
898 ;; Mask of scalar elements in each vector type
899 (define_mode_attr ssescalarnummask
900 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
901 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
902 (V8SF "7") (V4DF "3")
903 (V4SF "3") (V2DF "1")])
905 (define_mode_attr ssescalarsize
906 [(V4TI "64") (V2TI "64") (V1TI "64")
907 (V8DI "64") (V4DI "64") (V2DI "64")
908 (V64QI "8") (V32QI "8") (V16QI "8")
909 (V32HI "16") (V16HI "16") (V8HI "16")
910 (V16SI "32") (V8SI "32") (V4SI "32")
911 (V16SF "32") (V8SF "32") (V4SF "32")
912 (V8DF "64") (V4DF "64") (V2DF "64")])
914 ;; SSE prefix for integer vector modes
915 (define_mode_attr sseintprefix
916 [(V2DI "p") (V2DF "")
921 (V16SI "p") (V16SF "")
922 (V16QI "p") (V8HI "p")
923 (V32QI "p") (V16HI "p")
924 (V64QI "p") (V32HI "p")])
926 ;; SSE scalar suffix for vector modes
927 (define_mode_attr ssescalarmodesuffix
929 (V16SF "ss") (V8DF "sd")
930 (V8SF "ss") (V4DF "sd")
931 (V4SF "ss") (V2DF "sd")
932 (V16SI "d") (V8DI "q")
933 (V8SI "d") (V4DI "q")
934 (V4SI "d") (V2DI "q")])
936 ;; Pack/unpack vector modes
937 (define_mode_attr sseunpackmode
938 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
939 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
940 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
942 (define_mode_attr ssepackmode
943 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
944 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
945 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
947 ;; Mapping of the max integer size for xop rotate immediate constraint
948 (define_mode_attr sserotatemax
949 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
951 ;; Mapping of mode to cast intrinsic name
952 (define_mode_attr castmode
953 [(V8SI "si") (V8SF "ps") (V4DF "pd")
954 (V16SI "si") (V16SF "ps") (V8DF "pd")])
956 ;; Instruction suffix for sign and zero extensions.
957 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
959 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
960 ;; i64x4 or f64x4 for 512bit modes.
961 (define_mode_attr i128
962 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
963 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
964 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
966 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
967 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
968 (define_mode_attr i128vldq
969 [(V8SF "f32x4") (V4DF "f64x2")
970 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
973 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
974 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
976 ;; Mapping for dbpsabbw modes
977 (define_mode_attr dbpsadbwmode
978 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
980 ;; Mapping suffixes for broadcast
981 (define_mode_attr bcstscalarsuff
982 [(V64QI "b") (V32QI "b") (V16QI "b")
983 (V32HI "w") (V16HI "w") (V8HI "w")
984 (V16SI "d") (V8SI "d") (V4SI "d")
985 (V8DI "q") (V4DI "q") (V2DI "q")
986 (V16SF "ss") (V8SF "ss") (V4SF "ss")
987 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
989 ;; Tie mode of assembler operand to mode iterator
990 (define_mode_attr xtg_mode
991 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
992 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
993 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
995 ;; Half mask mode for unpacks
996 (define_mode_attr HALFMASKMODE
997 [(DI "SI") (SI "HI")])
999 ;; Double mask mode for packs
1000 (define_mode_attr DOUBLEMASKMODE
1001 [(HI "SI") (SI "DI")])
1004 ;; Include define_subst patterns for instructions with mask
1005 (include "subst.md")
1007 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1009 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1016 ;; This is essential for maintaining stable calling conventions.
1018 (define_expand "mov<mode>"
1019 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1020 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1023 ix86_expand_vector_move (<MODE>mode, operands);
1027 (define_insn "mov<mode>_internal"
1028 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1030 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1033 && (register_operand (operands[0], <MODE>mode)
1034 || register_operand (operands[1], <MODE>mode))"
1036 switch (get_attr_type (insn))
1039 return standard_sse_constant_opcode (insn, operands);
1042 return ix86_output_ssemov (insn, operands);
1048 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1049 (set_attr "prefix" "maybe_vex")
1051 (cond [(match_test "TARGET_AVX")
1052 (const_string "<sseinsnmode>")
1053 (ior (not (match_test "TARGET_SSE2"))
1054 (match_test "optimize_function_for_size_p (cfun)"))
1055 (const_string "V4SF")
1056 (and (match_test "<MODE>mode == V2DFmode")
1057 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1058 (const_string "V4SF")
1059 (and (eq_attr "alternative" "3")
1060 (match_test "TARGET_SSE_TYPELESS_STORES"))
1061 (const_string "V4SF")
1062 (and (eq_attr "alternative" "0")
1063 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1066 (const_string "<sseinsnmode>")))
1067 (set (attr "enabled")
1068 (cond [(and (match_test "<MODE_SIZE> == 16")
1069 (eq_attr "alternative" "1"))
1070 (symbol_ref "TARGET_SSE2")
1071 (and (match_test "<MODE_SIZE> == 32")
1072 (eq_attr "alternative" "1"))
1073 (symbol_ref "TARGET_AVX2")
1075 (symbol_ref "true")))])
1077 ;; If mem_addr points to a memory region with less than whole vector size bytes
1078 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1079 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1081 (define_expand "<avx512>_load<mode>_mask"
1082 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1083 (vec_merge:V48_AVX512VL
1084 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1085 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1086 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1089 if (CONST_INT_P (operands[3]))
1091 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1094 else if (MEM_P (operands[1]))
1095 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1096 gen_rtvec(1, operands[1]),
1100 (define_insn "*<avx512>_load<mode>_mask"
1101 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1102 (vec_merge:V48_AVX512VL
1103 (unspec:V48_AVX512VL
1104 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1106 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1107 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1110 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1112 if (misaligned_operand (operands[1], <MODE>mode))
1113 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1115 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1119 if (misaligned_operand (operands[1], <MODE>mode))
1120 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1122 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1125 [(set_attr "type" "ssemov")
1126 (set_attr "prefix" "evex")
1127 (set_attr "mode" "<sseinsnmode>")])
1129 (define_insn_and_split "*<avx512>_load<mode>"
1130 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1131 (unspec:V48_AVX512VL
1132 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1137 [(set (match_dup 0) (match_dup 1))])
1139 (define_expand "<avx512>_load<mode>_mask"
1140 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1141 (vec_merge:VI12_AVX512VL
1142 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1143 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1144 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1147 if (CONST_INT_P (operands[3]))
1149 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1152 else if (MEM_P (operands[1]))
1153 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1154 gen_rtvec(1, operands[1]),
1159 (define_insn "*<avx512>_load<mode>_mask"
1160 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1161 (vec_merge:VI12_AVX512VL
1162 (unspec:VI12_AVX512VL
1163 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1165 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1166 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1168 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1169 [(set_attr "type" "ssemov")
1170 (set_attr "prefix" "evex")
1171 (set_attr "mode" "<sseinsnmode>")])
1173 (define_insn_and_split "*<avx512>_load<mode>"
1174 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1175 (unspec:VI12_AVX512VL
1176 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1181 [(set (match_dup 0) (match_dup 1))])
1183 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1184 [(set (match_operand:VF_128 0 "register_operand" "=v")
1187 (match_operand:VF_128 2 "register_operand" "v")
1188 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1189 (match_operand:QI 4 "register_operand" "Yk"))
1190 (match_operand:VF_128 1 "register_operand" "v")
1193 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1194 [(set_attr "type" "ssemov")
1195 (set_attr "prefix" "evex")
1196 (set_attr "mode" "<ssescalarmode>")])
1198 (define_expand "avx512f_load<mode>_mask"
1199 [(set (match_operand:<ssevecmode> 0 "register_operand")
1200 (vec_merge:<ssevecmode>
1201 (vec_merge:<ssevecmode>
1202 (vec_duplicate:<ssevecmode>
1203 (match_operand:MODEF 1 "memory_operand"))
1204 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1205 (match_operand:QI 3 "register_operand"))
1209 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1211 (define_insn "*avx512f_load<mode>_mask"
1212 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1213 (vec_merge:<ssevecmode>
1214 (vec_merge:<ssevecmode>
1215 (vec_duplicate:<ssevecmode>
1216 (match_operand:MODEF 1 "memory_operand" "m"))
1217 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1218 (match_operand:QI 3 "register_operand" "Yk"))
1219 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1222 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1223 [(set_attr "type" "ssemov")
1224 (set_attr "prefix" "evex")
1225 (set_attr "memory" "load")
1226 (set_attr "mode" "<MODE>")])
1228 (define_insn "avx512f_store<mode>_mask"
1229 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1231 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1234 (match_operand:<ssevecmode> 1 "register_operand" "v")
1235 (parallel [(const_int 0)]))
1238 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1239 [(set_attr "type" "ssemov")
1240 (set_attr "prefix" "evex")
1241 (set_attr "memory" "store")
1242 (set_attr "mode" "<MODE>")])
1244 (define_insn "<avx512>_blendm<mode>"
1245 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1246 (vec_merge:V48_AVX512VL
1247 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1248 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1249 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1252 if (REG_P (operands[1])
1253 && REGNO (operands[1]) != REGNO (operands[0]))
1254 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1256 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1258 if (misaligned_operand (operands[2], <MODE>mode))
1259 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1261 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1265 if (misaligned_operand (operands[2], <MODE>mode))
1266 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1268 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1271 [(set_attr "type" "ssemov")
1272 (set_attr "prefix" "evex")
1273 (set_attr "mode" "<sseinsnmode>")])
1275 (define_insn "<avx512>_blendm<mode>"
1276 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1277 (vec_merge:VI12_AVX512VL
1278 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1279 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1280 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1283 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1284 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1285 [(set_attr "type" "ssemov")
1286 (set_attr "prefix" "evex")
1287 (set_attr "mode" "<sseinsnmode>")])
1289 (define_insn "<avx512>_store<mode>_mask"
1290 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1291 (vec_merge:V48_AVX512VL
1292 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1294 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1297 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1299 if (misaligned_operand (operands[0], <MODE>mode))
1300 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1302 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1306 if (misaligned_operand (operands[0], <MODE>mode))
1307 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1309 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1312 [(set_attr "type" "ssemov")
1313 (set_attr "prefix" "evex")
1314 (set_attr "memory" "store")
1315 (set_attr "mode" "<sseinsnmode>")])
1317 (define_insn "<avx512>_store<mode>_mask"
1318 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1319 (vec_merge:VI12_AVX512VL
1320 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1322 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1324 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1325 [(set_attr "type" "ssemov")
1326 (set_attr "prefix" "evex")
1327 (set_attr "memory" "store")
1328 (set_attr "mode" "<sseinsnmode>")])
1330 (define_insn "sse2_movq128"
1331 [(set (match_operand:V2DI 0 "register_operand" "=v")
1334 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1335 (parallel [(const_int 0)]))
1338 "%vmovq\t{%1, %0|%0, %q1}"
1339 [(set_attr "type" "ssemov")
1340 (set_attr "prefix" "maybe_vex")
1341 (set_attr "mode" "TI")])
1343 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1344 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1345 ;; from memory, we'd prefer to load the memory directly into the %xmm
1346 ;; register. To facilitate this happy circumstance, this pattern won't
1347 ;; split until after register allocation. If the 64-bit value didn't
1348 ;; come from memory, this is the best we can do. This is much better
1349 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1352 (define_insn_and_split "movdi_to_sse"
1353 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1354 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1355 UNSPEC_MOVDI_TO_SSE))
1356 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1357 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1359 "&& reload_completed"
1362 if (register_operand (operands[1], DImode))
1364 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1365 Assemble the 64-bit DImode value in an xmm register. */
1366 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1367 gen_lowpart (SImode, operands[1])));
1369 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1370 gen_highpart (SImode, operands[1]),
1374 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1375 gen_highpart (SImode, operands[1])));
1376 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1380 else if (memory_operand (operands[1], DImode))
1381 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1382 operands[1], const0_rtx));
1387 [(set_attr "isa" "sse4,*,*")])
1390 [(set (match_operand:V4SF 0 "register_operand")
1391 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1392 "TARGET_SSE && reload_completed"
1395 (vec_duplicate:V4SF (match_dup 1))
1399 operands[1] = gen_lowpart (SFmode, operands[1]);
1400 operands[2] = CONST0_RTX (V4SFmode);
1404 [(set (match_operand:V2DF 0 "register_operand")
1405 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1406 "TARGET_SSE2 && reload_completed"
1407 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1409 operands[1] = gen_lowpart (DFmode, operands[1]);
1410 operands[2] = CONST0_RTX (DFmode);
1413 (define_expand "movmisalign<mode>"
1414 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1415 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1418 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1422 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1424 [(set (match_operand:V2DF 0 "sse_reg_operand")
1425 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1426 (match_operand:DF 4 "const0_operand")))
1427 (set (match_operand:V2DF 2 "sse_reg_operand")
1428 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1429 (parallel [(const_int 0)]))
1430 (match_operand:DF 3 "memory_operand")))]
1431 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1432 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1433 [(set (match_dup 2) (match_dup 5))]
1434 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1437 [(set (match_operand:DF 0 "sse_reg_operand")
1438 (match_operand:DF 1 "memory_operand"))
1439 (set (match_operand:V2DF 2 "sse_reg_operand")
1440 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1441 (match_operand:DF 3 "memory_operand")))]
1442 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1443 && REGNO (operands[4]) == REGNO (operands[2])
1444 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1445 [(set (match_dup 2) (match_dup 5))]
1446 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1448 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1450 [(set (match_operand:DF 0 "memory_operand")
1451 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1452 (parallel [(const_int 0)])))
1453 (set (match_operand:DF 2 "memory_operand")
1454 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1455 (parallel [(const_int 1)])))]
1456 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1457 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1458 [(set (match_dup 4) (match_dup 1))]
1459 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1461 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1462 [(set (match_operand:VI1 0 "register_operand" "=x")
1463 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1466 "%vlddqu\t{%1, %0|%0, %1}"
1467 [(set_attr "type" "ssemov")
1468 (set_attr "movu" "1")
1469 (set (attr "prefix_data16")
1471 (match_test "TARGET_AVX")
1473 (const_string "0")))
1474 (set (attr "prefix_rep")
1476 (match_test "TARGET_AVX")
1478 (const_string "1")))
1479 (set_attr "prefix" "maybe_vex")
1480 (set_attr "mode" "<sseinsnmode>")])
1482 (define_insn "sse2_movnti<mode>"
1483 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1484 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1487 "movnti\t{%1, %0|%0, %1}"
1488 [(set_attr "type" "ssemov")
1489 (set_attr "prefix_data16" "0")
1490 (set_attr "mode" "<MODE>")])
1492 (define_insn "<sse>_movnt<mode>"
1493 [(set (match_operand:VF 0 "memory_operand" "=m")
1495 [(match_operand:VF 1 "register_operand" "v")]
1498 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "ssemov")
1500 (set_attr "prefix" "maybe_vex")
1501 (set_attr "mode" "<MODE>")])
1503 (define_insn "<sse2>_movnt<mode>"
1504 [(set (match_operand:VI8 0 "memory_operand" "=m")
1505 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1508 "%vmovntdq\t{%1, %0|%0, %1}"
1509 [(set_attr "type" "ssecvt")
1510 (set (attr "prefix_data16")
1512 (match_test "TARGET_AVX")
1514 (const_string "1")))
1515 (set_attr "prefix" "maybe_vex")
1516 (set_attr "mode" "<sseinsnmode>")])
1518 ; Expand patterns for non-temporal stores. At the moment, only those
1519 ; that directly map to insns are defined; it would be possible to
1520 ; define patterns for other modes that would expand to several insns.
1522 ;; Modes handled by storent patterns.
1523 (define_mode_iterator STORENT_MODE
1524 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1525 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1526 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1527 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1528 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1530 (define_expand "storent<mode>"
1531 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1532 (unspec:STORENT_MODE
1533 [(match_operand:STORENT_MODE 1 "register_operand")]
1537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1543 ;; All integer modes with AVX512BW/DQ.
1544 (define_mode_iterator SWI1248_AVX512BWDQ
1545 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1547 ;; All integer modes with AVX512BW, where HImode operation
1548 ;; can be used instead of QImode.
1549 (define_mode_iterator SWI1248_AVX512BW
1550 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1552 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1553 (define_mode_iterator SWI1248_AVX512BWDQ2
1554 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1555 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1557 (define_expand "kmov<mskmodesuffix>"
1558 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1559 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1561 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1563 (define_insn "k<code><mode>"
1564 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1565 (any_logic:SWI1248_AVX512BW
1566 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1567 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1568 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1571 if (get_attr_mode (insn) == MODE_HI)
1572 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1574 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1576 [(set_attr "type" "msklog")
1577 (set_attr "prefix" "vex")
1579 (cond [(and (match_test "<MODE>mode == QImode")
1580 (not (match_test "TARGET_AVX512DQ")))
1583 (const_string "<MODE>")))])
1586 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1587 (any_logic:SWI1248_AVX512BW
1588 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1589 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1590 (clobber (reg:CC FLAGS_REG))]
1591 "TARGET_AVX512F && reload_completed"
1594 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1595 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1597 (define_insn "kandn<mode>"
1598 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1599 (and:SWI1248_AVX512BW
1600 (not:SWI1248_AVX512BW
1601 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1602 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1603 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1606 if (get_attr_mode (insn) == MODE_HI)
1607 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1609 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1611 [(set_attr "type" "msklog")
1612 (set_attr "prefix" "vex")
1614 (cond [(and (match_test "<MODE>mode == QImode")
1615 (not (match_test "TARGET_AVX512DQ")))
1618 (const_string "<MODE>")))])
1621 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1622 (and:SWI1248_AVX512BW
1623 (not:SWI1248_AVX512BW
1624 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1625 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1626 (clobber (reg:CC FLAGS_REG))]
1627 "TARGET_AVX512F && reload_completed"
1630 (and:SWI1248_AVX512BW
1631 (not:SWI1248_AVX512BW (match_dup 1))
1633 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1635 (define_insn "kxnor<mode>"
1636 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1637 (not:SWI1248_AVX512BW
1638 (xor:SWI1248_AVX512BW
1639 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1640 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1641 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1644 if (get_attr_mode (insn) == MODE_HI)
1645 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1647 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1649 [(set_attr "type" "msklog")
1650 (set_attr "prefix" "vex")
1652 (cond [(and (match_test "<MODE>mode == QImode")
1653 (not (match_test "TARGET_AVX512DQ")))
1656 (const_string "<MODE>")))])
1658 (define_insn "knot<mode>"
1659 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1660 (not:SWI1248_AVX512BW
1661 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1662 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1665 if (get_attr_mode (insn) == MODE_HI)
1666 return "knotw\t{%1, %0|%0, %1}";
1668 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1670 [(set_attr "type" "msklog")
1671 (set_attr "prefix" "vex")
1673 (cond [(and (match_test "<MODE>mode == QImode")
1674 (not (match_test "TARGET_AVX512DQ")))
1677 (const_string "<MODE>")))])
1680 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1681 (not:SWI1248_AVX512BW
1682 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
1683 "TARGET_AVX512F && reload_completed"
1686 (not:SWI1248_AVX512BW (match_dup 1)))
1687 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1689 (define_insn "*knotsi_1_zext"
1690 [(set (match_operand:DI 0 "register_operand" "=k")
1692 (not:SI (match_operand:SI 1 "register_operand" "k"))))
1693 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1695 "knotd\t{%1, %0|%0, %1}";
1696 [(set_attr "type" "msklog")
1697 (set_attr "prefix" "vex")
1698 (set_attr "mode" "SI")])
1701 [(set (match_operand:DI 0 "mask_reg_operand")
1703 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
1704 "TARGET_AVX512BW && reload_completed"
1708 (not:SI (match_dup 1))))
1709 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1711 (define_insn "kadd<mode>"
1712 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1713 (plus:SWI1248_AVX512BWDQ2
1714 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1715 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1716 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1718 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1719 [(set_attr "type" "msklog")
1720 (set_attr "prefix" "vex")
1721 (set_attr "mode" "<MODE>")])
1723 ;; Mask variant shift mnemonics
1724 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1726 (define_insn "k<code><mode>"
1727 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1728 (any_lshift:SWI1248_AVX512BWDQ
1729 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1730 (match_operand 2 "const_0_to_255_operand" "n")))
1731 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1733 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1734 [(set_attr "type" "msklog")
1735 (set_attr "prefix" "vex")
1736 (set_attr "mode" "<MODE>")])
1738 (define_insn "ktest<mode>"
1739 [(set (reg:CC FLAGS_REG)
1741 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1742 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1745 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1746 [(set_attr "mode" "<MODE>")
1747 (set_attr "type" "msklog")
1748 (set_attr "prefix" "vex")])
1750 (define_insn "kortest<mode>"
1751 [(set (reg:CC FLAGS_REG)
1753 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1754 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1757 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1758 [(set_attr "mode" "<MODE>")
1759 (set_attr "type" "msklog")
1760 (set_attr "prefix" "vex")])
1762 (define_insn "kunpckhi"
1763 [(set (match_operand:HI 0 "register_operand" "=k")
1766 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1768 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1770 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1771 [(set_attr "mode" "HI")
1772 (set_attr "type" "msklog")
1773 (set_attr "prefix" "vex")])
1775 (define_insn "kunpcksi"
1776 [(set (match_operand:SI 0 "register_operand" "=k")
1779 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1781 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1783 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1784 [(set_attr "mode" "SI")])
1786 (define_insn "kunpckdi"
1787 [(set (match_operand:DI 0 "register_operand" "=k")
1790 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1792 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1794 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1795 [(set_attr "mode" "DI")])
1798 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1800 ;; Parallel floating point arithmetic
1802 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1804 (define_expand "<code><mode>2"
1805 [(set (match_operand:VF 0 "register_operand")
1807 (match_operand:VF 1 "register_operand")))]
1809 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1811 (define_insn_and_split "*<code><mode>2"
1812 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1814 (match_operand:VF 1 "vector_operand" "0,xBm,v,m")))
1815 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1818 "&& reload_completed"
1820 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1824 if (MEM_P (operands[1]))
1825 std::swap (operands[1], operands[2]);
1829 if (operands_match_p (operands[0], operands[2]))
1830 std::swap (operands[1], operands[2]);
1833 [(set_attr "isa" "noavx,noavx,avx,avx")])
1835 (define_insn_and_split "*nabs<mode>2"
1836 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1839 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1840 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1843 "&& reload_completed"
1845 (ior:VF (match_dup 1) (match_dup 2)))]
1849 if (MEM_P (operands[1]))
1850 std::swap (operands[1], operands[2]);
1854 if (operands_match_p (operands[0], operands[2]))
1855 std::swap (operands[1], operands[2]);
1858 [(set_attr "isa" "noavx,noavx,avx,avx")])
1860 (define_expand "<insn><mode>3<mask_name><round_name>"
1861 [(set (match_operand:VF 0 "register_operand")
1863 (match_operand:VF 1 "<round_nimm_predicate>")
1864 (match_operand:VF 2 "<round_nimm_predicate>")))]
1865 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1866 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1868 (define_insn "*<insn><mode>3<mask_name><round_name>"
1869 [(set (match_operand:VF 0 "register_operand" "=x,v")
1871 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
1872 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1873 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1874 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1876 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1877 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1878 [(set_attr "isa" "noavx,avx")
1879 (set_attr "type" "sseadd")
1880 (set_attr "prefix" "<bcst_mask_prefix3>")
1881 (set_attr "mode" "<MODE>")])
1883 ;; Standard scalar operation patterns which preserve the rest of the
1884 ;; vector for combiner.
1885 (define_insn "*<sse>_vm<insn><mode>3"
1886 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1888 (vec_duplicate:VF_128
1889 (plusminus:<ssescalarmode>
1890 (vec_select:<ssescalarmode>
1891 (match_operand:VF_128 1 "register_operand" "0,v")
1892 (parallel [(const_int 0)]))
1893 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1898 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1899 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1900 [(set_attr "isa" "noavx,avx")
1901 (set_attr "type" "sseadd")
1902 (set_attr "prefix" "orig,vex")
1903 (set_attr "mode" "<ssescalarmode>")])
1905 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
1906 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1909 (match_operand:VF_128 1 "register_operand" "0,v")
1910 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1915 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1916 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1917 [(set_attr "isa" "noavx,avx")
1918 (set_attr "type" "sseadd")
1919 (set_attr "prefix" "<round_scalar_prefix>")
1920 (set_attr "mode" "<ssescalarmode>")])
1922 (define_expand "mul<mode>3<mask_name><round_name>"
1923 [(set (match_operand:VF 0 "register_operand")
1925 (match_operand:VF 1 "<round_nimm_predicate>")
1926 (match_operand:VF 2 "<round_nimm_predicate>")))]
1927 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1928 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1930 (define_insn "*mul<mode>3<mask_name><round_name>"
1931 [(set (match_operand:VF 0 "register_operand" "=x,v")
1933 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
1934 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1935 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
1936 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1938 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1939 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1940 [(set_attr "isa" "noavx,avx")
1941 (set_attr "type" "ssemul")
1942 (set_attr "prefix" "<bcst_mask_prefix3>")
1943 (set_attr "btver2_decode" "direct,double")
1944 (set_attr "mode" "<MODE>")])
1946 ;; Standard scalar operation patterns which preserve the rest of the
1947 ;; vector for combiner.
1948 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
1949 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1951 (vec_duplicate:VF_128
1952 (multdiv:<ssescalarmode>
1953 (vec_select:<ssescalarmode>
1954 (match_operand:VF_128 1 "register_operand" "0,v")
1955 (parallel [(const_int 0)]))
1956 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1961 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1962 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1963 [(set_attr "isa" "noavx,avx")
1964 (set_attr "type" "sse<multdiv_mnemonic>")
1965 (set_attr "prefix" "orig,vex")
1966 (set_attr "btver2_decode" "direct,double")
1967 (set_attr "mode" "<ssescalarmode>")])
1969 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1970 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1973 (match_operand:VF_128 1 "register_operand" "0,v")
1974 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1979 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1980 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1981 [(set_attr "isa" "noavx,avx")
1982 (set_attr "type" "sse<multdiv_mnemonic>")
1983 (set_attr "prefix" "<round_scalar_prefix>")
1984 (set_attr "btver2_decode" "direct,double")
1985 (set_attr "mode" "<ssescalarmode>")])
1987 (define_expand "div<mode>3"
1988 [(set (match_operand:VF2 0 "register_operand")
1989 (div:VF2 (match_operand:VF2 1 "register_operand")
1990 (match_operand:VF2 2 "vector_operand")))]
1992 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1994 (define_expand "div<mode>3"
1995 [(set (match_operand:VF1 0 "register_operand")
1996 (div:VF1 (match_operand:VF1 1 "register_operand")
1997 (match_operand:VF1 2 "vector_operand")))]
2000 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
2003 && TARGET_RECIP_VEC_DIV
2004 && !optimize_insn_for_size_p ()
2005 && flag_finite_math_only && !flag_trapping_math
2006 && flag_unsafe_math_optimizations)
2008 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2013 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2014 [(set (match_operand:VF 0 "register_operand" "=x,v")
2016 (match_operand:VF 1 "register_operand" "0,v")
2017 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2018 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2020 div<ssemodesuffix>\t{%2, %0|%0, %2}
2021 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2022 [(set_attr "isa" "noavx,avx")
2023 (set_attr "type" "ssediv")
2024 (set_attr "prefix" "<bcst_mask_prefix3>")
2025 (set_attr "mode" "<MODE>")])
2027 (define_insn "<sse>_rcp<mode>2"
2028 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2030 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2032 "%vrcpps\t{%1, %0|%0, %1}"
2033 [(set_attr "type" "sse")
2034 (set_attr "atom_sse_attr" "rcp")
2035 (set_attr "btver2_sse_attr" "rcp")
2036 (set_attr "prefix" "maybe_vex")
2037 (set_attr "mode" "<MODE>")])
2039 (define_insn "sse_vmrcpv4sf2"
2040 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2042 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2044 (match_operand:V4SF 2 "register_operand" "0,x")
2048 rcpss\t{%1, %0|%0, %k1}
2049 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2050 [(set_attr "isa" "noavx,avx")
2051 (set_attr "type" "sse")
2052 (set_attr "atom_sse_attr" "rcp")
2053 (set_attr "btver2_sse_attr" "rcp")
2054 (set_attr "prefix" "orig,vex")
2055 (set_attr "mode" "SF")])
2057 (define_insn "*sse_vmrcpv4sf2"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2061 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2063 (match_operand:V4SF 2 "register_operand" "0,x")
2067 rcpss\t{%1, %0|%0, %1}
2068 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2069 [(set_attr "isa" "noavx,avx")
2070 (set_attr "type" "sse")
2071 (set_attr "atom_sse_attr" "rcp")
2072 (set_attr "btver2_sse_attr" "rcp")
2073 (set_attr "prefix" "orig,vex")
2074 (set_attr "mode" "SF")])
2076 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2077 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2079 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2082 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2083 [(set_attr "type" "sse")
2084 (set_attr "prefix" "evex")
2085 (set_attr "mode" "<MODE>")])
2087 (define_insn "srcp14<mode>"
2088 [(set (match_operand:VF_128 0 "register_operand" "=v")
2091 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2093 (match_operand:VF_128 2 "register_operand" "v")
2096 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2097 [(set_attr "type" "sse")
2098 (set_attr "prefix" "evex")
2099 (set_attr "mode" "<MODE>")])
2101 (define_insn "srcp14<mode>_mask"
2102 [(set (match_operand:VF_128 0 "register_operand" "=v")
2106 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2108 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2109 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2110 (match_operand:VF_128 2 "register_operand" "v")
2113 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2114 [(set_attr "type" "sse")
2115 (set_attr "prefix" "evex")
2116 (set_attr "mode" "<MODE>")])
2118 (define_expand "sqrt<mode>2"
2119 [(set (match_operand:VF2 0 "register_operand")
2120 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2123 (define_expand "sqrt<mode>2"
2124 [(set (match_operand:VF1 0 "register_operand")
2125 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2129 && TARGET_RECIP_VEC_SQRT
2130 && !optimize_insn_for_size_p ()
2131 && flag_finite_math_only && !flag_trapping_math
2132 && flag_unsafe_math_optimizations)
2134 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2139 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2140 [(set (match_operand:VF 0 "register_operand" "=x,v")
2141 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2142 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2144 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2145 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2146 [(set_attr "isa" "noavx,avx")
2147 (set_attr "type" "sse")
2148 (set_attr "atom_sse_attr" "sqrt")
2149 (set_attr "btver2_sse_attr" "sqrt")
2150 (set_attr "prefix" "maybe_vex")
2151 (set_attr "mode" "<MODE>")])
2153 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2154 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2157 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2158 (match_operand:VF_128 2 "register_operand" "0,v")
2162 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2163 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2164 [(set_attr "isa" "noavx,avx")
2165 (set_attr "type" "sse")
2166 (set_attr "atom_sse_attr" "sqrt")
2167 (set_attr "prefix" "<round_scalar_prefix>")
2168 (set_attr "btver2_sse_attr" "sqrt")
2169 (set_attr "mode" "<ssescalarmode>")])
2171 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2172 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2174 (vec_duplicate:VF_128
2175 (sqrt:<ssescalarmode>
2176 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2177 (match_operand:VF_128 2 "register_operand" "0,v")
2181 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2182 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2183 [(set_attr "isa" "noavx,avx")
2184 (set_attr "type" "sse")
2185 (set_attr "atom_sse_attr" "sqrt")
2186 (set_attr "prefix" "<round_scalar_prefix>")
2187 (set_attr "btver2_sse_attr" "sqrt")
2188 (set_attr "mode" "<ssescalarmode>")])
2190 (define_expand "rsqrt<mode>2"
2191 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2192 (unspec:VF1_AVX512ER_128_256
2193 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2195 "TARGET_SSE && TARGET_SSE_MATH"
2197 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2201 (define_insn "<sse>_rsqrt<mode>2"
2202 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2204 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2206 "%vrsqrtps\t{%1, %0|%0, %1}"
2207 [(set_attr "type" "sse")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "<MODE>")])
2211 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2212 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2214 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2217 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2218 [(set_attr "type" "sse")
2219 (set_attr "prefix" "evex")
2220 (set_attr "mode" "<MODE>")])
2222 (define_insn "rsqrt14<mode>"
2223 [(set (match_operand:VF_128 0 "register_operand" "=v")
2226 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2228 (match_operand:VF_128 2 "register_operand" "v")
2231 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2232 [(set_attr "type" "sse")
2233 (set_attr "prefix" "evex")
2234 (set_attr "mode" "<MODE>")])
2236 (define_insn "rsqrt14_<mode>_mask"
2237 [(set (match_operand:VF_128 0 "register_operand" "=v")
2241 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2243 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2244 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2245 (match_operand:VF_128 2 "register_operand" "v")
2248 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2249 [(set_attr "type" "sse")
2250 (set_attr "prefix" "evex")
2251 (set_attr "mode" "<MODE>")])
2253 (define_insn "sse_vmrsqrtv4sf2"
2254 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2256 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2258 (match_operand:V4SF 2 "register_operand" "0,x")
2262 rsqrtss\t{%1, %0|%0, %k1}
2263 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2264 [(set_attr "isa" "noavx,avx")
2265 (set_attr "type" "sse")
2266 (set_attr "prefix" "orig,vex")
2267 (set_attr "mode" "SF")])
2269 (define_insn "*sse_vmrsqrtv4sf2"
2270 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2273 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2275 (match_operand:V4SF 2 "register_operand" "0,x")
2279 rsqrtss\t{%1, %0|%0, %1}
2280 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2281 [(set_attr "isa" "noavx,avx")
2282 (set_attr "type" "sse")
2283 (set_attr "prefix" "orig,vex")
2284 (set_attr "mode" "SF")])
2286 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2287 [(set (match_operand:VF 0 "register_operand")
2289 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2290 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2291 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2293 if (!flag_finite_math_only || flag_signed_zeros)
2295 operands[1] = force_reg (<MODE>mode, operands[1]);
2296 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2297 (operands[0], operands[1], operands[2]
2298 <mask_operand_arg34>
2299 <round_saeonly_mask_arg3>));
2303 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2306 ;; These versions of the min/max patterns are intentionally ignorant of
2307 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2308 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2309 ;; are undefined in this condition, we're certain this is correct.
2311 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2312 [(set (match_operand:VF 0 "register_operand" "=x,v")
2314 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2315 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2317 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2318 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2320 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2321 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2322 [(set_attr "isa" "noavx,avx")
2323 (set_attr "type" "sseadd")
2324 (set_attr "btver2_sse_attr" "maxmin")
2325 (set_attr "prefix" "<mask_prefix3>")
2326 (set_attr "mode" "<MODE>")])
2328 ;; These versions of the min/max patterns implement exactly the operations
2329 ;; min = (op1 < op2 ? op1 : op2)
2330 ;; max = (!(op1 < op2) ? op1 : op2)
2331 ;; Their operands are not commutative, and thus they may be used in the
2332 ;; presence of -0.0 and NaN.
2334 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2335 [(set (match_operand:VF 0 "register_operand" "=x,v")
2337 [(match_operand:VF 1 "register_operand" "0,v")
2338 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2341 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2343 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2344 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2345 [(set_attr "isa" "noavx,avx")
2346 (set_attr "type" "sseadd")
2347 (set_attr "btver2_sse_attr" "maxmin")
2348 (set_attr "prefix" "<mask_prefix3>")
2349 (set_attr "mode" "<MODE>")])
2351 ;; Standard scalar operation patterns which preserve the rest of the
2352 ;; vector for combiner.
2353 (define_insn "*ieee_<ieee_maxmin><mode>3"
2354 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2356 (vec_duplicate:VF_128
2357 (unspec:<ssescalarmode>
2358 [(vec_select:<ssescalarmode>
2359 (match_operand:VF_128 1 "register_operand" "0,v")
2360 (parallel [(const_int 0)]))
2361 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2367 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2368 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2369 [(set_attr "isa" "noavx,avx")
2370 (set_attr "type" "sseadd")
2371 (set_attr "btver2_sse_attr" "maxmin")
2372 (set_attr "prefix" "orig,vex")
2373 (set_attr "mode" "<ssescalarmode>")])
2375 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2376 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2379 (match_operand:VF_128 1 "register_operand" "0,v")
2380 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2385 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2386 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2387 [(set_attr "isa" "noavx,avx")
2388 (set_attr "type" "sse")
2389 (set_attr "btver2_sse_attr" "maxmin")
2390 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2391 (set_attr "mode" "<ssescalarmode>")])
2393 (define_insn "avx_addsubv4df3"
2394 [(set (match_operand:V4DF 0 "register_operand" "=x")
2397 (match_operand:V4DF 1 "register_operand" "x")
2398 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2399 (plus:V4DF (match_dup 1) (match_dup 2))
2402 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2403 [(set_attr "type" "sseadd")
2404 (set_attr "prefix" "vex")
2405 (set_attr "mode" "V4DF")])
2407 (define_insn "sse3_addsubv2df3"
2408 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2411 (match_operand:V2DF 1 "register_operand" "0,x")
2412 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2413 (plus:V2DF (match_dup 1) (match_dup 2))
2417 addsubpd\t{%2, %0|%0, %2}
2418 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2419 [(set_attr "isa" "noavx,avx")
2420 (set_attr "type" "sseadd")
2421 (set_attr "atom_unit" "complex")
2422 (set_attr "prefix" "orig,vex")
2423 (set_attr "mode" "V2DF")])
2425 (define_insn "avx_addsubv8sf3"
2426 [(set (match_operand:V8SF 0 "register_operand" "=x")
2429 (match_operand:V8SF 1 "register_operand" "x")
2430 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2431 (plus:V8SF (match_dup 1) (match_dup 2))
2434 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2435 [(set_attr "type" "sseadd")
2436 (set_attr "prefix" "vex")
2437 (set_attr "mode" "V8SF")])
2439 (define_insn "sse3_addsubv4sf3"
2440 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2443 (match_operand:V4SF 1 "register_operand" "0,x")
2444 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2445 (plus:V4SF (match_dup 1) (match_dup 2))
2449 addsubps\t{%2, %0|%0, %2}
2450 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2451 [(set_attr "isa" "noavx,avx")
2452 (set_attr "type" "sseadd")
2453 (set_attr "prefix" "orig,vex")
2454 (set_attr "prefix_rep" "1,*")
2455 (set_attr "mode" "V4SF")])
2458 [(set (match_operand:VF_128_256 0 "register_operand")
2459 (match_operator:VF_128_256 6 "addsub_vm_operator"
2461 (match_operand:VF_128_256 1 "register_operand")
2462 (match_operand:VF_128_256 2 "vector_operand"))
2464 (match_operand:VF_128_256 3 "vector_operand")
2465 (match_operand:VF_128_256 4 "vector_operand"))
2466 (match_operand 5 "const_int_operand")]))]
2468 && can_create_pseudo_p ()
2469 && ((rtx_equal_p (operands[1], operands[3])
2470 && rtx_equal_p (operands[2], operands[4]))
2471 || (rtx_equal_p (operands[1], operands[4])
2472 && rtx_equal_p (operands[2], operands[3])))"
2474 (vec_merge:VF_128_256
2475 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2476 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2480 [(set (match_operand:VF_128_256 0 "register_operand")
2481 (match_operator:VF_128_256 6 "addsub_vm_operator"
2483 (match_operand:VF_128_256 1 "vector_operand")
2484 (match_operand:VF_128_256 2 "vector_operand"))
2486 (match_operand:VF_128_256 3 "register_operand")
2487 (match_operand:VF_128_256 4 "vector_operand"))
2488 (match_operand 5 "const_int_operand")]))]
2490 && can_create_pseudo_p ()
2491 && ((rtx_equal_p (operands[1], operands[3])
2492 && rtx_equal_p (operands[2], operands[4]))
2493 || (rtx_equal_p (operands[1], operands[4])
2494 && rtx_equal_p (operands[2], operands[3])))"
2496 (vec_merge:VF_128_256
2497 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2498 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2501 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2503 = GEN_INT (~INTVAL (operands[5])
2504 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2508 [(set (match_operand:VF_128_256 0 "register_operand")
2509 (match_operator:VF_128_256 7 "addsub_vs_operator"
2510 [(vec_concat:<ssedoublemode>
2512 (match_operand:VF_128_256 1 "register_operand")
2513 (match_operand:VF_128_256 2 "vector_operand"))
2515 (match_operand:VF_128_256 3 "vector_operand")
2516 (match_operand:VF_128_256 4 "vector_operand")))
2517 (match_parallel 5 "addsub_vs_parallel"
2518 [(match_operand 6 "const_int_operand")])]))]
2520 && can_create_pseudo_p ()
2521 && ((rtx_equal_p (operands[1], operands[3])
2522 && rtx_equal_p (operands[2], operands[4]))
2523 || (rtx_equal_p (operands[1], operands[4])
2524 && rtx_equal_p (operands[2], operands[3])))"
2526 (vec_merge:VF_128_256
2527 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2528 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2531 int i, nelt = XVECLEN (operands[5], 0);
2532 HOST_WIDE_INT ival = 0;
2534 for (i = 0; i < nelt; i++)
2535 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2536 ival |= HOST_WIDE_INT_1 << i;
2538 operands[5] = GEN_INT (ival);
2542 [(set (match_operand:VF_128_256 0 "register_operand")
2543 (match_operator:VF_128_256 7 "addsub_vs_operator"
2544 [(vec_concat:<ssedoublemode>
2546 (match_operand:VF_128_256 1 "vector_operand")
2547 (match_operand:VF_128_256 2 "vector_operand"))
2549 (match_operand:VF_128_256 3 "register_operand")
2550 (match_operand:VF_128_256 4 "vector_operand")))
2551 (match_parallel 5 "addsub_vs_parallel"
2552 [(match_operand 6 "const_int_operand")])]))]
2554 && can_create_pseudo_p ()
2555 && ((rtx_equal_p (operands[1], operands[3])
2556 && rtx_equal_p (operands[2], operands[4]))
2557 || (rtx_equal_p (operands[1], operands[4])
2558 && rtx_equal_p (operands[2], operands[3])))"
2560 (vec_merge:VF_128_256
2561 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2562 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2565 int i, nelt = XVECLEN (operands[5], 0);
2566 HOST_WIDE_INT ival = 0;
2568 for (i = 0; i < nelt; i++)
2569 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2570 ival |= HOST_WIDE_INT_1 << i;
2572 operands[5] = GEN_INT (ival);
2575 (define_insn "avx_h<insn>v4df3"
2576 [(set (match_operand:V4DF 0 "register_operand" "=x")
2581 (match_operand:V4DF 1 "register_operand" "x")
2582 (parallel [(const_int 0)]))
2583 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2586 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2587 (parallel [(const_int 0)]))
2588 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2591 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2592 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2594 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2595 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2597 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2598 [(set_attr "type" "sseadd")
2599 (set_attr "prefix" "vex")
2600 (set_attr "mode" "V4DF")])
2602 (define_expand "sse3_haddv2df3"
2603 [(set (match_operand:V2DF 0 "register_operand")
2607 (match_operand:V2DF 1 "register_operand")
2608 (parallel [(const_int 0)]))
2609 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2612 (match_operand:V2DF 2 "vector_operand")
2613 (parallel [(const_int 0)]))
2614 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2617 (define_insn "*sse3_haddv2df3"
2618 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2622 (match_operand:V2DF 1 "register_operand" "0,x")
2623 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2626 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2629 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2630 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2633 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2635 && INTVAL (operands[3]) != INTVAL (operands[4])
2636 && INTVAL (operands[5]) != INTVAL (operands[6])"
2638 haddpd\t{%2, %0|%0, %2}
2639 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2640 [(set_attr "isa" "noavx,avx")
2641 (set_attr "type" "sseadd")
2642 (set_attr "prefix" "orig,vex")
2643 (set_attr "mode" "V2DF")])
2645 (define_insn "sse3_hsubv2df3"
2646 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2650 (match_operand:V2DF 1 "register_operand" "0,x")
2651 (parallel [(const_int 0)]))
2652 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2655 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2656 (parallel [(const_int 0)]))
2657 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2660 hsubpd\t{%2, %0|%0, %2}
2661 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2662 [(set_attr "isa" "noavx,avx")
2663 (set_attr "type" "sseadd")
2664 (set_attr "prefix" "orig,vex")
2665 (set_attr "mode" "V2DF")])
2667 (define_insn "*sse3_haddv2df3_low"
2668 [(set (match_operand:DF 0 "register_operand" "=x,x")
2671 (match_operand:V2DF 1 "register_operand" "0,x")
2672 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2675 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2677 && INTVAL (operands[2]) != INTVAL (operands[3])"
2679 haddpd\t{%0, %0|%0, %0}
2680 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2681 [(set_attr "isa" "noavx,avx")
2682 (set_attr "type" "sseadd1")
2683 (set_attr "prefix" "orig,vex")
2684 (set_attr "mode" "V2DF")])
2686 (define_insn "*sse3_hsubv2df3_low"
2687 [(set (match_operand:DF 0 "register_operand" "=x,x")
2690 (match_operand:V2DF 1 "register_operand" "0,x")
2691 (parallel [(const_int 0)]))
2694 (parallel [(const_int 1)]))))]
2697 hsubpd\t{%0, %0|%0, %0}
2698 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2699 [(set_attr "isa" "noavx,avx")
2700 (set_attr "type" "sseadd1")
2701 (set_attr "prefix" "orig,vex")
2702 (set_attr "mode" "V2DF")])
2704 (define_insn "avx_h<insn>v8sf3"
2705 [(set (match_operand:V8SF 0 "register_operand" "=x")
2711 (match_operand:V8SF 1 "register_operand" "x")
2712 (parallel [(const_int 0)]))
2713 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2715 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2716 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2720 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2721 (parallel [(const_int 0)]))
2722 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2724 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2725 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2729 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2730 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2732 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2733 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2736 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2737 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2739 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2740 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2742 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2743 [(set_attr "type" "sseadd")
2744 (set_attr "prefix" "vex")
2745 (set_attr "mode" "V8SF")])
2747 (define_insn "sse3_h<insn>v4sf3"
2748 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2753 (match_operand:V4SF 1 "register_operand" "0,x")
2754 (parallel [(const_int 0)]))
2755 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2757 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2758 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2762 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2763 (parallel [(const_int 0)]))
2764 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2766 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2767 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2770 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2771 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2772 [(set_attr "isa" "noavx,avx")
2773 (set_attr "type" "sseadd")
2774 (set_attr "atom_unit" "complex")
2775 (set_attr "prefix" "orig,vex")
2776 (set_attr "prefix_rep" "1,*")
2777 (set_attr "mode" "V4SF")])
2779 (define_mode_iterator REDUC_SSE_PLUS_MODE
2780 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2782 (define_expand "reduc_plus_scal_<mode>"
2783 [(plus:REDUC_SSE_PLUS_MODE
2784 (match_operand:<ssescalarmode> 0 "register_operand")
2785 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2788 rtx tmp = gen_reg_rtx (<MODE>mode);
2789 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2790 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2795 (define_expand "reduc_plus_scal_v16qi"
2797 (match_operand:QI 0 "register_operand")
2798 (match_operand:V16QI 1 "register_operand"))]
2801 rtx tmp = gen_reg_rtx (V1TImode);
2802 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2804 rtx tmp2 = gen_reg_rtx (V16QImode);
2805 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2806 rtx tmp3 = gen_reg_rtx (V16QImode);
2807 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2808 rtx tmp4 = gen_reg_rtx (V2DImode);
2809 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2810 tmp4 = gen_lowpart (V16QImode, tmp4);
2811 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2815 (define_mode_iterator REDUC_PLUS_MODE
2816 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2817 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2818 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2820 (define_expand "reduc_plus_scal_<mode>"
2821 [(plus:REDUC_PLUS_MODE
2822 (match_operand:<ssescalarmode> 0 "register_operand")
2823 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2826 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2827 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2828 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2829 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2830 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2831 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2835 ;; Modes handled by reduc_sm{in,ax}* patterns.
2836 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2837 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2838 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2839 (V2DI "TARGET_SSE4_2")])
2841 (define_expand "reduc_<code>_scal_<mode>"
2842 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2843 (match_operand:<ssescalarmode> 0 "register_operand")
2844 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2847 rtx tmp = gen_reg_rtx (<MODE>mode);
2848 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2849 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2854 (define_mode_iterator REDUC_SMINMAX_MODE
2855 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2856 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2857 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2858 (V64QI "TARGET_AVX512BW")
2859 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2860 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2861 (V8DF "TARGET_AVX512F")])
2863 (define_expand "reduc_<code>_scal_<mode>"
2864 [(smaxmin:REDUC_SMINMAX_MODE
2865 (match_operand:<ssescalarmode> 0 "register_operand")
2866 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2869 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2870 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2871 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2872 emit_insn (gen_<code><ssehalfvecmodelower>3
2873 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2874 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2878 (define_expand "reduc_<code>_scal_<mode>"
2879 [(umaxmin:VI_AVX512BW
2880 (match_operand:<ssescalarmode> 0 "register_operand")
2881 (match_operand:VI_AVX512BW 1 "register_operand"))]
2884 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2885 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2886 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2887 emit_insn (gen_<code><ssehalfvecmodelower>3
2888 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2889 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2893 (define_expand "reduc_<code>_scal_<mode>"
2895 (match_operand:<ssescalarmode> 0 "register_operand")
2896 (match_operand:VI_256 1 "register_operand"))]
2899 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2900 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2901 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2902 emit_insn (gen_<code><ssehalfvecmodelower>3
2903 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2904 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2905 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2906 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2907 (operands[0], tmp3, const0_rtx));
2911 (define_expand "reduc_umin_scal_v8hi"
2913 (match_operand:HI 0 "register_operand")
2914 (match_operand:V8HI 1 "register_operand"))]
2917 rtx tmp = gen_reg_rtx (V8HImode);
2918 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2919 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2923 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
2924 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2926 [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2927 (match_operand:SI 2 "const_0_to_255_operand")]
2930 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
2931 [(set_attr "type" "sse")
2932 (set_attr "prefix" "evex")
2933 (set_attr "mode" "<MODE>")])
2935 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
2936 [(set (match_operand:VF_128 0 "register_operand" "=v")
2939 [(match_operand:VF_128 1 "register_operand" "v")
2940 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
2941 (match_operand:SI 3 "const_0_to_255_operand")]
2946 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
2947 [(set_attr "type" "sse")
2948 (set_attr "prefix" "evex")
2949 (set_attr "mode" "<MODE>")])
2951 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2953 ;; Parallel floating point comparisons
2955 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2957 (define_insn "avx_cmp<mode>3"
2958 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2960 [(match_operand:VF_128_256 1 "register_operand" "x")
2961 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2962 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2965 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2966 [(set_attr "type" "ssecmp")
2967 (set_attr "length_immediate" "1")
2968 (set_attr "prefix" "vex")
2969 (set_attr "mode" "<MODE>")])
2971 (define_insn_and_split "*avx_cmp<mode>3_1"
2972 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2973 (vec_merge:<sseintvecmode>
2974 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
2975 (match_operand:<sseintvecmode> 2 "const0_operand")
2976 (unspec:<avx512fmaskmode>
2977 [(match_operand:VF_128_256 3 "register_operand")
2978 (match_operand:VF_128_256 4 "nonimmediate_operand")
2979 (match_operand:SI 5 "const_0_to_31_operand")]
2981 "TARGET_AVX512VL && ix86_pre_reload_split ()"
2990 (set (match_dup 0) (match_dup 7))]
2992 operands[6] = gen_reg_rtx (<MODE>mode);
2994 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
2997 (define_insn_and_split "*avx_cmp<mode>3_2"
2998 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2999 (vec_merge:<sseintvecmode>
3000 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3001 (match_operand:<sseintvecmode> 2 "const0_operand")
3002 (not:<avx512fmaskmode>
3003 (unspec:<avx512fmaskmode>
3004 [(match_operand:VF_128_256 3 "register_operand")
3005 (match_operand:VF_128_256 4 "nonimmediate_operand")
3006 (match_operand:SI 5 "const_0_to_31_operand")]
3008 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3017 (set (match_dup 0) (match_dup 7))]
3019 operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);
3020 operands[6] = gen_reg_rtx (<MODE>mode);
3022 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3025 (define_insn_and_split "*avx_cmp<mode>3_3"
3026 [(set (match_operand:VF_128_256 0 "register_operand")
3027 (vec_merge:VF_128_256
3028 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3029 (match_operand:VF_128_256 2 "const0_operand")
3030 (unspec:<avx512fmaskmode>
3031 [(match_operand:VF_128_256 3 "register_operand")
3032 (match_operand:VF_128_256 4 "nonimmediate_operand")
3033 (match_operand:SI 5 "const_0_to_31_operand")]
3035 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3045 (define_insn_and_split "*avx_cmp<mode>3_4"
3046 [(set (match_operand:VF_128_256 0 "register_operand")
3047 (vec_merge:VF_128_256
3048 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3049 (match_operand:VF_128_256 2 "const0_operand")
3050 (not:<avx512fmaskmode>
3051 (unspec:<avx512fmaskmode>
3052 [(match_operand:VF_128_256 3 "register_operand")
3053 (match_operand:VF_128_256 4 "nonimmediate_operand")
3054 (match_operand:SI 5 "const_0_to_31_operand")]
3056 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3065 "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
3067 (define_insn "avx_vmcmp<mode>3"
3068 [(set (match_operand:VF_128 0 "register_operand" "=x")
3071 [(match_operand:VF_128 1 "register_operand" "x")
3072 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
3073 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3078 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
3079 [(set_attr "type" "ssecmp")
3080 (set_attr "length_immediate" "1")
3081 (set_attr "prefix" "vex")
3082 (set_attr "mode" "<ssescalarmode>")])
3084 (define_insn "*<sse>_maskcmp<mode>3_comm"
3085 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3086 (match_operator:VF_128_256 3 "sse_comparison_operator"
3087 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
3088 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3090 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
3092 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3093 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3094 [(set_attr "isa" "noavx,avx")
3095 (set_attr "type" "ssecmp")
3096 (set_attr "length_immediate" "1")
3097 (set_attr "prefix" "orig,vex")
3098 (set_attr "mode" "<MODE>")])
3100 (define_insn "<sse>_maskcmp<mode>3"
3101 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3102 (match_operator:VF_128_256 3 "sse_comparison_operator"
3103 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3104 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3107 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3108 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3109 [(set_attr "isa" "noavx,avx")
3110 (set_attr "type" "ssecmp")
3111 (set_attr "length_immediate" "1")
3112 (set_attr "prefix" "orig,vex")
3113 (set_attr "mode" "<MODE>")])
3115 (define_insn "<sse>_vmmaskcmp<mode>3"
3116 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3118 (match_operator:VF_128 3 "sse_comparison_operator"
3119 [(match_operand:VF_128 1 "register_operand" "0,x")
3120 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3125 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3126 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3127 [(set_attr "isa" "noavx,avx")
3128 (set_attr "type" "ssecmp")
3129 (set_attr "length_immediate" "1,*")
3130 (set_attr "prefix" "orig,vex")
3131 (set_attr "mode" "<ssescalarmode>")])
3133 (define_mode_attr cmp_imm_predicate
3134 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3135 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3136 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3137 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3138 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3139 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3140 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3141 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3142 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3144 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3145 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3146 (unspec:<avx512fmaskmode>
3147 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
3148 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3149 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3151 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3152 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3153 [(set_attr "type" "ssecmp")
3154 (set_attr "length_immediate" "1")
3155 (set_attr "prefix" "evex")
3156 (set_attr "mode" "<sseinsnmode>")])
3158 (define_insn_and_split "*<avx512>_cmp<mode>3"
3159 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3160 (not:<avx512fmaskmode>
3161 (unspec:<avx512fmaskmode>
3162 [(match_operand:V48_AVX512VL 1 "register_operand")
3163 (match_operand:V48_AVX512VL 2 "nonimmediate_operand")
3164 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3166 "TARGET_AVX512F && ix86_pre_reload_split ()"
3170 (unspec:<avx512fmaskmode>
3175 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3177 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3178 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3179 (unspec:<avx512fmaskmode>
3180 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3181 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3182 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3185 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3186 [(set_attr "type" "ssecmp")
3187 (set_attr "length_immediate" "1")
3188 (set_attr "prefix" "evex")
3189 (set_attr "mode" "<sseinsnmode>")])
3191 (define_int_iterator UNSPEC_PCMP_ITER
3192 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3194 (define_insn_and_split "*<avx512>_cmp<mode>3"
3195 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3196 (not:<avx512fmaskmode>
3197 (unspec:<avx512fmaskmode>
3198 [(match_operand:VI12_AVX512VL 1 "register_operand")
3199 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3200 (match_operand:SI 3 "<cmp_imm_predicate>")]
3201 UNSPEC_PCMP_ITER)))]
3202 "TARGET_AVX512BW && ix86_pre_reload_split ()"
3206 (unspec:<avx512fmaskmode>
3211 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3213 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3214 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3215 (unspec:<avx512fmaskmode>
3216 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3217 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3218 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3219 UNSPEC_UNSIGNED_PCMP))]
3221 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3222 [(set_attr "type" "ssecmp")
3223 (set_attr "length_immediate" "1")
3224 (set_attr "prefix" "evex")
3225 (set_attr "mode" "<sseinsnmode>")])
3227 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3228 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3229 (unspec:<avx512fmaskmode>
3230 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3231 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3232 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3233 UNSPEC_UNSIGNED_PCMP))]
3235 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3236 [(set_attr "type" "ssecmp")
3237 (set_attr "length_immediate" "1")
3238 (set_attr "prefix" "evex")
3239 (set_attr "mode" "<sseinsnmode>")])
3241 (define_insn_and_split "*<avx512>_ucmp<mode>3"
3242 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3243 (not:<avx512fmaskmode>
3244 (unspec:<avx512fmaskmode>
3245 [(match_operand:VI48_AVX512VL 1 "register_operand")
3246 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
3247 (match_operand:SI 3 "const_0_to_7_operand")]
3248 UNSPEC_UNSIGNED_PCMP)))]
3249 "TARGET_AVX512F && ix86_pre_reload_split ()"
3253 (unspec:<avx512fmaskmode>
3257 UNSPEC_UNSIGNED_PCMP))]
3258 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3260 (define_int_attr pcmp_signed_mask
3261 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
3263 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
3264 ;; For signed comparison, handle EQ 0: NEQ 4,
3265 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
3268 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3269 (unspec:<avx512fmaskmode>
3270 [(us_minus:VI12_AVX512VL
3271 (match_operand:VI12_AVX512VL 1 "vector_operand")
3272 (match_operand:VI12_AVX512VL 2 "vector_operand"))
3273 (match_operand:VI12_AVX512VL 3 "const0_operand")
3274 (match_operand:SI 4 "const_0_to_7_operand")]
3277 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
3278 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
3281 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
3282 int cmp_predicate = 2; /* LE */
3283 if (MEM_P (operands[1]))
3285 std::swap (operands[1], operands[2]);
3286 cmp_predicate = 5; /* NLT (GE) */
3288 if ((INTVAL (operands[4]) & 4) != 0)
3289 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
3290 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
3291 GEN_INT (cmp_predicate)));
3295 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3296 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3297 (and:<avx512fmaskmode>
3298 (unspec:<avx512fmaskmode>
3299 [(match_operand:VF_128 1 "register_operand" "v")
3300 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3301 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3305 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3306 [(set_attr "type" "ssecmp")
3307 (set_attr "length_immediate" "1")
3308 (set_attr "prefix" "evex")
3309 (set_attr "mode" "<ssescalarmode>")])
3311 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3312 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3313 (and:<avx512fmaskmode>
3314 (unspec:<avx512fmaskmode>
3315 [(match_operand:VF_128 1 "register_operand" "v")
3316 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3317 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3319 (and:<avx512fmaskmode>
3320 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3323 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3324 [(set_attr "type" "ssecmp")
3325 (set_attr "length_immediate" "1")
3326 (set_attr "prefix" "evex")
3327 (set_attr "mode" "<ssescalarmode>")])
3329 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3330 [(set (reg:CCFP FLAGS_REG)
3333 (match_operand:<ssevecmode> 0 "register_operand" "v")
3334 (parallel [(const_int 0)]))
3336 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3337 (parallel [(const_int 0)]))))]
3338 "SSE_FLOAT_MODE_P (<MODE>mode)"
3339 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3340 [(set_attr "type" "ssecomi")
3341 (set_attr "prefix" "maybe_vex")
3342 (set_attr "prefix_rep" "0")
3343 (set (attr "prefix_data16")
3344 (if_then_else (eq_attr "mode" "DF")
3346 (const_string "0")))
3347 (set_attr "mode" "<MODE>")])
3349 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3350 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3351 (match_operator:<avx512fmaskmode> 1 ""
3352 [(match_operand:V48_AVX512VL 2 "register_operand")
3353 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3356 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3357 operands[2], operands[3]);
3362 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3363 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3364 (match_operator:<avx512fmaskmode> 1 ""
3365 [(match_operand:VI12_AVX512VL 2 "register_operand")
3366 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3369 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3370 operands[2], operands[3]);
3375 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3376 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3377 (match_operator:<sseintvecmode> 1 ""
3378 [(match_operand:VI_256 2 "register_operand")
3379 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3382 bool ok = ix86_expand_int_vec_cmp (operands);
3387 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3388 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3389 (match_operator:<sseintvecmode> 1 ""
3390 [(match_operand:VI124_128 2 "register_operand")
3391 (match_operand:VI124_128 3 "vector_operand")]))]
3394 bool ok = ix86_expand_int_vec_cmp (operands);
3399 (define_expand "vec_cmpv2div2di"
3400 [(set (match_operand:V2DI 0 "register_operand")
3401 (match_operator:V2DI 1 ""
3402 [(match_operand:V2DI 2 "register_operand")
3403 (match_operand:V2DI 3 "vector_operand")]))]
3406 bool ok = ix86_expand_int_vec_cmp (operands);
3411 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3412 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3413 (match_operator:<sseintvecmode> 1 ""
3414 [(match_operand:VF_256 2 "register_operand")
3415 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3418 bool ok = ix86_expand_fp_vec_cmp (operands);
3423 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3424 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3425 (match_operator:<sseintvecmode> 1 ""
3426 [(match_operand:VF_128 2 "register_operand")
3427 (match_operand:VF_128 3 "vector_operand")]))]
3430 bool ok = ix86_expand_fp_vec_cmp (operands);
3435 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3436 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3437 (match_operator:<avx512fmaskmode> 1 ""
3438 [(match_operand:VI48_AVX512VL 2 "register_operand")
3439 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3442 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3443 operands[2], operands[3]);
3448 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3449 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3450 (match_operator:<avx512fmaskmode> 1 ""
3451 [(match_operand:VI12_AVX512VL 2 "register_operand")
3452 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3455 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3456 operands[2], operands[3]);
3461 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3462 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3463 (match_operator:<sseintvecmode> 1 ""
3464 [(match_operand:VI_256 2 "register_operand")
3465 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3468 bool ok = ix86_expand_int_vec_cmp (operands);
3473 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3474 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3475 (match_operator:<sseintvecmode> 1 ""
3476 [(match_operand:VI124_128 2 "register_operand")
3477 (match_operand:VI124_128 3 "vector_operand")]))]
3480 bool ok = ix86_expand_int_vec_cmp (operands);
3485 (define_expand "vec_cmpuv2div2di"
3486 [(set (match_operand:V2DI 0 "register_operand")
3487 (match_operator:V2DI 1 ""
3488 [(match_operand:V2DI 2 "register_operand")
3489 (match_operand:V2DI 3 "vector_operand")]))]
3492 bool ok = ix86_expand_int_vec_cmp (operands);
3497 (define_expand "vec_cmpeqv2div2di"
3498 [(set (match_operand:V2DI 0 "register_operand")
3499 (match_operator:V2DI 1 ""
3500 [(match_operand:V2DI 2 "register_operand")
3501 (match_operand:V2DI 3 "vector_operand")]))]
3504 bool ok = ix86_expand_int_vec_cmp (operands);
3509 (define_expand "vcond<V_512:mode><VF_512:mode>"
3510 [(set (match_operand:V_512 0 "register_operand")
3512 (match_operator 3 ""
3513 [(match_operand:VF_512 4 "nonimmediate_operand")
3514 (match_operand:VF_512 5 "nonimmediate_operand")])
3515 (match_operand:V_512 1 "general_operand")
3516 (match_operand:V_512 2 "general_operand")))]
3518 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3519 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3521 bool ok = ix86_expand_fp_vcond (operands);
3526 (define_expand "vcond<V_256:mode><VF_256:mode>"
3527 [(set (match_operand:V_256 0 "register_operand")
3529 (match_operator 3 ""
3530 [(match_operand:VF_256 4 "nonimmediate_operand")
3531 (match_operand:VF_256 5 "nonimmediate_operand")])
3532 (match_operand:V_256 1 "general_operand")
3533 (match_operand:V_256 2 "general_operand")))]
3535 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3536 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3538 bool ok = ix86_expand_fp_vcond (operands);
3543 (define_expand "vcond<V_128:mode><VF_128:mode>"
3544 [(set (match_operand:V_128 0 "register_operand")
3546 (match_operator 3 ""
3547 [(match_operand:VF_128 4 "vector_operand")
3548 (match_operand:VF_128 5 "vector_operand")])
3549 (match_operand:V_128 1 "general_operand")
3550 (match_operand:V_128 2 "general_operand")))]
3552 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3553 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3555 bool ok = ix86_expand_fp_vcond (operands);
3560 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3561 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3562 (vec_merge:V48_AVX512VL
3563 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3564 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3565 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3568 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3569 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3570 (vec_merge:VI12_AVX512VL
3571 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3572 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3573 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3576 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3577 ;; and their condition can be folded late into a constant, we need to
3578 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3579 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3582 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3583 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3584 (vec_merge:VI_256_AVX2
3585 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3586 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3587 (match_operand:<sseintvecmode> 3 "register_operand")))]
3590 ix86_expand_sse_movcc (operands[0], operands[3],
3591 operands[1], operands[2]);
3595 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3596 [(set (match_operand:VI124_128 0 "register_operand")
3597 (vec_merge:VI124_128
3598 (match_operand:VI124_128 1 "vector_operand")
3599 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3600 (match_operand:<sseintvecmode> 3 "register_operand")))]
3603 ix86_expand_sse_movcc (operands[0], operands[3],
3604 operands[1], operands[2]);
3608 (define_expand "vcond_mask_v2div2di"
3609 [(set (match_operand:V2DI 0 "register_operand")
3611 (match_operand:V2DI 1 "vector_operand")
3612 (match_operand:V2DI 2 "nonimm_or_0_operand")
3613 (match_operand:V2DI 3 "register_operand")))]
3616 ix86_expand_sse_movcc (operands[0], operands[3],
3617 operands[1], operands[2]);
3621 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3622 [(set (match_operand:VF_256 0 "register_operand")
3624 (match_operand:VF_256 1 "nonimmediate_operand")
3625 (match_operand:VF_256 2 "nonimm_or_0_operand")
3626 (match_operand:<sseintvecmode> 3 "register_operand")))]
3629 ix86_expand_sse_movcc (operands[0], operands[3],
3630 operands[1], operands[2]);
3634 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3635 [(set (match_operand:VF_128 0 "register_operand")
3637 (match_operand:VF_128 1 "vector_operand")
3638 (match_operand:VF_128 2 "nonimm_or_0_operand")
3639 (match_operand:<sseintvecmode> 3 "register_operand")))]
3642 ix86_expand_sse_movcc (operands[0], operands[3],
3643 operands[1], operands[2]);
3647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3649 ;; Parallel floating point logical operations
3651 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3653 (define_insn "<sse>_andnot<mode>3<mask_name>"
3654 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3657 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3658 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3659 "TARGET_SSE && <mask_avx512vl_condition>"
3665 switch (which_alternative)
3668 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3673 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3679 switch (get_attr_mode (insn))
3687 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3688 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3689 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3692 suffix = "<ssemodesuffix>";
3695 snprintf (buf, sizeof (buf), ops, suffix);
3696 output_asm_insn (buf, operands);
3699 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3700 (set_attr "type" "sselog")
3701 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3703 (cond [(and (match_test "<mask_applied>")
3704 (and (eq_attr "alternative" "1")
3705 (match_test "!TARGET_AVX512DQ")))
3706 (const_string "<sseintvecmode2>")
3707 (eq_attr "alternative" "3")
3708 (const_string "<sseintvecmode2>")
3709 (match_test "TARGET_AVX")
3710 (const_string "<MODE>")
3711 (match_test "optimize_function_for_size_p (cfun)")
3712 (const_string "V4SF")
3713 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3714 (const_string "V4SF")
3716 (const_string "<MODE>")))])
3718 (define_insn "<sse>_andnot<mode>3<mask_name>"
3719 [(set (match_operand:VF_512 0 "register_operand" "=v")
3722 (match_operand:VF_512 1 "register_operand" "v"))
3723 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3730 suffix = "<ssemodesuffix>";
3733 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3734 if (!TARGET_AVX512DQ)
3736 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3740 snprintf (buf, sizeof (buf),
3741 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3743 output_asm_insn (buf, operands);
3746 [(set_attr "type" "sselog")
3747 (set_attr "prefix" "evex")
3749 (if_then_else (match_test "TARGET_AVX512DQ")
3750 (const_string "<sseinsnmode>")
3751 (const_string "XI")))])
3753 (define_expand "<code><mode>3<mask_name>"
3754 [(set (match_operand:VF_128_256 0 "register_operand")
3755 (any_logic:VF_128_256
3756 (match_operand:VF_128_256 1 "vector_operand")
3757 (match_operand:VF_128_256 2 "vector_operand")))]
3758 "TARGET_SSE && <mask_avx512vl_condition>"
3759 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3761 (define_expand "<code><mode>3<mask_name>"
3762 [(set (match_operand:VF_512 0 "register_operand")
3764 (match_operand:VF_512 1 "nonimmediate_operand")
3765 (match_operand:VF_512 2 "nonimmediate_operand")))]
3767 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3769 (define_insn "*<code><mode>3<mask_name>"
3770 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3771 (any_logic:VF_128_256
3772 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3773 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3774 "TARGET_SSE && <mask_avx512vl_condition>
3775 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3781 switch (which_alternative)
3784 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3789 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3795 switch (get_attr_mode (insn))
3803 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3804 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3805 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3808 suffix = "<ssemodesuffix>";
3811 snprintf (buf, sizeof (buf), ops, suffix);
3812 output_asm_insn (buf, operands);
3815 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3816 (set_attr "type" "sselog")
3817 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3819 (cond [(and (match_test "<mask_applied>")
3820 (and (eq_attr "alternative" "1")
3821 (match_test "!TARGET_AVX512DQ")))
3822 (const_string "<sseintvecmode2>")
3823 (eq_attr "alternative" "3")
3824 (const_string "<sseintvecmode2>")
3825 (match_test "TARGET_AVX")
3826 (const_string "<MODE>")
3827 (match_test "optimize_function_for_size_p (cfun)")
3828 (const_string "V4SF")
3829 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3830 (const_string "V4SF")
3832 (const_string "<MODE>")))])
3834 (define_insn "*<code><mode>3<mask_name>"
3835 [(set (match_operand:VF_512 0 "register_operand" "=v")
3837 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3838 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3839 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3845 suffix = "<ssemodesuffix>";
3848 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3849 if (!TARGET_AVX512DQ)
3851 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3855 snprintf (buf, sizeof (buf),
3856 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3858 output_asm_insn (buf, operands);
3861 [(set_attr "type" "sselog")
3862 (set_attr "prefix" "evex")
3864 (if_then_else (match_test "TARGET_AVX512DQ")
3865 (const_string "<sseinsnmode>")
3866 (const_string "XI")))])
3868 (define_expand "copysign<mode>3"
3871 (not:VF (match_dup 3))
3872 (match_operand:VF 1 "vector_operand")))
3874 (and:VF (match_dup 3)
3875 (match_operand:VF 2 "vector_operand")))
3876 (set (match_operand:VF 0 "register_operand")
3877 (ior:VF (match_dup 4) (match_dup 5)))]
3880 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3882 operands[4] = gen_reg_rtx (<MODE>mode);
3883 operands[5] = gen_reg_rtx (<MODE>mode);
3886 (define_expand "xorsign<mode>3"
3888 (and:VF (match_dup 3)
3889 (match_operand:VF 2 "vector_operand")))
3890 (set (match_operand:VF 0 "register_operand")
3891 (xor:VF (match_dup 4)
3892 (match_operand:VF 1 "vector_operand")))]
3895 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3897 operands[4] = gen_reg_rtx (<MODE>mode);
3900 (define_expand "signbit<mode>2"
3901 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3902 (lshiftrt:<sseintvecmode>
3903 (subreg:<sseintvecmode>
3904 (match_operand:VF1_AVX2 1 "register_operand") 0)
3907 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3909 ;; Also define scalar versions. These are used for abs, neg, and
3910 ;; conditional move. Using subregs into vector modes causes register
3911 ;; allocation lossage. These patterns do not allow memory operands
3912 ;; because the native instructions read the full 128-bits.
3914 (define_insn "*andnot<mode>3"
3915 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3918 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3919 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3920 "SSE_FLOAT_MODE_P (<MODE>mode)"
3925 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3927 switch (which_alternative)
3930 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3933 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3936 if (TARGET_AVX512DQ)
3937 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3940 suffix = <MODE>mode == DFmode ? "q" : "d";
3941 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3945 if (TARGET_AVX512DQ)
3946 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3949 suffix = <MODE>mode == DFmode ? "q" : "d";
3950 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3957 snprintf (buf, sizeof (buf), ops, suffix);
3958 output_asm_insn (buf, operands);
3961 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3962 (set_attr "type" "sselog")
3963 (set_attr "prefix" "orig,vex,evex,evex")
3965 (cond [(eq_attr "alternative" "2")
3966 (if_then_else (match_test "TARGET_AVX512DQ")
3967 (const_string "<ssevecmode>")
3968 (const_string "TI"))
3969 (eq_attr "alternative" "3")
3970 (if_then_else (match_test "TARGET_AVX512DQ")
3971 (const_string "<avx512fvecmode>")
3972 (const_string "XI"))
3973 (match_test "TARGET_AVX")
3974 (const_string "<ssevecmode>")
3975 (match_test "optimize_function_for_size_p (cfun)")
3976 (const_string "V4SF")
3977 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3978 (const_string "V4SF")
3980 (const_string "<ssevecmode>")))])
3982 (define_insn "*andnottf3"
3983 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3985 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3986 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3992 = (which_alternative >= 2 ? "pandnq"
3993 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3995 switch (which_alternative)
3998 ops = "%s\t{%%2, %%0|%%0, %%2}";
4002 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4005 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4011 snprintf (buf, sizeof (buf), ops, tmp);
4012 output_asm_insn (buf, operands);
4015 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4016 (set_attr "type" "sselog")
4017 (set (attr "prefix_data16")
4019 (and (eq_attr "alternative" "0")
4020 (eq_attr "mode" "TI"))
4022 (const_string "*")))
4023 (set_attr "prefix" "orig,vex,evex,evex")
4025 (cond [(eq_attr "alternative" "2")
4027 (eq_attr "alternative" "3")
4029 (match_test "TARGET_AVX")
4031 (ior (not (match_test "TARGET_SSE2"))
4032 (match_test "optimize_function_for_size_p (cfun)"))
4033 (const_string "V4SF")
4034 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4035 (const_string "V4SF")
4037 (const_string "TI")))])
4039 (define_insn "*<code><mode>3"
4040 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4042 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
4043 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4044 "SSE_FLOAT_MODE_P (<MODE>mode)"
4049 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4051 switch (which_alternative)
4054 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
4057 if (!TARGET_AVX512DQ)
4059 suffix = <MODE>mode == DFmode ? "q" : "d";
4060 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4065 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4068 if (TARGET_AVX512DQ)
4069 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4072 suffix = <MODE>mode == DFmode ? "q" : "d";
4073 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4080 snprintf (buf, sizeof (buf), ops, suffix);
4081 output_asm_insn (buf, operands);
4084 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4085 (set_attr "type" "sselog")
4086 (set_attr "prefix" "orig,vex,evex,evex")
4088 (cond [(eq_attr "alternative" "2")
4089 (if_then_else (match_test "TARGET_AVX512DQ")
4090 (const_string "<ssevecmode>")
4091 (const_string "TI"))
4092 (eq_attr "alternative" "3")
4093 (if_then_else (match_test "TARGET_AVX512DQ")
4094 (const_string "<avx512fvecmode>")
4095 (const_string "XI"))
4096 (match_test "TARGET_AVX")
4097 (const_string "<ssevecmode>")
4098 (match_test "optimize_function_for_size_p (cfun)")
4099 (const_string "V4SF")
4100 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4101 (const_string "V4SF")
4103 (const_string "<ssevecmode>")))])
4105 (define_expand "<code>tf3"
4106 [(set (match_operand:TF 0 "register_operand")
4108 (match_operand:TF 1 "vector_operand")
4109 (match_operand:TF 2 "vector_operand")))]
4111 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
4113 (define_insn "*<code>tf3"
4114 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
4116 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
4117 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
4118 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4123 = (which_alternative >= 2 ? "p<logic>q"
4124 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
4126 switch (which_alternative)
4129 ops = "%s\t{%%2, %%0|%%0, %%2}";
4133 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4136 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4142 snprintf (buf, sizeof (buf), ops, tmp);
4143 output_asm_insn (buf, operands);
4146 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4147 (set_attr "type" "sselog")
4148 (set (attr "prefix_data16")
4150 (and (eq_attr "alternative" "0")
4151 (eq_attr "mode" "TI"))
4153 (const_string "*")))
4154 (set_attr "prefix" "orig,vex,evex,evex")
4156 (cond [(eq_attr "alternative" "2")
4158 (eq_attr "alternative" "3")
4160 (match_test "TARGET_AVX")
4162 (ior (not (match_test "TARGET_SSE2"))
4163 (match_test "optimize_function_for_size_p (cfun)"))
4164 (const_string "V4SF")
4165 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4166 (const_string "V4SF")
4168 (const_string "TI")))])
4170 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4172 ;; FMA floating point multiply/accumulate instructions. These include
4173 ;; scalar versions of the instructions as well as vector versions.
4175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4177 ;; The standard names for scalar FMA are only available with SSE math enabled.
4178 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
4179 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
4180 ;; and TARGET_FMA4 are both false.
4181 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
4182 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
4183 ;; GAS to allow proper prefix selection. However, for the moment all hardware
4184 ;; that supports AVX512F also supports FMA so we can ignore this for now.
4185 (define_mode_iterator FMAMODEM
4186 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4187 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4188 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4189 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4190 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4191 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4192 (V16SF "TARGET_AVX512F")
4193 (V8DF "TARGET_AVX512F")])
4195 (define_expand "fma<mode>4"
4196 [(set (match_operand:FMAMODEM 0 "register_operand")
4198 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4199 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4200 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4202 (define_expand "fms<mode>4"
4203 [(set (match_operand:FMAMODEM 0 "register_operand")
4205 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4206 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4207 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4209 (define_expand "fnma<mode>4"
4210 [(set (match_operand:FMAMODEM 0 "register_operand")
4212 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4213 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4214 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4216 (define_expand "fnms<mode>4"
4217 [(set (match_operand:FMAMODEM 0 "register_operand")
4219 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4220 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4221 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4223 ;; The builtins for intrinsics are not constrained by SSE math enabled.
4224 (define_mode_iterator FMAMODE_AVX512
4225 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4226 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4227 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4228 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4229 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4230 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4231 (V16SF "TARGET_AVX512F")
4232 (V8DF "TARGET_AVX512F")])
4234 (define_mode_iterator FMAMODE
4235 [SF DF V4SF V2DF V8SF V4DF])
4237 (define_expand "fma4i_fmadd_<mode>"
4238 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4240 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4241 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4242 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4244 (define_expand "fma4i_fmsub_<mode>"
4245 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4247 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4248 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4250 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4252 (define_expand "fma4i_fnmadd_<mode>"
4253 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4256 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4257 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4258 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4260 (define_expand "fma4i_fnmsub_<mode>"
4261 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4264 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4265 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4267 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4269 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4270 [(match_operand:VF_AVX512VL 0 "register_operand")
4271 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4272 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4273 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4274 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4275 "TARGET_AVX512F && <round_mode512bit_condition>"
4277 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4278 operands[0], operands[1], operands[2], operands[3],
4279 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4283 (define_insn "*fma_fmadd_<mode>"
4284 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4286 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4287 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4288 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4289 "TARGET_FMA || TARGET_FMA4"
4291 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4292 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4293 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4294 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4295 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4296 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4297 (set_attr "type" "ssemuladd")
4298 (set_attr "mode" "<MODE>")])
4300 ;; Suppose AVX-512F as baseline
4301 (define_mode_iterator VF_SF_AVX512VL
4302 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4303 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4305 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4306 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4308 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4309 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4310 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4311 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4313 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4314 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4315 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4316 [(set_attr "type" "ssemuladd")
4317 (set_attr "mode" "<MODE>")])
4319 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4320 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4321 (vec_merge:VF_AVX512VL
4323 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4324 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4325 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4327 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4328 "TARGET_AVX512F && <round_mode512bit_condition>"
4330 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4331 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4332 [(set_attr "type" "ssemuladd")
4333 (set_attr "mode" "<MODE>")])
4335 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4336 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4337 (vec_merge:VF_AVX512VL
4339 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4340 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4341 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4343 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4345 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4346 [(set_attr "type" "ssemuladd")
4347 (set_attr "mode" "<MODE>")])
4349 (define_insn "*fma_fmsub_<mode>"
4350 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4352 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4353 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4355 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4356 "TARGET_FMA || TARGET_FMA4"
4358 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4359 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4360 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4361 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4362 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4363 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4364 (set_attr "type" "ssemuladd")
4365 (set_attr "mode" "<MODE>")])
4367 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4368 [(match_operand:VF_AVX512VL 0 "register_operand")
4369 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4370 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4371 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4372 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4373 "TARGET_AVX512F && <round_mode512bit_condition>"
4375 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4376 operands[0], operands[1], operands[2], operands[3],
4377 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4381 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4382 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4384 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4385 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4387 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4388 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4390 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4391 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4392 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4393 [(set_attr "type" "ssemuladd")
4394 (set_attr "mode" "<MODE>")])
4396 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4397 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4398 (vec_merge:VF_AVX512VL
4400 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4401 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4403 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4405 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4408 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4409 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4410 [(set_attr "type" "ssemuladd")
4411 (set_attr "mode" "<MODE>")])
4413 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4414 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4415 (vec_merge:VF_AVX512VL
4417 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4418 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4420 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4422 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4423 "TARGET_AVX512F && <round_mode512bit_condition>"
4424 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4425 [(set_attr "type" "ssemuladd")
4426 (set_attr "mode" "<MODE>")])
4428 (define_insn "*fma_fnmadd_<mode>"
4429 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4432 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4433 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4434 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4435 "TARGET_FMA || TARGET_FMA4"
4437 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4438 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4439 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4440 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4441 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4442 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4443 (set_attr "type" "ssemuladd")
4444 (set_attr "mode" "<MODE>")])
4446 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4447 [(match_operand:VF_AVX512VL 0 "register_operand")
4448 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4449 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4450 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4451 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4452 "TARGET_AVX512F && <round_mode512bit_condition>"
4454 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4455 operands[0], operands[1], operands[2], operands[3],
4456 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4460 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4461 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4464 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4465 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4466 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4467 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4469 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4470 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4471 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4472 [(set_attr "type" "ssemuladd")
4473 (set_attr "mode" "<MODE>")])
4475 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4476 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4477 (vec_merge:VF_AVX512VL
4480 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4481 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4482 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4484 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4485 "TARGET_AVX512F && <round_mode512bit_condition>"
4487 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4488 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4489 [(set_attr "type" "ssemuladd")
4490 (set_attr "mode" "<MODE>")])
4492 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4493 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4494 (vec_merge:VF_AVX512VL
4497 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4498 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4499 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4501 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4502 "TARGET_AVX512F && <round_mode512bit_condition>"
4503 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4504 [(set_attr "type" "ssemuladd")
4505 (set_attr "mode" "<MODE>")])
4507 (define_insn "*fma_fnmsub_<mode>"
4508 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4511 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4512 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4514 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4515 "TARGET_FMA || TARGET_FMA4"
4517 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4518 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4519 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4520 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4521 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4522 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4523 (set_attr "type" "ssemuladd")
4524 (set_attr "mode" "<MODE>")])
4526 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4527 [(match_operand:VF_AVX512VL 0 "register_operand")
4528 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4529 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4530 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4531 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4532 "TARGET_AVX512F && <round_mode512bit_condition>"
4534 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4535 operands[0], operands[1], operands[2], operands[3],
4536 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4540 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4541 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4544 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4545 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4547 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4548 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4550 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4551 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4552 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4553 [(set_attr "type" "ssemuladd")
4554 (set_attr "mode" "<MODE>")])
4556 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4557 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4558 (vec_merge:VF_AVX512VL
4561 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4562 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4564 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4566 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4567 "TARGET_AVX512F && <round_mode512bit_condition>"
4569 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4570 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4571 [(set_attr "type" "ssemuladd")
4572 (set_attr "mode" "<MODE>")])
4574 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4575 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4576 (vec_merge:VF_AVX512VL
4579 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4580 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4582 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4584 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4586 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4587 [(set_attr "type" "ssemuladd")
4588 (set_attr "mode" "<MODE>")])
4590 ;; FMA parallel floating point multiply addsub and subadd operations.
4592 ;; It would be possible to represent these without the UNSPEC as
4595 ;; (fma op1 op2 op3)
4596 ;; (fma op1 op2 (neg op3))
4599 ;; But this doesn't seem useful in practice.
4601 (define_expand "fmaddsub_<mode>"
4602 [(set (match_operand:VF 0 "register_operand")
4604 [(match_operand:VF 1 "nonimmediate_operand")
4605 (match_operand:VF 2 "nonimmediate_operand")
4606 (match_operand:VF 3 "nonimmediate_operand")]
4608 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4610 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4611 [(match_operand:VF_AVX512VL 0 "register_operand")
4612 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4613 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4614 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4615 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4618 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4619 operands[0], operands[1], operands[2], operands[3],
4620 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4624 (define_insn "*fma_fmaddsub_<mode>"
4625 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4627 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4628 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4629 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4631 "TARGET_FMA || TARGET_FMA4"
4633 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4634 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4635 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4636 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4637 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4638 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4639 (set_attr "type" "ssemuladd")
4640 (set_attr "mode" "<MODE>")])
4642 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4643 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4644 (unspec:VF_SF_AVX512VL
4645 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4646 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4647 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4649 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4651 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4652 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4653 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4654 [(set_attr "type" "ssemuladd")
4655 (set_attr "mode" "<MODE>")])
4657 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4658 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4659 (vec_merge:VF_AVX512VL
4661 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4662 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4663 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4666 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4669 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4670 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4671 [(set_attr "type" "ssemuladd")
4672 (set_attr "mode" "<MODE>")])
4674 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4675 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4676 (vec_merge:VF_AVX512VL
4678 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4679 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4680 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4683 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4685 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4686 [(set_attr "type" "ssemuladd")
4687 (set_attr "mode" "<MODE>")])
4689 (define_insn "*fma_fmsubadd_<mode>"
4690 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4692 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4693 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4695 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4697 "TARGET_FMA || TARGET_FMA4"
4699 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4700 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4701 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4702 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4703 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4704 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4705 (set_attr "type" "ssemuladd")
4706 (set_attr "mode" "<MODE>")])
4708 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4709 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4710 (unspec:VF_SF_AVX512VL
4711 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4712 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4714 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4716 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4718 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4719 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4720 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4721 [(set_attr "type" "ssemuladd")
4722 (set_attr "mode" "<MODE>")])
4724 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4725 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4726 (vec_merge:VF_AVX512VL
4728 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4729 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4731 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4734 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4737 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4738 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4739 [(set_attr "type" "ssemuladd")
4740 (set_attr "mode" "<MODE>")])
4742 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4743 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4744 (vec_merge:VF_AVX512VL
4746 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4747 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4749 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4752 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4754 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4755 [(set_attr "type" "ssemuladd")
4756 (set_attr "mode" "<MODE>")])
4758 ;; FMA3 floating point scalar intrinsics. These merge result with
4759 ;; high-order elements from the destination register.
4761 (define_expand "fmai_vmfmadd_<mode><round_name>"
4762 [(set (match_operand:VF_128 0 "register_operand")
4765 (match_operand:VF_128 1 "register_operand")
4766 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4767 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4772 (define_expand "fmai_vmfmsub_<mode><round_name>"
4773 [(set (match_operand:VF_128 0 "register_operand")
4776 (match_operand:VF_128 1 "register_operand")
4777 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4779 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4784 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4785 [(set (match_operand:VF_128 0 "register_operand")
4789 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4790 (match_operand:VF_128 1 "register_operand")
4791 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4796 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4797 [(set (match_operand:VF_128 0 "register_operand")
4801 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4802 (match_operand:VF_128 1 "register_operand")
4804 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4809 (define_insn "*fmai_fmadd_<mode>"
4810 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4813 (match_operand:VF_128 1 "register_operand" "0,0")
4814 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
4815 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4818 "TARGET_FMA || TARGET_AVX512F"
4820 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4821 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4822 [(set_attr "type" "ssemuladd")
4823 (set_attr "mode" "<MODE>")])
4825 (define_insn "*fmai_fmsub_<mode>"
4826 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4829 (match_operand:VF_128 1 "register_operand" "0,0")
4830 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4832 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4835 "TARGET_FMA || TARGET_AVX512F"
4837 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4838 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4839 [(set_attr "type" "ssemuladd")
4840 (set_attr "mode" "<MODE>")])
4842 (define_insn "*fmai_fnmadd_<mode><round_name>"
4843 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4847 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4848 (match_operand:VF_128 1 "register_operand" "0,0")
4849 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4852 "TARGET_FMA || TARGET_AVX512F"
4854 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4855 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4856 [(set_attr "type" "ssemuladd")
4857 (set_attr "mode" "<MODE>")])
4859 (define_insn "*fmai_fnmsub_<mode><round_name>"
4860 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4864 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4865 (match_operand:VF_128 1 "register_operand" "0,0")
4867 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4870 "TARGET_FMA || TARGET_AVX512F"
4872 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4873 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4874 [(set_attr "type" "ssemuladd")
4875 (set_attr "mode" "<MODE>")])
4877 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4878 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4882 (match_operand:VF_128 1 "register_operand" "0,0")
4883 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4884 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4886 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4891 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4892 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4893 [(set_attr "type" "ssemuladd")
4894 (set_attr "mode" "<MODE>")])
4896 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4897 [(set (match_operand:VF_128 0 "register_operand" "=v")
4901 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4902 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4903 (match_operand:VF_128 3 "register_operand" "0"))
4905 (match_operand:QI 4 "register_operand" "Yk"))
4909 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4910 [(set_attr "type" "ssemuladd")
4911 (set_attr "mode" "<MODE>")])
4913 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4914 [(match_operand:VF_128 0 "register_operand")
4915 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4916 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4917 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4918 (match_operand:QI 4 "register_operand")]
4921 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4922 operands[0], operands[1], operands[2], operands[3],
4923 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4927 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4928 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4932 (match_operand:VF_128 1 "register_operand" "0,0")
4933 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4934 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4935 (match_operand:VF_128 4 "const0_operand" "C,C")
4936 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4941 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4942 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4943 [(set_attr "type" "ssemuladd")
4944 (set_attr "mode" "<MODE>")])
4946 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4947 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4951 (match_operand:VF_128 1 "register_operand" "0,0")
4952 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4954 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4956 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4961 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4962 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4963 [(set_attr "type" "ssemuladd")
4964 (set_attr "mode" "<MODE>")])
4966 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4967 [(set (match_operand:VF_128 0 "register_operand" "=v")
4971 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4972 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4974 (match_operand:VF_128 3 "register_operand" "0")))
4976 (match_operand:QI 4 "register_operand" "Yk"))
4980 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4981 [(set_attr "type" "ssemuladd")
4982 (set_attr "mode" "<MODE>")])
4984 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4985 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4989 (match_operand:VF_128 1 "register_operand" "0,0")
4990 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4992 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4993 (match_operand:VF_128 4 "const0_operand" "C,C")
4994 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4999 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5000 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5001 [(set_attr "type" "ssemuladd")
5002 (set_attr "mode" "<MODE>")])
5004 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
5005 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5010 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5011 (match_operand:VF_128 1 "register_operand" "0,0")
5012 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5014 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5019 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5020 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5021 [(set_attr "type" "ssemuladd")
5022 (set_attr "mode" "<MODE>")])
5024 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
5025 [(set (match_operand:VF_128 0 "register_operand" "=v")
5030 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5031 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5032 (match_operand:VF_128 3 "register_operand" "0"))
5034 (match_operand:QI 4 "register_operand" "Yk"))
5038 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5039 [(set_attr "type" "ssemuladd")
5040 (set_attr "mode" "<MODE>")])
5042 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
5043 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5048 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5049 (match_operand:VF_128 1 "register_operand" "0,0")
5050 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5051 (match_operand:VF_128 4 "const0_operand" "C,C")
5052 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5057 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5058 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5059 [(set_attr "type" "ssemuladd")
5060 (set_attr "mode" "<MODE>")])
5062 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
5063 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5068 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5069 (match_operand:VF_128 1 "register_operand" "0,0")
5071 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5073 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5078 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5079 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5080 [(set_attr "type" "ssemuladd")
5081 (set_attr "mode" "<MODE>")])
5083 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
5084 [(set (match_operand:VF_128 0 "register_operand" "=v")
5089 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5090 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5092 (match_operand:VF_128 3 "register_operand" "0")))
5094 (match_operand:QI 4 "register_operand" "Yk"))
5098 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5099 [(set_attr "type" "ssemuladd")
5100 (set_attr "mode" "<MODE>")])
5102 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
5103 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5108 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5109 (match_operand:VF_128 1 "register_operand" "0,0")
5111 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5112 (match_operand:VF_128 4 "const0_operand" "C,C")
5113 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5118 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5119 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5120 [(set_attr "type" "ssemuladd")
5121 (set_attr "mode" "<MODE>")])
5123 ;; FMA4 floating point scalar intrinsics. These write the
5124 ;; entire destination register, with the high-order elements zeroed.
5126 (define_expand "fma4i_vmfmadd_<mode>"
5127 [(set (match_operand:VF_128 0 "register_operand")
5130 (match_operand:VF_128 1 "nonimmediate_operand")
5131 (match_operand:VF_128 2 "nonimmediate_operand")
5132 (match_operand:VF_128 3 "nonimmediate_operand"))
5136 "operands[4] = CONST0_RTX (<MODE>mode);")
5138 (define_insn "*fma4i_vmfmadd_<mode>"
5139 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5142 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5143 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5144 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5145 (match_operand:VF_128 4 "const0_operand")
5148 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5149 [(set_attr "type" "ssemuladd")
5150 (set_attr "mode" "<MODE>")])
5152 (define_insn "*fma4i_vmfmsub_<mode>"
5153 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5156 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5157 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5159 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5160 (match_operand:VF_128 4 "const0_operand")
5163 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5164 [(set_attr "type" "ssemuladd")
5165 (set_attr "mode" "<MODE>")])
5167 (define_insn "*fma4i_vmfnmadd_<mode>"
5168 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5172 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5173 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5174 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5175 (match_operand:VF_128 4 "const0_operand")
5178 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5179 [(set_attr "type" "ssemuladd")
5180 (set_attr "mode" "<MODE>")])
5182 (define_insn "*fma4i_vmfnmsub_<mode>"
5183 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5187 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5188 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5190 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5191 (match_operand:VF_128 4 "const0_operand")
5194 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5195 [(set_attr "type" "ssemuladd")
5196 (set_attr "mode" "<MODE>")])
5198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5200 ;; Parallel single-precision floating point conversion operations
5202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5204 (define_insn_and_split "sse_cvtpi2ps"
5205 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5208 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5209 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5211 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5212 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5214 cvtpi2ps\t{%2, %0|%0, %2}
5217 "TARGET_SSE2 && reload_completed
5218 && SSE_REG_P (operands[2])"
5221 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5222 GET_MODE (operands[2]));
5223 /* Generate SSE2 cvtdq2ps. */
5224 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5226 /* Merge operands[3] with operands[0]. */
5230 mask = gen_rtx_PARALLEL (VOIDmode,
5231 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5232 GEN_INT (6), GEN_INT (7)));
5233 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5234 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5235 emit_insn (gen_rtx_SET (operands[0], op2));
5239 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5240 mask = gen_rtx_PARALLEL (VOIDmode,
5241 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5242 GEN_INT (4), GEN_INT (5)));
5243 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5244 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5245 emit_insn (gen_rtx_SET (operands[0], op2));
5247 /* Swap bits 0:63 with bits 64:127. */
5248 mask = gen_rtx_PARALLEL (VOIDmode,
5249 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5250 GEN_INT (0), GEN_INT (1)));
5251 rtx dest = lowpart_subreg (V4SImode, operands[0],
5252 GET_MODE (operands[0]));
5253 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5254 emit_insn (gen_rtx_SET (dest, op1));
5258 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5259 (set_attr "type" "ssecvt")
5260 (set_attr "mode" "V4SF")])
5262 (define_insn_and_split "sse_cvtps2pi"
5263 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5265 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5267 (parallel [(const_int 0) (const_int 1)])))]
5268 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5270 cvtps2pi\t{%1, %0|%0, %q1}
5272 "TARGET_SSE2 && reload_completed
5273 && SSE_REG_P (operands[0])"
5276 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5277 GET_MODE (operands[1]));
5278 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5279 GET_MODE (operands[0]));
5281 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5282 emit_insn (gen_rtx_SET (tmp, op1));
5284 rtx dest = lowpart_subreg (V4SImode, operands[0],
5285 GET_MODE (operands[0]));
5286 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5289 [(set_attr "isa" "*,sse2")
5290 (set_attr "mmx_isa" "native,*")
5291 (set_attr "type" "ssecvt")
5292 (set_attr "unit" "mmx,*")
5293 (set_attr "mode" "DI")])
5295 (define_insn_and_split "sse_cvttps2pi"
5296 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5298 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5299 (parallel [(const_int 0) (const_int 1)])))]
5300 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5302 cvttps2pi\t{%1, %0|%0, %q1}
5304 "TARGET_SSE2 && reload_completed
5305 && SSE_REG_P (operands[0])"
5308 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5309 GET_MODE (operands[1]));
5310 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5311 GET_MODE (operands[0]));
5313 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5314 emit_insn (gen_rtx_SET (tmp, op1));
5316 rtx dest = lowpart_subreg (V4SImode, operands[0],
5317 GET_MODE (operands[0]));
5318 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5321 [(set_attr "isa" "*,sse2")
5322 (set_attr "mmx_isa" "native,*")
5323 (set_attr "type" "ssecvt")
5324 (set_attr "unit" "mmx,*")
5325 (set_attr "prefix_rep" "0")
5326 (set_attr "mode" "SF")])
5328 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5329 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5332 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5333 (match_operand:V4SF 1 "register_operand" "0,0,v")
5337 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5338 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5339 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5340 [(set_attr "isa" "noavx,noavx,avx")
5341 (set_attr "type" "sseicvt")
5342 (set_attr "athlon_decode" "vector,double,*")
5343 (set_attr "amdfam10_decode" "vector,double,*")
5344 (set_attr "bdver1_decode" "double,direct,*")
5345 (set_attr "btver2_decode" "double,double,double")
5346 (set_attr "znver1_decode" "double,double,double")
5347 (set (attr "length_vex")
5349 (and (match_test "<MODE>mode == DImode")
5350 (eq_attr "alternative" "2"))
5352 (const_string "*")))
5353 (set (attr "prefix_rex")
5355 (and (match_test "<MODE>mode == DImode")
5356 (eq_attr "alternative" "0,1"))
5358 (const_string "*")))
5359 (set_attr "prefix" "orig,orig,maybe_evex")
5360 (set_attr "mode" "SF")])
5362 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5363 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5366 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5367 (parallel [(const_int 0)]))]
5368 UNSPEC_FIX_NOTRUNC))]
5370 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5371 [(set_attr "type" "sseicvt")
5372 (set_attr "athlon_decode" "double,vector")
5373 (set_attr "bdver1_decode" "double,double")
5374 (set_attr "prefix_rep" "1")
5375 (set_attr "prefix" "maybe_vex")
5376 (set_attr "mode" "<MODE>")])
5378 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5379 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5380 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5381 UNSPEC_FIX_NOTRUNC))]
5383 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5384 [(set_attr "type" "sseicvt")
5385 (set_attr "athlon_decode" "double,vector")
5386 (set_attr "amdfam10_decode" "double,double")
5387 (set_attr "bdver1_decode" "double,double")
5388 (set_attr "prefix_rep" "1")
5389 (set_attr "prefix" "maybe_vex")
5390 (set_attr "mode" "<MODE>")])
5392 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5393 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5396 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5397 (parallel [(const_int 0)]))))]
5399 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5400 [(set_attr "type" "sseicvt")
5401 (set_attr "athlon_decode" "double,vector")
5402 (set_attr "amdfam10_decode" "double,double")
5403 (set_attr "bdver1_decode" "double,double")
5404 (set_attr "prefix_rep" "1")
5405 (set_attr "prefix" "maybe_vex")
5406 (set_attr "mode" "<MODE>")])
5408 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5409 [(set (match_operand:VF_128 0 "register_operand" "=v")
5411 (vec_duplicate:VF_128
5412 (unsigned_float:<ssescalarmode>
5413 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5414 (match_operand:VF_128 1 "register_operand" "v")
5416 "TARGET_AVX512F && <round_modev4sf_condition>"
5417 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5418 [(set_attr "type" "sseicvt")
5419 (set_attr "prefix" "evex")
5420 (set_attr "mode" "<ssescalarmode>")])
5422 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5423 [(set (match_operand:VF_128 0 "register_operand" "=v")
5425 (vec_duplicate:VF_128
5426 (unsigned_float:<ssescalarmode>
5427 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5428 (match_operand:VF_128 1 "register_operand" "v")
5430 "TARGET_AVX512F && TARGET_64BIT"
5431 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5432 [(set_attr "type" "sseicvt")
5433 (set_attr "prefix" "evex")
5434 (set_attr "mode" "<ssescalarmode>")])
5436 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5437 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5439 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5440 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5442 cvtdq2ps\t{%1, %0|%0, %1}
5443 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5444 [(set_attr "isa" "noavx,avx")
5445 (set_attr "type" "ssecvt")
5446 (set_attr "prefix" "maybe_vex")
5447 (set_attr "mode" "<sseinsnmode>")])
5449 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5450 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5451 (unsigned_float:VF1_AVX512VL
5452 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5454 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5455 [(set_attr "type" "ssecvt")
5456 (set_attr "prefix" "evex")
5457 (set_attr "mode" "<MODE>")])
5459 (define_expand "floatuns<sseintvecmodelower><mode>2"
5460 [(match_operand:VF1 0 "register_operand")
5461 (match_operand:<sseintvecmode> 1 "register_operand")]
5462 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5464 if (<MODE>mode == V16SFmode)
5465 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5467 if (TARGET_AVX512VL)
5469 if (<MODE>mode == V4SFmode)
5470 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5472 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5475 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5481 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5482 (define_mode_attr sf2simodelower
5483 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5485 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5486 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5488 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5489 UNSPEC_FIX_NOTRUNC))]
5490 "TARGET_SSE2 && <mask_mode512bit_condition>"
5491 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5492 [(set_attr "type" "ssecvt")
5493 (set (attr "prefix_data16")
5495 (match_test "TARGET_AVX")
5497 (const_string "1")))
5498 (set_attr "prefix" "maybe_vex")
5499 (set_attr "mode" "<sseinsnmode>")])
5501 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5502 [(set (match_operand:V16SI 0 "register_operand" "=v")
5504 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5505 UNSPEC_FIX_NOTRUNC))]
5507 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5508 [(set_attr "type" "ssecvt")
5509 (set_attr "prefix" "evex")
5510 (set_attr "mode" "XI")])
5512 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5513 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5514 (unspec:VI4_AVX512VL
5515 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5516 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5518 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5519 [(set_attr "type" "ssecvt")
5520 (set_attr "prefix" "evex")
5521 (set_attr "mode" "<sseinsnmode>")])
5523 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5524 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5525 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5526 UNSPEC_FIX_NOTRUNC))]
5527 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5528 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5529 [(set_attr "type" "ssecvt")
5530 (set_attr "prefix" "evex")
5531 (set_attr "mode" "<sseinsnmode>")])
5533 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5534 [(set (match_operand:V2DI 0 "register_operand" "=v")
5537 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5538 (parallel [(const_int 0) (const_int 1)]))]
5539 UNSPEC_FIX_NOTRUNC))]
5540 "TARGET_AVX512DQ && TARGET_AVX512VL"
5541 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5542 [(set_attr "type" "ssecvt")
5543 (set_attr "prefix" "evex")
5544 (set_attr "mode" "TI")])
5546 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5547 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5548 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5549 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5550 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5551 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5552 [(set_attr "type" "ssecvt")
5553 (set_attr "prefix" "evex")
5554 (set_attr "mode" "<sseinsnmode>")])
5556 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5557 [(set (match_operand:V2DI 0 "register_operand" "=v")
5560 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5561 (parallel [(const_int 0) (const_int 1)]))]
5562 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5563 "TARGET_AVX512DQ && TARGET_AVX512VL"
5564 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5565 [(set_attr "type" "ssecvt")
5566 (set_attr "prefix" "evex")
5567 (set_attr "mode" "TI")])
5569 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5570 [(set (match_operand:V16SI 0 "register_operand" "=v")
5572 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5574 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5575 [(set_attr "type" "ssecvt")
5576 (set_attr "prefix" "evex")
5577 (set_attr "mode" "XI")])
5579 (define_insn "fix_truncv8sfv8si2<mask_name>"
5580 [(set (match_operand:V8SI 0 "register_operand" "=v")
5581 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5582 "TARGET_AVX && <mask_avx512vl_condition>"
5583 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5584 [(set_attr "type" "ssecvt")
5585 (set_attr "prefix" "<mask_prefix>")
5586 (set_attr "mode" "OI")])
5588 (define_insn "fix_truncv4sfv4si2<mask_name>"
5589 [(set (match_operand:V4SI 0 "register_operand" "=v")
5590 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5591 "TARGET_SSE2 && <mask_avx512vl_condition>"
5592 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5593 [(set_attr "type" "ssecvt")
5594 (set (attr "prefix_rep")
5596 (match_test "TARGET_AVX")
5598 (const_string "1")))
5599 (set (attr "prefix_data16")
5601 (match_test "TARGET_AVX")
5603 (const_string "0")))
5604 (set_attr "prefix_data16" "0")
5605 (set_attr "prefix" "<mask_prefix2>")
5606 (set_attr "mode" "TI")])
5608 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5609 [(match_operand:<sseintvecmode> 0 "register_operand")
5610 (match_operand:VF1 1 "register_operand")]
5613 if (<MODE>mode == V16SFmode)
5614 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5619 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5620 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5621 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5622 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5629 ;; Parallel double-precision floating point conversion operations
5631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5633 (define_insn "sse2_cvtpi2pd"
5634 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5635 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5638 %vcvtdq2pd\t{%1, %0|%0, %1}
5639 cvtpi2pd\t{%1, %0|%0, %1}"
5640 [(set_attr "mmx_isa" "*,native")
5641 (set_attr "type" "ssecvt")
5642 (set_attr "unit" "*,mmx")
5643 (set_attr "prefix_data16" "*,1")
5644 (set_attr "prefix" "maybe_vex,*")
5645 (set_attr "mode" "V2DF")])
5647 (define_expand "floatv2siv2df2"
5648 [(set (match_operand:V2DF 0 "register_operand")
5649 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5650 "TARGET_MMX_WITH_SSE")
5652 (define_insn "floatunsv2siv2df2"
5653 [(set (match_operand:V2DF 0 "register_operand" "=v")
5654 (unsigned_float:V2DF
5655 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5656 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5657 "vcvtudq2pd\t{%1, %0|%0, %1}"
5658 [(set_attr "type" "ssecvt")
5659 (set_attr "prefix" "evex")
5660 (set_attr "mode" "V2DF")])
5662 (define_insn "sse2_cvtpd2pi"
5663 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5664 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5665 UNSPEC_FIX_NOTRUNC))]
5668 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5669 cvtpd2pi\t{%1, %0|%0, %1}"
5670 [(set_attr "mmx_isa" "*,native")
5671 (set_attr "type" "ssecvt")
5672 (set_attr "unit" "*,mmx")
5673 (set_attr "amdfam10_decode" "double")
5674 (set_attr "athlon_decode" "vector")
5675 (set_attr "bdver1_decode" "double")
5676 (set_attr "prefix_data16" "*,1")
5677 (set_attr "prefix" "maybe_vex,*")
5678 (set_attr "mode" "TI")])
5680 (define_insn "sse2_cvttpd2pi"
5681 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5682 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5685 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5686 cvttpd2pi\t{%1, %0|%0, %1}"
5687 [(set_attr "mmx_isa" "*,native")
5688 (set_attr "type" "ssecvt")
5689 (set_attr "unit" "*,mmx")
5690 (set_attr "amdfam10_decode" "double")
5691 (set_attr "athlon_decode" "vector")
5692 (set_attr "bdver1_decode" "double")
5693 (set_attr "prefix_data16" "*,1")
5694 (set_attr "prefix" "maybe_vex,*")
5695 (set_attr "mode" "TI")])
5697 (define_expand "fix_truncv2dfv2si2"
5698 [(set (match_operand:V2SI 0 "register_operand")
5699 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5700 "TARGET_MMX_WITH_SSE")
5702 (define_insn "fixuns_truncv2dfv2si2"
5703 [(set (match_operand:V2SI 0 "register_operand" "=v")
5705 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5706 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5707 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5708 [(set_attr "type" "ssecvt")
5709 (set_attr "prefix" "evex")
5710 (set_attr "mode" "TI")])
5712 (define_insn "sse2_cvtsi2sd"
5713 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5716 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5717 (match_operand:V2DF 1 "register_operand" "0,0,v")
5721 cvtsi2sd{l}\t{%2, %0|%0, %2}
5722 cvtsi2sd{l}\t{%2, %0|%0, %2}
5723 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5724 [(set_attr "isa" "noavx,noavx,avx")
5725 (set_attr "type" "sseicvt")
5726 (set_attr "athlon_decode" "double,direct,*")
5727 (set_attr "amdfam10_decode" "vector,double,*")
5728 (set_attr "bdver1_decode" "double,direct,*")
5729 (set_attr "btver2_decode" "double,double,double")
5730 (set_attr "znver1_decode" "double,double,double")
5731 (set_attr "prefix" "orig,orig,maybe_evex")
5732 (set_attr "mode" "DF")])
5734 (define_insn "sse2_cvtsi2sdq<round_name>"
5735 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5738 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5739 (match_operand:V2DF 1 "register_operand" "0,0,v")
5741 "TARGET_SSE2 && TARGET_64BIT"
5743 cvtsi2sd{q}\t{%2, %0|%0, %2}
5744 cvtsi2sd{q}\t{%2, %0|%0, %2}
5745 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5746 [(set_attr "isa" "noavx,noavx,avx")
5747 (set_attr "type" "sseicvt")
5748 (set_attr "athlon_decode" "double,direct,*")
5749 (set_attr "amdfam10_decode" "vector,double,*")
5750 (set_attr "bdver1_decode" "double,direct,*")
5751 (set_attr "length_vex" "*,*,4")
5752 (set_attr "prefix_rex" "1,1,*")
5753 (set_attr "prefix" "orig,orig,maybe_evex")
5754 (set_attr "mode" "DF")])
5756 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5757 [(set (match_operand:SWI48 0 "register_operand" "=r")
5760 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5761 (parallel [(const_int 0)]))]
5762 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5764 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5765 [(set_attr "type" "sseicvt")
5766 (set_attr "prefix" "evex")
5767 (set_attr "mode" "<MODE>")])
5769 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5770 [(set (match_operand:SWI48 0 "register_operand" "=r")
5773 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5774 (parallel [(const_int 0)]))))]
5776 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5777 [(set_attr "type" "sseicvt")
5778 (set_attr "prefix" "evex")
5779 (set_attr "mode" "<MODE>")])
5781 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5782 [(set (match_operand:SWI48 0 "register_operand" "=r")
5785 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5786 (parallel [(const_int 0)]))]
5787 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5789 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5790 [(set_attr "type" "sseicvt")
5791 (set_attr "prefix" "evex")
5792 (set_attr "mode" "<MODE>")])
5794 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5795 [(set (match_operand:SWI48 0 "register_operand" "=r")
5798 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5799 (parallel [(const_int 0)]))))]
5801 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5802 [(set_attr "type" "sseicvt")
5803 (set_attr "prefix" "evex")
5804 (set_attr "mode" "<MODE>")])
5806 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5807 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5810 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5811 (parallel [(const_int 0)]))]
5812 UNSPEC_FIX_NOTRUNC))]
5814 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5815 [(set_attr "type" "sseicvt")
5816 (set_attr "athlon_decode" "double,vector")
5817 (set_attr "bdver1_decode" "double,double")
5818 (set_attr "btver2_decode" "double,double")
5819 (set_attr "prefix_rep" "1")
5820 (set_attr "prefix" "maybe_vex")
5821 (set_attr "mode" "<MODE>")])
5823 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5824 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5825 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5826 UNSPEC_FIX_NOTRUNC))]
5828 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5829 [(set_attr "type" "sseicvt")
5830 (set_attr "athlon_decode" "double,vector")
5831 (set_attr "amdfam10_decode" "double,double")
5832 (set_attr "bdver1_decode" "double,double")
5833 (set_attr "prefix_rep" "1")
5834 (set_attr "prefix" "maybe_vex")
5835 (set_attr "mode" "<MODE>")])
5837 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5838 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5841 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5842 (parallel [(const_int 0)]))))]
5844 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5845 [(set_attr "type" "sseicvt")
5846 (set_attr "athlon_decode" "double,vector")
5847 (set_attr "amdfam10_decode" "double,double")
5848 (set_attr "bdver1_decode" "double,double")
5849 (set_attr "btver2_decode" "double,double")
5850 (set_attr "prefix_rep" "1")
5851 (set_attr "prefix" "maybe_vex")
5852 (set_attr "mode" "<MODE>")])
5854 ;; For float<si2dfmode><mode>2 insn pattern
5855 (define_mode_attr si2dfmode
5856 [(V8DF "V8SI") (V4DF "V4SI")])
5857 (define_mode_attr si2dfmodelower
5858 [(V8DF "v8si") (V4DF "v4si")])
5860 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5861 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5862 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5863 "TARGET_AVX && <mask_mode512bit_condition>"
5864 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5865 [(set_attr "type" "ssecvt")
5866 (set_attr "prefix" "maybe_vex")
5867 (set_attr "mode" "<MODE>")])
5869 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5870 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5871 (any_float:VF2_AVX512VL
5872 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5874 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5875 [(set_attr "type" "ssecvt")
5876 (set_attr "prefix" "evex")
5877 (set_attr "mode" "<MODE>")])
5879 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5880 (define_mode_attr qq2pssuff
5881 [(V8SF "") (V4SF "{y}")])
5883 (define_mode_attr sselongvecmode
5884 [(V8SF "V8DI") (V4SF "V4DI")])
5886 (define_mode_attr sselongvecmodelower
5887 [(V8SF "v8di") (V4SF "v4di")])
5889 (define_mode_attr sseintvecmode3
5890 [(V8SF "XI") (V4SF "OI")
5891 (V8DF "OI") (V4DF "TI")])
5893 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5894 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5895 (any_float:VF1_128_256VL
5896 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5897 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5898 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5899 [(set_attr "type" "ssecvt")
5900 (set_attr "prefix" "evex")
5901 (set_attr "mode" "<MODE>")])
5903 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
5904 [(set (match_operand:V4SF 0 "register_operand" "=v")
5906 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5908 "TARGET_AVX512DQ && TARGET_AVX512VL"
5909 "operands[2] = CONST0_RTX (V2SFmode);")
5911 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
5912 [(set (match_operand:V4SF 0 "register_operand" "=v")
5914 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5915 (match_operand:V2SF 2 "const0_operand" "C")))]
5916 "TARGET_AVX512DQ && TARGET_AVX512VL"
5917 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5918 [(set_attr "type" "ssecvt")
5919 (set_attr "prefix" "evex")
5920 (set_attr "mode" "V4SF")])
5922 (define_expand "float<floatunssuffix>v2div2sf2"
5923 [(set (match_operand:V2SF 0 "register_operand")
5924 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
5925 "TARGET_AVX512DQ && TARGET_AVX512VL"
5927 operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
5928 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
5929 (operands[0], operands[1]));
5933 (define_mode_attr vpckfloat_concat_mode
5934 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5935 (define_mode_attr vpckfloat_temp_mode
5936 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5937 (define_mode_attr vpckfloat_op_mode
5938 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5940 (define_expand "vec_pack<floatprefix>_float_<mode>"
5941 [(match_operand:<ssePSmode> 0 "register_operand")
5942 (any_float:<ssePSmode>
5943 (match_operand:VI8_AVX512VL 1 "register_operand"))
5944 (match_operand:VI8_AVX512VL 2 "register_operand")]
5947 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5948 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5949 rtx (*gen) (rtx, rtx);
5951 if (<MODE>mode == V2DImode)
5952 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
5954 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5955 emit_insn (gen (r1, operands[1]));
5956 emit_insn (gen (r2, operands[2]));
5957 if (<MODE>mode == V2DImode)
5958 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5960 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5965 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5966 [(set (match_operand:V4SF 0 "register_operand" "=v")
5969 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5971 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5972 (parallel [(const_int 0) (const_int 1)]))
5973 (match_operand:QI 3 "register_operand" "Yk"))
5975 "TARGET_AVX512DQ && TARGET_AVX512VL"
5976 "operands[4] = CONST0_RTX (V2SFmode);")
5978 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5979 [(set (match_operand:V4SF 0 "register_operand" "=v")
5982 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5984 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5985 (parallel [(const_int 0) (const_int 1)]))
5986 (match_operand:QI 3 "register_operand" "Yk"))
5987 (match_operand:V2SF 4 "const0_operand" "C")))]
5988 "TARGET_AVX512DQ && TARGET_AVX512VL"
5989 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5990 [(set_attr "type" "ssecvt")
5991 (set_attr "prefix" "evex")
5992 (set_attr "mode" "V4SF")])
5994 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5995 [(set (match_operand:V4SF 0 "register_operand" "=v")
5998 (any_float:V2SF (match_operand:V2DI 1
5999 "nonimmediate_operand" "vm"))
6000 (match_operand:V2SF 3 "const0_operand" "C")
6001 (match_operand:QI 2 "register_operand" "Yk"))
6002 (match_operand:V2SF 4 "const0_operand" "C")))]
6003 "TARGET_AVX512DQ && TARGET_AVX512VL"
6004 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6005 [(set_attr "type" "ssecvt")
6006 (set_attr "prefix" "evex")
6007 (set_attr "mode" "V4SF")])
6009 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
6010 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
6011 (unsigned_float:VF2_512_256VL
6012 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
6014 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6015 [(set_attr "type" "ssecvt")
6016 (set_attr "prefix" "evex")
6017 (set_attr "mode" "<MODE>")])
6019 (define_insn "ufloatv2siv2df2<mask_name>"
6020 [(set (match_operand:V2DF 0 "register_operand" "=v")
6021 (unsigned_float:V2DF
6023 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6024 (parallel [(const_int 0) (const_int 1)]))))]
6026 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6027 [(set_attr "type" "ssecvt")
6028 (set_attr "prefix" "evex")
6029 (set_attr "mode" "V2DF")])
6031 (define_insn "avx512f_cvtdq2pd512_2"
6032 [(set (match_operand:V8DF 0 "register_operand" "=v")
6035 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
6036 (parallel [(const_int 0) (const_int 1)
6037 (const_int 2) (const_int 3)
6038 (const_int 4) (const_int 5)
6039 (const_int 6) (const_int 7)]))))]
6041 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
6042 [(set_attr "type" "ssecvt")
6043 (set_attr "prefix" "evex")
6044 (set_attr "mode" "V8DF")])
6046 (define_insn "avx_cvtdq2pd256_2"
6047 [(set (match_operand:V4DF 0 "register_operand" "=v")
6050 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
6051 (parallel [(const_int 0) (const_int 1)
6052 (const_int 2) (const_int 3)]))))]
6054 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
6055 [(set_attr "type" "ssecvt")
6056 (set_attr "prefix" "maybe_evex")
6057 (set_attr "mode" "V4DF")])
6059 (define_insn "sse2_cvtdq2pd<mask_name>"
6060 [(set (match_operand:V2DF 0 "register_operand" "=v")
6063 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6064 (parallel [(const_int 0) (const_int 1)]))))]
6065 "TARGET_SSE2 && <mask_avx512vl_condition>"
6066 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6067 [(set_attr "type" "ssecvt")
6068 (set_attr "prefix" "maybe_vex")
6069 (set_attr "mode" "V2DF")])
6071 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
6072 [(set (match_operand:V8SI 0 "register_operand" "=v")
6074 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
6075 UNSPEC_FIX_NOTRUNC))]
6077 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6078 [(set_attr "type" "ssecvt")
6079 (set_attr "prefix" "evex")
6080 (set_attr "mode" "OI")])
6082 (define_insn "avx_cvtpd2dq256<mask_name>"
6083 [(set (match_operand:V4SI 0 "register_operand" "=v")
6084 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6085 UNSPEC_FIX_NOTRUNC))]
6086 "TARGET_AVX && <mask_avx512vl_condition>"
6087 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6088 [(set_attr "type" "ssecvt")
6089 (set_attr "prefix" "<mask_prefix>")
6090 (set_attr "mode" "OI")])
6092 (define_expand "avx_cvtpd2dq256_2"
6093 [(set (match_operand:V8SI 0 "register_operand")
6095 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
6099 "operands[2] = CONST0_RTX (V4SImode);")
6101 (define_insn "*avx_cvtpd2dq256_2"
6102 [(set (match_operand:V8SI 0 "register_operand" "=v")
6104 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6106 (match_operand:V4SI 2 "const0_operand")))]
6108 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
6109 [(set_attr "type" "ssecvt")
6110 (set_attr "prefix" "vex")
6111 (set_attr "btver2_decode" "vector")
6112 (set_attr "mode" "OI")])
6114 (define_insn "sse2_cvtpd2dq"
6115 [(set (match_operand:V4SI 0 "register_operand" "=v")
6117 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
6119 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6123 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
6125 return "cvtpd2dq\t{%1, %0|%0, %1}";
6127 [(set_attr "type" "ssecvt")
6128 (set_attr "prefix_rep" "1")
6129 (set_attr "prefix_data16" "0")
6130 (set_attr "prefix" "maybe_vex")
6131 (set_attr "mode" "TI")
6132 (set_attr "amdfam10_decode" "double")
6133 (set_attr "athlon_decode" "vector")
6134 (set_attr "bdver1_decode" "double")])
6136 (define_insn "sse2_cvtpd2dq_mask"
6137 [(set (match_operand:V4SI 0 "register_operand" "=v")
6140 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6143 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6144 (parallel [(const_int 0) (const_int 1)]))
6145 (match_operand:QI 3 "register_operand" "Yk"))
6146 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6148 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6149 [(set_attr "type" "ssecvt")
6150 (set_attr "prefix" "evex")
6151 (set_attr "mode" "TI")])
6153 (define_insn "*sse2_cvtpd2dq_mask_1"
6154 [(set (match_operand:V4SI 0 "register_operand" "=v")
6157 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6159 (const_vector:V2SI [(const_int 0) (const_int 0)])
6160 (match_operand:QI 2 "register_operand" "Yk"))
6161 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6163 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6164 [(set_attr "type" "ssecvt")
6165 (set_attr "prefix" "evex")
6166 (set_attr "mode" "TI")])
6168 ;; For ufix_notrunc* insn patterns
6169 (define_mode_attr pd2udqsuff
6170 [(V8DF "") (V4DF "{y}")])
6172 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6173 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6175 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6176 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6178 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6179 [(set_attr "type" "ssecvt")
6180 (set_attr "prefix" "evex")
6181 (set_attr "mode" "<sseinsnmode>")])
6183 (define_insn "ufix_notruncv2dfv2si2"
6184 [(set (match_operand:V4SI 0 "register_operand" "=v")
6187 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6188 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6189 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6191 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6192 [(set_attr "type" "ssecvt")
6193 (set_attr "prefix" "evex")
6194 (set_attr "mode" "TI")])
6196 (define_insn "ufix_notruncv2dfv2si2_mask"
6197 [(set (match_operand:V4SI 0 "register_operand" "=v")
6201 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6202 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6204 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6205 (parallel [(const_int 0) (const_int 1)]))
6206 (match_operand:QI 3 "register_operand" "Yk"))
6207 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6209 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6210 [(set_attr "type" "ssecvt")
6211 (set_attr "prefix" "evex")
6212 (set_attr "mode" "TI")])
6214 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6215 [(set (match_operand:V4SI 0 "register_operand" "=v")
6219 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6220 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6221 (const_vector:V2SI [(const_int 0) (const_int 0)])
6222 (match_operand:QI 2 "register_operand" "Yk"))
6223 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6225 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6226 [(set_attr "type" "ssecvt")
6227 (set_attr "prefix" "evex")
6228 (set_attr "mode" "TI")])
6230 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6231 [(set (match_operand:V8SI 0 "register_operand" "=v")
6233 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6235 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6236 [(set_attr "type" "ssecvt")
6237 (set_attr "prefix" "evex")
6238 (set_attr "mode" "OI")])
6240 (define_insn "ufix_truncv2dfv2si2"
6241 [(set (match_operand:V4SI 0 "register_operand" "=v")
6243 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6244 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6246 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6247 [(set_attr "type" "ssecvt")
6248 (set_attr "prefix" "evex")
6249 (set_attr "mode" "TI")])
6251 (define_insn "ufix_truncv2dfv2si2_mask"
6252 [(set (match_operand:V4SI 0 "register_operand" "=v")
6255 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6257 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6258 (parallel [(const_int 0) (const_int 1)]))
6259 (match_operand:QI 3 "register_operand" "Yk"))
6260 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6262 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6263 [(set_attr "type" "ssecvt")
6264 (set_attr "prefix" "evex")
6265 (set_attr "mode" "TI")])
6267 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6268 [(set (match_operand:V4SI 0 "register_operand" "=v")
6271 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6272 (const_vector:V2SI [(const_int 0) (const_int 0)])
6273 (match_operand:QI 2 "register_operand" "Yk"))
6274 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6276 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6277 [(set_attr "type" "ssecvt")
6278 (set_attr "prefix" "evex")
6279 (set_attr "mode" "TI")])
6281 (define_insn "fix_truncv4dfv4si2<mask_name>"
6282 [(set (match_operand:V4SI 0 "register_operand" "=v")
6283 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6284 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6285 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6286 [(set_attr "type" "ssecvt")
6287 (set_attr "prefix" "maybe_evex")
6288 (set_attr "mode" "OI")])
6290 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6291 [(set (match_operand:V4SI 0 "register_operand" "=v")
6292 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6293 "TARGET_AVX512VL && TARGET_AVX512F"
6294 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6295 [(set_attr "type" "ssecvt")
6296 (set_attr "prefix" "maybe_evex")
6297 (set_attr "mode" "OI")])
6299 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6300 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6301 (any_fix:<sseintvecmode>
6302 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6303 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6304 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6305 [(set_attr "type" "ssecvt")
6306 (set_attr "prefix" "evex")
6307 (set_attr "mode" "<sseintvecmode2>")])
6309 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6310 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6311 (unspec:<sseintvecmode>
6312 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6313 UNSPEC_FIX_NOTRUNC))]
6314 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6315 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6316 [(set_attr "type" "ssecvt")
6317 (set_attr "prefix" "evex")
6318 (set_attr "mode" "<sseintvecmode2>")])
6320 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6321 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6322 (unspec:<sseintvecmode>
6323 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6324 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6325 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6326 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6327 [(set_attr "type" "ssecvt")
6328 (set_attr "prefix" "evex")
6329 (set_attr "mode" "<sseintvecmode2>")])
6331 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6332 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6333 (any_fix:<sselongvecmode>
6334 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6335 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6336 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6337 [(set_attr "type" "ssecvt")
6338 (set_attr "prefix" "evex")
6339 (set_attr "mode" "<sseintvecmode3>")])
6341 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6342 [(set (match_operand:V2DI 0 "register_operand" "=v")
6345 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6346 (parallel [(const_int 0) (const_int 1)]))))]
6347 "TARGET_AVX512DQ && TARGET_AVX512VL"
6348 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6349 [(set_attr "type" "ssecvt")
6350 (set_attr "prefix" "evex")
6351 (set_attr "mode" "TI")])
6353 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
6354 [(set (match_operand:V2DI 0 "register_operand")
6356 (match_operand:V2SF 1 "register_operand")))]
6357 "TARGET_AVX512DQ && TARGET_AVX512VL"
6359 operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
6360 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
6361 (operands[0], operands[1]));
6365 (define_mode_attr vunpckfixt_mode
6366 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6367 (define_mode_attr vunpckfixt_model
6368 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6369 (define_mode_attr vunpckfixt_extract_mode
6370 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6372 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6373 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6374 (any_fix:<vunpckfixt_mode>
6375 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6378 rtx tem = operands[1];
6379 rtx (*gen) (rtx, rtx);
6381 if (<MODE>mode != V4SFmode)
6383 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6384 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6386 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6389 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6391 emit_insn (gen (operands[0], tem));
6395 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6396 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6397 (any_fix:<vunpckfixt_mode>
6398 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6402 rtx (*gen) (rtx, rtx);
6404 if (<MODE>mode != V4SFmode)
6406 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6407 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6409 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6413 tem = gen_reg_rtx (V4SFmode);
6414 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6415 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6418 emit_insn (gen (operands[0], tem));
6422 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6423 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6424 (unsigned_fix:<sseintvecmode>
6425 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6427 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6428 [(set_attr "type" "ssecvt")
6429 (set_attr "prefix" "evex")
6430 (set_attr "mode" "<sseintvecmode2>")])
6432 (define_expand "avx_cvttpd2dq256_2"
6433 [(set (match_operand:V8SI 0 "register_operand")
6435 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6438 "operands[2] = CONST0_RTX (V4SImode);")
6440 (define_insn "sse2_cvttpd2dq"
6441 [(set (match_operand:V4SI 0 "register_operand" "=v")
6443 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6444 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6448 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6450 return "cvttpd2dq\t{%1, %0|%0, %1}";
6452 [(set_attr "type" "ssecvt")
6453 (set_attr "amdfam10_decode" "double")
6454 (set_attr "athlon_decode" "vector")
6455 (set_attr "bdver1_decode" "double")
6456 (set_attr "prefix" "maybe_vex")
6457 (set_attr "mode" "TI")])
6459 (define_insn "sse2_cvttpd2dq_mask"
6460 [(set (match_operand:V4SI 0 "register_operand" "=v")
6463 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6465 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6466 (parallel [(const_int 0) (const_int 1)]))
6467 (match_operand:QI 3 "register_operand" "Yk"))
6468 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6470 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6471 [(set_attr "type" "ssecvt")
6472 (set_attr "prefix" "evex")
6473 (set_attr "mode" "TI")])
6475 (define_insn "*sse2_cvttpd2dq_mask_1"
6476 [(set (match_operand:V4SI 0 "register_operand" "=v")
6479 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6480 (const_vector:V2SI [(const_int 0) (const_int 0)])
6481 (match_operand:QI 2 "register_operand" "Yk"))
6482 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6484 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6485 [(set_attr "type" "ssecvt")
6486 (set_attr "prefix" "evex")
6487 (set_attr "mode" "TI")])
6489 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
6490 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6493 (float_truncate:V2SF
6494 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6495 (match_operand:V4SF 1 "register_operand" "0,0,v")
6499 cvtsd2ss\t{%2, %0|%0, %2}
6500 cvtsd2ss\t{%2, %0|%0, %q2}
6501 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
6502 [(set_attr "isa" "noavx,noavx,avx")
6503 (set_attr "type" "ssecvt")
6504 (set_attr "athlon_decode" "vector,double,*")
6505 (set_attr "amdfam10_decode" "vector,double,*")
6506 (set_attr "bdver1_decode" "direct,direct,*")
6507 (set_attr "btver2_decode" "double,double,double")
6508 (set_attr "prefix" "orig,orig,<round_prefix>")
6509 (set_attr "mode" "SF")])
6511 (define_insn "*sse2_vd_cvtsd2ss"
6512 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6515 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6516 (match_operand:V4SF 1 "register_operand" "0,0,v")
6520 cvtsd2ss\t{%2, %0|%0, %2}
6521 cvtsd2ss\t{%2, %0|%0, %2}
6522 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6523 [(set_attr "isa" "noavx,noavx,avx")
6524 (set_attr "type" "ssecvt")
6525 (set_attr "athlon_decode" "vector,double,*")
6526 (set_attr "amdfam10_decode" "vector,double,*")
6527 (set_attr "bdver1_decode" "direct,direct,*")
6528 (set_attr "btver2_decode" "double,double,double")
6529 (set_attr "prefix" "orig,orig,vex")
6530 (set_attr "mode" "SF")])
6532 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
6533 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6537 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6538 (parallel [(const_int 0) (const_int 1)])))
6539 (match_operand:V2DF 1 "register_operand" "0,0,v")
6543 cvtss2sd\t{%2, %0|%0, %2}
6544 cvtss2sd\t{%2, %0|%0, %k2}
6545 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
6546 [(set_attr "isa" "noavx,noavx,avx")
6547 (set_attr "type" "ssecvt")
6548 (set_attr "amdfam10_decode" "vector,double,*")
6549 (set_attr "athlon_decode" "direct,direct,*")
6550 (set_attr "bdver1_decode" "direct,direct,*")
6551 (set_attr "btver2_decode" "double,double,double")
6552 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6553 (set_attr "mode" "DF")])
6555 (define_insn "*sse2_vd_cvtss2sd"
6556 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6559 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6560 (match_operand:V2DF 1 "register_operand" "0,0,v")
6564 cvtss2sd\t{%2, %0|%0, %2}
6565 cvtss2sd\t{%2, %0|%0, %2}
6566 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6567 [(set_attr "isa" "noavx,noavx,avx")
6568 (set_attr "type" "ssecvt")
6569 (set_attr "amdfam10_decode" "vector,double,*")
6570 (set_attr "athlon_decode" "direct,direct,*")
6571 (set_attr "bdver1_decode" "direct,direct,*")
6572 (set_attr "btver2_decode" "double,double,double")
6573 (set_attr "prefix" "orig,orig,vex")
6574 (set_attr "mode" "DF")])
6576 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6577 [(set (match_operand:V8SF 0 "register_operand" "=v")
6578 (float_truncate:V8SF
6579 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6581 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6582 [(set_attr "type" "ssecvt")
6583 (set_attr "prefix" "evex")
6584 (set_attr "mode" "V8SF")])
6586 (define_insn "avx_cvtpd2ps256<mask_name>"
6587 [(set (match_operand:V4SF 0 "register_operand" "=v")
6588 (float_truncate:V4SF
6589 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6590 "TARGET_AVX && <mask_avx512vl_condition>"
6591 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6592 [(set_attr "type" "ssecvt")
6593 (set_attr "prefix" "maybe_evex")
6594 (set_attr "btver2_decode" "vector")
6595 (set_attr "mode" "V4SF")])
6597 (define_expand "sse2_cvtpd2ps"
6598 [(set (match_operand:V4SF 0 "register_operand")
6600 (float_truncate:V2SF
6601 (match_operand:V2DF 1 "vector_operand"))
6604 "operands[2] = CONST0_RTX (V2SFmode);")
6606 (define_expand "sse2_cvtpd2ps_mask"
6607 [(set (match_operand:V4SF 0 "register_operand")
6610 (float_truncate:V2SF
6611 (match_operand:V2DF 1 "vector_operand"))
6613 (match_operand:V4SF 2 "nonimm_or_0_operand")
6614 (parallel [(const_int 0) (const_int 1)]))
6615 (match_operand:QI 3 "register_operand"))
6618 "operands[4] = CONST0_RTX (V2SFmode);")
6620 (define_insn "*sse2_cvtpd2ps"
6621 [(set (match_operand:V4SF 0 "register_operand" "=v")
6623 (float_truncate:V2SF
6624 (match_operand:V2DF 1 "vector_operand" "vBm"))
6625 (match_operand:V2SF 2 "const0_operand" "C")))]
6629 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6631 return "cvtpd2ps\t{%1, %0|%0, %1}";
6633 [(set_attr "type" "ssecvt")
6634 (set_attr "amdfam10_decode" "double")
6635 (set_attr "athlon_decode" "vector")
6636 (set_attr "bdver1_decode" "double")
6637 (set_attr "prefix_data16" "1")
6638 (set_attr "prefix" "maybe_vex")
6639 (set_attr "mode" "V4SF")])
6641 (define_insn "truncv2dfv2sf2"
6642 [(set (match_operand:V2SF 0 "register_operand" "=v")
6643 (float_truncate:V2SF
6644 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6645 "TARGET_MMX_WITH_SSE"
6648 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6650 return "cvtpd2ps\t{%1, %0|%0, %1}";
6652 [(set_attr "type" "ssecvt")
6653 (set_attr "amdfam10_decode" "double")
6654 (set_attr "athlon_decode" "vector")
6655 (set_attr "bdver1_decode" "double")
6656 (set_attr "prefix_data16" "1")
6657 (set_attr "prefix" "maybe_vex")
6658 (set_attr "mode" "V4SF")])
6660 (define_insn "*sse2_cvtpd2ps_mask"
6661 [(set (match_operand:V4SF 0 "register_operand" "=v")
6664 (float_truncate:V2SF
6665 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6667 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6668 (parallel [(const_int 0) (const_int 1)]))
6669 (match_operand:QI 3 "register_operand" "Yk"))
6670 (match_operand:V2SF 4 "const0_operand" "C")))]
6672 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6673 [(set_attr "type" "ssecvt")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "V4SF")])
6677 (define_insn "*sse2_cvtpd2ps_mask_1"
6678 [(set (match_operand:V4SF 0 "register_operand" "=v")
6681 (float_truncate:V2SF
6682 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6683 (match_operand:V2SF 3 "const0_operand" "C")
6684 (match_operand:QI 2 "register_operand" "Yk"))
6685 (match_operand:V2SF 4 "const0_operand" "C")))]
6687 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6688 [(set_attr "type" "ssecvt")
6689 (set_attr "prefix" "evex")
6690 (set_attr "mode" "V4SF")])
6692 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6693 (define_mode_attr sf2dfmode
6694 [(V8DF "V8SF") (V4DF "V4SF")])
6695 (define_mode_attr sf2dfmode_lower
6696 [(V8DF "v8sf") (V4DF "v4sf")])
6698 (define_expand "trunc<mode><sf2dfmode_lower>2"
6699 [(set (match_operand:<sf2dfmode> 0 "register_operand")
6700 (float_truncate:<sf2dfmode>
6701 (match_operand:VF2_512_256 1 "vector_operand")))]
6704 (define_expand "extend<sf2dfmode_lower><mode>2"
6705 [(set (match_operand:VF2_512_256 0 "register_operand")
6706 (float_extend:VF2_512_256
6707 (match_operand:<sf2dfmode> 1 "vector_operand")))]
6710 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6711 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6712 (float_extend:VF2_512_256
6713 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6714 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6715 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6716 [(set_attr "type" "ssecvt")
6717 (set_attr "prefix" "maybe_vex")
6718 (set_attr "mode" "<MODE>")])
6720 (define_insn "*avx_cvtps2pd256_2"
6721 [(set (match_operand:V4DF 0 "register_operand" "=v")
6724 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6725 (parallel [(const_int 0) (const_int 1)
6726 (const_int 2) (const_int 3)]))))]
6728 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6729 [(set_attr "type" "ssecvt")
6730 (set_attr "prefix" "vex")
6731 (set_attr "mode" "V4DF")])
6733 (define_insn "vec_unpacks_lo_v16sf"
6734 [(set (match_operand:V8DF 0 "register_operand" "=v")
6737 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6738 (parallel [(const_int 0) (const_int 1)
6739 (const_int 2) (const_int 3)
6740 (const_int 4) (const_int 5)
6741 (const_int 6) (const_int 7)]))))]
6743 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6744 [(set_attr "type" "ssecvt")
6745 (set_attr "prefix" "evex")
6746 (set_attr "mode" "V8DF")])
6748 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6749 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6750 (unspec:<avx512fmaskmode>
6751 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6752 UNSPEC_CVTINT2MASK))]
6754 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6755 [(set_attr "prefix" "evex")
6756 (set_attr "mode" "<sseinsnmode>")])
6758 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6759 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6760 (unspec:<avx512fmaskmode>
6761 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6762 UNSPEC_CVTINT2MASK))]
6764 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6765 [(set_attr "prefix" "evex")
6766 (set_attr "mode" "<sseinsnmode>")])
6768 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6769 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6770 (vec_merge:VI12_AVX512VL
6773 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6776 operands[2] = CONSTM1_RTX (<MODE>mode);
6777 operands[3] = CONST0_RTX (<MODE>mode);
6780 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6781 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6782 (vec_merge:VI12_AVX512VL
6783 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6784 (match_operand:VI12_AVX512VL 3 "const0_operand")
6785 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6787 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6788 [(set_attr "prefix" "evex")
6789 (set_attr "mode" "<sseinsnmode>")])
6791 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6792 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6793 (vec_merge:VI48_AVX512VL
6796 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6799 operands[2] = CONSTM1_RTX (<MODE>mode);
6800 operands[3] = CONST0_RTX (<MODE>mode);
6803 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6804 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
6805 (vec_merge:VI48_AVX512VL
6806 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6807 (match_operand:VI48_AVX512VL 3 "const0_operand")
6808 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
6811 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
6812 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
6813 [(set_attr "isa" "avx512dq,*")
6814 (set_attr "length_immediate" "0,1")
6815 (set_attr "prefix" "evex")
6816 (set_attr "mode" "<sseinsnmode>")])
6818 (define_insn "sse2_cvtps2pd<mask_name>"
6819 [(set (match_operand:V2DF 0 "register_operand" "=v")
6822 (match_operand:V4SF 1 "vector_operand" "vm")
6823 (parallel [(const_int 0) (const_int 1)]))))]
6824 "TARGET_SSE2 && <mask_avx512vl_condition>"
6825 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6826 [(set_attr "type" "ssecvt")
6827 (set_attr "amdfam10_decode" "direct")
6828 (set_attr "athlon_decode" "double")
6829 (set_attr "bdver1_decode" "double")
6830 (set_attr "prefix_data16" "0")
6831 (set_attr "prefix" "maybe_vex")
6832 (set_attr "mode" "V2DF")])
6834 (define_insn "extendv2sfv2df2"
6835 [(set (match_operand:V2DF 0 "register_operand" "=v")
6837 (match_operand:V2SF 1 "register_operand" "v")))]
6838 "TARGET_MMX_WITH_SSE"
6839 "%vcvtps2pd\t{%1, %0|%0, %1}"
6840 [(set_attr "type" "ssecvt")
6841 (set_attr "amdfam10_decode" "direct")
6842 (set_attr "athlon_decode" "double")
6843 (set_attr "bdver1_decode" "double")
6844 (set_attr "prefix_data16" "0")
6845 (set_attr "prefix" "maybe_vex")
6846 (set_attr "mode" "V2DF")])
6848 (define_expand "vec_unpacks_hi_v4sf"
6853 (match_operand:V4SF 1 "vector_operand"))
6854 (parallel [(const_int 6) (const_int 7)
6855 (const_int 2) (const_int 3)])))
6856 (set (match_operand:V2DF 0 "register_operand")
6860 (parallel [(const_int 0) (const_int 1)]))))]
6862 "operands[2] = gen_reg_rtx (V4SFmode);")
6864 (define_expand "vec_unpacks_hi_v8sf"
6867 (match_operand:V8SF 1 "register_operand")
6868 (parallel [(const_int 4) (const_int 5)
6869 (const_int 6) (const_int 7)])))
6870 (set (match_operand:V4DF 0 "register_operand")
6874 "operands[2] = gen_reg_rtx (V4SFmode);")
6876 (define_expand "vec_unpacks_hi_v16sf"
6879 (match_operand:V16SF 1 "register_operand")
6880 (parallel [(const_int 8) (const_int 9)
6881 (const_int 10) (const_int 11)
6882 (const_int 12) (const_int 13)
6883 (const_int 14) (const_int 15)])))
6884 (set (match_operand:V8DF 0 "register_operand")
6888 "operands[2] = gen_reg_rtx (V8SFmode);")
6890 (define_expand "vec_unpacks_lo_v4sf"
6891 [(set (match_operand:V2DF 0 "register_operand")
6894 (match_operand:V4SF 1 "vector_operand")
6895 (parallel [(const_int 0) (const_int 1)]))))]
6898 (define_expand "vec_unpacks_lo_v8sf"
6899 [(set (match_operand:V4DF 0 "register_operand")
6902 (match_operand:V8SF 1 "nonimmediate_operand")
6903 (parallel [(const_int 0) (const_int 1)
6904 (const_int 2) (const_int 3)]))))]
6907 (define_mode_attr sseunpackfltmode
6908 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6909 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6911 (define_expand "vec_unpacks_float_hi_<mode>"
6912 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6913 (match_operand:VI2_AVX512F 1 "register_operand")]
6916 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6918 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6919 emit_insn (gen_rtx_SET (operands[0],
6920 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6924 (define_expand "vec_unpacks_float_lo_<mode>"
6925 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6926 (match_operand:VI2_AVX512F 1 "register_operand")]
6929 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6931 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6932 emit_insn (gen_rtx_SET (operands[0],
6933 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6937 (define_expand "vec_unpacku_float_hi_<mode>"
6938 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6939 (match_operand:VI2_AVX512F 1 "register_operand")]
6942 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6944 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6945 emit_insn (gen_rtx_SET (operands[0],
6946 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6950 (define_expand "vec_unpacku_float_lo_<mode>"
6951 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6952 (match_operand:VI2_AVX512F 1 "register_operand")]
6955 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6957 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6958 emit_insn (gen_rtx_SET (operands[0],
6959 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6963 (define_expand "vec_unpacks_float_hi_v4si"
6966 (match_operand:V4SI 1 "vector_operand")
6967 (parallel [(const_int 2) (const_int 3)
6968 (const_int 2) (const_int 3)])))
6969 (set (match_operand:V2DF 0 "register_operand")
6973 (parallel [(const_int 0) (const_int 1)]))))]
6975 "operands[2] = gen_reg_rtx (V4SImode);")
6977 (define_expand "vec_unpacks_float_lo_v4si"
6978 [(set (match_operand:V2DF 0 "register_operand")
6981 (match_operand:V4SI 1 "vector_operand")
6982 (parallel [(const_int 0) (const_int 1)]))))]
6985 (define_expand "vec_unpacks_float_hi_v8si"
6988 (match_operand:V8SI 1 "vector_operand")
6989 (parallel [(const_int 4) (const_int 5)
6990 (const_int 6) (const_int 7)])))
6991 (set (match_operand:V4DF 0 "register_operand")
6995 "operands[2] = gen_reg_rtx (V4SImode);")
6997 (define_expand "vec_unpacks_float_lo_v8si"
6998 [(set (match_operand:V4DF 0 "register_operand")
7001 (match_operand:V8SI 1 "nonimmediate_operand")
7002 (parallel [(const_int 0) (const_int 1)
7003 (const_int 2) (const_int 3)]))))]
7006 (define_expand "vec_unpacks_float_hi_v16si"
7009 (match_operand:V16SI 1 "nonimmediate_operand")
7010 (parallel [(const_int 8) (const_int 9)
7011 (const_int 10) (const_int 11)
7012 (const_int 12) (const_int 13)
7013 (const_int 14) (const_int 15)])))
7014 (set (match_operand:V8DF 0 "register_operand")
7018 "operands[2] = gen_reg_rtx (V8SImode);")
7020 (define_expand "vec_unpacks_float_lo_v16si"
7021 [(set (match_operand:V8DF 0 "register_operand")
7024 (match_operand:V16SI 1 "nonimmediate_operand")
7025 (parallel [(const_int 0) (const_int 1)
7026 (const_int 2) (const_int 3)
7027 (const_int 4) (const_int 5)
7028 (const_int 6) (const_int 7)]))))]
7031 (define_expand "vec_unpacku_float_hi_v4si"
7034 (match_operand:V4SI 1 "vector_operand")
7035 (parallel [(const_int 2) (const_int 3)
7036 (const_int 2) (const_int 3)])))
7041 (parallel [(const_int 0) (const_int 1)]))))
7043 (lt:V2DF (match_dup 6) (match_dup 3)))
7045 (and:V2DF (match_dup 7) (match_dup 4)))
7046 (set (match_operand:V2DF 0 "register_operand")
7047 (plus:V2DF (match_dup 6) (match_dup 8)))]
7050 REAL_VALUE_TYPE TWO32r;
7054 real_ldexp (&TWO32r, &dconst1, 32);
7055 x = const_double_from_real_value (TWO32r, DFmode);
7057 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7058 operands[4] = force_reg (V2DFmode,
7059 ix86_build_const_vector (V2DFmode, 1, x));
7061 operands[5] = gen_reg_rtx (V4SImode);
7063 for (i = 6; i < 9; i++)
7064 operands[i] = gen_reg_rtx (V2DFmode);
7067 (define_expand "vec_unpacku_float_lo_v4si"
7071 (match_operand:V4SI 1 "vector_operand")
7072 (parallel [(const_int 0) (const_int 1)]))))
7074 (lt:V2DF (match_dup 5) (match_dup 3)))
7076 (and:V2DF (match_dup 6) (match_dup 4)))
7077 (set (match_operand:V2DF 0 "register_operand")
7078 (plus:V2DF (match_dup 5) (match_dup 7)))]
7081 REAL_VALUE_TYPE TWO32r;
7085 real_ldexp (&TWO32r, &dconst1, 32);
7086 x = const_double_from_real_value (TWO32r, DFmode);
7088 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7089 operands[4] = force_reg (V2DFmode,
7090 ix86_build_const_vector (V2DFmode, 1, x));
7092 for (i = 5; i < 8; i++)
7093 operands[i] = gen_reg_rtx (V2DFmode);
7096 (define_expand "vec_unpacku_float_hi_v8si"
7097 [(match_operand:V4DF 0 "register_operand")
7098 (match_operand:V8SI 1 "register_operand")]
7101 REAL_VALUE_TYPE TWO32r;
7105 real_ldexp (&TWO32r, &dconst1, 32);
7106 x = const_double_from_real_value (TWO32r, DFmode);
7108 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7109 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7110 tmp[5] = gen_reg_rtx (V4SImode);
7112 for (i = 2; i < 5; i++)
7113 tmp[i] = gen_reg_rtx (V4DFmode);
7114 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
7115 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
7116 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7117 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7118 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7122 (define_expand "vec_unpacku_float_hi_v16si"
7123 [(match_operand:V8DF 0 "register_operand")
7124 (match_operand:V16SI 1 "register_operand")]
7127 REAL_VALUE_TYPE TWO32r;
7130 real_ldexp (&TWO32r, &dconst1, 32);
7131 x = const_double_from_real_value (TWO32r, DFmode);
7133 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7134 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7135 tmp[2] = gen_reg_rtx (V8DFmode);
7136 tmp[3] = gen_reg_rtx (V8SImode);
7137 k = gen_reg_rtx (QImode);
7139 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
7140 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
7141 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7142 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7143 emit_move_insn (operands[0], tmp[2]);
7147 (define_expand "vec_unpacku_float_lo_v8si"
7148 [(match_operand:V4DF 0 "register_operand")
7149 (match_operand:V8SI 1 "nonimmediate_operand")]
7152 REAL_VALUE_TYPE TWO32r;
7156 real_ldexp (&TWO32r, &dconst1, 32);
7157 x = const_double_from_real_value (TWO32r, DFmode);
7159 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7160 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7162 for (i = 2; i < 5; i++)
7163 tmp[i] = gen_reg_rtx (V4DFmode);
7164 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
7165 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7166 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7167 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7171 (define_expand "vec_unpacku_float_lo_v16si"
7172 [(match_operand:V8DF 0 "register_operand")
7173 (match_operand:V16SI 1 "nonimmediate_operand")]
7176 REAL_VALUE_TYPE TWO32r;
7179 real_ldexp (&TWO32r, &dconst1, 32);
7180 x = const_double_from_real_value (TWO32r, DFmode);
7182 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7183 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7184 tmp[2] = gen_reg_rtx (V8DFmode);
7185 k = gen_reg_rtx (QImode);
7187 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7188 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7189 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7190 emit_move_insn (operands[0], tmp[2]);
7194 (define_expand "vec_pack_trunc_<mode>"
7196 (float_truncate:<sf2dfmode>
7197 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7199 (float_truncate:<sf2dfmode>
7200 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7201 (set (match_operand:<ssePSmode> 0 "register_operand")
7202 (vec_concat:<ssePSmode>
7207 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7208 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7211 (define_expand "vec_pack_trunc_v2df"
7212 [(match_operand:V4SF 0 "register_operand")
7213 (match_operand:V2DF 1 "vector_operand")
7214 (match_operand:V2DF 2 "vector_operand")]
7219 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7221 tmp0 = gen_reg_rtx (V4DFmode);
7222 tmp1 = force_reg (V2DFmode, operands[1]);
7224 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7225 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7229 tmp0 = gen_reg_rtx (V4SFmode);
7230 tmp1 = gen_reg_rtx (V4SFmode);
7232 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7233 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7234 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7239 (define_expand "vec_pack_sfix_trunc_v8df"
7240 [(match_operand:V16SI 0 "register_operand")
7241 (match_operand:V8DF 1 "nonimmediate_operand")
7242 (match_operand:V8DF 2 "nonimmediate_operand")]
7247 r1 = gen_reg_rtx (V8SImode);
7248 r2 = gen_reg_rtx (V8SImode);
7250 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7251 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7252 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7256 (define_expand "vec_pack_sfix_trunc_v4df"
7257 [(match_operand:V8SI 0 "register_operand")
7258 (match_operand:V4DF 1 "nonimmediate_operand")
7259 (match_operand:V4DF 2 "nonimmediate_operand")]
7264 r1 = gen_reg_rtx (V4SImode);
7265 r2 = gen_reg_rtx (V4SImode);
7267 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7268 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7269 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7273 (define_expand "vec_pack_sfix_trunc_v2df"
7274 [(match_operand:V4SI 0 "register_operand")
7275 (match_operand:V2DF 1 "vector_operand")
7276 (match_operand:V2DF 2 "vector_operand")]
7279 rtx tmp0, tmp1, tmp2;
7281 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7283 tmp0 = gen_reg_rtx (V4DFmode);
7284 tmp1 = force_reg (V2DFmode, operands[1]);
7286 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7287 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7291 tmp0 = gen_reg_rtx (V4SImode);
7292 tmp1 = gen_reg_rtx (V4SImode);
7293 tmp2 = gen_reg_rtx (V2DImode);
7295 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7296 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7297 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7298 gen_lowpart (V2DImode, tmp0),
7299 gen_lowpart (V2DImode, tmp1)));
7300 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7305 (define_mode_attr ssepackfltmode
7306 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7308 (define_expand "vec_pack_ufix_trunc_<mode>"
7309 [(match_operand:<ssepackfltmode> 0 "register_operand")
7310 (match_operand:VF2 1 "register_operand")
7311 (match_operand:VF2 2 "register_operand")]
7314 if (<MODE>mode == V8DFmode)
7318 r1 = gen_reg_rtx (V8SImode);
7319 r2 = gen_reg_rtx (V8SImode);
7321 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7322 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7323 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7328 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7329 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7330 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7331 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7332 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7334 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7335 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7339 tmp[5] = gen_reg_rtx (V8SFmode);
7340 ix86_expand_vec_extract_even_odd (tmp[5],
7341 gen_lowpart (V8SFmode, tmp[2]),
7342 gen_lowpart (V8SFmode, tmp[3]), 0);
7343 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7345 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7346 operands[0], 0, OPTAB_DIRECT);
7347 if (tmp[6] != operands[0])
7348 emit_move_insn (operands[0], tmp[6]);
7354 (define_expand "avx512f_vec_pack_sfix_v8df"
7355 [(match_operand:V16SI 0 "register_operand")
7356 (match_operand:V8DF 1 "nonimmediate_operand")
7357 (match_operand:V8DF 2 "nonimmediate_operand")]
7362 r1 = gen_reg_rtx (V8SImode);
7363 r2 = gen_reg_rtx (V8SImode);
7365 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7366 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7367 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7371 (define_expand "vec_pack_sfix_v4df"
7372 [(match_operand:V8SI 0 "register_operand")
7373 (match_operand:V4DF 1 "nonimmediate_operand")
7374 (match_operand:V4DF 2 "nonimmediate_operand")]
7379 r1 = gen_reg_rtx (V4SImode);
7380 r2 = gen_reg_rtx (V4SImode);
7382 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7383 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7384 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7388 (define_expand "vec_pack_sfix_v2df"
7389 [(match_operand:V4SI 0 "register_operand")
7390 (match_operand:V2DF 1 "vector_operand")
7391 (match_operand:V2DF 2 "vector_operand")]
7394 rtx tmp0, tmp1, tmp2;
7396 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7398 tmp0 = gen_reg_rtx (V4DFmode);
7399 tmp1 = force_reg (V2DFmode, operands[1]);
7401 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7402 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7406 tmp0 = gen_reg_rtx (V4SImode);
7407 tmp1 = gen_reg_rtx (V4SImode);
7408 tmp2 = gen_reg_rtx (V2DImode);
7410 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7411 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7412 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7413 gen_lowpart (V2DImode, tmp0),
7414 gen_lowpart (V2DImode, tmp1)));
7415 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7422 ;; Parallel single-precision floating point element swizzling
7424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7426 (define_expand "sse_movhlps_exp"
7427 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7430 (match_operand:V4SF 1 "nonimmediate_operand")
7431 (match_operand:V4SF 2 "nonimmediate_operand"))
7432 (parallel [(const_int 6)
7438 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7440 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7442 /* Fix up the destination if needed. */
7443 if (dst != operands[0])
7444 emit_move_insn (operands[0], dst);
7449 (define_insn "sse_movhlps"
7450 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7453 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7454 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7455 (parallel [(const_int 6)
7459 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7461 movhlps\t{%2, %0|%0, %2}
7462 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7463 movlps\t{%H2, %0|%0, %H2}
7464 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7465 %vmovhps\t{%2, %0|%q0, %2}"
7466 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7467 (set_attr "type" "ssemov")
7468 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7469 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7471 (define_expand "sse_movlhps_exp"
7472 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7475 (match_operand:V4SF 1 "nonimmediate_operand")
7476 (match_operand:V4SF 2 "nonimmediate_operand"))
7477 (parallel [(const_int 0)
7483 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7485 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7487 /* Fix up the destination if needed. */
7488 if (dst != operands[0])
7489 emit_move_insn (operands[0], dst);
7494 (define_insn "sse_movlhps"
7495 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7498 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7499 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7500 (parallel [(const_int 0)
7504 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7506 movlhps\t{%2, %0|%0, %2}
7507 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7508 movhps\t{%2, %0|%0, %q2}
7509 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7510 %vmovlps\t{%2, %H0|%H0, %2}"
7511 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7512 (set_attr "type" "ssemov")
7513 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7514 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7516 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7517 [(set (match_operand:V16SF 0 "register_operand" "=v")
7520 (match_operand:V16SF 1 "register_operand" "v")
7521 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7522 (parallel [(const_int 2) (const_int 18)
7523 (const_int 3) (const_int 19)
7524 (const_int 6) (const_int 22)
7525 (const_int 7) (const_int 23)
7526 (const_int 10) (const_int 26)
7527 (const_int 11) (const_int 27)
7528 (const_int 14) (const_int 30)
7529 (const_int 15) (const_int 31)])))]
7531 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7532 [(set_attr "type" "sselog")
7533 (set_attr "prefix" "evex")
7534 (set_attr "mode" "V16SF")])
7536 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7537 (define_insn "avx_unpckhps256<mask_name>"
7538 [(set (match_operand:V8SF 0 "register_operand" "=v")
7541 (match_operand:V8SF 1 "register_operand" "v")
7542 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7543 (parallel [(const_int 2) (const_int 10)
7544 (const_int 3) (const_int 11)
7545 (const_int 6) (const_int 14)
7546 (const_int 7) (const_int 15)])))]
7547 "TARGET_AVX && <mask_avx512vl_condition>"
7548 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7549 [(set_attr "type" "sselog")
7550 (set_attr "prefix" "vex")
7551 (set_attr "mode" "V8SF")])
7553 (define_expand "vec_interleave_highv8sf"
7557 (match_operand:V8SF 1 "register_operand")
7558 (match_operand:V8SF 2 "nonimmediate_operand"))
7559 (parallel [(const_int 0) (const_int 8)
7560 (const_int 1) (const_int 9)
7561 (const_int 4) (const_int 12)
7562 (const_int 5) (const_int 13)])))
7568 (parallel [(const_int 2) (const_int 10)
7569 (const_int 3) (const_int 11)
7570 (const_int 6) (const_int 14)
7571 (const_int 7) (const_int 15)])))
7572 (set (match_operand:V8SF 0 "register_operand")
7577 (parallel [(const_int 4) (const_int 5)
7578 (const_int 6) (const_int 7)
7579 (const_int 12) (const_int 13)
7580 (const_int 14) (const_int 15)])))]
7583 operands[3] = gen_reg_rtx (V8SFmode);
7584 operands[4] = gen_reg_rtx (V8SFmode);
7587 (define_insn "vec_interleave_highv4sf<mask_name>"
7588 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7591 (match_operand:V4SF 1 "register_operand" "0,v")
7592 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7593 (parallel [(const_int 2) (const_int 6)
7594 (const_int 3) (const_int 7)])))]
7595 "TARGET_SSE && <mask_avx512vl_condition>"
7597 unpckhps\t{%2, %0|%0, %2}
7598 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7599 [(set_attr "isa" "noavx,avx")
7600 (set_attr "type" "sselog")
7601 (set_attr "prefix" "orig,vex")
7602 (set_attr "mode" "V4SF")])
7604 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7605 [(set (match_operand:V16SF 0 "register_operand" "=v")
7608 (match_operand:V16SF 1 "register_operand" "v")
7609 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7610 (parallel [(const_int 0) (const_int 16)
7611 (const_int 1) (const_int 17)
7612 (const_int 4) (const_int 20)
7613 (const_int 5) (const_int 21)
7614 (const_int 8) (const_int 24)
7615 (const_int 9) (const_int 25)
7616 (const_int 12) (const_int 28)
7617 (const_int 13) (const_int 29)])))]
7619 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7620 [(set_attr "type" "sselog")
7621 (set_attr "prefix" "evex")
7622 (set_attr "mode" "V16SF")])
7624 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7625 (define_insn "avx_unpcklps256<mask_name>"
7626 [(set (match_operand:V8SF 0 "register_operand" "=v")
7629 (match_operand:V8SF 1 "register_operand" "v")
7630 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7631 (parallel [(const_int 0) (const_int 8)
7632 (const_int 1) (const_int 9)
7633 (const_int 4) (const_int 12)
7634 (const_int 5) (const_int 13)])))]
7635 "TARGET_AVX && <mask_avx512vl_condition>"
7636 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7637 [(set_attr "type" "sselog")
7638 (set_attr "prefix" "vex")
7639 (set_attr "mode" "V8SF")])
7641 (define_insn "unpcklps128_mask"
7642 [(set (match_operand:V4SF 0 "register_operand" "=v")
7646 (match_operand:V4SF 1 "register_operand" "v")
7647 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7648 (parallel [(const_int 0) (const_int 4)
7649 (const_int 1) (const_int 5)]))
7650 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7651 (match_operand:QI 4 "register_operand" "Yk")))]
7653 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7654 [(set_attr "type" "sselog")
7655 (set_attr "prefix" "evex")
7656 (set_attr "mode" "V4SF")])
7658 (define_expand "vec_interleave_lowv8sf"
7662 (match_operand:V8SF 1 "register_operand")
7663 (match_operand:V8SF 2 "nonimmediate_operand"))
7664 (parallel [(const_int 0) (const_int 8)
7665 (const_int 1) (const_int 9)
7666 (const_int 4) (const_int 12)
7667 (const_int 5) (const_int 13)])))
7673 (parallel [(const_int 2) (const_int 10)
7674 (const_int 3) (const_int 11)
7675 (const_int 6) (const_int 14)
7676 (const_int 7) (const_int 15)])))
7677 (set (match_operand:V8SF 0 "register_operand")
7682 (parallel [(const_int 0) (const_int 1)
7683 (const_int 2) (const_int 3)
7684 (const_int 8) (const_int 9)
7685 (const_int 10) (const_int 11)])))]
7688 operands[3] = gen_reg_rtx (V8SFmode);
7689 operands[4] = gen_reg_rtx (V8SFmode);
7692 (define_insn "vec_interleave_lowv4sf"
7693 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7696 (match_operand:V4SF 1 "register_operand" "0,v")
7697 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7698 (parallel [(const_int 0) (const_int 4)
7699 (const_int 1) (const_int 5)])))]
7702 unpcklps\t{%2, %0|%0, %2}
7703 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7704 [(set_attr "isa" "noavx,avx")
7705 (set_attr "type" "sselog")
7706 (set_attr "prefix" "orig,maybe_evex")
7707 (set_attr "mode" "V4SF")])
7709 ;; These are modeled with the same vec_concat as the others so that we
7710 ;; capture users of shufps that can use the new instructions
7711 (define_insn "avx_movshdup256<mask_name>"
7712 [(set (match_operand:V8SF 0 "register_operand" "=v")
7715 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7717 (parallel [(const_int 1) (const_int 1)
7718 (const_int 3) (const_int 3)
7719 (const_int 5) (const_int 5)
7720 (const_int 7) (const_int 7)])))]
7721 "TARGET_AVX && <mask_avx512vl_condition>"
7722 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7723 [(set_attr "type" "sse")
7724 (set_attr "prefix" "vex")
7725 (set_attr "mode" "V8SF")])
7727 (define_insn "sse3_movshdup<mask_name>"
7728 [(set (match_operand:V4SF 0 "register_operand" "=v")
7731 (match_operand:V4SF 1 "vector_operand" "vBm")
7733 (parallel [(const_int 1)
7737 "TARGET_SSE3 && <mask_avx512vl_condition>"
7738 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7739 [(set_attr "type" "sse")
7740 (set_attr "prefix_rep" "1")
7741 (set_attr "prefix" "maybe_vex")
7742 (set_attr "mode" "V4SF")])
7744 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7745 [(set (match_operand:V16SF 0 "register_operand" "=v")
7748 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7750 (parallel [(const_int 1) (const_int 1)
7751 (const_int 3) (const_int 3)
7752 (const_int 5) (const_int 5)
7753 (const_int 7) (const_int 7)
7754 (const_int 9) (const_int 9)
7755 (const_int 11) (const_int 11)
7756 (const_int 13) (const_int 13)
7757 (const_int 15) (const_int 15)])))]
7759 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7760 [(set_attr "type" "sse")
7761 (set_attr "prefix" "evex")
7762 (set_attr "mode" "V16SF")])
7764 (define_insn "avx_movsldup256<mask_name>"
7765 [(set (match_operand:V8SF 0 "register_operand" "=v")
7768 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7770 (parallel [(const_int 0) (const_int 0)
7771 (const_int 2) (const_int 2)
7772 (const_int 4) (const_int 4)
7773 (const_int 6) (const_int 6)])))]
7774 "TARGET_AVX && <mask_avx512vl_condition>"
7775 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7776 [(set_attr "type" "sse")
7777 (set_attr "prefix" "vex")
7778 (set_attr "mode" "V8SF")])
7780 (define_insn "sse3_movsldup<mask_name>"
7781 [(set (match_operand:V4SF 0 "register_operand" "=v")
7784 (match_operand:V4SF 1 "vector_operand" "vBm")
7786 (parallel [(const_int 0)
7790 "TARGET_SSE3 && <mask_avx512vl_condition>"
7791 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7792 [(set_attr "type" "sse")
7793 (set_attr "prefix_rep" "1")
7794 (set_attr "prefix" "maybe_vex")
7795 (set_attr "mode" "V4SF")])
7797 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7798 [(set (match_operand:V16SF 0 "register_operand" "=v")
7801 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7803 (parallel [(const_int 0) (const_int 0)
7804 (const_int 2) (const_int 2)
7805 (const_int 4) (const_int 4)
7806 (const_int 6) (const_int 6)
7807 (const_int 8) (const_int 8)
7808 (const_int 10) (const_int 10)
7809 (const_int 12) (const_int 12)
7810 (const_int 14) (const_int 14)])))]
7812 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7813 [(set_attr "type" "sse")
7814 (set_attr "prefix" "evex")
7815 (set_attr "mode" "V16SF")])
7817 (define_expand "avx_shufps256<mask_expand4_name>"
7818 [(match_operand:V8SF 0 "register_operand")
7819 (match_operand:V8SF 1 "register_operand")
7820 (match_operand:V8SF 2 "nonimmediate_operand")
7821 (match_operand:SI 3 "const_int_operand")]
7824 int mask = INTVAL (operands[3]);
7825 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7828 GEN_INT ((mask >> 0) & 3),
7829 GEN_INT ((mask >> 2) & 3),
7830 GEN_INT (((mask >> 4) & 3) + 8),
7831 GEN_INT (((mask >> 6) & 3) + 8),
7832 GEN_INT (((mask >> 0) & 3) + 4),
7833 GEN_INT (((mask >> 2) & 3) + 4),
7834 GEN_INT (((mask >> 4) & 3) + 12),
7835 GEN_INT (((mask >> 6) & 3) + 12)
7836 <mask_expand4_args>));
7840 ;; One bit in mask selects 2 elements.
7841 (define_insn "avx_shufps256_1<mask_name>"
7842 [(set (match_operand:V8SF 0 "register_operand" "=v")
7845 (match_operand:V8SF 1 "register_operand" "v")
7846 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7847 (parallel [(match_operand 3 "const_0_to_3_operand" )
7848 (match_operand 4 "const_0_to_3_operand" )
7849 (match_operand 5 "const_8_to_11_operand" )
7850 (match_operand 6 "const_8_to_11_operand" )
7851 (match_operand 7 "const_4_to_7_operand" )
7852 (match_operand 8 "const_4_to_7_operand" )
7853 (match_operand 9 "const_12_to_15_operand")
7854 (match_operand 10 "const_12_to_15_operand")])))]
7856 && <mask_avx512vl_condition>
7857 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7858 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7859 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7860 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7863 mask = INTVAL (operands[3]);
7864 mask |= INTVAL (operands[4]) << 2;
7865 mask |= (INTVAL (operands[5]) - 8) << 4;
7866 mask |= (INTVAL (operands[6]) - 8) << 6;
7867 operands[3] = GEN_INT (mask);
7869 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7871 [(set_attr "type" "sseshuf")
7872 (set_attr "length_immediate" "1")
7873 (set_attr "prefix" "<mask_prefix>")
7874 (set_attr "mode" "V8SF")])
7876 (define_expand "sse_shufps<mask_expand4_name>"
7877 [(match_operand:V4SF 0 "register_operand")
7878 (match_operand:V4SF 1 "register_operand")
7879 (match_operand:V4SF 2 "vector_operand")
7880 (match_operand:SI 3 "const_int_operand")]
7883 int mask = INTVAL (operands[3]);
7884 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7887 GEN_INT ((mask >> 0) & 3),
7888 GEN_INT ((mask >> 2) & 3),
7889 GEN_INT (((mask >> 4) & 3) + 4),
7890 GEN_INT (((mask >> 6) & 3) + 4)
7891 <mask_expand4_args>));
7895 (define_insn "sse_shufps_v4sf_mask"
7896 [(set (match_operand:V4SF 0 "register_operand" "=v")
7900 (match_operand:V4SF 1 "register_operand" "v")
7901 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7902 (parallel [(match_operand 3 "const_0_to_3_operand")
7903 (match_operand 4 "const_0_to_3_operand")
7904 (match_operand 5 "const_4_to_7_operand")
7905 (match_operand 6 "const_4_to_7_operand")]))
7906 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7907 (match_operand:QI 8 "register_operand" "Yk")))]
7911 mask |= INTVAL (operands[3]) << 0;
7912 mask |= INTVAL (operands[4]) << 2;
7913 mask |= (INTVAL (operands[5]) - 4) << 4;
7914 mask |= (INTVAL (operands[6]) - 4) << 6;
7915 operands[3] = GEN_INT (mask);
7917 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7919 [(set_attr "type" "sseshuf")
7920 (set_attr "length_immediate" "1")
7921 (set_attr "prefix" "evex")
7922 (set_attr "mode" "V4SF")])
7924 (define_insn "sse_shufps_<mode>"
7925 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7926 (vec_select:VI4F_128
7927 (vec_concat:<ssedoublevecmode>
7928 (match_operand:VI4F_128 1 "register_operand" "0,v")
7929 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7930 (parallel [(match_operand 3 "const_0_to_3_operand")
7931 (match_operand 4 "const_0_to_3_operand")
7932 (match_operand 5 "const_4_to_7_operand")
7933 (match_operand 6 "const_4_to_7_operand")])))]
7937 mask |= INTVAL (operands[3]) << 0;
7938 mask |= INTVAL (operands[4]) << 2;
7939 mask |= (INTVAL (operands[5]) - 4) << 4;
7940 mask |= (INTVAL (operands[6]) - 4) << 6;
7941 operands[3] = GEN_INT (mask);
7943 switch (which_alternative)
7946 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7948 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7953 [(set_attr "isa" "noavx,avx")
7954 (set_attr "type" "sseshuf")
7955 (set_attr "length_immediate" "1")
7956 (set_attr "prefix" "orig,maybe_evex")
7957 (set_attr "mode" "V4SF")])
7959 (define_insn "sse_storehps"
7960 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7962 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7963 (parallel [(const_int 2) (const_int 3)])))]
7964 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7966 %vmovhps\t{%1, %0|%q0, %1}
7967 %vmovhlps\t{%1, %d0|%d0, %1}
7968 %vmovlps\t{%H1, %d0|%d0, %H1}"
7969 [(set_attr "type" "ssemov")
7970 (set_attr "prefix" "maybe_vex")
7971 (set_attr "mode" "V2SF,V4SF,V2SF")])
7973 (define_expand "sse_loadhps_exp"
7974 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7977 (match_operand:V4SF 1 "nonimmediate_operand")
7978 (parallel [(const_int 0) (const_int 1)]))
7979 (match_operand:V2SF 2 "nonimmediate_operand")))]
7982 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7984 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7986 /* Fix up the destination if needed. */
7987 if (dst != operands[0])
7988 emit_move_insn (operands[0], dst);
7993 (define_insn "sse_loadhps"
7994 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7997 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7998 (parallel [(const_int 0) (const_int 1)]))
7999 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
8002 movhps\t{%2, %0|%0, %q2}
8003 vmovhps\t{%2, %1, %0|%0, %1, %q2}
8004 movlhps\t{%2, %0|%0, %2}
8005 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8006 %vmovlps\t{%2, %H0|%H0, %2}"
8007 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8008 (set_attr "type" "ssemov")
8009 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8010 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
8012 (define_insn "sse_storelps"
8013 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
8015 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
8016 (parallel [(const_int 0) (const_int 1)])))]
8017 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8019 %vmovlps\t{%1, %0|%q0, %1}
8020 %vmovaps\t{%1, %0|%0, %1}
8021 %vmovlps\t{%1, %d0|%d0, %q1}"
8022 [(set_attr "type" "ssemov")
8023 (set_attr "prefix" "maybe_vex")
8024 (set_attr "mode" "V2SF,V4SF,V2SF")])
8026 (define_expand "sse_loadlps_exp"
8027 [(set (match_operand:V4SF 0 "nonimmediate_operand")
8029 (match_operand:V2SF 2 "nonimmediate_operand")
8031 (match_operand:V4SF 1 "nonimmediate_operand")
8032 (parallel [(const_int 2) (const_int 3)]))))]
8035 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
8037 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
8039 /* Fix up the destination if needed. */
8040 if (dst != operands[0])
8041 emit_move_insn (operands[0], dst);
8046 (define_insn "sse_loadlps"
8047 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
8049 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
8051 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
8052 (parallel [(const_int 2) (const_int 3)]))))]
8055 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
8056 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
8057 movlps\t{%2, %0|%0, %q2}
8058 vmovlps\t{%2, %1, %0|%0, %1, %q2}
8059 %vmovlps\t{%2, %0|%q0, %2}"
8060 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8061 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
8062 (set (attr "length_immediate")
8063 (if_then_else (eq_attr "alternative" "0,1")
8065 (const_string "*")))
8066 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8067 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
8069 (define_insn "sse_movss"
8070 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
8072 (match_operand:V4SF 2 "register_operand" " x,v")
8073 (match_operand:V4SF 1 "register_operand" " 0,v")
8077 movss\t{%2, %0|%0, %2}
8078 vmovss\t{%2, %1, %0|%0, %1, %2}"
8079 [(set_attr "isa" "noavx,avx")
8080 (set_attr "type" "ssemov")
8081 (set_attr "prefix" "orig,maybe_evex")
8082 (set_attr "mode" "SF")])
8084 (define_insn "avx2_vec_dup<mode>"
8085 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
8086 (vec_duplicate:VF1_128_256
8088 (match_operand:V4SF 1 "register_operand" "v")
8089 (parallel [(const_int 0)]))))]
8091 "vbroadcastss\t{%1, %0|%0, %1}"
8092 [(set_attr "type" "sselog1")
8093 (set_attr "prefix" "maybe_evex")
8094 (set_attr "mode" "<MODE>")])
8096 (define_insn "avx2_vec_dupv8sf_1"
8097 [(set (match_operand:V8SF 0 "register_operand" "=v")
8100 (match_operand:V8SF 1 "register_operand" "v")
8101 (parallel [(const_int 0)]))))]
8103 "vbroadcastss\t{%x1, %0|%0, %x1}"
8104 [(set_attr "type" "sselog1")
8105 (set_attr "prefix" "maybe_evex")
8106 (set_attr "mode" "V8SF")])
8108 (define_insn "avx512f_vec_dup<mode>_1"
8109 [(set (match_operand:VF_512 0 "register_operand" "=v")
8110 (vec_duplicate:VF_512
8111 (vec_select:<ssescalarmode>
8112 (match_operand:VF_512 1 "register_operand" "v")
8113 (parallel [(const_int 0)]))))]
8115 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
8116 [(set_attr "type" "sselog1")
8117 (set_attr "prefix" "evex")
8118 (set_attr "mode" "<MODE>")])
8120 ;; Although insertps takes register source, we prefer
8121 ;; unpcklps with register source since it is shorter.
8122 (define_insn "*vec_concatv2sf_sse4_1"
8123 [(set (match_operand:V2SF 0 "register_operand"
8124 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
8126 (match_operand:SF 1 "nonimmediate_operand"
8127 " 0, 0,Yv, 0,0, v,m, 0 , m")
8128 (match_operand:SF 2 "nonimm_or_0_operand"
8129 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
8130 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8132 unpcklps\t{%2, %0|%0, %2}
8133 unpcklps\t{%2, %0|%0, %2}
8134 vunpcklps\t{%2, %1, %0|%0, %1, %2}
8135 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8136 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8137 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
8138 %vmovss\t{%1, %0|%0, %1}
8139 punpckldq\t{%2, %0|%0, %2}
8140 movd\t{%1, %0|%0, %1}"
8142 (cond [(eq_attr "alternative" "0,1,3,4")
8143 (const_string "noavx")
8144 (eq_attr "alternative" "2,5")
8145 (const_string "avx")
8147 (const_string "*")))
8149 (cond [(eq_attr "alternative" "6")
8150 (const_string "ssemov")
8151 (eq_attr "alternative" "7")
8152 (const_string "mmxcvt")
8153 (eq_attr "alternative" "8")
8154 (const_string "mmxmov")
8156 (const_string "sselog")))
8157 (set (attr "mmx_isa")
8158 (if_then_else (eq_attr "alternative" "7,8")
8159 (const_string "native")
8160 (const_string "*")))
8161 (set (attr "prefix_data16")
8162 (if_then_else (eq_attr "alternative" "3,4")
8164 (const_string "*")))
8165 (set (attr "prefix_extra")
8166 (if_then_else (eq_attr "alternative" "3,4,5")
8168 (const_string "*")))
8169 (set (attr "length_immediate")
8170 (if_then_else (eq_attr "alternative" "3,4,5")
8172 (const_string "*")))
8173 (set (attr "prefix")
8174 (cond [(eq_attr "alternative" "2,5")
8175 (const_string "maybe_evex")
8176 (eq_attr "alternative" "6")
8177 (const_string "maybe_vex")
8179 (const_string "orig")))
8180 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8182 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8183 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8184 ;; alternatives pretty much forces the MMX alternative to be chosen.
8185 (define_insn "*vec_concatv2sf_sse"
8186 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8188 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8189 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8192 unpcklps\t{%2, %0|%0, %2}
8193 movss\t{%1, %0|%0, %1}
8194 punpckldq\t{%2, %0|%0, %2}
8195 movd\t{%1, %0|%0, %1}"
8196 [(set_attr "mmx_isa" "*,*,native,native")
8197 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8198 (set_attr "mode" "V4SF,SF,DI,DI")])
8200 (define_insn "*vec_concatv4sf"
8201 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8203 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8204 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8207 movlhps\t{%2, %0|%0, %2}
8208 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8209 movhps\t{%2, %0|%0, %q2}
8210 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8211 [(set_attr "isa" "noavx,avx,noavx,avx")
8212 (set_attr "type" "ssemov")
8213 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8214 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8216 (define_insn "*vec_concatv4sf_0"
8217 [(set (match_operand:V4SF 0 "register_operand" "=v")
8219 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
8220 (match_operand:V2SF 2 "const0_operand" " C")))]
8222 "%vmovq\t{%1, %0|%0, %1}"
8223 [(set_attr "type" "ssemov")
8224 (set_attr "prefix" "maybe_vex")
8225 (set_attr "mode" "DF")])
8227 ;; Avoid combining registers from different units in a single alternative,
8228 ;; see comment above inline_secondary_memory_needed function in i386.c
8229 (define_insn "vec_set<mode>_0"
8230 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8231 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8233 (vec_duplicate:VI4F_128
8234 (match_operand:<ssescalarmode> 2 "general_operand"
8235 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8236 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8237 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8241 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8242 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8243 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8244 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8245 %vmovd\t{%2, %0|%0, %2}
8246 movss\t{%2, %0|%0, %2}
8247 movss\t{%2, %0|%0, %2}
8248 vmovss\t{%2, %1, %0|%0, %1, %2}
8249 pinsrd\t{$0, %2, %0|%0, %2, 0}
8250 pinsrd\t{$0, %2, %0|%0, %2, 0}
8251 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8256 (cond [(eq_attr "alternative" "0,1,8,9")
8257 (const_string "sse4_noavx")
8258 (eq_attr "alternative" "2,7,10")
8259 (const_string "avx")
8260 (eq_attr "alternative" "3,4")
8261 (const_string "sse2")
8262 (eq_attr "alternative" "5,6")
8263 (const_string "noavx")
8265 (const_string "*")))
8267 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8268 (const_string "sselog")
8269 (eq_attr "alternative" "12")
8270 (const_string "imov")
8271 (eq_attr "alternative" "13")
8272 (const_string "fmov")
8274 (const_string "ssemov")))
8275 (set (attr "prefix_extra")
8276 (if_then_else (eq_attr "alternative" "8,9,10")
8278 (const_string "*")))
8279 (set (attr "length_immediate")
8280 (if_then_else (eq_attr "alternative" "8,9,10")
8282 (const_string "*")))
8283 (set (attr "prefix")
8284 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8285 (const_string "orig")
8286 (eq_attr "alternative" "2")
8287 (const_string "maybe_evex")
8288 (eq_attr "alternative" "3,4")
8289 (const_string "maybe_vex")
8290 (eq_attr "alternative" "7,10")
8291 (const_string "vex")
8293 (const_string "*")))
8294 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8295 (set (attr "preferred_for_speed")
8296 (cond [(eq_attr "alternative" "4")
8297 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8299 (symbol_ref "true")))])
8301 ;; A subset is vec_setv4sf.
8302 (define_insn "*vec_setv4sf_sse4_1"
8303 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8306 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8307 (match_operand:V4SF 1 "register_operand" "0,0,v")
8308 (match_operand:SI 3 "const_int_operand")))]
8310 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8311 < GET_MODE_NUNITS (V4SFmode))"
8313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8314 switch (which_alternative)
8318 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8320 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8325 [(set_attr "isa" "noavx,noavx,avx")
8326 (set_attr "type" "sselog")
8327 (set_attr "prefix_data16" "1,1,*")
8328 (set_attr "prefix_extra" "1")
8329 (set_attr "length_immediate" "1")
8330 (set_attr "prefix" "orig,orig,maybe_evex")
8331 (set_attr "mode" "V4SF")])
8333 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8334 (define_insn "vec_set<mode>_0"
8335 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8336 (vec_merge:VI4F_256_512
8337 (vec_duplicate:VI4F_256_512
8338 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8339 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8343 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8344 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8345 vmovd\t{%2, %x0|%x0, %2}"
8347 (if_then_else (eq_attr "alternative" "0")
8348 (const_string "sselog")
8349 (const_string "ssemov")))
8350 (set_attr "prefix" "maybe_evex")
8351 (set_attr "mode" "SF,<ssescalarmode>,SI")
8352 (set (attr "preferred_for_speed")
8353 (cond [(eq_attr "alternative" "2")
8354 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8356 (symbol_ref "true")))])
8358 (define_insn "sse4_1_insertps"
8359 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8360 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8361 (match_operand:V4SF 1 "register_operand" "0,0,v")
8362 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8366 if (MEM_P (operands[2]))
8368 unsigned count_s = INTVAL (operands[3]) >> 6;
8370 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8371 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8373 switch (which_alternative)
8377 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8379 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8384 [(set_attr "isa" "noavx,noavx,avx")
8385 (set_attr "type" "sselog")
8386 (set_attr "prefix_data16" "1,1,*")
8387 (set_attr "prefix_extra" "1")
8388 (set_attr "length_immediate" "1")
8389 (set_attr "prefix" "orig,orig,maybe_evex")
8390 (set_attr "mode" "V4SF")])
8393 [(set (match_operand:VI4F_128 0 "memory_operand")
8395 (vec_duplicate:VI4F_128
8396 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8399 "TARGET_SSE && reload_completed"
8400 [(set (match_dup 0) (match_dup 1))]
8401 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8403 ;; Standard scalar operation patterns which preserve the rest of the
8404 ;; vector for combiner.
8405 (define_insn "vec_setv2df_0"
8406 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8409 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8410 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8414 movsd\t{%2, %0|%0, %2}
8415 vmovsd\t{%2, %1, %0|%0, %1, %2}
8416 movlpd\t{%2, %0|%0, %2}
8417 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8418 [(set_attr "isa" "noavx,avx,noavx,avx")
8419 (set_attr "type" "ssemov")
8420 (set_attr "mode" "DF")])
8422 (define_expand "vec_set<mode>"
8423 [(match_operand:V 0 "register_operand")
8424 (match_operand:<ssescalarmode> 1 "register_operand")
8425 (match_operand 2 "vec_setm_operand")]
8428 if (CONST_INT_P (operands[2]))
8429 ix86_expand_vector_set (false, operands[0], operands[1],
8430 INTVAL (operands[2]));
8432 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8436 (define_insn_and_split "*vec_extractv4sf_0"
8437 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8439 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8440 (parallel [(const_int 0)])))]
8441 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8443 "&& reload_completed"
8444 [(set (match_dup 0) (match_dup 1))]
8445 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8447 (define_insn_and_split "*sse4_1_extractps"
8448 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8450 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8451 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8454 extractps\t{%2, %1, %0|%0, %1, %2}
8455 extractps\t{%2, %1, %0|%0, %1, %2}
8456 vextractps\t{%2, %1, %0|%0, %1, %2}
8459 "&& reload_completed && SSE_REG_P (operands[0])"
8462 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8463 switch (INTVAL (operands[2]))
8467 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8468 operands[2], operands[2],
8469 GEN_INT (INTVAL (operands[2]) + 4),
8470 GEN_INT (INTVAL (operands[2]) + 4)));
8473 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8476 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8481 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8482 (set_attr "type" "sselog,sselog,sselog,*,*")
8483 (set_attr "prefix_data16" "1,1,1,*,*")
8484 (set_attr "prefix_extra" "1,1,1,*,*")
8485 (set_attr "length_immediate" "1,1,1,*,*")
8486 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8487 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8489 (define_insn_and_split "*vec_extractv4sf_mem"
8490 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8492 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8493 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8496 "&& reload_completed"
8497 [(set (match_dup 0) (match_dup 1))]
8499 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8502 (define_mode_attr extract_type
8503 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8505 (define_mode_attr extract_suf
8506 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8508 (define_mode_iterator AVX512_VEC
8509 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8511 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8512 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8513 (match_operand:AVX512_VEC 1 "register_operand")
8514 (match_operand:SI 2 "const_0_to_3_operand")
8515 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8516 (match_operand:QI 4 "register_operand")]
8520 mask = INTVAL (operands[2]);
8521 rtx dest = operands[0];
8523 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8524 dest = gen_reg_rtx (<ssequartermode>mode);
8526 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8527 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8528 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8529 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8532 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8533 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8535 if (dest != operands[0])
8536 emit_move_insn (operands[0], dest);
8540 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8541 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8542 (vec_merge:<ssequartermode>
8543 (vec_select:<ssequartermode>
8544 (match_operand:V8FI 1 "register_operand" "v,v")
8545 (parallel [(match_operand 2 "const_0_to_7_operand")
8546 (match_operand 3 "const_0_to_7_operand")]))
8547 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8548 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8550 && INTVAL (operands[2]) % 2 == 0
8551 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8552 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8554 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8555 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8557 [(set_attr "type" "sselog1")
8558 (set_attr "prefix_extra" "1")
8559 (set_attr "length_immediate" "1")
8560 (set_attr "prefix" "evex")
8561 (set_attr "mode" "<sseinsnmode>")])
8563 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8564 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8565 (vec_select:<ssequartermode>
8566 (match_operand:V8FI 1 "register_operand" "v")
8567 (parallel [(match_operand 2 "const_0_to_7_operand")
8568 (match_operand 3 "const_0_to_7_operand")])))]
8570 && INTVAL (operands[2]) % 2 == 0
8571 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8573 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8574 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8576 [(set_attr "type" "sselog1")
8577 (set_attr "prefix_extra" "1")
8578 (set_attr "length_immediate" "1")
8579 (set_attr "prefix" "evex")
8580 (set_attr "mode" "<sseinsnmode>")])
8583 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8584 (vec_select:<ssequartermode>
8585 (match_operand:V8FI 1 "register_operand")
8586 (parallel [(const_int 0) (const_int 1)])))]
8590 || REG_P (operands[0])
8591 || !EXT_REX_SSE_REG_P (operands[1]))"
8592 [(set (match_dup 0) (match_dup 1))]
8594 if (!TARGET_AVX512VL
8595 && REG_P (operands[0])
8596 && EXT_REX_SSE_REG_P (operands[1]))
8598 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8600 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8603 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8604 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8605 (vec_merge:<ssequartermode>
8606 (vec_select:<ssequartermode>
8607 (match_operand:V16FI 1 "register_operand" "v,v")
8608 (parallel [(match_operand 2 "const_0_to_15_operand")
8609 (match_operand 3 "const_0_to_15_operand")
8610 (match_operand 4 "const_0_to_15_operand")
8611 (match_operand 5 "const_0_to_15_operand")]))
8612 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8613 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8615 && INTVAL (operands[2]) % 4 == 0
8616 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8617 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8618 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8619 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8621 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8622 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8624 [(set_attr "type" "sselog1")
8625 (set_attr "prefix_extra" "1")
8626 (set_attr "length_immediate" "1")
8627 (set_attr "prefix" "evex")
8628 (set_attr "mode" "<sseinsnmode>")])
8630 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8631 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8632 (vec_select:<ssequartermode>
8633 (match_operand:V16FI 1 "register_operand" "v")
8634 (parallel [(match_operand 2 "const_0_to_15_operand")
8635 (match_operand 3 "const_0_to_15_operand")
8636 (match_operand 4 "const_0_to_15_operand")
8637 (match_operand 5 "const_0_to_15_operand")])))]
8639 && INTVAL (operands[2]) % 4 == 0
8640 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8641 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8642 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8644 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8645 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8647 [(set_attr "type" "sselog1")
8648 (set_attr "prefix_extra" "1")
8649 (set_attr "length_immediate" "1")
8650 (set_attr "prefix" "evex")
8651 (set_attr "mode" "<sseinsnmode>")])
8654 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8655 (vec_select:<ssequartermode>
8656 (match_operand:V16FI 1 "register_operand")
8657 (parallel [(const_int 0) (const_int 1)
8658 (const_int 2) (const_int 3)])))]
8662 || REG_P (operands[0])
8663 || !EXT_REX_SSE_REG_P (operands[1]))"
8664 [(set (match_dup 0) (match_dup 1))]
8666 if (!TARGET_AVX512VL
8667 && REG_P (operands[0])
8668 && EXT_REX_SSE_REG_P (operands[1]))
8670 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8672 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8675 (define_mode_attr extract_type_2
8676 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8678 (define_mode_attr extract_suf_2
8679 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8681 (define_mode_iterator AVX512_VEC_2
8682 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8684 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8685 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8686 (match_operand:AVX512_VEC_2 1 "register_operand")
8687 (match_operand:SI 2 "const_0_to_1_operand")
8688 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8689 (match_operand:QI 4 "register_operand")]
8692 rtx (*insn)(rtx, rtx, rtx, rtx);
8693 rtx dest = operands[0];
8695 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8696 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8698 switch (INTVAL (operands[2]))
8701 insn = gen_vec_extract_lo_<mode>_mask;
8704 insn = gen_vec_extract_hi_<mode>_mask;
8710 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8711 if (dest != operands[0])
8712 emit_move_insn (operands[0], dest);
8717 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8718 (vec_select:<ssehalfvecmode>
8719 (match_operand:V8FI 1 "nonimmediate_operand")
8720 (parallel [(const_int 0) (const_int 1)
8721 (const_int 2) (const_int 3)])))]
8722 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8725 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8726 [(set (match_dup 0) (match_dup 1))]
8727 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8729 (define_insn "vec_extract_lo_<mode>_mask"
8730 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8731 (vec_merge:<ssehalfvecmode>
8732 (vec_select:<ssehalfvecmode>
8733 (match_operand:V8FI 1 "register_operand" "v,v")
8734 (parallel [(const_int 0) (const_int 1)
8735 (const_int 2) (const_int 3)]))
8736 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8737 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8739 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8740 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8741 [(set_attr "type" "sselog1")
8742 (set_attr "prefix_extra" "1")
8743 (set_attr "length_immediate" "1")
8744 (set_attr "memory" "none,store")
8745 (set_attr "prefix" "evex")
8746 (set_attr "mode" "<sseinsnmode>")])
8748 (define_insn "vec_extract_lo_<mode>"
8749 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
8750 (vec_select:<ssehalfvecmode>
8751 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
8752 (parallel [(const_int 0) (const_int 1)
8753 (const_int 2) (const_int 3)])))]
8754 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8756 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
8757 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8761 [(set_attr "type" "sselog1")
8762 (set_attr "prefix_extra" "1")
8763 (set_attr "length_immediate" "1")
8764 (set_attr "memory" "none,store,load")
8765 (set_attr "prefix" "evex")
8766 (set_attr "mode" "<sseinsnmode>")])
8768 (define_insn "vec_extract_hi_<mode>_mask"
8769 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8770 (vec_merge:<ssehalfvecmode>
8771 (vec_select:<ssehalfvecmode>
8772 (match_operand:V8FI 1 "register_operand" "v,v")
8773 (parallel [(const_int 4) (const_int 5)
8774 (const_int 6) (const_int 7)]))
8775 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8776 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8778 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8779 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8780 [(set_attr "type" "sselog1")
8781 (set_attr "prefix_extra" "1")
8782 (set_attr "length_immediate" "1")
8783 (set_attr "prefix" "evex")
8784 (set_attr "mode" "<sseinsnmode>")])
8786 (define_insn "vec_extract_hi_<mode>"
8787 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8788 (vec_select:<ssehalfvecmode>
8789 (match_operand:V8FI 1 "register_operand" "v")
8790 (parallel [(const_int 4) (const_int 5)
8791 (const_int 6) (const_int 7)])))]
8793 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8794 [(set_attr "type" "sselog1")
8795 (set_attr "prefix_extra" "1")
8796 (set_attr "length_immediate" "1")
8797 (set_attr "prefix" "evex")
8798 (set_attr "mode" "<sseinsnmode>")])
8800 (define_insn "vec_extract_hi_<mode>_mask"
8801 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8802 (vec_merge:<ssehalfvecmode>
8803 (vec_select:<ssehalfvecmode>
8804 (match_operand:V16FI 1 "register_operand" "v,v")
8805 (parallel [(const_int 8) (const_int 9)
8806 (const_int 10) (const_int 11)
8807 (const_int 12) (const_int 13)
8808 (const_int 14) (const_int 15)]))
8809 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8810 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8812 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8813 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8814 [(set_attr "type" "sselog1")
8815 (set_attr "prefix_extra" "1")
8816 (set_attr "length_immediate" "1")
8817 (set_attr "prefix" "evex")
8818 (set_attr "mode" "<sseinsnmode>")])
8820 (define_insn "vec_extract_hi_<mode>"
8821 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
8822 (vec_select:<ssehalfvecmode>
8823 (match_operand:V16FI 1 "register_operand" "v,v")
8824 (parallel [(const_int 8) (const_int 9)
8825 (const_int 10) (const_int 11)
8826 (const_int 12) (const_int 13)
8827 (const_int 14) (const_int 15)])))]
8830 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
8831 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8832 [(set_attr "type" "sselog1")
8833 (set_attr "prefix_extra" "1")
8834 (set_attr "isa" "avx512dq,noavx512dq")
8835 (set_attr "length_immediate" "1")
8836 (set_attr "prefix" "evex")
8837 (set_attr "mode" "<sseinsnmode>")])
8839 (define_mode_iterator VI48F_256_DQ
8840 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
8842 (define_expand "avx512vl_vextractf128<mode>"
8843 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8844 (match_operand:VI48F_256_DQ 1 "register_operand")
8845 (match_operand:SI 2 "const_0_to_1_operand")
8846 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8847 (match_operand:QI 4 "register_operand")]
8850 rtx (*insn)(rtx, rtx, rtx, rtx);
8851 rtx dest = operands[0];
8854 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8855 /* For V8S[IF]mode there are maskm insns with =m and 0
8857 ? !rtx_equal_p (dest, operands[3])
8858 /* For V4D[IF]mode, hi insns don't allow memory, and
8859 lo insns have =m and 0C constraints. */
8860 : (operands[2] != const0_rtx
8861 || (!rtx_equal_p (dest, operands[3])
8862 && GET_CODE (operands[3]) != CONST_VECTOR))))
8863 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8864 switch (INTVAL (operands[2]))
8867 insn = gen_vec_extract_lo_<mode>_mask;
8870 insn = gen_vec_extract_hi_<mode>_mask;
8876 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8877 if (dest != operands[0])
8878 emit_move_insn (operands[0], dest);
8882 (define_expand "avx_vextractf128<mode>"
8883 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8884 (match_operand:V_256 1 "register_operand")
8885 (match_operand:SI 2 "const_0_to_1_operand")]
8888 rtx (*insn)(rtx, rtx);
8890 switch (INTVAL (operands[2]))
8893 insn = gen_vec_extract_lo_<mode>;
8896 insn = gen_vec_extract_hi_<mode>;
8902 emit_insn (insn (operands[0], operands[1]));
8906 (define_insn "vec_extract_lo_<mode>_mask"
8907 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8908 (vec_merge:<ssehalfvecmode>
8909 (vec_select:<ssehalfvecmode>
8910 (match_operand:V16FI 1 "register_operand" "v,v")
8911 (parallel [(const_int 0) (const_int 1)
8912 (const_int 2) (const_int 3)
8913 (const_int 4) (const_int 5)
8914 (const_int 6) (const_int 7)]))
8915 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8916 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8918 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8919 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8920 [(set_attr "type" "sselog1")
8921 (set_attr "prefix_extra" "1")
8922 (set_attr "length_immediate" "1")
8923 (set_attr "memory" "none,store")
8924 (set_attr "prefix" "evex")
8925 (set_attr "mode" "<sseinsnmode>")])
8927 (define_insn "vec_extract_lo_<mode>"
8928 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8929 (vec_select:<ssehalfvecmode>
8930 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
8931 (parallel [(const_int 0) (const_int 1)
8932 (const_int 2) (const_int 3)
8933 (const_int 4) (const_int 5)
8934 (const_int 6) (const_int 7)])))]
8936 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8938 if (!TARGET_AVX512VL
8939 && !REG_P (operands[0])
8940 && EXT_REX_SSE_REG_P (operands[1]))
8942 if (TARGET_AVX512DQ)
8943 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
8945 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8950 [(set_attr "type" "sselog1")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "length_immediate" "1")
8953 (set_attr "memory" "none,load,store")
8954 (set_attr "prefix" "evex")
8955 (set_attr "mode" "<sseinsnmode>")])
8958 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8959 (vec_select:<ssehalfvecmode>
8960 (match_operand:V16FI 1 "nonimmediate_operand")
8961 (parallel [(const_int 0) (const_int 1)
8962 (const_int 2) (const_int 3)
8963 (const_int 4) (const_int 5)
8964 (const_int 6) (const_int 7)])))]
8965 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8968 || REG_P (operands[0])
8969 || !EXT_REX_SSE_REG_P (operands[1]))"
8970 [(set (match_dup 0) (match_dup 1))]
8972 if (!TARGET_AVX512VL
8973 && REG_P (operands[0])
8974 && EXT_REX_SSE_REG_P (operands[1]))
8976 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8978 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8981 (define_insn "vec_extract_lo_<mode>_mask"
8982 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8983 (vec_merge:<ssehalfvecmode>
8984 (vec_select:<ssehalfvecmode>
8985 (match_operand:VI8F_256 1 "register_operand" "v,v")
8986 (parallel [(const_int 0) (const_int 1)]))
8987 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8988 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8991 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8992 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8993 [(set_attr "type" "sselog1")
8994 (set_attr "prefix_extra" "1")
8995 (set_attr "length_immediate" "1")
8996 (set_attr "memory" "none,store")
8997 (set_attr "prefix" "evex")
8998 (set_attr "mode" "XI")])
9000 (define_insn "vec_extract_lo_<mode>"
9001 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9002 (vec_select:<ssehalfvecmode>
9003 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
9004 (parallel [(const_int 0) (const_int 1)])))]
9006 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9010 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9011 (vec_select:<ssehalfvecmode>
9012 (match_operand:VI8F_256 1 "nonimmediate_operand")
9013 (parallel [(const_int 0) (const_int 1)])))]
9014 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9015 && reload_completed"
9016 [(set (match_dup 0) (match_dup 1))]
9017 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9019 (define_insn "vec_extract_hi_<mode>_mask"
9020 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9021 (vec_merge:<ssehalfvecmode>
9022 (vec_select:<ssehalfvecmode>
9023 (match_operand:VI8F_256 1 "register_operand" "v,v")
9024 (parallel [(const_int 2) (const_int 3)]))
9025 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9026 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9029 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9030 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9031 [(set_attr "type" "sselog1")
9032 (set_attr "prefix_extra" "1")
9033 (set_attr "length_immediate" "1")
9034 (set_attr "prefix" "vex")
9035 (set_attr "mode" "<sseinsnmode>")])
9037 (define_insn "vec_extract_hi_<mode>"
9038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
9039 (vec_select:<ssehalfvecmode>
9040 (match_operand:VI8F_256 1 "register_operand" "v")
9041 (parallel [(const_int 2) (const_int 3)])))]
9044 if (TARGET_AVX512VL)
9046 if (TARGET_AVX512DQ)
9047 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
9049 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
9052 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
9054 [(set_attr "type" "sselog1")
9055 (set_attr "prefix_extra" "1")
9056 (set_attr "length_immediate" "1")
9057 (set_attr "prefix" "vex")
9058 (set_attr "mode" "<sseinsnmode>")])
9061 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9062 (vec_select:<ssehalfvecmode>
9063 (match_operand:VI4F_256 1 "nonimmediate_operand")
9064 (parallel [(const_int 0) (const_int 1)
9065 (const_int 2) (const_int 3)])))]
9066 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9067 && reload_completed"
9068 [(set (match_dup 0) (match_dup 1))]
9069 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9071 (define_insn "vec_extract_lo_<mode>_mask"
9072 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9073 (vec_merge:<ssehalfvecmode>
9074 (vec_select:<ssehalfvecmode>
9075 (match_operand:VI4F_256 1 "register_operand" "v,v")
9076 (parallel [(const_int 0) (const_int 1)
9077 (const_int 2) (const_int 3)]))
9078 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9079 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9081 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9082 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9083 [(set_attr "type" "sselog1")
9084 (set_attr "prefix_extra" "1")
9085 (set_attr "length_immediate" "1")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "<sseinsnmode>")])
9089 (define_insn "vec_extract_lo_<mode>"
9090 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9091 (vec_select:<ssehalfvecmode>
9092 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
9093 (parallel [(const_int 0) (const_int 1)
9094 (const_int 2) (const_int 3)])))]
9096 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9098 [(set_attr "type" "sselog1")
9099 (set_attr "prefix_extra" "1")
9100 (set_attr "length_immediate" "1")
9101 (set_attr "prefix" "evex")
9102 (set_attr "mode" "<sseinsnmode>")])
9104 (define_insn "vec_extract_hi_<mode>_mask"
9105 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
9106 (vec_merge:<ssehalfvecmode>
9107 (vec_select:<ssehalfvecmode>
9108 (match_operand:VI4F_256 1 "register_operand" "v,v")
9109 (parallel [(const_int 4) (const_int 5)
9110 (const_int 6) (const_int 7)]))
9111 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9112 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9114 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9115 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9116 [(set_attr "type" "sselog1")
9117 (set_attr "length_immediate" "1")
9118 (set_attr "prefix" "evex")
9119 (set_attr "mode" "<sseinsnmode>")])
9121 (define_insn "vec_extract_hi_<mode>"
9122 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
9123 (vec_select:<ssehalfvecmode>
9124 (match_operand:VI4F_256 1 "register_operand" "x, v")
9125 (parallel [(const_int 4) (const_int 5)
9126 (const_int 6) (const_int 7)])))]
9129 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
9130 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9131 [(set_attr "isa" "*, avx512vl")
9132 (set_attr "prefix" "vex, evex")
9133 (set_attr "type" "sselog1")
9134 (set_attr "length_immediate" "1")
9135 (set_attr "mode" "<sseinsnmode>")])
9137 (define_insn_and_split "vec_extract_lo_v32hi"
9138 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
9140 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
9141 (parallel [(const_int 0) (const_int 1)
9142 (const_int 2) (const_int 3)
9143 (const_int 4) (const_int 5)
9144 (const_int 6) (const_int 7)
9145 (const_int 8) (const_int 9)
9146 (const_int 10) (const_int 11)
9147 (const_int 12) (const_int 13)
9148 (const_int 14) (const_int 15)])))]
9149 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9152 || REG_P (operands[0])
9153 || !EXT_REX_SSE_REG_P (operands[1]))
9156 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9158 "&& reload_completed
9160 || REG_P (operands[0])
9161 || !EXT_REX_SSE_REG_P (operands[1]))"
9162 [(set (match_dup 0) (match_dup 1))]
9164 if (!TARGET_AVX512VL
9165 && REG_P (operands[0])
9166 && EXT_REX_SSE_REG_P (operands[1]))
9167 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
9169 operands[1] = gen_lowpart (V16HImode, operands[1]);
9171 [(set_attr "type" "sselog1")
9172 (set_attr "prefix_extra" "1")
9173 (set_attr "length_immediate" "1")
9174 (set_attr "memory" "none,load,store")
9175 (set_attr "prefix" "evex")
9176 (set_attr "mode" "XI")])
9178 (define_insn "vec_extract_hi_v32hi"
9179 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9181 (match_operand:V32HI 1 "register_operand" "v")
9182 (parallel [(const_int 16) (const_int 17)
9183 (const_int 18) (const_int 19)
9184 (const_int 20) (const_int 21)
9185 (const_int 22) (const_int 23)
9186 (const_int 24) (const_int 25)
9187 (const_int 26) (const_int 27)
9188 (const_int 28) (const_int 29)
9189 (const_int 30) (const_int 31)])))]
9191 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9192 [(set_attr "type" "sselog1")
9193 (set_attr "prefix_extra" "1")
9194 (set_attr "length_immediate" "1")
9195 (set_attr "prefix" "evex")
9196 (set_attr "mode" "XI")])
9198 (define_insn_and_split "vec_extract_lo_v16hi"
9199 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9201 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9202 (parallel [(const_int 0) (const_int 1)
9203 (const_int 2) (const_int 3)
9204 (const_int 4) (const_int 5)
9205 (const_int 6) (const_int 7)])))]
9206 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9208 "&& reload_completed"
9209 [(set (match_dup 0) (match_dup 1))]
9210 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9212 (define_insn "vec_extract_hi_v16hi"
9213 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9215 (match_operand:V16HI 1 "register_operand" "x,v,v")
9216 (parallel [(const_int 8) (const_int 9)
9217 (const_int 10) (const_int 11)
9218 (const_int 12) (const_int 13)
9219 (const_int 14) (const_int 15)])))]
9222 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9223 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9224 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9225 [(set_attr "type" "sselog1")
9226 (set_attr "prefix_extra" "1")
9227 (set_attr "length_immediate" "1")
9228 (set_attr "isa" "*,avx512dq,avx512f")
9229 (set_attr "prefix" "vex,evex,evex")
9230 (set_attr "mode" "OI")])
9232 (define_insn_and_split "vec_extract_lo_v64qi"
9233 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9235 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9236 (parallel [(const_int 0) (const_int 1)
9237 (const_int 2) (const_int 3)
9238 (const_int 4) (const_int 5)
9239 (const_int 6) (const_int 7)
9240 (const_int 8) (const_int 9)
9241 (const_int 10) (const_int 11)
9242 (const_int 12) (const_int 13)
9243 (const_int 14) (const_int 15)
9244 (const_int 16) (const_int 17)
9245 (const_int 18) (const_int 19)
9246 (const_int 20) (const_int 21)
9247 (const_int 22) (const_int 23)
9248 (const_int 24) (const_int 25)
9249 (const_int 26) (const_int 27)
9250 (const_int 28) (const_int 29)
9251 (const_int 30) (const_int 31)])))]
9252 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9255 || REG_P (operands[0])
9256 || !EXT_REX_SSE_REG_P (operands[1]))
9259 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9261 "&& reload_completed
9263 || REG_P (operands[0])
9264 || !EXT_REX_SSE_REG_P (operands[1]))"
9265 [(set (match_dup 0) (match_dup 1))]
9267 if (!TARGET_AVX512VL
9268 && REG_P (operands[0])
9269 && EXT_REX_SSE_REG_P (operands[1]))
9270 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9272 operands[1] = gen_lowpart (V32QImode, operands[1]);
9274 [(set_attr "type" "sselog1")
9275 (set_attr "prefix_extra" "1")
9276 (set_attr "length_immediate" "1")
9277 (set_attr "memory" "none,load,store")
9278 (set_attr "prefix" "evex")
9279 (set_attr "mode" "XI")])
9281 (define_insn "vec_extract_hi_v64qi"
9282 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9284 (match_operand:V64QI 1 "register_operand" "v")
9285 (parallel [(const_int 32) (const_int 33)
9286 (const_int 34) (const_int 35)
9287 (const_int 36) (const_int 37)
9288 (const_int 38) (const_int 39)
9289 (const_int 40) (const_int 41)
9290 (const_int 42) (const_int 43)
9291 (const_int 44) (const_int 45)
9292 (const_int 46) (const_int 47)
9293 (const_int 48) (const_int 49)
9294 (const_int 50) (const_int 51)
9295 (const_int 52) (const_int 53)
9296 (const_int 54) (const_int 55)
9297 (const_int 56) (const_int 57)
9298 (const_int 58) (const_int 59)
9299 (const_int 60) (const_int 61)
9300 (const_int 62) (const_int 63)])))]
9302 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9303 [(set_attr "type" "sselog1")
9304 (set_attr "prefix_extra" "1")
9305 (set_attr "length_immediate" "1")
9306 (set_attr "prefix" "evex")
9307 (set_attr "mode" "XI")])
9309 (define_insn_and_split "vec_extract_lo_v32qi"
9310 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9312 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9313 (parallel [(const_int 0) (const_int 1)
9314 (const_int 2) (const_int 3)
9315 (const_int 4) (const_int 5)
9316 (const_int 6) (const_int 7)
9317 (const_int 8) (const_int 9)
9318 (const_int 10) (const_int 11)
9319 (const_int 12) (const_int 13)
9320 (const_int 14) (const_int 15)])))]
9321 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9323 "&& reload_completed"
9324 [(set (match_dup 0) (match_dup 1))]
9325 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9327 (define_insn "vec_extract_hi_v32qi"
9328 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9330 (match_operand:V32QI 1 "register_operand" "x,v,v")
9331 (parallel [(const_int 16) (const_int 17)
9332 (const_int 18) (const_int 19)
9333 (const_int 20) (const_int 21)
9334 (const_int 22) (const_int 23)
9335 (const_int 24) (const_int 25)
9336 (const_int 26) (const_int 27)
9337 (const_int 28) (const_int 29)
9338 (const_int 30) (const_int 31)])))]
9341 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9342 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9343 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9344 [(set_attr "type" "sselog1")
9345 (set_attr "prefix_extra" "1")
9346 (set_attr "length_immediate" "1")
9347 (set_attr "isa" "*,avx512dq,avx512f")
9348 (set_attr "prefix" "vex,evex,evex")
9349 (set_attr "mode" "OI")])
9351 ;; Modes handled by vec_extract patterns.
9352 (define_mode_iterator VEC_EXTRACT_MODE
9353 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9354 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9355 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9356 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9357 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9358 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9359 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9361 (define_expand "vec_extract<mode><ssescalarmodelower>"
9362 [(match_operand:<ssescalarmode> 0 "register_operand")
9363 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9364 (match_operand 2 "const_int_operand")]
9367 ix86_expand_vector_extract (false, operands[0], operands[1],
9368 INTVAL (operands[2]));
9372 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9373 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9374 (match_operand:V_256_512 1 "register_operand")
9375 (match_operand 2 "const_0_to_1_operand")]
9378 if (INTVAL (operands[2]))
9379 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9381 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9387 ;; Parallel double-precision floating point element swizzling
9389 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9391 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9392 [(set (match_operand:V8DF 0 "register_operand" "=v")
9395 (match_operand:V8DF 1 "register_operand" "v")
9396 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9397 (parallel [(const_int 1) (const_int 9)
9398 (const_int 3) (const_int 11)
9399 (const_int 5) (const_int 13)
9400 (const_int 7) (const_int 15)])))]
9402 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9403 [(set_attr "type" "sselog")
9404 (set_attr "prefix" "evex")
9405 (set_attr "mode" "V8DF")])
9407 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9408 (define_insn "avx_unpckhpd256<mask_name>"
9409 [(set (match_operand:V4DF 0 "register_operand" "=v")
9412 (match_operand:V4DF 1 "register_operand" "v")
9413 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9414 (parallel [(const_int 1) (const_int 5)
9415 (const_int 3) (const_int 7)])))]
9416 "TARGET_AVX && <mask_avx512vl_condition>"
9417 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9418 [(set_attr "type" "sselog")
9419 (set_attr "prefix" "vex")
9420 (set_attr "mode" "V4DF")])
9422 (define_expand "vec_interleave_highv4df"
9426 (match_operand:V4DF 1 "register_operand")
9427 (match_operand:V4DF 2 "nonimmediate_operand"))
9428 (parallel [(const_int 0) (const_int 4)
9429 (const_int 2) (const_int 6)])))
9435 (parallel [(const_int 1) (const_int 5)
9436 (const_int 3) (const_int 7)])))
9437 (set (match_operand:V4DF 0 "register_operand")
9442 (parallel [(const_int 2) (const_int 3)
9443 (const_int 6) (const_int 7)])))]
9446 operands[3] = gen_reg_rtx (V4DFmode);
9447 operands[4] = gen_reg_rtx (V4DFmode);
9451 (define_insn "avx512vl_unpckhpd128_mask"
9452 [(set (match_operand:V2DF 0 "register_operand" "=v")
9456 (match_operand:V2DF 1 "register_operand" "v")
9457 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9458 (parallel [(const_int 1) (const_int 3)]))
9459 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9460 (match_operand:QI 4 "register_operand" "Yk")))]
9462 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9463 [(set_attr "type" "sselog")
9464 (set_attr "prefix" "evex")
9465 (set_attr "mode" "V2DF")])
9467 (define_expand "vec_interleave_highv2df"
9468 [(set (match_operand:V2DF 0 "register_operand")
9471 (match_operand:V2DF 1 "nonimmediate_operand")
9472 (match_operand:V2DF 2 "nonimmediate_operand"))
9473 (parallel [(const_int 1)
9477 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9478 operands[2] = force_reg (V2DFmode, operands[2]);
9481 (define_insn "*vec_interleave_highv2df"
9482 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9485 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9486 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9487 (parallel [(const_int 1)
9489 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9491 unpckhpd\t{%2, %0|%0, %2}
9492 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9493 %vmovddup\t{%H1, %0|%0, %H1}
9494 movlpd\t{%H1, %0|%0, %H1}
9495 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9496 %vmovhpd\t{%1, %0|%q0, %1}"
9497 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9498 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9499 (set (attr "prefix_data16")
9500 (if_then_else (eq_attr "alternative" "3,5")
9502 (const_string "*")))
9503 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9504 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9506 (define_expand "avx512f_movddup512<mask_name>"
9507 [(set (match_operand:V8DF 0 "register_operand")
9510 (match_operand:V8DF 1 "nonimmediate_operand")
9512 (parallel [(const_int 0) (const_int 8)
9513 (const_int 2) (const_int 10)
9514 (const_int 4) (const_int 12)
9515 (const_int 6) (const_int 14)])))]
9518 (define_expand "avx512f_unpcklpd512<mask_name>"
9519 [(set (match_operand:V8DF 0 "register_operand")
9522 (match_operand:V8DF 1 "register_operand")
9523 (match_operand:V8DF 2 "nonimmediate_operand"))
9524 (parallel [(const_int 0) (const_int 8)
9525 (const_int 2) (const_int 10)
9526 (const_int 4) (const_int 12)
9527 (const_int 6) (const_int 14)])))]
9530 (define_insn "*avx512f_unpcklpd512<mask_name>"
9531 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9534 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9535 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9536 (parallel [(const_int 0) (const_int 8)
9537 (const_int 2) (const_int 10)
9538 (const_int 4) (const_int 12)
9539 (const_int 6) (const_int 14)])))]
9542 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9543 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9544 [(set_attr "type" "sselog")
9545 (set_attr "prefix" "evex")
9546 (set_attr "mode" "V8DF")])
9548 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9549 (define_expand "avx_movddup256<mask_name>"
9550 [(set (match_operand:V4DF 0 "register_operand")
9553 (match_operand:V4DF 1 "nonimmediate_operand")
9555 (parallel [(const_int 0) (const_int 4)
9556 (const_int 2) (const_int 6)])))]
9557 "TARGET_AVX && <mask_avx512vl_condition>")
9559 (define_expand "avx_unpcklpd256<mask_name>"
9560 [(set (match_operand:V4DF 0 "register_operand")
9563 (match_operand:V4DF 1 "register_operand")
9564 (match_operand:V4DF 2 "nonimmediate_operand"))
9565 (parallel [(const_int 0) (const_int 4)
9566 (const_int 2) (const_int 6)])))]
9567 "TARGET_AVX && <mask_avx512vl_condition>")
9569 (define_insn "*avx_unpcklpd256<mask_name>"
9570 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9573 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9574 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9575 (parallel [(const_int 0) (const_int 4)
9576 (const_int 2) (const_int 6)])))]
9577 "TARGET_AVX && <mask_avx512vl_condition>"
9579 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9580 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9581 [(set_attr "type" "sselog")
9582 (set_attr "prefix" "vex")
9583 (set_attr "mode" "V4DF")])
9585 (define_expand "vec_interleave_lowv4df"
9589 (match_operand:V4DF 1 "register_operand")
9590 (match_operand:V4DF 2 "nonimmediate_operand"))
9591 (parallel [(const_int 0) (const_int 4)
9592 (const_int 2) (const_int 6)])))
9598 (parallel [(const_int 1) (const_int 5)
9599 (const_int 3) (const_int 7)])))
9600 (set (match_operand:V4DF 0 "register_operand")
9605 (parallel [(const_int 0) (const_int 1)
9606 (const_int 4) (const_int 5)])))]
9609 operands[3] = gen_reg_rtx (V4DFmode);
9610 operands[4] = gen_reg_rtx (V4DFmode);
9613 (define_insn "avx512vl_unpcklpd128_mask"
9614 [(set (match_operand:V2DF 0 "register_operand" "=v")
9618 (match_operand:V2DF 1 "register_operand" "v")
9619 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9620 (parallel [(const_int 0) (const_int 2)]))
9621 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9622 (match_operand:QI 4 "register_operand" "Yk")))]
9624 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9625 [(set_attr "type" "sselog")
9626 (set_attr "prefix" "evex")
9627 (set_attr "mode" "V2DF")])
9629 (define_expand "vec_interleave_lowv2df"
9630 [(set (match_operand:V2DF 0 "register_operand")
9633 (match_operand:V2DF 1 "nonimmediate_operand")
9634 (match_operand:V2DF 2 "nonimmediate_operand"))
9635 (parallel [(const_int 0)
9639 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9640 operands[1] = force_reg (V2DFmode, operands[1]);
9643 (define_insn "*vec_interleave_lowv2df"
9644 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9647 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9648 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9649 (parallel [(const_int 0)
9651 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9653 unpcklpd\t{%2, %0|%0, %2}
9654 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9655 %vmovddup\t{%1, %0|%0, %q1}
9656 movhpd\t{%2, %0|%0, %q2}
9657 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9658 %vmovlpd\t{%2, %H0|%H0, %2}"
9659 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9660 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9661 (set (attr "prefix_data16")
9662 (if_then_else (eq_attr "alternative" "3,5")
9664 (const_string "*")))
9665 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9666 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9669 [(set (match_operand:V2DF 0 "memory_operand")
9672 (match_operand:V2DF 1 "register_operand")
9674 (parallel [(const_int 0)
9676 "TARGET_SSE3 && reload_completed"
9679 rtx low = gen_lowpart (DFmode, operands[1]);
9681 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9682 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9687 [(set (match_operand:V2DF 0 "register_operand")
9690 (match_operand:V2DF 1 "memory_operand")
9692 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9693 (match_operand:SI 3 "const_int_operand")])))]
9694 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9695 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9697 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9700 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9701 [(set (match_operand:VF_128 0 "register_operand" "=v")
9704 [(match_operand:VF_128 1 "register_operand" "v")
9705 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9710 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9711 [(set_attr "prefix" "evex")
9712 (set_attr "mode" "<ssescalarmode>")])
9714 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9715 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9717 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9718 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9721 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9722 [(set_attr "prefix" "evex")
9723 (set_attr "mode" "<MODE>")])
9725 (define_expand "<avx512>_vternlog<mode>_maskz"
9726 [(match_operand:VI48_AVX512VL 0 "register_operand")
9727 (match_operand:VI48_AVX512VL 1 "register_operand")
9728 (match_operand:VI48_AVX512VL 2 "register_operand")
9729 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9730 (match_operand:SI 4 "const_0_to_255_operand")
9731 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9734 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9735 operands[0], operands[1], operands[2], operands[3],
9736 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9740 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9741 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9742 (unspec:VI48_AVX512VL
9743 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9744 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9745 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9746 (match_operand:SI 4 "const_0_to_255_operand")]
9749 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9750 [(set_attr "type" "sselog")
9751 (set_attr "prefix" "evex")
9752 (set_attr "mode" "<sseinsnmode>")])
9754 (define_insn "<avx512>_vternlog<mode>_mask"
9755 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9756 (vec_merge:VI48_AVX512VL
9757 (unspec:VI48_AVX512VL
9758 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9759 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9760 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9761 (match_operand:SI 4 "const_0_to_255_operand")]
9764 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9766 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9767 [(set_attr "type" "sselog")
9768 (set_attr "prefix" "evex")
9769 (set_attr "mode" "<sseinsnmode>")])
9771 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9772 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9773 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9776 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9777 [(set_attr "prefix" "evex")
9778 (set_attr "mode" "<MODE>")])
9780 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9781 [(set (match_operand:VF_128 0 "register_operand" "=v")
9784 [(match_operand:VF_128 1 "register_operand" "v")
9785 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9790 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9791 [(set_attr "prefix" "evex")
9792 (set_attr "mode" "<ssescalarmode>")])
9794 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9795 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9796 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9797 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9798 (match_operand:SI 3 "const_0_to_255_operand")]
9801 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9802 [(set_attr "prefix" "evex")
9803 (set_attr "mode" "<sseinsnmode>")])
9805 (define_expand "avx512f_shufps512_mask"
9806 [(match_operand:V16SF 0 "register_operand")
9807 (match_operand:V16SF 1 "register_operand")
9808 (match_operand:V16SF 2 "nonimmediate_operand")
9809 (match_operand:SI 3 "const_0_to_255_operand")
9810 (match_operand:V16SF 4 "register_operand")
9811 (match_operand:HI 5 "register_operand")]
9814 int mask = INTVAL (operands[3]);
9815 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9816 GEN_INT ((mask >> 0) & 3),
9817 GEN_INT ((mask >> 2) & 3),
9818 GEN_INT (((mask >> 4) & 3) + 16),
9819 GEN_INT (((mask >> 6) & 3) + 16),
9820 GEN_INT (((mask >> 0) & 3) + 4),
9821 GEN_INT (((mask >> 2) & 3) + 4),
9822 GEN_INT (((mask >> 4) & 3) + 20),
9823 GEN_INT (((mask >> 6) & 3) + 20),
9824 GEN_INT (((mask >> 0) & 3) + 8),
9825 GEN_INT (((mask >> 2) & 3) + 8),
9826 GEN_INT (((mask >> 4) & 3) + 24),
9827 GEN_INT (((mask >> 6) & 3) + 24),
9828 GEN_INT (((mask >> 0) & 3) + 12),
9829 GEN_INT (((mask >> 2) & 3) + 12),
9830 GEN_INT (((mask >> 4) & 3) + 28),
9831 GEN_INT (((mask >> 6) & 3) + 28),
9832 operands[4], operands[5]));
9837 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9838 [(match_operand:VF_AVX512VL 0 "register_operand")
9839 (match_operand:VF_AVX512VL 1 "register_operand")
9840 (match_operand:VF_AVX512VL 2 "register_operand")
9841 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9842 (match_operand:SI 4 "const_0_to_255_operand")
9843 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9846 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9847 operands[0], operands[1], operands[2], operands[3],
9848 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9849 <round_saeonly_expand_operand6>));
9853 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9854 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9856 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9857 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9858 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9859 (match_operand:SI 4 "const_0_to_255_operand")]
9862 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9863 [(set_attr "prefix" "evex")
9864 (set_attr "mode" "<MODE>")])
9866 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9867 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9868 (vec_merge:VF_AVX512VL
9870 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9871 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9872 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9873 (match_operand:SI 4 "const_0_to_255_operand")]
9876 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9878 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9879 [(set_attr "prefix" "evex")
9880 (set_attr "mode" "<MODE>")])
9882 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9883 [(match_operand:VF_128 0 "register_operand")
9884 (match_operand:VF_128 1 "register_operand")
9885 (match_operand:VF_128 2 "register_operand")
9886 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9887 (match_operand:SI 4 "const_0_to_255_operand")
9888 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9891 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9892 operands[0], operands[1], operands[2], operands[3],
9893 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9894 <round_saeonly_expand_operand6>));
9898 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9899 [(set (match_operand:VF_128 0 "register_operand" "=v")
9902 [(match_operand:VF_128 1 "register_operand" "0")
9903 (match_operand:VF_128 2 "register_operand" "v")
9904 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9905 (match_operand:SI 4 "const_0_to_255_operand")]
9910 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9911 [(set_attr "prefix" "evex")
9912 (set_attr "mode" "<ssescalarmode>")])
9914 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9915 [(set (match_operand:VF_128 0 "register_operand" "=v")
9919 [(match_operand:VF_128 1 "register_operand" "0")
9920 (match_operand:VF_128 2 "register_operand" "v")
9921 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9922 (match_operand:SI 4 "const_0_to_255_operand")]
9927 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9929 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9930 [(set_attr "prefix" "evex")
9931 (set_attr "mode" "<ssescalarmode>")])
9933 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9934 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9936 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9937 (match_operand:SI 2 "const_0_to_255_operand")]
9940 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9941 [(set_attr "length_immediate" "1")
9942 (set_attr "prefix" "evex")
9943 (set_attr "mode" "<MODE>")])
9945 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
9946 [(set (match_operand:VF_128 0 "register_operand" "=v")
9949 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
9950 (match_operand:SI 3 "const_0_to_255_operand")]
9952 (match_operand:VF_128 1 "register_operand" "v")
9955 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
9956 [(set_attr "length_immediate" "1")
9957 (set_attr "prefix" "evex")
9958 (set_attr "mode" "<MODE>")])
9960 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
9961 [(set (match_operand:VF_128 0 "register_operand" "=v")
9963 (vec_duplicate:VF_128
9964 (unspec:<ssescalarmode>
9965 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9966 (match_operand:SI 3 "const_0_to_255_operand")]
9968 (match_operand:VF_128 1 "register_operand" "v")
9971 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
9972 [(set_attr "length_immediate" "1")
9973 (set_attr "prefix" "evex")
9974 (set_attr "mode" "<MODE>")])
9976 ;; One bit in mask selects 2 elements.
9977 (define_insn "avx512f_shufps512_1<mask_name>"
9978 [(set (match_operand:V16SF 0 "register_operand" "=v")
9981 (match_operand:V16SF 1 "register_operand" "v")
9982 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9983 (parallel [(match_operand 3 "const_0_to_3_operand")
9984 (match_operand 4 "const_0_to_3_operand")
9985 (match_operand 5 "const_16_to_19_operand")
9986 (match_operand 6 "const_16_to_19_operand")
9987 (match_operand 7 "const_4_to_7_operand")
9988 (match_operand 8 "const_4_to_7_operand")
9989 (match_operand 9 "const_20_to_23_operand")
9990 (match_operand 10 "const_20_to_23_operand")
9991 (match_operand 11 "const_8_to_11_operand")
9992 (match_operand 12 "const_8_to_11_operand")
9993 (match_operand 13 "const_24_to_27_operand")
9994 (match_operand 14 "const_24_to_27_operand")
9995 (match_operand 15 "const_12_to_15_operand")
9996 (match_operand 16 "const_12_to_15_operand")
9997 (match_operand 17 "const_28_to_31_operand")
9998 (match_operand 18 "const_28_to_31_operand")])))]
10000 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
10001 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
10002 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
10003 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
10004 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
10005 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
10006 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
10007 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
10008 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
10009 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
10010 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
10011 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
10014 mask = INTVAL (operands[3]);
10015 mask |= INTVAL (operands[4]) << 2;
10016 mask |= (INTVAL (operands[5]) - 16) << 4;
10017 mask |= (INTVAL (operands[6]) - 16) << 6;
10018 operands[3] = GEN_INT (mask);
10020 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
10022 [(set_attr "type" "sselog")
10023 (set_attr "length_immediate" "1")
10024 (set_attr "prefix" "evex")
10025 (set_attr "mode" "V16SF")])
10027 (define_expand "avx512f_shufpd512_mask"
10028 [(match_operand:V8DF 0 "register_operand")
10029 (match_operand:V8DF 1 "register_operand")
10030 (match_operand:V8DF 2 "nonimmediate_operand")
10031 (match_operand:SI 3 "const_0_to_255_operand")
10032 (match_operand:V8DF 4 "register_operand")
10033 (match_operand:QI 5 "register_operand")]
10036 int mask = INTVAL (operands[3]);
10037 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
10038 GEN_INT (mask & 1),
10039 GEN_INT (mask & 2 ? 9 : 8),
10040 GEN_INT (mask & 4 ? 3 : 2),
10041 GEN_INT (mask & 8 ? 11 : 10),
10042 GEN_INT (mask & 16 ? 5 : 4),
10043 GEN_INT (mask & 32 ? 13 : 12),
10044 GEN_INT (mask & 64 ? 7 : 6),
10045 GEN_INT (mask & 128 ? 15 : 14),
10046 operands[4], operands[5]));
10050 (define_insn "avx512f_shufpd512_1<mask_name>"
10051 [(set (match_operand:V8DF 0 "register_operand" "=v")
10054 (match_operand:V8DF 1 "register_operand" "v")
10055 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
10056 (parallel [(match_operand 3 "const_0_to_1_operand")
10057 (match_operand 4 "const_8_to_9_operand")
10058 (match_operand 5 "const_2_to_3_operand")
10059 (match_operand 6 "const_10_to_11_operand")
10060 (match_operand 7 "const_4_to_5_operand")
10061 (match_operand 8 "const_12_to_13_operand")
10062 (match_operand 9 "const_6_to_7_operand")
10063 (match_operand 10 "const_14_to_15_operand")])))]
10067 mask = INTVAL (operands[3]);
10068 mask |= (INTVAL (operands[4]) - 8) << 1;
10069 mask |= (INTVAL (operands[5]) - 2) << 2;
10070 mask |= (INTVAL (operands[6]) - 10) << 3;
10071 mask |= (INTVAL (operands[7]) - 4) << 4;
10072 mask |= (INTVAL (operands[8]) - 12) << 5;
10073 mask |= (INTVAL (operands[9]) - 6) << 6;
10074 mask |= (INTVAL (operands[10]) - 14) << 7;
10075 operands[3] = GEN_INT (mask);
10077 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
10079 [(set_attr "type" "sselog")
10080 (set_attr "length_immediate" "1")
10081 (set_attr "prefix" "evex")
10082 (set_attr "mode" "V8DF")])
10084 (define_expand "avx_shufpd256<mask_expand4_name>"
10085 [(match_operand:V4DF 0 "register_operand")
10086 (match_operand:V4DF 1 "register_operand")
10087 (match_operand:V4DF 2 "nonimmediate_operand")
10088 (match_operand:SI 3 "const_int_operand")]
10091 int mask = INTVAL (operands[3]);
10092 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
10095 GEN_INT (mask & 1),
10096 GEN_INT (mask & 2 ? 5 : 4),
10097 GEN_INT (mask & 4 ? 3 : 2),
10098 GEN_INT (mask & 8 ? 7 : 6)
10099 <mask_expand4_args>));
10103 (define_insn "avx_shufpd256_1<mask_name>"
10104 [(set (match_operand:V4DF 0 "register_operand" "=v")
10107 (match_operand:V4DF 1 "register_operand" "v")
10108 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
10109 (parallel [(match_operand 3 "const_0_to_1_operand")
10110 (match_operand 4 "const_4_to_5_operand")
10111 (match_operand 5 "const_2_to_3_operand")
10112 (match_operand 6 "const_6_to_7_operand")])))]
10113 "TARGET_AVX && <mask_avx512vl_condition>"
10116 mask = INTVAL (operands[3]);
10117 mask |= (INTVAL (operands[4]) - 4) << 1;
10118 mask |= (INTVAL (operands[5]) - 2) << 2;
10119 mask |= (INTVAL (operands[6]) - 6) << 3;
10120 operands[3] = GEN_INT (mask);
10122 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
10124 [(set_attr "type" "sseshuf")
10125 (set_attr "length_immediate" "1")
10126 (set_attr "prefix" "vex")
10127 (set_attr "mode" "V4DF")])
10129 (define_expand "sse2_shufpd<mask_expand4_name>"
10130 [(match_operand:V2DF 0 "register_operand")
10131 (match_operand:V2DF 1 "register_operand")
10132 (match_operand:V2DF 2 "vector_operand")
10133 (match_operand:SI 3 "const_int_operand")]
10136 int mask = INTVAL (operands[3]);
10137 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
10138 operands[2], GEN_INT (mask & 1),
10139 GEN_INT (mask & 2 ? 3 : 2)
10140 <mask_expand4_args>));
10144 (define_insn "sse2_shufpd_v2df_mask"
10145 [(set (match_operand:V2DF 0 "register_operand" "=v")
10149 (match_operand:V2DF 1 "register_operand" "v")
10150 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
10151 (parallel [(match_operand 3 "const_0_to_1_operand")
10152 (match_operand 4 "const_2_to_3_operand")]))
10153 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
10154 (match_operand:QI 6 "register_operand" "Yk")))]
10158 mask = INTVAL (operands[3]);
10159 mask |= (INTVAL (operands[4]) - 2) << 1;
10160 operands[3] = GEN_INT (mask);
10162 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
10164 [(set_attr "type" "sseshuf")
10165 (set_attr "length_immediate" "1")
10166 (set_attr "prefix" "evex")
10167 (set_attr "mode" "V2DF")])
10169 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
10170 (define_insn "avx2_interleave_highv4di<mask_name>"
10171 [(set (match_operand:V4DI 0 "register_operand" "=v")
10174 (match_operand:V4DI 1 "register_operand" "v")
10175 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10176 (parallel [(const_int 1)
10180 "TARGET_AVX2 && <mask_avx512vl_condition>"
10181 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10182 [(set_attr "type" "sselog")
10183 (set_attr "prefix" "vex")
10184 (set_attr "mode" "OI")])
10186 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10187 [(set (match_operand:V8DI 0 "register_operand" "=v")
10190 (match_operand:V8DI 1 "register_operand" "v")
10191 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10192 (parallel [(const_int 1) (const_int 9)
10193 (const_int 3) (const_int 11)
10194 (const_int 5) (const_int 13)
10195 (const_int 7) (const_int 15)])))]
10197 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10198 [(set_attr "type" "sselog")
10199 (set_attr "prefix" "evex")
10200 (set_attr "mode" "XI")])
10202 (define_insn "vec_interleave_highv2di<mask_name>"
10203 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10206 (match_operand:V2DI 1 "register_operand" "0,v")
10207 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10208 (parallel [(const_int 1)
10210 "TARGET_SSE2 && <mask_avx512vl_condition>"
10212 punpckhqdq\t{%2, %0|%0, %2}
10213 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10214 [(set_attr "isa" "noavx,avx")
10215 (set_attr "type" "sselog")
10216 (set_attr "prefix_data16" "1,*")
10217 (set_attr "prefix" "orig,<mask_prefix>")
10218 (set_attr "mode" "TI")])
10220 (define_insn "avx2_interleave_lowv4di<mask_name>"
10221 [(set (match_operand:V4DI 0 "register_operand" "=v")
10224 (match_operand:V4DI 1 "register_operand" "v")
10225 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10226 (parallel [(const_int 0)
10230 "TARGET_AVX2 && <mask_avx512vl_condition>"
10231 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10232 [(set_attr "type" "sselog")
10233 (set_attr "prefix" "vex")
10234 (set_attr "mode" "OI")])
10236 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10237 [(set (match_operand:V8DI 0 "register_operand" "=v")
10240 (match_operand:V8DI 1 "register_operand" "v")
10241 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10242 (parallel [(const_int 0) (const_int 8)
10243 (const_int 2) (const_int 10)
10244 (const_int 4) (const_int 12)
10245 (const_int 6) (const_int 14)])))]
10247 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10248 [(set_attr "type" "sselog")
10249 (set_attr "prefix" "evex")
10250 (set_attr "mode" "XI")])
10252 (define_insn "vec_interleave_lowv2di<mask_name>"
10253 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10256 (match_operand:V2DI 1 "register_operand" "0,v")
10257 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10258 (parallel [(const_int 0)
10260 "TARGET_SSE2 && <mask_avx512vl_condition>"
10262 punpcklqdq\t{%2, %0|%0, %2}
10263 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10264 [(set_attr "isa" "noavx,avx")
10265 (set_attr "type" "sselog")
10266 (set_attr "prefix_data16" "1,*")
10267 (set_attr "prefix" "orig,vex")
10268 (set_attr "mode" "TI")])
10270 (define_insn "sse2_shufpd_<mode>"
10271 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10272 (vec_select:VI8F_128
10273 (vec_concat:<ssedoublevecmode>
10274 (match_operand:VI8F_128 1 "register_operand" "0,v")
10275 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10276 (parallel [(match_operand 3 "const_0_to_1_operand")
10277 (match_operand 4 "const_2_to_3_operand")])))]
10281 mask = INTVAL (operands[3]);
10282 mask |= (INTVAL (operands[4]) - 2) << 1;
10283 operands[3] = GEN_INT (mask);
10285 switch (which_alternative)
10288 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10290 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10292 gcc_unreachable ();
10295 [(set_attr "isa" "noavx,avx")
10296 (set_attr "type" "sseshuf")
10297 (set_attr "length_immediate" "1")
10298 (set_attr "prefix" "orig,maybe_evex")
10299 (set_attr "mode" "V2DF")])
10301 ;; Avoid combining registers from different units in a single alternative,
10302 ;; see comment above inline_secondary_memory_needed function in i386.c
10303 (define_insn "sse2_storehpd"
10304 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10306 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10307 (parallel [(const_int 1)])))]
10308 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10310 %vmovhpd\t{%1, %0|%0, %1}
10312 vunpckhpd\t{%d1, %0|%0, %d1}
10316 [(set_attr "isa" "*,noavx,avx,*,*,*")
10317 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10318 (set (attr "prefix_data16")
10320 (and (eq_attr "alternative" "0")
10321 (not (match_test "TARGET_AVX")))
10323 (const_string "*")))
10324 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10325 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10328 [(set (match_operand:DF 0 "register_operand")
10330 (match_operand:V2DF 1 "memory_operand")
10331 (parallel [(const_int 1)])))]
10332 "TARGET_SSE2 && reload_completed"
10333 [(set (match_dup 0) (match_dup 1))]
10334 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10336 (define_insn "*vec_extractv2df_1_sse"
10337 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10339 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10340 (parallel [(const_int 1)])))]
10341 "!TARGET_SSE2 && TARGET_SSE
10342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10344 movhps\t{%1, %0|%0, %1}
10345 movhlps\t{%1, %0|%0, %1}
10346 movlps\t{%H1, %0|%0, %H1}"
10347 [(set_attr "type" "ssemov")
10348 (set_attr "mode" "V2SF,V4SF,V2SF")])
10350 ;; Avoid combining registers from different units in a single alternative,
10351 ;; see comment above inline_secondary_memory_needed function in i386.c
10352 (define_insn "sse2_storelpd"
10353 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10355 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10356 (parallel [(const_int 0)])))]
10357 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10359 %vmovlpd\t{%1, %0|%0, %1}
10364 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10365 (set (attr "prefix_data16")
10366 (if_then_else (eq_attr "alternative" "0")
10368 (const_string "*")))
10369 (set_attr "prefix" "maybe_vex")
10370 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10373 [(set (match_operand:DF 0 "register_operand")
10375 (match_operand:V2DF 1 "nonimmediate_operand")
10376 (parallel [(const_int 0)])))]
10377 "TARGET_SSE2 && reload_completed"
10378 [(set (match_dup 0) (match_dup 1))]
10379 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10381 (define_insn "*vec_extractv2df_0_sse"
10382 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10384 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10385 (parallel [(const_int 0)])))]
10386 "!TARGET_SSE2 && TARGET_SSE
10387 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10389 movlps\t{%1, %0|%0, %1}
10390 movaps\t{%1, %0|%0, %1}
10391 movlps\t{%1, %0|%0, %q1}"
10392 [(set_attr "type" "ssemov")
10393 (set_attr "mode" "V2SF,V4SF,V2SF")])
10395 (define_expand "sse2_loadhpd_exp"
10396 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10399 (match_operand:V2DF 1 "nonimmediate_operand")
10400 (parallel [(const_int 0)]))
10401 (match_operand:DF 2 "nonimmediate_operand")))]
10404 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10406 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10408 /* Fix up the destination if needed. */
10409 if (dst != operands[0])
10410 emit_move_insn (operands[0], dst);
10415 ;; Avoid combining registers from different units in a single alternative,
10416 ;; see comment above inline_secondary_memory_needed function in i386.c
10417 (define_insn "sse2_loadhpd"
10418 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10419 "=x,v,x,v ,o,o ,o")
10422 (match_operand:V2DF 1 "nonimmediate_operand"
10423 " 0,v,0,v ,0,0 ,0")
10424 (parallel [(const_int 0)]))
10425 (match_operand:DF 2 "nonimmediate_operand"
10426 " m,m,x,Yv,x,*f,r")))]
10427 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10429 movhpd\t{%2, %0|%0, %2}
10430 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10431 unpcklpd\t{%2, %0|%0, %2}
10432 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10436 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10437 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10438 (set (attr "prefix_data16")
10439 (if_then_else (eq_attr "alternative" "0")
10441 (const_string "*")))
10442 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10443 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10446 [(set (match_operand:V2DF 0 "memory_operand")
10448 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10449 (match_operand:DF 1 "register_operand")))]
10450 "TARGET_SSE2 && reload_completed"
10451 [(set (match_dup 0) (match_dup 1))]
10452 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10454 (define_expand "sse2_loadlpd_exp"
10455 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10457 (match_operand:DF 2 "nonimmediate_operand")
10459 (match_operand:V2DF 1 "nonimmediate_operand")
10460 (parallel [(const_int 1)]))))]
10463 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10465 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10467 /* Fix up the destination if needed. */
10468 if (dst != operands[0])
10469 emit_move_insn (operands[0], dst);
10474 ;; Avoid combining registers from different units in a single alternative,
10475 ;; see comment above inline_secondary_memory_needed function in i386.c
10476 (define_insn "sse2_loadlpd"
10477 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10478 "=v,x,v,x,v,x,x,v,m,m ,m")
10480 (match_operand:DF 2 "nonimmediate_operand"
10481 "vm,m,m,x,v,0,0,v,x,*f,r")
10483 (match_operand:V2DF 1 "nonimm_or_0_operand"
10484 " C,0,v,0,v,x,o,o,0,0 ,0")
10485 (parallel [(const_int 1)]))))]
10486 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10488 %vmovq\t{%2, %0|%0, %2}
10489 movlpd\t{%2, %0|%0, %2}
10490 vmovlpd\t{%2, %1, %0|%0, %1, %2}
10491 movsd\t{%2, %0|%0, %2}
10492 vmovsd\t{%2, %1, %0|%0, %1, %2}
10493 shufpd\t{$2, %1, %0|%0, %1, 2}
10494 movhpd\t{%H1, %0|%0, %H1}
10495 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10499 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10501 (cond [(eq_attr "alternative" "5")
10502 (const_string "sselog")
10503 (eq_attr "alternative" "9")
10504 (const_string "fmov")
10505 (eq_attr "alternative" "10")
10506 (const_string "imov")
10508 (const_string "ssemov")))
10509 (set (attr "prefix_data16")
10510 (if_then_else (eq_attr "alternative" "1,6")
10512 (const_string "*")))
10513 (set (attr "length_immediate")
10514 (if_then_else (eq_attr "alternative" "5")
10516 (const_string "*")))
10517 (set (attr "prefix")
10518 (cond [(eq_attr "alternative" "0")
10519 (const_string "maybe_vex")
10520 (eq_attr "alternative" "1,3,5,6")
10521 (const_string "orig")
10522 (eq_attr "alternative" "2,4,7")
10523 (const_string "maybe_evex")
10525 (const_string "*")))
10526 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10529 [(set (match_operand:V2DF 0 "memory_operand")
10531 (match_operand:DF 1 "register_operand")
10532 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10533 "TARGET_SSE2 && reload_completed"
10534 [(set (match_dup 0) (match_dup 1))]
10535 "operands[0] = adjust_address (operands[0], DFmode, 0);")
10537 (define_insn "sse2_movsd"
10538 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
10540 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10541 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10545 movsd\t{%2, %0|%0, %2}
10546 vmovsd\t{%2, %1, %0|%0, %1, %2}
10547 movlpd\t{%2, %0|%0, %q2}
10548 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10549 %vmovlpd\t{%2, %0|%q0, %2}
10550 shufpd\t{$2, %1, %0|%0, %1, 2}
10551 movhps\t{%H1, %0|%0, %H1}
10552 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10553 %vmovhps\t{%1, %H0|%H0, %1}"
10554 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10557 (eq_attr "alternative" "5")
10558 (const_string "sselog")
10559 (const_string "ssemov")))
10560 (set (attr "prefix_data16")
10562 (and (eq_attr "alternative" "2,4")
10563 (not (match_test "TARGET_AVX")))
10565 (const_string "*")))
10566 (set (attr "length_immediate")
10567 (if_then_else (eq_attr "alternative" "5")
10569 (const_string "*")))
10570 (set (attr "prefix")
10571 (cond [(eq_attr "alternative" "1,3,7")
10572 (const_string "maybe_evex")
10573 (eq_attr "alternative" "4,8")
10574 (const_string "maybe_vex")
10576 (const_string "orig")))
10577 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10579 (define_insn "vec_dupv2df<mask_name>"
10580 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10581 (vec_duplicate:V2DF
10582 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10583 "TARGET_SSE2 && <mask_avx512vl_condition>"
10586 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10587 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10588 [(set_attr "isa" "noavx,sse3,avx512vl")
10589 (set_attr "type" "sselog1")
10590 (set_attr "prefix" "orig,maybe_vex,evex")
10591 (set_attr "mode" "V2DF,DF,DF")])
10593 (define_insn "vec_concatv2df"
10594 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10596 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10597 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10599 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10600 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10602 unpcklpd\t{%2, %0|%0, %2}
10603 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10604 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10605 %vmovddup\t{%1, %0|%0, %1}
10606 vmovddup\t{%1, %0|%0, %1}
10607 movhpd\t{%2, %0|%0, %2}
10608 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10609 %vmovq\t{%1, %0|%0, %1}
10610 movlhps\t{%2, %0|%0, %2}
10611 movhps\t{%2, %0|%0, %2}"
10613 (cond [(eq_attr "alternative" "0,5")
10614 (const_string "sse2_noavx")
10615 (eq_attr "alternative" "1,6")
10616 (const_string "avx")
10617 (eq_attr "alternative" "2,4")
10618 (const_string "avx512vl")
10619 (eq_attr "alternative" "3")
10620 (const_string "sse3")
10621 (eq_attr "alternative" "7")
10622 (const_string "sse2")
10624 (const_string "noavx")))
10627 (eq_attr "alternative" "0,1,2,3,4")
10628 (const_string "sselog")
10629 (const_string "ssemov")))
10630 (set (attr "prefix_data16")
10631 (if_then_else (eq_attr "alternative" "5")
10633 (const_string "*")))
10634 (set (attr "prefix")
10635 (cond [(eq_attr "alternative" "1,6")
10636 (const_string "vex")
10637 (eq_attr "alternative" "2,4")
10638 (const_string "evex")
10639 (eq_attr "alternative" "3,7")
10640 (const_string "maybe_vex")
10642 (const_string "orig")))
10643 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10645 ;; vmovq clears also the higher bits.
10646 (define_insn "vec_set<mode>_0"
10647 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10648 (vec_merge:VF2_512_256
10649 (vec_duplicate:VF2_512_256
10650 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
10651 (match_operand:VF2_512_256 1 "const0_operand" "C")
10654 "vmovq\t{%2, %x0|%x0, %2}"
10655 [(set_attr "type" "ssemov")
10656 (set_attr "prefix" "maybe_evex")
10657 (set_attr "mode" "DF")])
10659 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10661 ;; Parallel integer down-conversion operations
10663 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10665 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10666 (define_mode_attr pmov_src_mode
10667 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10668 (define_mode_attr pmov_src_lower
10669 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10670 (define_mode_attr pmov_suff_1
10671 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10673 (define_expand "trunc<pmov_src_lower><mode>2"
10674 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
10675 (truncate:PMOV_DST_MODE_1
10676 (match_operand:<pmov_src_mode> 1 "register_operand")))]
10679 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10680 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10681 (any_truncate:PMOV_DST_MODE_1
10682 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10684 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10685 [(set_attr "type" "ssemov")
10686 (set_attr "memory" "none,store")
10687 (set_attr "prefix" "evex")
10688 (set_attr "mode" "<sseinsnmode>")])
10690 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10691 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10692 (vec_merge:PMOV_DST_MODE_1
10693 (any_truncate:PMOV_DST_MODE_1
10694 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10695 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10696 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10698 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10699 [(set_attr "type" "ssemov")
10700 (set_attr "memory" "none,store")
10701 (set_attr "prefix" "evex")
10702 (set_attr "mode" "<sseinsnmode>")])
10704 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10705 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10706 (vec_merge:PMOV_DST_MODE_1
10707 (any_truncate:PMOV_DST_MODE_1
10708 (match_operand:<pmov_src_mode> 1 "register_operand"))
10710 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10713 (define_expand "truncv32hiv32qi2"
10714 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10716 (match_operand:V32HI 1 "register_operand")))]
10719 (define_insn "avx512bw_<code>v32hiv32qi2"
10720 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10721 (any_truncate:V32QI
10722 (match_operand:V32HI 1 "register_operand" "v,v")))]
10724 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10725 [(set_attr "type" "ssemov")
10726 (set_attr "memory" "none,store")
10727 (set_attr "prefix" "evex")
10728 (set_attr "mode" "XI")])
10730 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10731 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10733 (any_truncate:V32QI
10734 (match_operand:V32HI 1 "register_operand" "v,v"))
10735 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10736 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10738 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10739 [(set_attr "type" "ssemov")
10740 (set_attr "memory" "none,store")
10741 (set_attr "prefix" "evex")
10742 (set_attr "mode" "XI")])
10744 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10745 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10747 (any_truncate:V32QI
10748 (match_operand:V32HI 1 "register_operand"))
10750 (match_operand:SI 2 "register_operand")))]
10753 (define_mode_iterator PMOV_DST_MODE_2
10754 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10755 (define_mode_attr pmov_suff_2
10756 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10758 (define_expand "trunc<ssedoublemodelower><mode>2"
10759 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10760 (truncate:PMOV_DST_MODE_2
10761 (match_operand:<ssedoublemode> 1 "register_operand")))]
10764 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10765 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10766 (any_truncate:PMOV_DST_MODE_2
10767 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10769 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10770 [(set_attr "type" "ssemov")
10771 (set_attr "memory" "none,store")
10772 (set_attr "prefix" "evex")
10773 (set_attr "mode" "<sseinsnmode>")])
10775 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10776 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10777 (vec_merge:PMOV_DST_MODE_2
10778 (any_truncate:PMOV_DST_MODE_2
10779 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10780 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10781 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10783 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10784 [(set_attr "type" "ssemov")
10785 (set_attr "memory" "none,store")
10786 (set_attr "prefix" "evex")
10787 (set_attr "mode" "<sseinsnmode>")])
10789 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10790 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10791 (vec_merge:PMOV_DST_MODE_2
10792 (any_truncate:PMOV_DST_MODE_2
10793 (match_operand:<ssedoublemode> 1 "register_operand"))
10795 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10798 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10799 (define_mode_attr pmov_dst_3_lower
10800 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
10801 (define_mode_attr pmov_dst_3
10802 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10803 (define_mode_attr pmov_dst_zeroed_3
10804 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10805 (define_mode_attr pmov_suff_3
10806 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10808 (define_expand "trunc<mode><pmov_dst_3_lower>2"
10809 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
10810 (truncate:<pmov_dst_3>
10811 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
10814 operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
10815 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
10817 CONST0_RTX (<pmov_dst_zeroed_3>mode)));
10821 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
10822 [(set (match_operand:V16QI 0 "register_operand" "=v")
10824 (any_truncate:<pmov_dst_3>
10825 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10826 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10828 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10829 [(set_attr "type" "ssemov")
10830 (set_attr "prefix" "evex")
10831 (set_attr "mode" "TI")])
10833 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
10834 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10836 (match_operand:V2DI 1 "register_operand" "v")))]
10838 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10839 [(set_attr "type" "ssemov")
10840 (set_attr "memory" "store")
10841 (set_attr "prefix" "evex")
10842 (set_attr "mode" "TI")])
10844 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
10845 [(set (match_operand:HI 0 "memory_operand")
10848 (match_operand:V2DI 1 "register_operand")) 0))]
10849 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10852 [(set (match_dup 0)
10853 (any_truncate:V2QI (match_dup 1)))]
10854 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10856 (define_insn "avx512vl_<code>v2div2qi2_mask"
10857 [(set (match_operand:V16QI 0 "register_operand" "=v")
10861 (match_operand:V2DI 1 "register_operand" "v"))
10863 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10864 (parallel [(const_int 0) (const_int 1)]))
10865 (match_operand:QI 3 "register_operand" "Yk"))
10866 (const_vector:V14QI [(const_int 0) (const_int 0)
10867 (const_int 0) (const_int 0)
10868 (const_int 0) (const_int 0)
10869 (const_int 0) (const_int 0)
10870 (const_int 0) (const_int 0)
10871 (const_int 0) (const_int 0)
10872 (const_int 0) (const_int 0)])))]
10874 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10875 [(set_attr "type" "ssemov")
10876 (set_attr "prefix" "evex")
10877 (set_attr "mode" "TI")])
10879 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10880 [(set (match_operand:V16QI 0 "register_operand" "=v")
10884 (match_operand:V2DI 1 "register_operand" "v"))
10885 (const_vector:V2QI [(const_int 0) (const_int 0)])
10886 (match_operand:QI 2 "register_operand" "Yk"))
10887 (const_vector:V14QI [(const_int 0) (const_int 0)
10888 (const_int 0) (const_int 0)
10889 (const_int 0) (const_int 0)
10890 (const_int 0) (const_int 0)
10891 (const_int 0) (const_int 0)
10892 (const_int 0) (const_int 0)
10893 (const_int 0) (const_int 0)])))]
10895 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10896 [(set_attr "type" "ssemov")
10897 (set_attr "prefix" "evex")
10898 (set_attr "mode" "TI")])
10900 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
10901 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10904 (match_operand:V2DI 1 "register_operand" "v"))
10906 (match_operand:QI 2 "register_operand" "Yk")))]
10908 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10909 [(set_attr "type" "ssemov")
10910 (set_attr "memory" "store")
10911 (set_attr "prefix" "evex")
10912 (set_attr "mode" "TI")])
10914 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
10915 [(set (match_operand:HI 0 "memory_operand")
10919 (match_operand:V2DI 1 "register_operand"))
10925 (parallel [(const_int 0) (const_int 1)]))
10926 (match_operand:QI 2 "register_operand")) 0))]
10927 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10930 [(set (match_dup 0)
10932 (any_truncate:V2QI (match_dup 1))
10935 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10937 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
10938 [(set (match_operand:V4QI 0 "memory_operand" "=m")
10940 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
10942 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10943 [(set_attr "type" "ssemov")
10944 (set_attr "memory" "store")
10945 (set_attr "prefix" "evex")
10946 (set_attr "mode" "TI")])
10948 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
10949 [(set (match_operand:SI 0 "memory_operand")
10952 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
10953 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10956 [(set (match_dup 0)
10957 (any_truncate:V4QI (match_dup 1)))]
10958 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
10960 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10961 [(set (match_operand:V16QI 0 "register_operand" "=v")
10965 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10967 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10968 (parallel [(const_int 0) (const_int 1)
10969 (const_int 2) (const_int 3)]))
10970 (match_operand:QI 3 "register_operand" "Yk"))
10971 (const_vector:V12QI [(const_int 0) (const_int 0)
10972 (const_int 0) (const_int 0)
10973 (const_int 0) (const_int 0)
10974 (const_int 0) (const_int 0)
10975 (const_int 0) (const_int 0)
10976 (const_int 0) (const_int 0)])))]
10978 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10979 [(set_attr "type" "ssemov")
10980 (set_attr "prefix" "evex")
10981 (set_attr "mode" "TI")])
10983 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10984 [(set (match_operand:V16QI 0 "register_operand" "=v")
10988 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10989 (const_vector:V4QI [(const_int 0) (const_int 0)
10990 (const_int 0) (const_int 0)])
10991 (match_operand:QI 2 "register_operand" "Yk"))
10992 (const_vector:V12QI [(const_int 0) (const_int 0)
10993 (const_int 0) (const_int 0)
10994 (const_int 0) (const_int 0)
10995 (const_int 0) (const_int 0)
10996 (const_int 0) (const_int 0)
10997 (const_int 0) (const_int 0)])))]
10999 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11000 [(set_attr "type" "ssemov")
11001 (set_attr "prefix" "evex")
11002 (set_attr "mode" "TI")])
11004 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
11005 [(set (match_operand:V4QI 0 "memory_operand" "=m")
11008 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11010 (match_operand:QI 2 "register_operand" "Yk")))]
11012 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11013 [(set_attr "type" "ssemov")
11014 (set_attr "memory" "store")
11015 (set_attr "prefix" "evex")
11016 (set_attr "mode" "TI")])
11018 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
11019 [(set (match_operand:SI 0 "memory_operand")
11023 (match_operand:VI4_128_8_256 1 "register_operand"))
11029 (parallel [(const_int 0) (const_int 1)
11030 (const_int 2) (const_int 3)]))
11031 (match_operand:QI 2 "register_operand")) 0))]
11032 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11035 [(set (match_dup 0)
11037 (any_truncate:V4QI (match_dup 1))
11040 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
11042 (define_mode_iterator VI2_128_BW_4_256
11043 [(V8HI "TARGET_AVX512BW") V8SI])
11045 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
11046 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11048 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
11050 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11051 [(set_attr "type" "ssemov")
11052 (set_attr "memory" "store")
11053 (set_attr "prefix" "evex")
11054 (set_attr "mode" "TI")])
11056 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
11057 [(set (match_operand:DI 0 "memory_operand" "=m")
11060 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
11061 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11064 [(set (match_dup 0)
11065 (any_truncate:V8QI (match_dup 1)))]
11066 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11068 (define_insn "avx512vl_<code><mode>v8qi2_mask"
11069 [(set (match_operand:V16QI 0 "register_operand" "=v")
11073 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11075 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11076 (parallel [(const_int 0) (const_int 1)
11077 (const_int 2) (const_int 3)
11078 (const_int 4) (const_int 5)
11079 (const_int 6) (const_int 7)]))
11080 (match_operand:QI 3 "register_operand" "Yk"))
11081 (const_vector:V8QI [(const_int 0) (const_int 0)
11082 (const_int 0) (const_int 0)
11083 (const_int 0) (const_int 0)
11084 (const_int 0) (const_int 0)])))]
11086 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11087 [(set_attr "type" "ssemov")
11088 (set_attr "prefix" "evex")
11089 (set_attr "mode" "TI")])
11091 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
11092 [(set (match_operand:V16QI 0 "register_operand" "=v")
11096 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11097 (const_vector:V8QI [(const_int 0) (const_int 0)
11098 (const_int 0) (const_int 0)
11099 (const_int 0) (const_int 0)
11100 (const_int 0) (const_int 0)])
11101 (match_operand:QI 2 "register_operand" "Yk"))
11102 (const_vector:V8QI [(const_int 0) (const_int 0)
11103 (const_int 0) (const_int 0)
11104 (const_int 0) (const_int 0)
11105 (const_int 0) (const_int 0)])))]
11107 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11108 [(set_attr "type" "ssemov")
11109 (set_attr "prefix" "evex")
11110 (set_attr "mode" "TI")])
11112 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
11113 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11116 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11118 (match_operand:QI 2 "register_operand" "Yk")))]
11120 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11121 [(set_attr "type" "ssemov")
11122 (set_attr "memory" "store")
11123 (set_attr "prefix" "evex")
11124 (set_attr "mode" "TI")])
11126 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
11127 [(set (match_operand:DI 0 "memory_operand")
11131 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
11137 (parallel [(const_int 0) (const_int 1)
11138 (const_int 2) (const_int 3)
11139 (const_int 4) (const_int 5)
11140 (const_int 6) (const_int 7)]))
11141 (match_operand:QI 2 "register_operand")) 0))]
11142 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11145 [(set (match_dup 0)
11147 (any_truncate:V8QI (match_dup 1))
11150 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11152 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
11153 (define_mode_attr pmov_dst_4
11154 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
11155 (define_mode_attr pmov_dst_zeroed_4
11156 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
11157 (define_mode_attr pmov_suff_4
11158 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
11160 (define_expand "trunc<mode><pmov_dst_4>2"
11161 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
11162 (truncate:<pmov_dst_4>
11163 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
11166 operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
11167 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
11169 CONST0_RTX (<pmov_dst_zeroed_4>mode)));
11174 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
11175 [(set (match_operand:V8HI 0 "register_operand" "=v")
11177 (any_truncate:<pmov_dst_4>
11178 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
11179 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
11181 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11182 [(set_attr "type" "ssemov")
11183 (set_attr "prefix" "evex")
11184 (set_attr "mode" "TI")])
11186 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
11187 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11189 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11191 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11192 [(set_attr "type" "ssemov")
11193 (set_attr "memory" "store")
11194 (set_attr "prefix" "evex")
11195 (set_attr "mode" "TI")])
11197 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
11198 [(set (match_operand:DI 0 "memory_operand")
11201 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11202 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11205 [(set (match_dup 0)
11206 (any_truncate:V4HI (match_dup 1)))]
11207 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11209 (define_insn "avx512vl_<code><mode>v4hi2_mask"
11210 [(set (match_operand:V8HI 0 "register_operand" "=v")
11214 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11216 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11217 (parallel [(const_int 0) (const_int 1)
11218 (const_int 2) (const_int 3)]))
11219 (match_operand:QI 3 "register_operand" "Yk"))
11220 (const_vector:V4HI [(const_int 0) (const_int 0)
11221 (const_int 0) (const_int 0)])))]
11223 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11224 [(set_attr "type" "ssemov")
11225 (set_attr "prefix" "evex")
11226 (set_attr "mode" "TI")])
11228 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
11229 [(set (match_operand:V8HI 0 "register_operand" "=v")
11233 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11234 (const_vector:V4HI [(const_int 0) (const_int 0)
11235 (const_int 0) (const_int 0)])
11236 (match_operand:QI 2 "register_operand" "Yk"))
11237 (const_vector:V4HI [(const_int 0) (const_int 0)
11238 (const_int 0) (const_int 0)])))]
11240 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11241 [(set_attr "type" "ssemov")
11242 (set_attr "prefix" "evex")
11243 (set_attr "mode" "TI")])
11245 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
11246 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11249 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11251 (match_operand:QI 2 "register_operand" "Yk")))]
11254 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
11255 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
11256 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
11258 [(set_attr "type" "ssemov")
11259 (set_attr "memory" "store")
11260 (set_attr "prefix" "evex")
11261 (set_attr "mode" "TI")])
11263 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
11264 [(set (match_operand:DI 0 "memory_operand")
11268 (match_operand:VI4_128_8_256 1 "register_operand"))
11274 (parallel [(const_int 0) (const_int 1)
11275 (const_int 2) (const_int 3)]))
11276 (match_operand:QI 2 "register_operand")) 0))]
11277 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11280 [(set (match_dup 0)
11282 (any_truncate:V4HI (match_dup 1))
11285 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11288 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
11289 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11291 (match_operand:V2DI 1 "register_operand" "v")))]
11293 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11294 [(set_attr "type" "ssemov")
11295 (set_attr "memory" "store")
11296 (set_attr "prefix" "evex")
11297 (set_attr "mode" "TI")])
11299 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
11300 [(set (match_operand:SI 0 "memory_operand")
11303 (match_operand:V2DI 1 "register_operand")) 0))]
11304 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11307 [(set (match_dup 0)
11308 (any_truncate:V2HI (match_dup 1)))]
11309 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11311 (define_insn "avx512vl_<code>v2div2hi2_mask"
11312 [(set (match_operand:V8HI 0 "register_operand" "=v")
11316 (match_operand:V2DI 1 "register_operand" "v"))
11318 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11319 (parallel [(const_int 0) (const_int 1)]))
11320 (match_operand:QI 3 "register_operand" "Yk"))
11321 (const_vector:V6HI [(const_int 0) (const_int 0)
11322 (const_int 0) (const_int 0)
11323 (const_int 0) (const_int 0)])))]
11325 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11326 [(set_attr "type" "ssemov")
11327 (set_attr "prefix" "evex")
11328 (set_attr "mode" "TI")])
11330 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
11331 [(set (match_operand:V8HI 0 "register_operand" "=v")
11335 (match_operand:V2DI 1 "register_operand" "v"))
11336 (const_vector:V2HI [(const_int 0) (const_int 0)])
11337 (match_operand:QI 2 "register_operand" "Yk"))
11338 (const_vector:V6HI [(const_int 0) (const_int 0)
11339 (const_int 0) (const_int 0)
11340 (const_int 0) (const_int 0)])))]
11342 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11343 [(set_attr "type" "ssemov")
11344 (set_attr "prefix" "evex")
11345 (set_attr "mode" "TI")])
11347 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
11348 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11351 (match_operand:V2DI 1 "register_operand" "v"))
11353 (match_operand:QI 2 "register_operand" "Yk")))]
11355 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
11356 [(set_attr "type" "ssemov")
11357 (set_attr "memory" "store")
11358 (set_attr "prefix" "evex")
11359 (set_attr "mode" "TI")])
11361 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
11362 [(set (match_operand:SI 0 "memory_operand")
11366 (match_operand:V2DI 1 "register_operand"))
11372 (parallel [(const_int 0) (const_int 1)]))
11373 (match_operand:QI 2 "register_operand")) 0))]
11374 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11377 [(set (match_dup 0)
11379 (any_truncate:V2HI (match_dup 1))
11382 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11384 (define_expand "truncv2div2si2"
11385 [(set (match_operand:V2SI 0 "register_operand")
11387 (match_operand:V2DI 1 "register_operand")))]
11390 operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
11391 emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
11393 CONST0_RTX (V2SImode)));
11397 (define_insn "avx512vl_<code>v2div2si2"
11398 [(set (match_operand:V4SI 0 "register_operand" "=v")
11401 (match_operand:V2DI 1 "register_operand" "v"))
11402 (match_operand:V2SI 2 "const0_operand")))]
11404 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11405 [(set_attr "type" "ssemov")
11406 (set_attr "prefix" "evex")
11407 (set_attr "mode" "TI")])
11409 (define_insn "*avx512vl_<code>v2div2si2_store_1"
11410 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11412 (match_operand:V2DI 1 "register_operand" "v")))]
11414 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11415 [(set_attr "type" "ssemov")
11416 (set_attr "memory" "store")
11417 (set_attr "prefix" "evex")
11418 (set_attr "mode" "TI")])
11420 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
11421 [(set (match_operand:DI 0 "memory_operand")
11424 (match_operand:V2DI 1 "register_operand")) 0))]
11425 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11428 [(set (match_dup 0)
11429 (any_truncate:V2SI (match_dup 1)))]
11430 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11432 (define_insn "avx512vl_<code>v2div2si2_mask"
11433 [(set (match_operand:V4SI 0 "register_operand" "=v")
11437 (match_operand:V2DI 1 "register_operand" "v"))
11439 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11440 (parallel [(const_int 0) (const_int 1)]))
11441 (match_operand:QI 3 "register_operand" "Yk"))
11442 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11444 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11445 [(set_attr "type" "ssemov")
11446 (set_attr "prefix" "evex")
11447 (set_attr "mode" "TI")])
11449 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
11450 [(set (match_operand:V4SI 0 "register_operand" "=v")
11454 (match_operand:V2DI 1 "register_operand" "v"))
11455 (const_vector:V2SI [(const_int 0) (const_int 0)])
11456 (match_operand:QI 2 "register_operand" "Yk"))
11457 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11459 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11460 [(set_attr "type" "ssemov")
11461 (set_attr "prefix" "evex")
11462 (set_attr "mode" "TI")])
11464 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
11465 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11468 (match_operand:V2DI 1 "register_operand" "v"))
11470 (match_operand:QI 2 "register_operand" "Yk")))]
11472 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11473 [(set_attr "type" "ssemov")
11474 (set_attr "memory" "store")
11475 (set_attr "prefix" "evex")
11476 (set_attr "mode" "TI")])
11478 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
11479 [(set (match_operand:DI 0 "memory_operand")
11483 (match_operand:V2DI 1 "register_operand"))
11489 (parallel [(const_int 0) (const_int 1)]))
11490 (match_operand:QI 2 "register_operand")) 0))]
11491 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11494 [(set (match_dup 0)
11496 (any_truncate:V2SI (match_dup 1))
11499 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11501 (define_expand "truncv8div8qi2"
11502 [(set (match_operand:V8QI 0 "register_operand")
11504 (match_operand:V8DI 1 "register_operand")))]
11507 operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
11508 emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
11512 (define_insn "avx512f_<code>v8div16qi2"
11513 [(set (match_operand:V16QI 0 "register_operand" "=v")
11516 (match_operand:V8DI 1 "register_operand" "v"))
11517 (const_vector:V8QI [(const_int 0) (const_int 0)
11518 (const_int 0) (const_int 0)
11519 (const_int 0) (const_int 0)
11520 (const_int 0) (const_int 0)])))]
11522 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11523 [(set_attr "type" "ssemov")
11524 (set_attr "prefix" "evex")
11525 (set_attr "mode" "TI")])
11527 (define_insn "*avx512f_<code>v8div16qi2_store_1"
11528 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11530 (match_operand:V8DI 1 "register_operand" "v")))]
11532 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11533 [(set_attr "type" "ssemov")
11534 (set_attr "memory" "store")
11535 (set_attr "prefix" "evex")
11536 (set_attr "mode" "TI")])
11538 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
11539 [(set (match_operand:DI 0 "memory_operand")
11542 (match_operand:V8DI 1 "register_operand")) 0))]
11543 "TARGET_AVX512F && ix86_pre_reload_split ()"
11546 [(set (match_dup 0)
11547 (any_truncate:V8QI (match_dup 1)))]
11548 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11550 (define_insn "avx512f_<code>v8div16qi2_mask"
11551 [(set (match_operand:V16QI 0 "register_operand" "=v")
11555 (match_operand:V8DI 1 "register_operand" "v"))
11557 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11558 (parallel [(const_int 0) (const_int 1)
11559 (const_int 2) (const_int 3)
11560 (const_int 4) (const_int 5)
11561 (const_int 6) (const_int 7)]))
11562 (match_operand:QI 3 "register_operand" "Yk"))
11563 (const_vector:V8QI [(const_int 0) (const_int 0)
11564 (const_int 0) (const_int 0)
11565 (const_int 0) (const_int 0)
11566 (const_int 0) (const_int 0)])))]
11568 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11569 [(set_attr "type" "ssemov")
11570 (set_attr "prefix" "evex")
11571 (set_attr "mode" "TI")])
11573 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
11574 [(set (match_operand:V16QI 0 "register_operand" "=v")
11578 (match_operand:V8DI 1 "register_operand" "v"))
11579 (const_vector:V8QI [(const_int 0) (const_int 0)
11580 (const_int 0) (const_int 0)
11581 (const_int 0) (const_int 0)
11582 (const_int 0) (const_int 0)])
11583 (match_operand:QI 2 "register_operand" "Yk"))
11584 (const_vector:V8QI [(const_int 0) (const_int 0)
11585 (const_int 0) (const_int 0)
11586 (const_int 0) (const_int 0)
11587 (const_int 0) (const_int 0)])))]
11589 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11590 [(set_attr "type" "ssemov")
11591 (set_attr "prefix" "evex")
11592 (set_attr "mode" "TI")])
11594 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
11595 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11598 (match_operand:V8DI 1 "register_operand" "v"))
11600 (match_operand:QI 2 "register_operand" "Yk")))]
11602 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11603 [(set_attr "type" "ssemov")
11604 (set_attr "memory" "store")
11605 (set_attr "prefix" "evex")
11606 (set_attr "mode" "TI")])
11608 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
11609 [(set (match_operand:DI 0 "memory_operand")
11613 (match_operand:V8DI 1 "register_operand"))
11619 (parallel [(const_int 0) (const_int 1)
11620 (const_int 2) (const_int 3)
11621 (const_int 4) (const_int 5)
11622 (const_int 6) (const_int 7)]))
11623 (match_operand:QI 2 "register_operand")) 0))]
11624 "TARGET_AVX512F && ix86_pre_reload_split ()"
11627 [(set (match_dup 0)
11629 (any_truncate:V8QI (match_dup 1))
11632 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11636 ;; Parallel integral arithmetic
11638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11640 (define_expand "neg<mode>2"
11641 [(set (match_operand:VI_AVX2 0 "register_operand")
11644 (match_operand:VI_AVX2 1 "vector_operand")))]
11646 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11648 (define_expand "<insn><mode>3"
11649 [(set (match_operand:VI_AVX2 0 "register_operand")
11651 (match_operand:VI_AVX2 1 "vector_operand")
11652 (match_operand:VI_AVX2 2 "vector_operand")))]
11654 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11656 (define_expand "<insn><mode>3_mask"
11657 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11658 (vec_merge:VI48_AVX512VL
11659 (plusminus:VI48_AVX512VL
11660 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11661 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11662 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11663 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11665 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11667 (define_expand "<insn><mode>3_mask"
11668 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11669 (vec_merge:VI12_AVX512VL
11670 (plusminus:VI12_AVX512VL
11671 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11672 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11673 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11674 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11676 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11678 (define_insn "*<insn><mode>3"
11679 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
11681 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
11682 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
11683 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11685 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11686 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11687 [(set_attr "isa" "noavx,avx")
11688 (set_attr "type" "sseiadd")
11689 (set_attr "prefix_data16" "1,*")
11690 (set_attr "prefix" "orig,maybe_evex")
11691 (set_attr "mode" "<sseinsnmode>")])
11693 (define_insn "*<insn><mode>3_mask"
11694 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11695 (vec_merge:VI48_AVX512VL
11696 (plusminus:VI48_AVX512VL
11697 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11698 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11699 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
11700 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11701 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11702 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11703 [(set_attr "type" "sseiadd")
11704 (set_attr "prefix" "evex")
11705 (set_attr "mode" "<sseinsnmode>")])
11707 (define_insn "*<insn><mode>3_mask"
11708 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11709 (vec_merge:VI12_AVX512VL
11710 (plusminus:VI12_AVX512VL
11711 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11712 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11713 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
11714 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11715 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11716 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11717 [(set_attr "type" "sseiadd")
11718 (set_attr "prefix" "evex")
11719 (set_attr "mode" "<sseinsnmode>")])
11721 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
11722 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
11723 (sat_plusminus:VI12_AVX2_AVX512BW
11724 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
11725 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
11726 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11727 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11729 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
11730 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
11731 (sat_plusminus:VI12_AVX2_AVX512BW
11732 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
11733 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
11734 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
11735 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11737 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11738 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11739 [(set_attr "isa" "noavx,avx")
11740 (set_attr "type" "sseiadd")
11741 (set_attr "prefix_data16" "1,*")
11742 (set_attr "prefix" "orig,maybe_evex")
11743 (set_attr "mode" "TI")])
11745 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
11747 [(set (match_operand:VI12_AVX2 0 "register_operand")
11749 (us_minus:VI12_AVX2
11750 (match_operand:VI12_AVX2 1 "vector_operand")
11751 (match_operand:VI12_AVX2 2 "vector_operand"))
11752 (match_operand:VI12_AVX2 3 "const0_operand")))]
11754 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
11755 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
11756 [(set (match_dup 4)
11757 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
11759 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
11760 "operands[4] = gen_reg_rtx (<MODE>mode);")
11762 (define_expand "mulv8qi3"
11763 [(set (match_operand:V8QI 0 "register_operand")
11764 (mult:V8QI (match_operand:V8QI 1 "register_operand")
11765 (match_operand:V8QI 2 "register_operand")))]
11766 "TARGET_AVX512VL && TARGET_AVX512BW"
11768 gcc_assert (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]));
11772 (define_expand "mul<mode>3"
11773 [(set (match_operand:VI1_AVX512 0 "register_operand")
11774 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
11775 (match_operand:VI1_AVX512 2 "register_operand")))]
11778 if (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]))
11780 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11784 (define_expand "mul<mode>3<mask_name>"
11785 [(set (match_operand:VI2_AVX2 0 "register_operand")
11786 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11787 (match_operand:VI2_AVX2 2 "vector_operand")))]
11788 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11789 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11791 (define_insn "*mul<mode>3<mask_name>"
11792 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11793 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11794 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11795 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11796 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11798 pmullw\t{%2, %0|%0, %2}
11799 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11800 [(set_attr "isa" "noavx,avx")
11801 (set_attr "type" "sseimul")
11802 (set_attr "prefix_data16" "1,*")
11803 (set_attr "prefix" "orig,vex")
11804 (set_attr "mode" "<sseinsnmode>")])
11806 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11807 [(set (match_operand:VI2_AVX2 0 "register_operand")
11809 (lshiftrt:<ssedoublemode>
11810 (mult:<ssedoublemode>
11811 (any_extend:<ssedoublemode>
11812 (match_operand:VI2_AVX2 1 "vector_operand"))
11813 (any_extend:<ssedoublemode>
11814 (match_operand:VI2_AVX2 2 "vector_operand")))
11817 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11818 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11820 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11821 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11823 (lshiftrt:<ssedoublemode>
11824 (mult:<ssedoublemode>
11825 (any_extend:<ssedoublemode>
11826 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11827 (any_extend:<ssedoublemode>
11828 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11830 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11831 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11833 pmulh<u>w\t{%2, %0|%0, %2}
11834 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11835 [(set_attr "isa" "noavx,avx")
11836 (set_attr "type" "sseimul")
11837 (set_attr "prefix_data16" "1,*")
11838 (set_attr "prefix" "orig,vex")
11839 (set_attr "mode" "<sseinsnmode>")])
11841 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11842 [(set (match_operand:V8DI 0 "register_operand")
11846 (match_operand:V16SI 1 "nonimmediate_operand")
11847 (parallel [(const_int 0) (const_int 2)
11848 (const_int 4) (const_int 6)
11849 (const_int 8) (const_int 10)
11850 (const_int 12) (const_int 14)])))
11853 (match_operand:V16SI 2 "nonimmediate_operand")
11854 (parallel [(const_int 0) (const_int 2)
11855 (const_int 4) (const_int 6)
11856 (const_int 8) (const_int 10)
11857 (const_int 12) (const_int 14)])))))]
11859 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11861 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11862 [(set (match_operand:V8DI 0 "register_operand" "=v")
11866 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11867 (parallel [(const_int 0) (const_int 2)
11868 (const_int 4) (const_int 6)
11869 (const_int 8) (const_int 10)
11870 (const_int 12) (const_int 14)])))
11873 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11874 (parallel [(const_int 0) (const_int 2)
11875 (const_int 4) (const_int 6)
11876 (const_int 8) (const_int 10)
11877 (const_int 12) (const_int 14)])))))]
11878 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11879 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11880 [(set_attr "type" "sseimul")
11881 (set_attr "prefix_extra" "1")
11882 (set_attr "prefix" "evex")
11883 (set_attr "mode" "XI")])
11885 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11886 [(set (match_operand:V4DI 0 "register_operand")
11890 (match_operand:V8SI 1 "nonimmediate_operand")
11891 (parallel [(const_int 0) (const_int 2)
11892 (const_int 4) (const_int 6)])))
11895 (match_operand:V8SI 2 "nonimmediate_operand")
11896 (parallel [(const_int 0) (const_int 2)
11897 (const_int 4) (const_int 6)])))))]
11898 "TARGET_AVX2 && <mask_avx512vl_condition>"
11899 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11901 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11902 [(set (match_operand:V4DI 0 "register_operand" "=v")
11906 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11907 (parallel [(const_int 0) (const_int 2)
11908 (const_int 4) (const_int 6)])))
11911 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11912 (parallel [(const_int 0) (const_int 2)
11913 (const_int 4) (const_int 6)])))))]
11914 "TARGET_AVX2 && <mask_avx512vl_condition>
11915 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11916 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11917 [(set_attr "type" "sseimul")
11918 (set_attr "prefix" "maybe_evex")
11919 (set_attr "mode" "OI")])
11921 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11922 [(set (match_operand:V2DI 0 "register_operand")
11926 (match_operand:V4SI 1 "vector_operand")
11927 (parallel [(const_int 0) (const_int 2)])))
11930 (match_operand:V4SI 2 "vector_operand")
11931 (parallel [(const_int 0) (const_int 2)])))))]
11932 "TARGET_SSE2 && <mask_avx512vl_condition>"
11933 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11935 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11936 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11940 (match_operand:V4SI 1 "vector_operand" "%0,v")
11941 (parallel [(const_int 0) (const_int 2)])))
11944 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11945 (parallel [(const_int 0) (const_int 2)])))))]
11946 "TARGET_SSE2 && <mask_avx512vl_condition>
11947 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11949 pmuludq\t{%2, %0|%0, %2}
11950 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11951 [(set_attr "isa" "noavx,avx")
11952 (set_attr "type" "sseimul")
11953 (set_attr "prefix_data16" "1,*")
11954 (set_attr "prefix" "orig,maybe_evex")
11955 (set_attr "mode" "TI")])
11957 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11958 [(set (match_operand:V8DI 0 "register_operand")
11962 (match_operand:V16SI 1 "nonimmediate_operand")
11963 (parallel [(const_int 0) (const_int 2)
11964 (const_int 4) (const_int 6)
11965 (const_int 8) (const_int 10)
11966 (const_int 12) (const_int 14)])))
11969 (match_operand:V16SI 2 "nonimmediate_operand")
11970 (parallel [(const_int 0) (const_int 2)
11971 (const_int 4) (const_int 6)
11972 (const_int 8) (const_int 10)
11973 (const_int 12) (const_int 14)])))))]
11975 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11977 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11978 [(set (match_operand:V8DI 0 "register_operand" "=v")
11982 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11983 (parallel [(const_int 0) (const_int 2)
11984 (const_int 4) (const_int 6)
11985 (const_int 8) (const_int 10)
11986 (const_int 12) (const_int 14)])))
11989 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11990 (parallel [(const_int 0) (const_int 2)
11991 (const_int 4) (const_int 6)
11992 (const_int 8) (const_int 10)
11993 (const_int 12) (const_int 14)])))))]
11994 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11995 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11996 [(set_attr "type" "sseimul")
11997 (set_attr "prefix_extra" "1")
11998 (set_attr "prefix" "evex")
11999 (set_attr "mode" "XI")])
12001 (define_expand "vec_widen_smult_even_v8si<mask_name>"
12002 [(set (match_operand:V4DI 0 "register_operand")
12006 (match_operand:V8SI 1 "nonimmediate_operand")
12007 (parallel [(const_int 0) (const_int 2)
12008 (const_int 4) (const_int 6)])))
12011 (match_operand:V8SI 2 "nonimmediate_operand")
12012 (parallel [(const_int 0) (const_int 2)
12013 (const_int 4) (const_int 6)])))))]
12014 "TARGET_AVX2 && <mask_avx512vl_condition>"
12015 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
12017 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
12018 [(set (match_operand:V4DI 0 "register_operand" "=v")
12022 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
12023 (parallel [(const_int 0) (const_int 2)
12024 (const_int 4) (const_int 6)])))
12027 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
12028 (parallel [(const_int 0) (const_int 2)
12029 (const_int 4) (const_int 6)])))))]
12030 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12031 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12032 [(set_attr "type" "sseimul")
12033 (set_attr "prefix_extra" "1")
12034 (set_attr "prefix" "vex")
12035 (set_attr "mode" "OI")])
12037 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
12038 [(set (match_operand:V2DI 0 "register_operand")
12042 (match_operand:V4SI 1 "vector_operand")
12043 (parallel [(const_int 0) (const_int 2)])))
12046 (match_operand:V4SI 2 "vector_operand")
12047 (parallel [(const_int 0) (const_int 2)])))))]
12048 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
12049 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
12051 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
12052 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
12056 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
12057 (parallel [(const_int 0) (const_int 2)])))
12060 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
12061 (parallel [(const_int 0) (const_int 2)])))))]
12062 "TARGET_SSE4_1 && <mask_avx512vl_condition>
12063 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12065 pmuldq\t{%2, %0|%0, %2}
12066 pmuldq\t{%2, %0|%0, %2}
12067 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12068 [(set_attr "isa" "noavx,noavx,avx")
12069 (set_attr "type" "sseimul")
12070 (set_attr "prefix_data16" "1,1,*")
12071 (set_attr "prefix_extra" "1")
12072 (set_attr "prefix" "orig,orig,vex")
12073 (set_attr "mode" "TI")])
12075 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
12076 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
12077 (unspec:<sseunpackmode>
12078 [(match_operand:VI2_AVX2 1 "register_operand" "v")
12079 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
12080 UNSPEC_PMADDWD512))]
12081 "TARGET_AVX512BW && <mask_mode512bit_condition>"
12082 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
12083 [(set_attr "type" "sseiadd")
12084 (set_attr "prefix" "evex")
12085 (set_attr "mode" "XI")])
12087 (define_expand "avx2_pmaddwd"
12088 [(set (match_operand:V8SI 0 "register_operand")
12093 (match_operand:V16HI 1 "nonimmediate_operand")
12094 (parallel [(const_int 0) (const_int 2)
12095 (const_int 4) (const_int 6)
12096 (const_int 8) (const_int 10)
12097 (const_int 12) (const_int 14)])))
12100 (match_operand:V16HI 2 "nonimmediate_operand")
12101 (parallel [(const_int 0) (const_int 2)
12102 (const_int 4) (const_int 6)
12103 (const_int 8) (const_int 10)
12104 (const_int 12) (const_int 14)]))))
12107 (vec_select:V8HI (match_dup 1)
12108 (parallel [(const_int 1) (const_int 3)
12109 (const_int 5) (const_int 7)
12110 (const_int 9) (const_int 11)
12111 (const_int 13) (const_int 15)])))
12113 (vec_select:V8HI (match_dup 2)
12114 (parallel [(const_int 1) (const_int 3)
12115 (const_int 5) (const_int 7)
12116 (const_int 9) (const_int 11)
12117 (const_int 13) (const_int 15)]))))))]
12119 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
12121 (define_insn "*avx2_pmaddwd"
12122 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
12127 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
12128 (parallel [(const_int 0) (const_int 2)
12129 (const_int 4) (const_int 6)
12130 (const_int 8) (const_int 10)
12131 (const_int 12) (const_int 14)])))
12134 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
12135 (parallel [(const_int 0) (const_int 2)
12136 (const_int 4) (const_int 6)
12137 (const_int 8) (const_int 10)
12138 (const_int 12) (const_int 14)]))))
12141 (vec_select:V8HI (match_dup 1)
12142 (parallel [(const_int 1) (const_int 3)
12143 (const_int 5) (const_int 7)
12144 (const_int 9) (const_int 11)
12145 (const_int 13) (const_int 15)])))
12147 (vec_select:V8HI (match_dup 2)
12148 (parallel [(const_int 1) (const_int 3)
12149 (const_int 5) (const_int 7)
12150 (const_int 9) (const_int 11)
12151 (const_int 13) (const_int 15)]))))))]
12152 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12153 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12154 [(set_attr "type" "sseiadd")
12155 (set_attr "isa" "*,avx512bw")
12156 (set_attr "prefix" "vex,evex")
12157 (set_attr "mode" "OI")])
12159 (define_expand "sse2_pmaddwd"
12160 [(set (match_operand:V4SI 0 "register_operand")
12165 (match_operand:V8HI 1 "vector_operand")
12166 (parallel [(const_int 0) (const_int 2)
12167 (const_int 4) (const_int 6)])))
12170 (match_operand:V8HI 2 "vector_operand")
12171 (parallel [(const_int 0) (const_int 2)
12172 (const_int 4) (const_int 6)]))))
12175 (vec_select:V4HI (match_dup 1)
12176 (parallel [(const_int 1) (const_int 3)
12177 (const_int 5) (const_int 7)])))
12179 (vec_select:V4HI (match_dup 2)
12180 (parallel [(const_int 1) (const_int 3)
12181 (const_int 5) (const_int 7)]))))))]
12183 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
12185 (define_insn "*sse2_pmaddwd"
12186 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
12191 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12192 (parallel [(const_int 0) (const_int 2)
12193 (const_int 4) (const_int 6)])))
12196 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
12197 (parallel [(const_int 0) (const_int 2)
12198 (const_int 4) (const_int 6)]))))
12201 (vec_select:V4HI (match_dup 1)
12202 (parallel [(const_int 1) (const_int 3)
12203 (const_int 5) (const_int 7)])))
12205 (vec_select:V4HI (match_dup 2)
12206 (parallel [(const_int 1) (const_int 3)
12207 (const_int 5) (const_int 7)]))))))]
12208 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12210 pmaddwd\t{%2, %0|%0, %2}
12211 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
12212 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12213 [(set_attr "isa" "noavx,avx,avx512bw")
12214 (set_attr "type" "sseiadd")
12215 (set_attr "atom_unit" "simul")
12216 (set_attr "prefix_data16" "1,*,*")
12217 (set_attr "prefix" "orig,vex,evex")
12218 (set_attr "mode" "TI")])
12220 (define_insn "avx512dq_mul<mode>3<mask_name>"
12221 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
12223 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
12224 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
12225 "TARGET_AVX512DQ && <mask_mode512bit_condition>
12226 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
12227 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12228 [(set_attr "type" "sseimul")
12229 (set_attr "prefix" "evex")
12230 (set_attr "mode" "<sseinsnmode>")])
12232 (define_expand "mul<mode>3<mask_name>"
12233 [(set (match_operand:VI4_AVX512F 0 "register_operand")
12235 (match_operand:VI4_AVX512F 1 "general_vector_operand")
12236 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
12237 "TARGET_SSE2 && <mask_mode512bit_condition>"
12241 if (!vector_operand (operands[1], <MODE>mode))
12242 operands[1] = force_reg (<MODE>mode, operands[1]);
12243 if (!vector_operand (operands[2], <MODE>mode))
12244 operands[2] = force_reg (<MODE>mode, operands[2]);
12245 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
12249 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
12254 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
12255 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
12257 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
12258 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
12259 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
12260 && <mask_mode512bit_condition>"
12262 pmulld\t{%2, %0|%0, %2}
12263 pmulld\t{%2, %0|%0, %2}
12264 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12265 [(set_attr "isa" "noavx,noavx,avx")
12266 (set_attr "type" "sseimul")
12267 (set_attr "prefix_extra" "1")
12268 (set_attr "prefix" "<bcst_mask_prefix4>")
12269 (set_attr "btver2_decode" "vector,vector,vector")
12270 (set_attr "mode" "<sseinsnmode>")])
12272 (define_expand "mul<mode>3"
12273 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12274 (mult:VI8_AVX2_AVX512F
12275 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12276 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12279 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
12283 (define_expand "vec_widen_<s>mult_hi_<mode>"
12284 [(match_operand:<sseunpackmode> 0 "register_operand")
12285 (any_extend:<sseunpackmode>
12286 (match_operand:VI124_AVX2 1 "register_operand"))
12287 (match_operand:VI124_AVX2 2 "register_operand")]
12290 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12295 (define_expand "vec_widen_<s>mult_lo_<mode>"
12296 [(match_operand:<sseunpackmode> 0 "register_operand")
12297 (any_extend:<sseunpackmode>
12298 (match_operand:VI124_AVX2 1 "register_operand"))
12299 (match_operand:VI124_AVX2 2 "register_operand")]
12302 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12307 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
12308 ;; named patterns, but signed V4SI needs special help for plain SSE2.
12309 (define_expand "vec_widen_smult_even_v4si"
12310 [(match_operand:V2DI 0 "register_operand")
12311 (match_operand:V4SI 1 "vector_operand")
12312 (match_operand:V4SI 2 "vector_operand")]
12315 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12320 (define_expand "vec_widen_<s>mult_odd_<mode>"
12321 [(match_operand:<sseunpackmode> 0 "register_operand")
12322 (any_extend:<sseunpackmode>
12323 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
12324 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
12327 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12332 (define_mode_attr SDOT_PMADD_SUF
12333 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
12335 (define_expand "sdot_prod<mode>"
12336 [(match_operand:<sseunpackmode> 0 "register_operand")
12337 (match_operand:VI2_AVX2 1 "register_operand")
12338 (match_operand:VI2_AVX2 2 "register_operand")
12339 (match_operand:<sseunpackmode> 3 "register_operand")]
12342 rtx t = gen_reg_rtx (<sseunpackmode>mode);
12343 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
12344 emit_insn (gen_rtx_SET (operands[0],
12345 gen_rtx_PLUS (<sseunpackmode>mode,
12350 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
12351 ;; back together when madd is available.
12352 (define_expand "sdot_prodv4si"
12353 [(match_operand:V2DI 0 "register_operand")
12354 (match_operand:V4SI 1 "register_operand")
12355 (match_operand:V4SI 2 "register_operand")
12356 (match_operand:V2DI 3 "register_operand")]
12359 rtx t = gen_reg_rtx (V2DImode);
12360 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
12361 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
12365 (define_expand "uavg<mode>3_ceil"
12366 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12367 (truncate:VI12_AVX2_AVX512BW
12368 (lshiftrt:<ssedoublemode>
12369 (plus:<ssedoublemode>
12370 (plus:<ssedoublemode>
12371 (zero_extend:<ssedoublemode>
12372 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
12373 (zero_extend:<ssedoublemode>
12374 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
12379 operands[3] = CONST1_RTX(<ssedoublemode>mode);
12380 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12383 (define_expand "usadv16qi"
12384 [(match_operand:V4SI 0 "register_operand")
12385 (match_operand:V16QI 1 "register_operand")
12386 (match_operand:V16QI 2 "vector_operand")
12387 (match_operand:V4SI 3 "vector_operand")]
12390 rtx t1 = gen_reg_rtx (V2DImode);
12391 rtx t2 = gen_reg_rtx (V4SImode);
12392 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
12393 convert_move (t2, t1, 0);
12394 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
12398 (define_expand "usadv32qi"
12399 [(match_operand:V8SI 0 "register_operand")
12400 (match_operand:V32QI 1 "register_operand")
12401 (match_operand:V32QI 2 "nonimmediate_operand")
12402 (match_operand:V8SI 3 "nonimmediate_operand")]
12405 rtx t1 = gen_reg_rtx (V4DImode);
12406 rtx t2 = gen_reg_rtx (V8SImode);
12407 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
12408 convert_move (t2, t1, 0);
12409 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
12413 (define_expand "usadv64qi"
12414 [(match_operand:V16SI 0 "register_operand")
12415 (match_operand:V64QI 1 "register_operand")
12416 (match_operand:V64QI 2 "nonimmediate_operand")
12417 (match_operand:V16SI 3 "nonimmediate_operand")]
12420 rtx t1 = gen_reg_rtx (V8DImode);
12421 rtx t2 = gen_reg_rtx (V16SImode);
12422 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
12423 convert_move (t2, t1, 0);
12424 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
12428 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12429 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
12430 (ashiftrt:VI248_AVX512BW_1
12431 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
12432 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12434 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12435 [(set_attr "type" "sseishft")
12436 (set (attr "length_immediate")
12437 (if_then_else (match_operand 2 "const_int_operand")
12439 (const_string "0")))
12440 (set_attr "mode" "<sseinsnmode>")])
12442 (define_insn "ashr<mode>3"
12443 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
12444 (ashiftrt:VI24_AVX2
12445 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
12446 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12449 psra<ssemodesuffix>\t{%2, %0|%0, %2}
12450 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12451 [(set_attr "isa" "noavx,avx")
12452 (set_attr "type" "sseishft")
12453 (set (attr "length_immediate")
12454 (if_then_else (match_operand 2 "const_int_operand")
12456 (const_string "0")))
12457 (set_attr "prefix_data16" "1,*")
12458 (set_attr "prefix" "orig,vex")
12459 (set_attr "mode" "<sseinsnmode>")])
12461 (define_insn "ashr<mode>3<mask_name>"
12462 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12463 (ashiftrt:VI248_AVX512BW_AVX512VL
12464 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12465 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12467 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12468 [(set_attr "type" "sseishft")
12469 (set (attr "length_immediate")
12470 (if_then_else (match_operand 2 "const_int_operand")
12472 (const_string "0")))
12473 (set_attr "mode" "<sseinsnmode>")])
12475 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
12476 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12477 (any_lshift:VI248_AVX512BW_2
12478 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12479 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12481 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12482 [(set_attr "type" "sseishft")
12483 (set (attr "length_immediate")
12484 (if_then_else (match_operand 2 "const_int_operand")
12486 (const_string "0")))
12487 (set_attr "mode" "<sseinsnmode>")])
12489 (define_insn "<insn><mode>3"
12490 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
12491 (any_lshift:VI248_AVX2
12492 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
12493 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12496 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12497 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12498 [(set_attr "isa" "noavx,avx")
12499 (set_attr "type" "sseishft")
12500 (set (attr "length_immediate")
12501 (if_then_else (match_operand 2 "const_int_operand")
12503 (const_string "0")))
12504 (set_attr "prefix_data16" "1,*")
12505 (set_attr "prefix" "orig,vex")
12506 (set_attr "mode" "<sseinsnmode>")])
12508 (define_insn "<insn><mode>3<mask_name>"
12509 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12510 (any_lshift:VI248_AVX512BW
12511 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12512 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12514 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12515 [(set_attr "type" "sseishft")
12516 (set (attr "length_immediate")
12517 (if_then_else (match_operand 2 "const_int_operand")
12519 (const_string "0")))
12520 (set_attr "mode" "<sseinsnmode>")])
12523 (define_expand "vec_shl_<mode>"
12524 [(set (match_dup 3)
12526 (match_operand:V_128 1 "register_operand")
12527 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12528 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12531 operands[1] = gen_lowpart (V1TImode, operands[1]);
12532 operands[3] = gen_reg_rtx (V1TImode);
12533 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12536 (define_expand "vec_shr_<mode>"
12537 [(set (match_dup 3)
12539 (match_operand:V_128 1 "register_operand")
12540 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12541 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12544 operands[1] = gen_lowpart (V1TImode, operands[1]);
12545 operands[3] = gen_reg_rtx (V1TImode);
12546 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12549 (define_insn "avx512bw_<insn><mode>3"
12550 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
12551 (any_lshift:VIMAX_AVX512VL
12552 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
12553 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
12556 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12557 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12559 [(set_attr "type" "sseishft")
12560 (set_attr "length_immediate" "1")
12561 (set_attr "prefix" "maybe_evex")
12562 (set_attr "mode" "<sseinsnmode>")])
12564 (define_insn "<sse2_avx2>_<insn><mode>3"
12565 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
12566 (any_lshift:VIMAX_AVX2
12567 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
12568 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
12571 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12573 switch (which_alternative)
12576 return "p<vshift>dq\t{%2, %0|%0, %2}";
12578 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12580 gcc_unreachable ();
12583 [(set_attr "isa" "noavx,avx")
12584 (set_attr "type" "sseishft")
12585 (set_attr "length_immediate" "1")
12586 (set_attr "atom_unit" "sishuf")
12587 (set_attr "prefix_data16" "1,*")
12588 (set_attr "prefix" "orig,vex")
12589 (set_attr "mode" "<sseinsnmode>")])
12591 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
12592 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12593 (any_rotate:VI48_AVX512VL
12594 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
12595 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12597 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12598 [(set_attr "prefix" "evex")
12599 (set_attr "mode" "<sseinsnmode>")])
12601 (define_insn "<avx512>_<rotate><mode><mask_name>"
12602 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12603 (any_rotate:VI48_AVX512VL
12604 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
12605 (match_operand:SI 2 "const_0_to_255_operand")))]
12607 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12608 [(set_attr "prefix" "evex")
12609 (set_attr "mode" "<sseinsnmode>")])
12611 (define_expand "<code><mode>3"
12612 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
12613 (maxmin:VI124_256_AVX512F_AVX512BW
12614 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
12615 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
12617 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12619 (define_insn "*avx2_<code><mode>3"
12620 [(set (match_operand:VI124_256 0 "register_operand" "=v")
12622 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
12623 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
12624 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12625 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12626 [(set_attr "type" "sseiadd")
12627 (set_attr "prefix_extra" "1")
12628 (set_attr "prefix" "vex")
12629 (set_attr "mode" "OI")])
12631 (define_expand "<code><mode>3_mask"
12632 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12633 (vec_merge:VI48_AVX512VL
12634 (maxmin:VI48_AVX512VL
12635 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12636 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12637 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12638 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12640 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12642 (define_insn "*avx512f_<code><mode>3<mask_name>"
12643 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12644 (maxmin:VI48_AVX512VL
12645 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
12646 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12647 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12648 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12649 [(set_attr "type" "sseiadd")
12650 (set_attr "prefix_extra" "1")
12651 (set_attr "prefix" "maybe_evex")
12652 (set_attr "mode" "<sseinsnmode>")])
12654 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12655 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12656 (maxmin:VI12_AVX512VL
12657 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
12658 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
12660 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12661 [(set_attr "type" "sseiadd")
12662 (set_attr "prefix" "evex")
12663 (set_attr "mode" "<sseinsnmode>")])
12665 (define_expand "<code><mode>3"
12666 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12667 (maxmin:VI8_AVX2_AVX512F
12668 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12669 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12673 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
12674 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12677 enum rtx_code code;
12682 xops[0] = operands[0];
12684 if (<CODE> == SMAX || <CODE> == UMAX)
12686 xops[1] = operands[1];
12687 xops[2] = operands[2];
12691 xops[1] = operands[2];
12692 xops[2] = operands[1];
12695 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
12697 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
12698 xops[4] = operands[1];
12699 xops[5] = operands[2];
12701 ok = ix86_expand_int_vcond (xops);
12707 (define_expand "<code><mode>3"
12708 [(set (match_operand:VI124_128 0 "register_operand")
12710 (match_operand:VI124_128 1 "vector_operand")
12711 (match_operand:VI124_128 2 "vector_operand")))]
12714 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
12715 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12721 xops[0] = operands[0];
12722 operands[1] = force_reg (<MODE>mode, operands[1]);
12723 operands[2] = force_reg (<MODE>mode, operands[2]);
12725 if (<CODE> == SMAX)
12727 xops[1] = operands[1];
12728 xops[2] = operands[2];
12732 xops[1] = operands[2];
12733 xops[2] = operands[1];
12736 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
12737 xops[4] = operands[1];
12738 xops[5] = operands[2];
12740 ok = ix86_expand_int_vcond (xops);
12746 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12747 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
12749 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
12750 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12752 && <mask_mode512bit_condition>
12753 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12755 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12756 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12757 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12758 [(set_attr "isa" "noavx,noavx,avx")
12759 (set_attr "type" "sseiadd")
12760 (set_attr "prefix_extra" "1,1,*")
12761 (set_attr "prefix" "orig,orig,vex")
12762 (set_attr "mode" "TI")])
12764 (define_insn "*<code>v8hi3"
12765 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
12767 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12768 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
12769 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12771 p<maxmin_int>w\t{%2, %0|%0, %2}
12772 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
12773 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
12774 [(set_attr "isa" "noavx,avx,avx512bw")
12775 (set_attr "type" "sseiadd")
12776 (set_attr "prefix_data16" "1,*,*")
12777 (set_attr "prefix_extra" "*,1,1")
12778 (set_attr "prefix" "orig,vex,evex")
12779 (set_attr "mode" "TI")])
12781 (define_expand "<code><mode>3"
12782 [(set (match_operand:VI124_128 0 "register_operand")
12784 (match_operand:VI124_128 1 "vector_operand")
12785 (match_operand:VI124_128 2 "vector_operand")))]
12788 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
12789 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12790 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
12792 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
12793 operands[1] = force_reg (<MODE>mode, operands[1]);
12794 if (rtx_equal_p (op3, op2))
12795 op3 = gen_reg_rtx (V8HImode);
12796 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12797 emit_insn (gen_addv8hi3 (op0, op3, op2));
12805 operands[1] = force_reg (<MODE>mode, operands[1]);
12806 operands[2] = force_reg (<MODE>mode, operands[2]);
12808 xops[0] = operands[0];
12810 if (<CODE> == UMAX)
12812 xops[1] = operands[1];
12813 xops[2] = operands[2];
12817 xops[1] = operands[2];
12818 xops[2] = operands[1];
12821 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12822 xops[4] = operands[1];
12823 xops[5] = operands[2];
12825 ok = ix86_expand_int_vcond (xops);
12831 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12832 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12834 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12835 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12837 && <mask_mode512bit_condition>
12838 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12840 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12841 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12842 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12843 [(set_attr "isa" "noavx,noavx,avx")
12844 (set_attr "type" "sseiadd")
12845 (set_attr "prefix_extra" "1,1,*")
12846 (set_attr "prefix" "orig,orig,vex")
12847 (set_attr "mode" "TI")])
12849 (define_insn "*<code>v16qi3"
12850 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12852 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12853 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12854 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12856 p<maxmin_int>b\t{%2, %0|%0, %2}
12857 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12858 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12859 [(set_attr "isa" "noavx,avx,avx512bw")
12860 (set_attr "type" "sseiadd")
12861 (set_attr "prefix_data16" "1,*,*")
12862 (set_attr "prefix_extra" "*,1,1")
12863 (set_attr "prefix" "orig,vex,evex")
12864 (set_attr "mode" "TI")])
12866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12868 ;; Parallel integral comparisons
12870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12872 (define_expand "avx2_eq<mode>3"
12873 [(set (match_operand:VI_256 0 "register_operand")
12875 (match_operand:VI_256 1 "nonimmediate_operand")
12876 (match_operand:VI_256 2 "nonimmediate_operand")))]
12878 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12880 (define_insn "*avx2_eq<mode>3"
12881 [(set (match_operand:VI_256 0 "register_operand" "=x")
12883 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12884 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12885 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12886 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12887 [(set_attr "type" "ssecmp")
12888 (set_attr "prefix_extra" "1")
12889 (set_attr "prefix" "vex")
12890 (set_attr "mode" "OI")])
12892 (define_insn_and_split "*avx2_eq<mode>3"
12893 [(set (match_operand:VI_128_256 0 "register_operand")
12894 (vec_merge:VI_128_256
12895 (match_operand:VI_128_256 1 "vector_all_ones_operand")
12896 (match_operand:VI_128_256 2 "const0_operand")
12897 (unspec:<avx512fmaskmode>
12898 [(match_operand:VI_128_256 3 "nonimmediate_operand")
12899 (match_operand:VI_128_256 4 "nonimmediate_operand")]
12900 UNSPEC_MASKED_EQ)))]
12901 "TARGET_AVX512VL && ix86_pre_reload_split ()
12902 && !(MEM_P (operands[3]) && MEM_P (operands[4]))"
12905 [(set (match_dup 0)
12910 (define_insn_and_split "*avx2_pcmp<mode>3_1"
12911 [(set (match_operand:VI_128_256 0 "register_operand")
12912 (vec_merge:VI_128_256
12913 (match_operand:VI_128_256 1 "vector_all_ones_operand")
12914 (match_operand:VI_128_256 2 "const0_operand")
12915 (unspec:<avx512fmaskmode>
12916 [(match_operand:VI_128_256 3 "nonimmediate_operand")
12917 (match_operand:VI_128_256 4 "nonimmediate_operand")
12918 (match_operand:SI 5 "const_0_to_7_operand")]
12920 "TARGET_AVX512VL && ix86_pre_reload_split ()
12921 /* EQ is commutative. */
12922 && ((INTVAL (operands[5]) == 0
12923 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
12924 /* NLE aka GT, 3 must be register. */
12925 || (INTVAL (operands[5]) == 6
12926 && !MEM_P (operands[3]))
12927 /* LT, 4 must be register and we swap operands. */
12928 || (INTVAL (operands[5]) == 1
12929 && !MEM_P (operands[4])))"
12934 if (INTVAL (operands[5]) == 1)
12935 std::swap (operands[3], operands[4]);
12936 enum rtx_code code = INTVAL (operands[5]) ? GT : EQ;
12937 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
12938 operands[3], operands[4]));
12942 (define_insn_and_split "*avx2_pcmp<mode>3_2"
12943 [(set (match_operand:VI_128_256 0 "register_operand")
12944 (vec_merge:VI_128_256
12945 (match_operand:VI_128_256 1 "vector_all_ones_operand")
12946 (match_operand:VI_128_256 2 "const0_operand")
12947 (not:<avx512fmaskmode>
12948 (unspec:<avx512fmaskmode>
12949 [(match_operand:VI_128_256 3 "nonimmediate_operand")
12950 (match_operand:VI_128_256 4 "nonimmediate_operand")
12951 (match_operand:SI 5 "const_0_to_7_operand")]
12953 "TARGET_AVX512VL && ix86_pre_reload_split ()
12954 /* NE is commutative. */
12955 && ((INTVAL (operands[5]) == 4
12956 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
12957 /* LE, 3 must be register. */
12958 || (INTVAL (operands[5]) == 2
12959 && !MEM_P (operands[3]))
12960 /* NLT aka GE, 4 must be register and we swap operands. */
12961 || (INTVAL (operands[5]) == 5
12962 && !MEM_P (operands[4])))"
12967 if (INTVAL (operands[5]) == 5)
12968 std::swap (operands[3], operands[4]);
12969 enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
12970 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
12971 operands[3], operands[4]));
12975 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12976 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12977 (unspec:<avx512fmaskmode>
12978 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12979 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12980 UNSPEC_MASKED_EQ))]
12982 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12984 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12985 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12986 (unspec:<avx512fmaskmode>
12987 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12988 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12989 UNSPEC_MASKED_EQ))]
12991 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12993 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12994 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12995 (unspec:<avx512fmaskmode>
12996 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12997 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12998 UNSPEC_MASKED_EQ))]
12999 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13001 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
13002 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
13003 [(set_attr "type" "ssecmp")
13004 (set_attr "prefix_extra" "1")
13005 (set_attr "prefix" "evex")
13006 (set_attr "mode" "<sseinsnmode>")])
13008 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
13009 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
13010 (unspec:<avx512fmaskmode>
13011 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
13012 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
13013 UNSPEC_MASKED_EQ))]
13014 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13016 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
13017 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
13018 [(set_attr "type" "ssecmp")
13019 (set_attr "prefix_extra" "1")
13020 (set_attr "prefix" "evex")
13021 (set_attr "mode" "<sseinsnmode>")])
13023 (define_insn "*sse4_1_eqv2di3"
13024 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
13026 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
13027 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
13028 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13030 pcmpeqq\t{%2, %0|%0, %2}
13031 pcmpeqq\t{%2, %0|%0, %2}
13032 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
13033 [(set_attr "isa" "noavx,noavx,avx")
13034 (set_attr "type" "ssecmp")
13035 (set_attr "prefix_extra" "1")
13036 (set_attr "prefix" "orig,orig,vex")
13037 (set_attr "mode" "TI")])
13039 (define_insn "*sse2_eq<mode>3"
13040 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
13042 (match_operand:VI124_128 1 "vector_operand" "%0,x")
13043 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
13044 "TARGET_SSE2 && !TARGET_XOP
13045 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13047 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
13048 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13049 [(set_attr "isa" "noavx,avx")
13050 (set_attr "type" "ssecmp")
13051 (set_attr "prefix_data16" "1,*")
13052 (set_attr "prefix" "orig,vex")
13053 (set_attr "mode" "TI")])
13055 (define_expand "sse2_eq<mode>3"
13056 [(set (match_operand:VI124_128 0 "register_operand")
13058 (match_operand:VI124_128 1 "vector_operand")
13059 (match_operand:VI124_128 2 "vector_operand")))]
13060 "TARGET_SSE2 && !TARGET_XOP "
13061 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
13063 (define_expand "sse4_1_eqv2di3"
13064 [(set (match_operand:V2DI 0 "register_operand")
13066 (match_operand:V2DI 1 "vector_operand")
13067 (match_operand:V2DI 2 "vector_operand")))]
13069 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
13071 (define_insn "sse4_2_gtv2di3"
13072 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
13074 (match_operand:V2DI 1 "register_operand" "0,0,x")
13075 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
13078 pcmpgtq\t{%2, %0|%0, %2}
13079 pcmpgtq\t{%2, %0|%0, %2}
13080 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
13081 [(set_attr "isa" "noavx,noavx,avx")
13082 (set_attr "type" "ssecmp")
13083 (set_attr "prefix_extra" "1")
13084 (set_attr "prefix" "orig,orig,vex")
13085 (set_attr "mode" "TI")])
13087 (define_insn "avx2_gt<mode>3"
13088 [(set (match_operand:VI_256 0 "register_operand" "=x")
13090 (match_operand:VI_256 1 "register_operand" "x")
13091 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
13093 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13094 [(set_attr "type" "ssecmp")
13095 (set_attr "prefix_extra" "1")
13096 (set_attr "prefix" "vex")
13097 (set_attr "mode" "OI")])
13099 (define_insn_and_split "*avx2_gt<mode>3"
13100 [(set (match_operand:VI_128_256 0 "register_operand")
13101 (vec_merge:VI_128_256
13102 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13103 (match_operand:VI_128_256 2 "const0_operand")
13104 (unspec:<avx512fmaskmode>
13105 [(match_operand:VI_128_256 3 "register_operand")
13106 (match_operand:VI_128_256 4 "nonimmediate_operand")]
13107 UNSPEC_MASKED_GT)))]
13109 && ix86_pre_reload_split ()"
13112 [(set (match_dup 0)
13117 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
13118 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13119 (unspec:<avx512fmaskmode>
13120 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
13121 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
13123 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13124 [(set_attr "type" "ssecmp")
13125 (set_attr "prefix_extra" "1")
13126 (set_attr "prefix" "evex")
13127 (set_attr "mode" "<sseinsnmode>")])
13129 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
13130 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13131 (unspec:<avx512fmaskmode>
13132 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
13133 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
13135 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13136 [(set_attr "type" "ssecmp")
13137 (set_attr "prefix_extra" "1")
13138 (set_attr "prefix" "evex")
13139 (set_attr "mode" "<sseinsnmode>")])
13141 (define_insn "sse2_gt<mode>3"
13142 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
13144 (match_operand:VI124_128 1 "register_operand" "0,x")
13145 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
13146 "TARGET_SSE2 && !TARGET_XOP"
13148 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
13149 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13150 [(set_attr "isa" "noavx,avx")
13151 (set_attr "type" "ssecmp")
13152 (set_attr "prefix_data16" "1,*")
13153 (set_attr "prefix" "orig,vex")
13154 (set_attr "mode" "TI")])
13156 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
13157 [(set (match_operand:V_512 0 "register_operand")
13158 (if_then_else:V_512
13159 (match_operator 3 ""
13160 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
13161 (match_operand:VI_AVX512BW 5 "general_operand")])
13162 (match_operand:V_512 1)
13163 (match_operand:V_512 2)))]
13165 && (GET_MODE_NUNITS (<V_512:MODE>mode)
13166 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
13168 bool ok = ix86_expand_int_vcond (operands);
13173 (define_expand "vcond<V_256:mode><VI_256:mode>"
13174 [(set (match_operand:V_256 0 "register_operand")
13175 (if_then_else:V_256
13176 (match_operator 3 ""
13177 [(match_operand:VI_256 4 "nonimmediate_operand")
13178 (match_operand:VI_256 5 "general_operand")])
13179 (match_operand:V_256 1)
13180 (match_operand:V_256 2)))]
13182 && (GET_MODE_NUNITS (<V_256:MODE>mode)
13183 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
13185 bool ok = ix86_expand_int_vcond (operands);
13190 (define_expand "vcond<V_128:mode><VI124_128:mode>"
13191 [(set (match_operand:V_128 0 "register_operand")
13192 (if_then_else:V_128
13193 (match_operator 3 ""
13194 [(match_operand:VI124_128 4 "vector_operand")
13195 (match_operand:VI124_128 5 "general_operand")])
13196 (match_operand:V_128 1)
13197 (match_operand:V_128 2)))]
13199 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13200 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13202 bool ok = ix86_expand_int_vcond (operands);
13207 (define_expand "vcond<VI8F_128:mode>v2di"
13208 [(set (match_operand:VI8F_128 0 "register_operand")
13209 (if_then_else:VI8F_128
13210 (match_operator 3 ""
13211 [(match_operand:V2DI 4 "vector_operand")
13212 (match_operand:V2DI 5 "general_operand")])
13213 (match_operand:VI8F_128 1)
13214 (match_operand:VI8F_128 2)))]
13217 bool ok = ix86_expand_int_vcond (operands);
13222 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
13223 [(set (match_operand:V_512 0 "register_operand")
13224 (if_then_else:V_512
13225 (match_operator 3 ""
13226 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
13227 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
13228 (match_operand:V_512 1 "general_operand")
13229 (match_operand:V_512 2 "general_operand")))]
13231 && (GET_MODE_NUNITS (<V_512:MODE>mode)
13232 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
13234 bool ok = ix86_expand_int_vcond (operands);
13239 (define_expand "vcondu<V_256:mode><VI_256:mode>"
13240 [(set (match_operand:V_256 0 "register_operand")
13241 (if_then_else:V_256
13242 (match_operator 3 ""
13243 [(match_operand:VI_256 4 "nonimmediate_operand")
13244 (match_operand:VI_256 5 "nonimmediate_operand")])
13245 (match_operand:V_256 1 "general_operand")
13246 (match_operand:V_256 2 "general_operand")))]
13248 && (GET_MODE_NUNITS (<V_256:MODE>mode)
13249 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
13251 bool ok = ix86_expand_int_vcond (operands);
13256 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
13257 [(set (match_operand:V_128 0 "register_operand")
13258 (if_then_else:V_128
13259 (match_operator 3 ""
13260 [(match_operand:VI124_128 4 "vector_operand")
13261 (match_operand:VI124_128 5 "vector_operand")])
13262 (match_operand:V_128 1 "general_operand")
13263 (match_operand:V_128 2 "general_operand")))]
13265 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13266 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13268 bool ok = ix86_expand_int_vcond (operands);
13273 (define_expand "vcondu<VI8F_128:mode>v2di"
13274 [(set (match_operand:VI8F_128 0 "register_operand")
13275 (if_then_else:VI8F_128
13276 (match_operator 3 ""
13277 [(match_operand:V2DI 4 "vector_operand")
13278 (match_operand:V2DI 5 "vector_operand")])
13279 (match_operand:VI8F_128 1 "general_operand")
13280 (match_operand:VI8F_128 2 "general_operand")))]
13283 bool ok = ix86_expand_int_vcond (operands);
13288 (define_expand "vcondeq<VI8F_128:mode>v2di"
13289 [(set (match_operand:VI8F_128 0 "register_operand")
13290 (if_then_else:VI8F_128
13291 (match_operator 3 ""
13292 [(match_operand:V2DI 4 "vector_operand")
13293 (match_operand:V2DI 5 "general_operand")])
13294 (match_operand:VI8F_128 1)
13295 (match_operand:VI8F_128 2)))]
13298 bool ok = ix86_expand_int_vcond (operands);
13303 (define_mode_iterator VEC_PERM_AVX2
13304 [V16QI V8HI V4SI V2DI V4SF V2DF
13305 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
13306 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
13307 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
13308 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
13309 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
13310 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
13312 (define_expand "vec_perm<mode>"
13313 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
13314 (match_operand:VEC_PERM_AVX2 1 "register_operand")
13315 (match_operand:VEC_PERM_AVX2 2 "register_operand")
13316 (match_operand:<sseintvecmode> 3 "register_operand")]
13317 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
13319 ix86_expand_vec_perm (operands);
13323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13325 ;; Parallel bitwise logical operations
13327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13329 (define_expand "one_cmpl<mode>2"
13330 [(set (match_operand:VI 0 "register_operand")
13331 (xor:VI (match_operand:VI 1 "vector_operand")
13335 if (!TARGET_AVX512F)
13336 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
13338 operands[2] = CONSTM1_RTX (<MODE>mode);
13341 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
13342 [(set (match_operand:VI 0 "register_operand" "=v,v")
13343 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
13344 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
13346 && (!<mask_applied>
13347 || <ssescalarmode>mode == SImode
13348 || <ssescalarmode>mode == DImode)"
13350 if (TARGET_AVX512VL)
13351 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
13353 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
13355 [(set_attr "type" "sselog")
13356 (set_attr "prefix" "evex")
13358 (if_then_else (match_test "TARGET_AVX512VL")
13359 (const_string "<sseinsnmode>")
13360 (const_string "XI")))
13361 (set (attr "enabled")
13362 (if_then_else (eq_attr "alternative" "1")
13363 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
13366 (define_expand "<sse2_avx2>_andnot<mode>3"
13367 [(set (match_operand:VI_AVX2 0 "register_operand")
13369 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
13370 (match_operand:VI_AVX2 2 "vector_operand")))]
13373 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13374 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13375 (vec_merge:VI48_AVX512VL
13378 (match_operand:VI48_AVX512VL 1 "register_operand"))
13379 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13380 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13381 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13384 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13385 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
13386 (vec_merge:VI12_AVX512VL
13389 (match_operand:VI12_AVX512VL 1 "register_operand"))
13390 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
13391 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
13392 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13395 (define_insn "*andnot<mode>3"
13396 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
13398 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
13399 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13405 const char *ssesuffix;
13407 switch (get_attr_mode (insn))
13410 gcc_assert (TARGET_AVX512F);
13413 gcc_assert (TARGET_AVX2);
13416 gcc_assert (TARGET_SSE2);
13418 switch (<MODE>mode)
13422 /* There is no vpandnb or vpandnw instruction, nor vpandn for
13423 512-bit vectors. Use vpandnq instead. */
13428 ssesuffix = "<ssemodesuffix>";
13434 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
13435 ? "<ssemodesuffix>" : "");
13438 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13443 gcc_assert (TARGET_AVX512F);
13446 gcc_assert (TARGET_AVX);
13449 gcc_assert (TARGET_SSE);
13455 gcc_unreachable ();
13458 switch (which_alternative)
13461 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13465 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13468 gcc_unreachable ();
13471 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13472 output_asm_insn (buf, operands);
13475 [(set_attr "isa" "noavx,avx,avx")
13476 (set_attr "type" "sselog")
13477 (set (attr "prefix_data16")
13479 (and (eq_attr "alternative" "0")
13480 (eq_attr "mode" "TI"))
13482 (const_string "*")))
13483 (set_attr "prefix" "orig,vex,evex")
13485 (cond [(match_test "TARGET_AVX2")
13486 (const_string "<sseinsnmode>")
13487 (match_test "TARGET_AVX")
13489 (match_test "<MODE_SIZE> > 16")
13490 (const_string "V8SF")
13491 (const_string "<sseinsnmode>"))
13492 (ior (not (match_test "TARGET_SSE2"))
13493 (match_test "optimize_function_for_size_p (cfun)"))
13494 (const_string "V4SF")
13496 (const_string "<sseinsnmode>")))])
13498 (define_insn "*andnot<mode>3_mask"
13499 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13500 (vec_merge:VI48_AVX512VL
13503 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
13504 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
13505 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
13506 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
13508 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
13509 [(set_attr "type" "sselog")
13510 (set_attr "prefix" "evex")
13511 (set_attr "mode" "<sseinsnmode>")])
13513 (define_expand "<code><mode>3"
13514 [(set (match_operand:VI 0 "register_operand")
13516 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
13517 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
13520 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
13524 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13525 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
13526 (any_logic:VI48_AVX_AVX512F
13527 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
13528 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13529 "TARGET_SSE && <mask_mode512bit_condition>
13530 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
13535 const char *ssesuffix;
13537 switch (get_attr_mode (insn))
13540 gcc_assert (TARGET_AVX512F);
13543 gcc_assert (TARGET_AVX2);
13546 gcc_assert (TARGET_SSE2);
13548 switch (<MODE>mode)
13552 ssesuffix = "<ssemodesuffix>";
13558 ssesuffix = (TARGET_AVX512VL
13559 && (<mask_applied> || which_alternative == 2)
13560 ? "<ssemodesuffix>" : "");
13563 gcc_unreachable ();
13568 gcc_assert (TARGET_AVX);
13571 gcc_assert (TARGET_SSE);
13577 gcc_unreachable ();
13580 switch (which_alternative)
13583 if (<mask_applied>)
13584 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
13586 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13590 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
13593 gcc_unreachable ();
13596 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13597 output_asm_insn (buf, operands);
13600 [(set_attr "isa" "noavx,avx,avx")
13601 (set_attr "type" "sselog")
13602 (set (attr "prefix_data16")
13604 (and (eq_attr "alternative" "0")
13605 (eq_attr "mode" "TI"))
13607 (const_string "*")))
13608 (set_attr "prefix" "<mask_prefix3>,evex")
13610 (cond [(match_test "TARGET_AVX2")
13611 (const_string "<sseinsnmode>")
13612 (match_test "TARGET_AVX")
13614 (match_test "<MODE_SIZE> > 16")
13615 (const_string "V8SF")
13616 (const_string "<sseinsnmode>"))
13617 (ior (not (match_test "TARGET_SSE2"))
13618 (match_test "optimize_function_for_size_p (cfun)"))
13619 (const_string "V4SF")
13621 (const_string "<sseinsnmode>")))])
13623 (define_insn "*<code><mode>3"
13624 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
13625 (any_logic:VI12_AVX_AVX512F
13626 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13627 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13628 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13633 const char *ssesuffix;
13635 switch (get_attr_mode (insn))
13638 gcc_assert (TARGET_AVX512F);
13641 gcc_assert (TARGET_AVX2);
13644 gcc_assert (TARGET_SSE2);
13646 switch (<MODE>mode)
13656 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13659 gcc_unreachable ();
13664 gcc_assert (TARGET_AVX);
13667 gcc_assert (TARGET_SSE);
13673 gcc_unreachable ();
13676 switch (which_alternative)
13679 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13683 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13686 gcc_unreachable ();
13689 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13690 output_asm_insn (buf, operands);
13693 [(set_attr "isa" "noavx,avx,avx")
13694 (set_attr "type" "sselog")
13695 (set (attr "prefix_data16")
13697 (and (eq_attr "alternative" "0")
13698 (eq_attr "mode" "TI"))
13700 (const_string "*")))
13701 (set_attr "prefix" "orig,vex,evex")
13703 (cond [(match_test "TARGET_AVX2")
13704 (const_string "<sseinsnmode>")
13705 (match_test "TARGET_AVX")
13707 (match_test "<MODE_SIZE> > 16")
13708 (const_string "V8SF")
13709 (const_string "<sseinsnmode>"))
13710 (ior (not (match_test "TARGET_SSE2"))
13711 (match_test "optimize_function_for_size_p (cfun)"))
13712 (const_string "V4SF")
13714 (const_string "<sseinsnmode>")))])
13716 (define_mode_iterator VI1248_AVX512VLBW
13717 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
13718 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
13719 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
13720 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
13721 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
13722 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13724 (define_mode_iterator AVX512ZEXTMASK
13725 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
13727 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
13728 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13729 (unspec:<avx512fmaskmode>
13730 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13731 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13734 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13735 [(set_attr "prefix" "evex")
13736 (set_attr "mode" "<sseinsnmode>")])
13738 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
13739 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13740 (unspec:<avx512fmaskmode>
13741 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13742 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13745 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13746 [(set_attr "prefix" "evex")
13747 (set_attr "mode" "<sseinsnmode>")])
13749 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
13750 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13751 (zero_extend:AVX512ZEXTMASK
13752 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13753 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13754 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13757 && (<AVX512ZEXTMASK:MODE_SIZE>
13758 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13759 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13760 [(set_attr "prefix" "evex")
13761 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13763 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
13764 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13765 (zero_extend:AVX512ZEXTMASK
13766 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13767 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13768 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13769 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13771 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13773 && (<AVX512ZEXTMASK:MODE_SIZE>
13774 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13775 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13776 [(set_attr "prefix" "evex")
13777 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13779 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
13780 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13781 (zero_extend:AVX512ZEXTMASK
13782 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13783 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13784 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13787 && (<AVX512ZEXTMASK:MODE_SIZE>
13788 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13789 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13790 [(set_attr "prefix" "evex")
13791 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13793 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
13794 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13795 (zero_extend:AVX512ZEXTMASK
13796 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13797 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13798 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13799 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13801 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13803 && (<AVX512ZEXTMASK:MODE_SIZE>
13804 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13805 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13806 [(set_attr "prefix" "evex")
13807 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13811 ;; Parallel integral element swizzling
13813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13815 (define_expand "vec_pack_trunc_<mode>"
13816 [(match_operand:<ssepackmode> 0 "register_operand")
13817 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
13818 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
13821 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
13822 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
13823 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
13827 (define_expand "vec_pack_trunc_qi"
13828 [(set (match_operand:HI 0 "register_operand")
13829 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
13831 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
13834 (define_expand "vec_pack_trunc_<mode>"
13835 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
13836 (ior:<DOUBLEMASKMODE>
13837 (ashift:<DOUBLEMASKMODE>
13838 (zero_extend:<DOUBLEMASKMODE>
13839 (match_operand:SWI24 2 "register_operand"))
13841 (zero_extend:<DOUBLEMASKMODE>
13842 (match_operand:SWI24 1 "register_operand"))))]
13845 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
13848 (define_expand "vec_pack_sbool_trunc_qi"
13849 [(match_operand:QI 0 "register_operand")
13850 (match_operand:QI 1 "register_operand")
13851 (match_operand:QI 2 "register_operand")
13852 (match_operand:QI 3 "const_int_operand")]
13855 HOST_WIDE_INT nunits = INTVAL (operands[3]);
13856 rtx mask, tem1, tem2;
13857 if (nunits != 8 && nunits != 4)
13859 mask = gen_reg_rtx (QImode);
13860 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
13861 tem1 = gen_reg_rtx (QImode);
13862 emit_insn (gen_kandqi (tem1, operands[1], mask));
13863 if (TARGET_AVX512DQ)
13865 tem2 = gen_reg_rtx (QImode);
13866 emit_insn (gen_kashiftqi (tem2, operands[2],
13867 GEN_INT (nunits / 2)));
13871 tem2 = gen_reg_rtx (HImode);
13872 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
13874 GEN_INT (nunits / 2)));
13875 tem2 = lowpart_subreg (QImode, tem2, HImode);
13877 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
13881 (define_insn "<sse2_avx2>_packsswb<mask_name>"
13882 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13883 (vec_concat:VI1_AVX512
13884 (ss_truncate:<ssehalfvecmode>
13885 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13886 (ss_truncate:<ssehalfvecmode>
13887 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13888 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13890 packsswb\t{%2, %0|%0, %2}
13891 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13892 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13893 [(set_attr "isa" "noavx,avx,avx512bw")
13894 (set_attr "type" "sselog")
13895 (set_attr "prefix_data16" "1,*,*")
13896 (set_attr "prefix" "orig,<mask_prefix>,evex")
13897 (set_attr "mode" "<sseinsnmode>")])
13899 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13900 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13901 (vec_concat:VI2_AVX2
13902 (ss_truncate:<ssehalfvecmode>
13903 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13904 (ss_truncate:<ssehalfvecmode>
13905 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13906 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13908 packssdw\t{%2, %0|%0, %2}
13909 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13910 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13911 [(set_attr "isa" "noavx,avx,avx512bw")
13912 (set_attr "type" "sselog")
13913 (set_attr "prefix_data16" "1,*,*")
13914 (set_attr "prefix" "orig,<mask_prefix>,evex")
13915 (set_attr "mode" "<sseinsnmode>")])
13917 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13918 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13919 (vec_concat:VI1_AVX512
13920 (us_truncate:<ssehalfvecmode>
13921 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13922 (us_truncate:<ssehalfvecmode>
13923 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13924 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13926 packuswb\t{%2, %0|%0, %2}
13927 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13928 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13929 [(set_attr "isa" "noavx,avx,avx512bw")
13930 (set_attr "type" "sselog")
13931 (set_attr "prefix_data16" "1,*,*")
13932 (set_attr "prefix" "orig,<mask_prefix>,evex")
13933 (set_attr "mode" "<sseinsnmode>")])
13935 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13936 [(set (match_operand:V64QI 0 "register_operand" "=v")
13939 (match_operand:V64QI 1 "register_operand" "v")
13940 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13941 (parallel [(const_int 8) (const_int 72)
13942 (const_int 9) (const_int 73)
13943 (const_int 10) (const_int 74)
13944 (const_int 11) (const_int 75)
13945 (const_int 12) (const_int 76)
13946 (const_int 13) (const_int 77)
13947 (const_int 14) (const_int 78)
13948 (const_int 15) (const_int 79)
13949 (const_int 24) (const_int 88)
13950 (const_int 25) (const_int 89)
13951 (const_int 26) (const_int 90)
13952 (const_int 27) (const_int 91)
13953 (const_int 28) (const_int 92)
13954 (const_int 29) (const_int 93)
13955 (const_int 30) (const_int 94)
13956 (const_int 31) (const_int 95)
13957 (const_int 40) (const_int 104)
13958 (const_int 41) (const_int 105)
13959 (const_int 42) (const_int 106)
13960 (const_int 43) (const_int 107)
13961 (const_int 44) (const_int 108)
13962 (const_int 45) (const_int 109)
13963 (const_int 46) (const_int 110)
13964 (const_int 47) (const_int 111)
13965 (const_int 56) (const_int 120)
13966 (const_int 57) (const_int 121)
13967 (const_int 58) (const_int 122)
13968 (const_int 59) (const_int 123)
13969 (const_int 60) (const_int 124)
13970 (const_int 61) (const_int 125)
13971 (const_int 62) (const_int 126)
13972 (const_int 63) (const_int 127)])))]
13974 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13975 [(set_attr "type" "sselog")
13976 (set_attr "prefix" "evex")
13977 (set_attr "mode" "XI")])
13979 (define_insn "avx2_interleave_highv32qi<mask_name>"
13980 [(set (match_operand:V32QI 0 "register_operand" "=v")
13983 (match_operand:V32QI 1 "register_operand" "v")
13984 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13985 (parallel [(const_int 8) (const_int 40)
13986 (const_int 9) (const_int 41)
13987 (const_int 10) (const_int 42)
13988 (const_int 11) (const_int 43)
13989 (const_int 12) (const_int 44)
13990 (const_int 13) (const_int 45)
13991 (const_int 14) (const_int 46)
13992 (const_int 15) (const_int 47)
13993 (const_int 24) (const_int 56)
13994 (const_int 25) (const_int 57)
13995 (const_int 26) (const_int 58)
13996 (const_int 27) (const_int 59)
13997 (const_int 28) (const_int 60)
13998 (const_int 29) (const_int 61)
13999 (const_int 30) (const_int 62)
14000 (const_int 31) (const_int 63)])))]
14001 "TARGET_AVX2 && <mask_avx512vl_condition>"
14002 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14003 [(set_attr "type" "sselog")
14004 (set_attr "prefix" "<mask_prefix>")
14005 (set_attr "mode" "OI")])
14007 (define_insn "vec_interleave_highv16qi<mask_name>"
14008 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
14011 (match_operand:V16QI 1 "register_operand" "0,v")
14012 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
14013 (parallel [(const_int 8) (const_int 24)
14014 (const_int 9) (const_int 25)
14015 (const_int 10) (const_int 26)
14016 (const_int 11) (const_int 27)
14017 (const_int 12) (const_int 28)
14018 (const_int 13) (const_int 29)
14019 (const_int 14) (const_int 30)
14020 (const_int 15) (const_int 31)])))]
14021 "TARGET_SSE2 && <mask_avx512vl_condition>"
14023 punpckhbw\t{%2, %0|%0, %2}
14024 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14025 [(set_attr "isa" "noavx,avx")
14026 (set_attr "type" "sselog")
14027 (set_attr "prefix_data16" "1,*")
14028 (set_attr "prefix" "orig,<mask_prefix>")
14029 (set_attr "mode" "TI")])
14031 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
14032 [(set (match_operand:V64QI 0 "register_operand" "=v")
14035 (match_operand:V64QI 1 "register_operand" "v")
14036 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
14037 (parallel [(const_int 0) (const_int 64)
14038 (const_int 1) (const_int 65)
14039 (const_int 2) (const_int 66)
14040 (const_int 3) (const_int 67)
14041 (const_int 4) (const_int 68)
14042 (const_int 5) (const_int 69)
14043 (const_int 6) (const_int 70)
14044 (const_int 7) (const_int 71)
14045 (const_int 16) (const_int 80)
14046 (const_int 17) (const_int 81)
14047 (const_int 18) (const_int 82)
14048 (const_int 19) (const_int 83)
14049 (const_int 20) (const_int 84)
14050 (const_int 21) (const_int 85)
14051 (const_int 22) (const_int 86)
14052 (const_int 23) (const_int 87)
14053 (const_int 32) (const_int 96)
14054 (const_int 33) (const_int 97)
14055 (const_int 34) (const_int 98)
14056 (const_int 35) (const_int 99)
14057 (const_int 36) (const_int 100)
14058 (const_int 37) (const_int 101)
14059 (const_int 38) (const_int 102)
14060 (const_int 39) (const_int 103)
14061 (const_int 48) (const_int 112)
14062 (const_int 49) (const_int 113)
14063 (const_int 50) (const_int 114)
14064 (const_int 51) (const_int 115)
14065 (const_int 52) (const_int 116)
14066 (const_int 53) (const_int 117)
14067 (const_int 54) (const_int 118)
14068 (const_int 55) (const_int 119)])))]
14070 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14071 [(set_attr "type" "sselog")
14072 (set_attr "prefix" "evex")
14073 (set_attr "mode" "XI")])
14075 (define_insn "avx2_interleave_lowv32qi<mask_name>"
14076 [(set (match_operand:V32QI 0 "register_operand" "=v")
14079 (match_operand:V32QI 1 "register_operand" "v")
14080 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
14081 (parallel [(const_int 0) (const_int 32)
14082 (const_int 1) (const_int 33)
14083 (const_int 2) (const_int 34)
14084 (const_int 3) (const_int 35)
14085 (const_int 4) (const_int 36)
14086 (const_int 5) (const_int 37)
14087 (const_int 6) (const_int 38)
14088 (const_int 7) (const_int 39)
14089 (const_int 16) (const_int 48)
14090 (const_int 17) (const_int 49)
14091 (const_int 18) (const_int 50)
14092 (const_int 19) (const_int 51)
14093 (const_int 20) (const_int 52)
14094 (const_int 21) (const_int 53)
14095 (const_int 22) (const_int 54)
14096 (const_int 23) (const_int 55)])))]
14097 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14098 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14099 [(set_attr "type" "sselog")
14100 (set_attr "prefix" "maybe_vex")
14101 (set_attr "mode" "OI")])
14103 (define_insn "vec_interleave_lowv16qi<mask_name>"
14104 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
14107 (match_operand:V16QI 1 "register_operand" "0,v")
14108 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
14109 (parallel [(const_int 0) (const_int 16)
14110 (const_int 1) (const_int 17)
14111 (const_int 2) (const_int 18)
14112 (const_int 3) (const_int 19)
14113 (const_int 4) (const_int 20)
14114 (const_int 5) (const_int 21)
14115 (const_int 6) (const_int 22)
14116 (const_int 7) (const_int 23)])))]
14117 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14119 punpcklbw\t{%2, %0|%0, %2}
14120 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14121 [(set_attr "isa" "noavx,avx")
14122 (set_attr "type" "sselog")
14123 (set_attr "prefix_data16" "1,*")
14124 (set_attr "prefix" "orig,vex")
14125 (set_attr "mode" "TI")])
14127 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
14128 [(set (match_operand:V32HI 0 "register_operand" "=v")
14131 (match_operand:V32HI 1 "register_operand" "v")
14132 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
14133 (parallel [(const_int 4) (const_int 36)
14134 (const_int 5) (const_int 37)
14135 (const_int 6) (const_int 38)
14136 (const_int 7) (const_int 39)
14137 (const_int 12) (const_int 44)
14138 (const_int 13) (const_int 45)
14139 (const_int 14) (const_int 46)
14140 (const_int 15) (const_int 47)
14141 (const_int 20) (const_int 52)
14142 (const_int 21) (const_int 53)
14143 (const_int 22) (const_int 54)
14144 (const_int 23) (const_int 55)
14145 (const_int 28) (const_int 60)
14146 (const_int 29) (const_int 61)
14147 (const_int 30) (const_int 62)
14148 (const_int 31) (const_int 63)])))]
14150 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14151 [(set_attr "type" "sselog")
14152 (set_attr "prefix" "evex")
14153 (set_attr "mode" "XI")])
14155 (define_insn "avx2_interleave_highv16hi<mask_name>"
14156 [(set (match_operand:V16HI 0 "register_operand" "=v")
14159 (match_operand:V16HI 1 "register_operand" "v")
14160 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
14161 (parallel [(const_int 4) (const_int 20)
14162 (const_int 5) (const_int 21)
14163 (const_int 6) (const_int 22)
14164 (const_int 7) (const_int 23)
14165 (const_int 12) (const_int 28)
14166 (const_int 13) (const_int 29)
14167 (const_int 14) (const_int 30)
14168 (const_int 15) (const_int 31)])))]
14169 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14170 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14171 [(set_attr "type" "sselog")
14172 (set_attr "prefix" "maybe_evex")
14173 (set_attr "mode" "OI")])
14175 (define_insn "vec_interleave_highv8hi<mask_name>"
14176 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
14179 (match_operand:V8HI 1 "register_operand" "0,v")
14180 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
14181 (parallel [(const_int 4) (const_int 12)
14182 (const_int 5) (const_int 13)
14183 (const_int 6) (const_int 14)
14184 (const_int 7) (const_int 15)])))]
14185 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14187 punpckhwd\t{%2, %0|%0, %2}
14188 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14189 [(set_attr "isa" "noavx,avx")
14190 (set_attr "type" "sselog")
14191 (set_attr "prefix_data16" "1,*")
14192 (set_attr "prefix" "orig,maybe_vex")
14193 (set_attr "mode" "TI")])
14195 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
14196 [(set (match_operand:V32HI 0 "register_operand" "=v")
14199 (match_operand:V32HI 1 "register_operand" "v")
14200 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
14201 (parallel [(const_int 0) (const_int 32)
14202 (const_int 1) (const_int 33)
14203 (const_int 2) (const_int 34)
14204 (const_int 3) (const_int 35)
14205 (const_int 8) (const_int 40)
14206 (const_int 9) (const_int 41)
14207 (const_int 10) (const_int 42)
14208 (const_int 11) (const_int 43)
14209 (const_int 16) (const_int 48)
14210 (const_int 17) (const_int 49)
14211 (const_int 18) (const_int 50)
14212 (const_int 19) (const_int 51)
14213 (const_int 24) (const_int 56)
14214 (const_int 25) (const_int 57)
14215 (const_int 26) (const_int 58)
14216 (const_int 27) (const_int 59)])))]
14218 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14219 [(set_attr "type" "sselog")
14220 (set_attr "prefix" "evex")
14221 (set_attr "mode" "XI")])
14223 (define_insn "avx2_interleave_lowv16hi<mask_name>"
14224 [(set (match_operand:V16HI 0 "register_operand" "=v")
14227 (match_operand:V16HI 1 "register_operand" "v")
14228 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
14229 (parallel [(const_int 0) (const_int 16)
14230 (const_int 1) (const_int 17)
14231 (const_int 2) (const_int 18)
14232 (const_int 3) (const_int 19)
14233 (const_int 8) (const_int 24)
14234 (const_int 9) (const_int 25)
14235 (const_int 10) (const_int 26)
14236 (const_int 11) (const_int 27)])))]
14237 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14238 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14239 [(set_attr "type" "sselog")
14240 (set_attr "prefix" "maybe_evex")
14241 (set_attr "mode" "OI")])
14243 (define_insn "vec_interleave_lowv8hi<mask_name>"
14244 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
14247 (match_operand:V8HI 1 "register_operand" "0,v")
14248 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
14249 (parallel [(const_int 0) (const_int 8)
14250 (const_int 1) (const_int 9)
14251 (const_int 2) (const_int 10)
14252 (const_int 3) (const_int 11)])))]
14253 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14255 punpcklwd\t{%2, %0|%0, %2}
14256 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14257 [(set_attr "isa" "noavx,avx")
14258 (set_attr "type" "sselog")
14259 (set_attr "prefix_data16" "1,*")
14260 (set_attr "prefix" "orig,maybe_evex")
14261 (set_attr "mode" "TI")])
14263 (define_insn "avx2_interleave_highv8si<mask_name>"
14264 [(set (match_operand:V8SI 0 "register_operand" "=v")
14267 (match_operand:V8SI 1 "register_operand" "v")
14268 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14269 (parallel [(const_int 2) (const_int 10)
14270 (const_int 3) (const_int 11)
14271 (const_int 6) (const_int 14)
14272 (const_int 7) (const_int 15)])))]
14273 "TARGET_AVX2 && <mask_avx512vl_condition>"
14274 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14275 [(set_attr "type" "sselog")
14276 (set_attr "prefix" "maybe_evex")
14277 (set_attr "mode" "OI")])
14279 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
14280 [(set (match_operand:V16SI 0 "register_operand" "=v")
14283 (match_operand:V16SI 1 "register_operand" "v")
14284 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14285 (parallel [(const_int 2) (const_int 18)
14286 (const_int 3) (const_int 19)
14287 (const_int 6) (const_int 22)
14288 (const_int 7) (const_int 23)
14289 (const_int 10) (const_int 26)
14290 (const_int 11) (const_int 27)
14291 (const_int 14) (const_int 30)
14292 (const_int 15) (const_int 31)])))]
14294 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14295 [(set_attr "type" "sselog")
14296 (set_attr "prefix" "evex")
14297 (set_attr "mode" "XI")])
14300 (define_insn "vec_interleave_highv4si<mask_name>"
14301 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14304 (match_operand:V4SI 1 "register_operand" "0,v")
14305 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14306 (parallel [(const_int 2) (const_int 6)
14307 (const_int 3) (const_int 7)])))]
14308 "TARGET_SSE2 && <mask_avx512vl_condition>"
14310 punpckhdq\t{%2, %0|%0, %2}
14311 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14312 [(set_attr "isa" "noavx,avx")
14313 (set_attr "type" "sselog")
14314 (set_attr "prefix_data16" "1,*")
14315 (set_attr "prefix" "orig,maybe_vex")
14316 (set_attr "mode" "TI")])
14318 (define_insn "avx2_interleave_lowv8si<mask_name>"
14319 [(set (match_operand:V8SI 0 "register_operand" "=v")
14322 (match_operand:V8SI 1 "register_operand" "v")
14323 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14324 (parallel [(const_int 0) (const_int 8)
14325 (const_int 1) (const_int 9)
14326 (const_int 4) (const_int 12)
14327 (const_int 5) (const_int 13)])))]
14328 "TARGET_AVX2 && <mask_avx512vl_condition>"
14329 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14330 [(set_attr "type" "sselog")
14331 (set_attr "prefix" "maybe_evex")
14332 (set_attr "mode" "OI")])
14334 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
14335 [(set (match_operand:V16SI 0 "register_operand" "=v")
14338 (match_operand:V16SI 1 "register_operand" "v")
14339 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14340 (parallel [(const_int 0) (const_int 16)
14341 (const_int 1) (const_int 17)
14342 (const_int 4) (const_int 20)
14343 (const_int 5) (const_int 21)
14344 (const_int 8) (const_int 24)
14345 (const_int 9) (const_int 25)
14346 (const_int 12) (const_int 28)
14347 (const_int 13) (const_int 29)])))]
14349 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14350 [(set_attr "type" "sselog")
14351 (set_attr "prefix" "evex")
14352 (set_attr "mode" "XI")])
14354 (define_insn "vec_interleave_lowv4si<mask_name>"
14355 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14358 (match_operand:V4SI 1 "register_operand" "0,v")
14359 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14360 (parallel [(const_int 0) (const_int 4)
14361 (const_int 1) (const_int 5)])))]
14362 "TARGET_SSE2 && <mask_avx512vl_condition>"
14364 punpckldq\t{%2, %0|%0, %2}
14365 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14366 [(set_attr "isa" "noavx,avx")
14367 (set_attr "type" "sselog")
14368 (set_attr "prefix_data16" "1,*")
14369 (set_attr "prefix" "orig,vex")
14370 (set_attr "mode" "TI")])
14372 (define_expand "vec_interleave_high<mode>"
14373 [(match_operand:VI_256 0 "register_operand")
14374 (match_operand:VI_256 1 "register_operand")
14375 (match_operand:VI_256 2 "nonimmediate_operand")]
14378 rtx t1 = gen_reg_rtx (<MODE>mode);
14379 rtx t2 = gen_reg_rtx (<MODE>mode);
14380 rtx t3 = gen_reg_rtx (V4DImode);
14381 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14382 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14383 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14384 gen_lowpart (V4DImode, t2),
14385 GEN_INT (1 + (3 << 4))));
14386 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14390 (define_expand "vec_interleave_low<mode>"
14391 [(match_operand:VI_256 0 "register_operand")
14392 (match_operand:VI_256 1 "register_operand")
14393 (match_operand:VI_256 2 "nonimmediate_operand")]
14396 rtx t1 = gen_reg_rtx (<MODE>mode);
14397 rtx t2 = gen_reg_rtx (<MODE>mode);
14398 rtx t3 = gen_reg_rtx (V4DImode);
14399 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14400 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14401 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14402 gen_lowpart (V4DImode, t2),
14403 GEN_INT (0 + (2 << 4))));
14404 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14408 ;; Modes handled by pinsr patterns.
14409 (define_mode_iterator PINSR_MODE
14410 [(V16QI "TARGET_SSE4_1") V8HI
14411 (V4SI "TARGET_SSE4_1")
14412 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
14414 (define_mode_attr sse2p4_1
14415 [(V16QI "sse4_1") (V8HI "sse2")
14416 (V4SI "sse4_1") (V2DI "sse4_1")])
14418 (define_mode_attr pinsr_evex_isa
14419 [(V16QI "avx512bw") (V8HI "avx512bw")
14420 (V4SI "avx512dq") (V2DI "avx512dq")])
14422 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
14423 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
14424 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
14425 (vec_merge:PINSR_MODE
14426 (vec_duplicate:PINSR_MODE
14427 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
14428 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
14429 (match_operand:SI 3 "const_int_operand")))]
14431 && ((unsigned) exact_log2 (INTVAL (operands[3]))
14432 < GET_MODE_NUNITS (<MODE>mode))"
14434 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
14436 switch (which_alternative)
14439 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14440 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
14443 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
14446 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14447 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
14451 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14453 gcc_unreachable ();
14456 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
14457 (set_attr "type" "sselog")
14458 (set (attr "prefix_rex")
14460 (and (not (match_test "TARGET_AVX"))
14461 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
14463 (const_string "*")))
14464 (set (attr "prefix_data16")
14466 (and (not (match_test "TARGET_AVX"))
14467 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14469 (const_string "*")))
14470 (set (attr "prefix_extra")
14472 (and (not (match_test "TARGET_AVX"))
14473 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14475 (const_string "1")))
14476 (set_attr "length_immediate" "1")
14477 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
14478 (set_attr "mode" "TI")])
14480 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
14481 [(match_operand:AVX512_VEC 0 "register_operand")
14482 (match_operand:AVX512_VEC 1 "register_operand")
14483 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
14484 (match_operand:SI 3 "const_0_to_3_operand")
14485 (match_operand:AVX512_VEC 4 "register_operand")
14486 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14489 int mask, selector;
14490 mask = INTVAL (operands[3]);
14491 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
14492 ? 0xFFFF ^ (0x000F << mask * 4)
14493 : 0xFF ^ (0x03 << mask * 2));
14494 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
14495 (operands[0], operands[1], operands[2], GEN_INT (selector),
14496 operands[4], operands[5]));
14500 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
14501 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
14502 (vec_merge:AVX512_VEC
14503 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
14504 (vec_duplicate:AVX512_VEC
14505 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
14506 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
14508 && (INTVAL (operands[3])
14509 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
14511 if (which_alternative == 0)
14512 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
14513 switch (<MODE>mode)
14516 if (misaligned_operand (operands[2], <ssequartermode>mode))
14517 return "vmovupd\t{%2, %x0|%x0, %2}";
14519 return "vmovapd\t{%2, %x0|%x0, %2}";
14521 if (misaligned_operand (operands[2], <ssequartermode>mode))
14522 return "vmovups\t{%2, %x0|%x0, %2}";
14524 return "vmovaps\t{%2, %x0|%x0, %2}";
14526 if (misaligned_operand (operands[2], <ssequartermode>mode))
14527 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
14528 : "vmovdqu\t{%2, %x0|%x0, %2}";
14530 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
14531 : "vmovdqa\t{%2, %x0|%x0, %2}";
14533 if (misaligned_operand (operands[2], <ssequartermode>mode))
14534 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
14535 : "vmovdqu\t{%2, %x0|%x0, %2}";
14537 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
14538 : "vmovdqa\t{%2, %x0|%x0, %2}";
14540 gcc_unreachable ();
14543 [(set_attr "type" "sselog,ssemov,ssemov")
14544 (set_attr "length_immediate" "1,0,0")
14545 (set_attr "prefix" "evex,vex,evex")
14546 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
14548 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
14549 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
14550 (vec_merge:AVX512_VEC
14551 (match_operand:AVX512_VEC 1 "register_operand" "v")
14552 (vec_duplicate:AVX512_VEC
14553 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
14554 (match_operand:SI 3 "const_int_operand" "n")))]
14558 int selector = INTVAL (operands[3]);
14560 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
14562 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
14564 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
14566 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
14569 gcc_unreachable ();
14571 operands[3] = GEN_INT (mask);
14573 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
14575 [(set_attr "type" "sselog")
14576 (set_attr "length_immediate" "1")
14577 (set_attr "prefix" "evex")
14578 (set_attr "mode" "<sseinsnmode>")])
14580 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
14581 [(match_operand:AVX512_VEC_2 0 "register_operand")
14582 (match_operand:AVX512_VEC_2 1 "register_operand")
14583 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14584 (match_operand:SI 3 "const_0_to_1_operand")
14585 (match_operand:AVX512_VEC_2 4 "register_operand")
14586 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14589 int mask = INTVAL (operands[3]);
14591 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
14592 operands[2], operands[4],
14595 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
14596 operands[2], operands[4],
14601 (define_insn "vec_set_lo_<mode><mask_name>"
14602 [(set (match_operand:V16FI 0 "register_operand" "=v")
14604 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14605 (vec_select:<ssehalfvecmode>
14606 (match_operand:V16FI 1 "register_operand" "v")
14607 (parallel [(const_int 8) (const_int 9)
14608 (const_int 10) (const_int 11)
14609 (const_int 12) (const_int 13)
14610 (const_int 14) (const_int 15)]))))]
14612 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14613 [(set_attr "type" "sselog")
14614 (set_attr "length_immediate" "1")
14615 (set_attr "prefix" "evex")
14616 (set_attr "mode" "<sseinsnmode>")])
14618 (define_insn "vec_set_hi_<mode><mask_name>"
14619 [(set (match_operand:V16FI 0 "register_operand" "=v")
14621 (vec_select:<ssehalfvecmode>
14622 (match_operand:V16FI 1 "register_operand" "v")
14623 (parallel [(const_int 0) (const_int 1)
14624 (const_int 2) (const_int 3)
14625 (const_int 4) (const_int 5)
14626 (const_int 6) (const_int 7)]))
14627 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14629 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14630 [(set_attr "type" "sselog")
14631 (set_attr "length_immediate" "1")
14632 (set_attr "prefix" "evex")
14633 (set_attr "mode" "<sseinsnmode>")])
14635 (define_insn "vec_set_lo_<mode><mask_name>"
14636 [(set (match_operand:V8FI 0 "register_operand" "=v")
14638 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14639 (vec_select:<ssehalfvecmode>
14640 (match_operand:V8FI 1 "register_operand" "v")
14641 (parallel [(const_int 4) (const_int 5)
14642 (const_int 6) (const_int 7)]))))]
14644 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14645 [(set_attr "type" "sselog")
14646 (set_attr "length_immediate" "1")
14647 (set_attr "prefix" "evex")
14648 (set_attr "mode" "XI")])
14650 (define_insn "vec_set_hi_<mode><mask_name>"
14651 [(set (match_operand:V8FI 0 "register_operand" "=v")
14653 (vec_select:<ssehalfvecmode>
14654 (match_operand:V8FI 1 "register_operand" "v")
14655 (parallel [(const_int 0) (const_int 1)
14656 (const_int 2) (const_int 3)]))
14657 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14659 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14660 [(set_attr "type" "sselog")
14661 (set_attr "length_immediate" "1")
14662 (set_attr "prefix" "evex")
14663 (set_attr "mode" "XI")])
14665 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
14666 [(match_operand:VI8F_256 0 "register_operand")
14667 (match_operand:VI8F_256 1 "register_operand")
14668 (match_operand:VI8F_256 2 "nonimmediate_operand")
14669 (match_operand:SI 3 "const_0_to_3_operand")
14670 (match_operand:VI8F_256 4 "register_operand")
14671 (match_operand:QI 5 "register_operand")]
14674 int mask = INTVAL (operands[3]);
14675 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
14676 (operands[0], operands[1], operands[2],
14677 GEN_INT (((mask >> 0) & 1) * 2 + 0),
14678 GEN_INT (((mask >> 0) & 1) * 2 + 1),
14679 GEN_INT (((mask >> 1) & 1) * 2 + 4),
14680 GEN_INT (((mask >> 1) & 1) * 2 + 5),
14681 operands[4], operands[5]));
14685 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
14686 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
14687 (vec_select:VI8F_256
14688 (vec_concat:<ssedoublemode>
14689 (match_operand:VI8F_256 1 "register_operand" "v")
14690 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
14691 (parallel [(match_operand 3 "const_0_to_3_operand")
14692 (match_operand 4 "const_0_to_3_operand")
14693 (match_operand 5 "const_4_to_7_operand")
14694 (match_operand 6 "const_4_to_7_operand")])))]
14696 && (INTVAL (operands[3]) & 1) == 0
14697 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14698 && (INTVAL (operands[5]) & 1) == 0
14699 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
14702 mask = INTVAL (operands[3]) / 2;
14703 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
14704 operands[3] = GEN_INT (mask);
14705 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
14707 [(set_attr "type" "sselog")
14708 (set_attr "length_immediate" "1")
14709 (set_attr "prefix" "evex")
14710 (set_attr "mode" "XI")])
14712 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
14713 [(match_operand:V8FI 0 "register_operand")
14714 (match_operand:V8FI 1 "register_operand")
14715 (match_operand:V8FI 2 "nonimmediate_operand")
14716 (match_operand:SI 3 "const_0_to_255_operand")
14717 (match_operand:V8FI 4 "register_operand")
14718 (match_operand:QI 5 "register_operand")]
14721 int mask = INTVAL (operands[3]);
14722 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
14723 (operands[0], operands[1], operands[2],
14724 GEN_INT (((mask >> 0) & 3) * 2),
14725 GEN_INT (((mask >> 0) & 3) * 2 + 1),
14726 GEN_INT (((mask >> 2) & 3) * 2),
14727 GEN_INT (((mask >> 2) & 3) * 2 + 1),
14728 GEN_INT (((mask >> 4) & 3) * 2 + 8),
14729 GEN_INT (((mask >> 4) & 3) * 2 + 9),
14730 GEN_INT (((mask >> 6) & 3) * 2 + 8),
14731 GEN_INT (((mask >> 6) & 3) * 2 + 9),
14732 operands[4], operands[5]));
14736 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
14737 [(set (match_operand:V8FI 0 "register_operand" "=v")
14739 (vec_concat:<ssedoublemode>
14740 (match_operand:V8FI 1 "register_operand" "v")
14741 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
14742 (parallel [(match_operand 3 "const_0_to_7_operand")
14743 (match_operand 4 "const_0_to_7_operand")
14744 (match_operand 5 "const_0_to_7_operand")
14745 (match_operand 6 "const_0_to_7_operand")
14746 (match_operand 7 "const_8_to_15_operand")
14747 (match_operand 8 "const_8_to_15_operand")
14748 (match_operand 9 "const_8_to_15_operand")
14749 (match_operand 10 "const_8_to_15_operand")])))]
14751 && (INTVAL (operands[3]) & 1) == 0
14752 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14753 && (INTVAL (operands[5]) & 1) == 0
14754 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
14755 && (INTVAL (operands[7]) & 1) == 0
14756 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14757 && (INTVAL (operands[9]) & 1) == 0
14758 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
14761 mask = INTVAL (operands[3]) / 2;
14762 mask |= INTVAL (operands[5]) / 2 << 2;
14763 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
14764 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
14765 operands[3] = GEN_INT (mask);
14767 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14769 [(set_attr "type" "sselog")
14770 (set_attr "length_immediate" "1")
14771 (set_attr "prefix" "evex")
14772 (set_attr "mode" "<sseinsnmode>")])
14774 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
14775 [(match_operand:VI4F_256 0 "register_operand")
14776 (match_operand:VI4F_256 1 "register_operand")
14777 (match_operand:VI4F_256 2 "nonimmediate_operand")
14778 (match_operand:SI 3 "const_0_to_3_operand")
14779 (match_operand:VI4F_256 4 "register_operand")
14780 (match_operand:QI 5 "register_operand")]
14783 int mask = INTVAL (operands[3]);
14784 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
14785 (operands[0], operands[1], operands[2],
14786 GEN_INT (((mask >> 0) & 1) * 4 + 0),
14787 GEN_INT (((mask >> 0) & 1) * 4 + 1),
14788 GEN_INT (((mask >> 0) & 1) * 4 + 2),
14789 GEN_INT (((mask >> 0) & 1) * 4 + 3),
14790 GEN_INT (((mask >> 1) & 1) * 4 + 8),
14791 GEN_INT (((mask >> 1) & 1) * 4 + 9),
14792 GEN_INT (((mask >> 1) & 1) * 4 + 10),
14793 GEN_INT (((mask >> 1) & 1) * 4 + 11),
14794 operands[4], operands[5]));
14798 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
14799 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
14800 (vec_select:VI4F_256
14801 (vec_concat:<ssedoublemode>
14802 (match_operand:VI4F_256 1 "register_operand" "v")
14803 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
14804 (parallel [(match_operand 3 "const_0_to_7_operand")
14805 (match_operand 4 "const_0_to_7_operand")
14806 (match_operand 5 "const_0_to_7_operand")
14807 (match_operand 6 "const_0_to_7_operand")
14808 (match_operand 7 "const_8_to_15_operand")
14809 (match_operand 8 "const_8_to_15_operand")
14810 (match_operand 9 "const_8_to_15_operand")
14811 (match_operand 10 "const_8_to_15_operand")])))]
14813 && (INTVAL (operands[3]) & 3) == 0
14814 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14815 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14816 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14817 && (INTVAL (operands[7]) & 3) == 0
14818 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14819 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14820 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
14823 mask = INTVAL (operands[3]) / 4;
14824 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
14825 operands[3] = GEN_INT (mask);
14827 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14829 [(set_attr "type" "sselog")
14830 (set_attr "length_immediate" "1")
14831 (set_attr "prefix" "evex")
14832 (set_attr "mode" "<sseinsnmode>")])
14834 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
14835 [(match_operand:V16FI 0 "register_operand")
14836 (match_operand:V16FI 1 "register_operand")
14837 (match_operand:V16FI 2 "nonimmediate_operand")
14838 (match_operand:SI 3 "const_0_to_255_operand")
14839 (match_operand:V16FI 4 "register_operand")
14840 (match_operand:HI 5 "register_operand")]
14843 int mask = INTVAL (operands[3]);
14844 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
14845 (operands[0], operands[1], operands[2],
14846 GEN_INT (((mask >> 0) & 3) * 4),
14847 GEN_INT (((mask >> 0) & 3) * 4 + 1),
14848 GEN_INT (((mask >> 0) & 3) * 4 + 2),
14849 GEN_INT (((mask >> 0) & 3) * 4 + 3),
14850 GEN_INT (((mask >> 2) & 3) * 4),
14851 GEN_INT (((mask >> 2) & 3) * 4 + 1),
14852 GEN_INT (((mask >> 2) & 3) * 4 + 2),
14853 GEN_INT (((mask >> 2) & 3) * 4 + 3),
14854 GEN_INT (((mask >> 4) & 3) * 4 + 16),
14855 GEN_INT (((mask >> 4) & 3) * 4 + 17),
14856 GEN_INT (((mask >> 4) & 3) * 4 + 18),
14857 GEN_INT (((mask >> 4) & 3) * 4 + 19),
14858 GEN_INT (((mask >> 6) & 3) * 4 + 16),
14859 GEN_INT (((mask >> 6) & 3) * 4 + 17),
14860 GEN_INT (((mask >> 6) & 3) * 4 + 18),
14861 GEN_INT (((mask >> 6) & 3) * 4 + 19),
14862 operands[4], operands[5]));
14866 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
14867 [(set (match_operand:V16FI 0 "register_operand" "=v")
14869 (vec_concat:<ssedoublemode>
14870 (match_operand:V16FI 1 "register_operand" "v")
14871 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
14872 (parallel [(match_operand 3 "const_0_to_15_operand")
14873 (match_operand 4 "const_0_to_15_operand")
14874 (match_operand 5 "const_0_to_15_operand")
14875 (match_operand 6 "const_0_to_15_operand")
14876 (match_operand 7 "const_0_to_15_operand")
14877 (match_operand 8 "const_0_to_15_operand")
14878 (match_operand 9 "const_0_to_15_operand")
14879 (match_operand 10 "const_0_to_15_operand")
14880 (match_operand 11 "const_16_to_31_operand")
14881 (match_operand 12 "const_16_to_31_operand")
14882 (match_operand 13 "const_16_to_31_operand")
14883 (match_operand 14 "const_16_to_31_operand")
14884 (match_operand 15 "const_16_to_31_operand")
14885 (match_operand 16 "const_16_to_31_operand")
14886 (match_operand 17 "const_16_to_31_operand")
14887 (match_operand 18 "const_16_to_31_operand")])))]
14889 && (INTVAL (operands[3]) & 3) == 0
14890 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14891 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14892 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14893 && (INTVAL (operands[7]) & 3) == 0
14894 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14895 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14896 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
14897 && (INTVAL (operands[11]) & 3) == 0
14898 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
14899 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
14900 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
14901 && (INTVAL (operands[15]) & 3) == 0
14902 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
14903 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
14904 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14907 mask = INTVAL (operands[3]) / 4;
14908 mask |= INTVAL (operands[7]) / 4 << 2;
14909 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14910 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14911 operands[3] = GEN_INT (mask);
14913 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14915 [(set_attr "type" "sselog")
14916 (set_attr "length_immediate" "1")
14917 (set_attr "prefix" "evex")
14918 (set_attr "mode" "<sseinsnmode>")])
14920 (define_expand "avx512f_pshufdv3_mask"
14921 [(match_operand:V16SI 0 "register_operand")
14922 (match_operand:V16SI 1 "nonimmediate_operand")
14923 (match_operand:SI 2 "const_0_to_255_operand")
14924 (match_operand:V16SI 3 "register_operand")
14925 (match_operand:HI 4 "register_operand")]
14928 int mask = INTVAL (operands[2]);
14929 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14930 GEN_INT ((mask >> 0) & 3),
14931 GEN_INT ((mask >> 2) & 3),
14932 GEN_INT ((mask >> 4) & 3),
14933 GEN_INT ((mask >> 6) & 3),
14934 GEN_INT (((mask >> 0) & 3) + 4),
14935 GEN_INT (((mask >> 2) & 3) + 4),
14936 GEN_INT (((mask >> 4) & 3) + 4),
14937 GEN_INT (((mask >> 6) & 3) + 4),
14938 GEN_INT (((mask >> 0) & 3) + 8),
14939 GEN_INT (((mask >> 2) & 3) + 8),
14940 GEN_INT (((mask >> 4) & 3) + 8),
14941 GEN_INT (((mask >> 6) & 3) + 8),
14942 GEN_INT (((mask >> 0) & 3) + 12),
14943 GEN_INT (((mask >> 2) & 3) + 12),
14944 GEN_INT (((mask >> 4) & 3) + 12),
14945 GEN_INT (((mask >> 6) & 3) + 12),
14946 operands[3], operands[4]));
14950 (define_insn "avx512f_pshufd_1<mask_name>"
14951 [(set (match_operand:V16SI 0 "register_operand" "=v")
14953 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14954 (parallel [(match_operand 2 "const_0_to_3_operand")
14955 (match_operand 3 "const_0_to_3_operand")
14956 (match_operand 4 "const_0_to_3_operand")
14957 (match_operand 5 "const_0_to_3_operand")
14958 (match_operand 6 "const_4_to_7_operand")
14959 (match_operand 7 "const_4_to_7_operand")
14960 (match_operand 8 "const_4_to_7_operand")
14961 (match_operand 9 "const_4_to_7_operand")
14962 (match_operand 10 "const_8_to_11_operand")
14963 (match_operand 11 "const_8_to_11_operand")
14964 (match_operand 12 "const_8_to_11_operand")
14965 (match_operand 13 "const_8_to_11_operand")
14966 (match_operand 14 "const_12_to_15_operand")
14967 (match_operand 15 "const_12_to_15_operand")
14968 (match_operand 16 "const_12_to_15_operand")
14969 (match_operand 17 "const_12_to_15_operand")])))]
14971 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14972 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14973 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14974 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14975 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14976 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14977 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14978 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14979 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14980 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14981 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14982 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14985 mask |= INTVAL (operands[2]) << 0;
14986 mask |= INTVAL (operands[3]) << 2;
14987 mask |= INTVAL (operands[4]) << 4;
14988 mask |= INTVAL (operands[5]) << 6;
14989 operands[2] = GEN_INT (mask);
14991 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14993 [(set_attr "type" "sselog1")
14994 (set_attr "prefix" "evex")
14995 (set_attr "length_immediate" "1")
14996 (set_attr "mode" "XI")])
14998 (define_expand "avx512vl_pshufdv3_mask"
14999 [(match_operand:V8SI 0 "register_operand")
15000 (match_operand:V8SI 1 "nonimmediate_operand")
15001 (match_operand:SI 2 "const_0_to_255_operand")
15002 (match_operand:V8SI 3 "register_operand")
15003 (match_operand:QI 4 "register_operand")]
15006 int mask = INTVAL (operands[2]);
15007 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
15008 GEN_INT ((mask >> 0) & 3),
15009 GEN_INT ((mask >> 2) & 3),
15010 GEN_INT ((mask >> 4) & 3),
15011 GEN_INT ((mask >> 6) & 3),
15012 GEN_INT (((mask >> 0) & 3) + 4),
15013 GEN_INT (((mask >> 2) & 3) + 4),
15014 GEN_INT (((mask >> 4) & 3) + 4),
15015 GEN_INT (((mask >> 6) & 3) + 4),
15016 operands[3], operands[4]));
15020 (define_expand "avx2_pshufdv3"
15021 [(match_operand:V8SI 0 "register_operand")
15022 (match_operand:V8SI 1 "nonimmediate_operand")
15023 (match_operand:SI 2 "const_0_to_255_operand")]
15026 int mask = INTVAL (operands[2]);
15027 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
15028 GEN_INT ((mask >> 0) & 3),
15029 GEN_INT ((mask >> 2) & 3),
15030 GEN_INT ((mask >> 4) & 3),
15031 GEN_INT ((mask >> 6) & 3),
15032 GEN_INT (((mask >> 0) & 3) + 4),
15033 GEN_INT (((mask >> 2) & 3) + 4),
15034 GEN_INT (((mask >> 4) & 3) + 4),
15035 GEN_INT (((mask >> 6) & 3) + 4)));
15039 (define_insn "avx2_pshufd_1<mask_name>"
15040 [(set (match_operand:V8SI 0 "register_operand" "=v")
15042 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
15043 (parallel [(match_operand 2 "const_0_to_3_operand")
15044 (match_operand 3 "const_0_to_3_operand")
15045 (match_operand 4 "const_0_to_3_operand")
15046 (match_operand 5 "const_0_to_3_operand")
15047 (match_operand 6 "const_4_to_7_operand")
15048 (match_operand 7 "const_4_to_7_operand")
15049 (match_operand 8 "const_4_to_7_operand")
15050 (match_operand 9 "const_4_to_7_operand")])))]
15052 && <mask_avx512vl_condition>
15053 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
15054 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
15055 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
15056 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
15059 mask |= INTVAL (operands[2]) << 0;
15060 mask |= INTVAL (operands[3]) << 2;
15061 mask |= INTVAL (operands[4]) << 4;
15062 mask |= INTVAL (operands[5]) << 6;
15063 operands[2] = GEN_INT (mask);
15065 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15067 [(set_attr "type" "sselog1")
15068 (set_attr "prefix" "maybe_evex")
15069 (set_attr "length_immediate" "1")
15070 (set_attr "mode" "OI")])
15072 (define_expand "avx512vl_pshufd_mask"
15073 [(match_operand:V4SI 0 "register_operand")
15074 (match_operand:V4SI 1 "nonimmediate_operand")
15075 (match_operand:SI 2 "const_0_to_255_operand")
15076 (match_operand:V4SI 3 "register_operand")
15077 (match_operand:QI 4 "register_operand")]
15080 int mask = INTVAL (operands[2]);
15081 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
15082 GEN_INT ((mask >> 0) & 3),
15083 GEN_INT ((mask >> 2) & 3),
15084 GEN_INT ((mask >> 4) & 3),
15085 GEN_INT ((mask >> 6) & 3),
15086 operands[3], operands[4]));
15090 (define_expand "sse2_pshufd"
15091 [(match_operand:V4SI 0 "register_operand")
15092 (match_operand:V4SI 1 "vector_operand")
15093 (match_operand:SI 2 "const_int_operand")]
15096 int mask = INTVAL (operands[2]);
15097 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
15098 GEN_INT ((mask >> 0) & 3),
15099 GEN_INT ((mask >> 2) & 3),
15100 GEN_INT ((mask >> 4) & 3),
15101 GEN_INT ((mask >> 6) & 3)));
15105 (define_insn "sse2_pshufd_1<mask_name>"
15106 [(set (match_operand:V4SI 0 "register_operand" "=v")
15108 (match_operand:V4SI 1 "vector_operand" "vBm")
15109 (parallel [(match_operand 2 "const_0_to_3_operand")
15110 (match_operand 3 "const_0_to_3_operand")
15111 (match_operand 4 "const_0_to_3_operand")
15112 (match_operand 5 "const_0_to_3_operand")])))]
15113 "TARGET_SSE2 && <mask_avx512vl_condition>"
15116 mask |= INTVAL (operands[2]) << 0;
15117 mask |= INTVAL (operands[3]) << 2;
15118 mask |= INTVAL (operands[4]) << 4;
15119 mask |= INTVAL (operands[5]) << 6;
15120 operands[2] = GEN_INT (mask);
15122 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15124 [(set_attr "type" "sselog1")
15125 (set_attr "prefix_data16" "1")
15126 (set_attr "prefix" "<mask_prefix2>")
15127 (set_attr "length_immediate" "1")
15128 (set_attr "mode" "TI")])
15130 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
15131 [(set (match_operand:V32HI 0 "register_operand" "=v")
15133 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15134 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15137 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15138 [(set_attr "type" "sselog")
15139 (set_attr "prefix" "evex")
15140 (set_attr "mode" "XI")])
15142 (define_expand "avx512vl_pshuflwv3_mask"
15143 [(match_operand:V16HI 0 "register_operand")
15144 (match_operand:V16HI 1 "nonimmediate_operand")
15145 (match_operand:SI 2 "const_0_to_255_operand")
15146 (match_operand:V16HI 3 "register_operand")
15147 (match_operand:HI 4 "register_operand")]
15148 "TARGET_AVX512VL && TARGET_AVX512BW"
15150 int mask = INTVAL (operands[2]);
15151 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
15152 GEN_INT ((mask >> 0) & 3),
15153 GEN_INT ((mask >> 2) & 3),
15154 GEN_INT ((mask >> 4) & 3),
15155 GEN_INT ((mask >> 6) & 3),
15156 GEN_INT (((mask >> 0) & 3) + 8),
15157 GEN_INT (((mask >> 2) & 3) + 8),
15158 GEN_INT (((mask >> 4) & 3) + 8),
15159 GEN_INT (((mask >> 6) & 3) + 8),
15160 operands[3], operands[4]));
15164 (define_expand "avx2_pshuflwv3"
15165 [(match_operand:V16HI 0 "register_operand")
15166 (match_operand:V16HI 1 "nonimmediate_operand")
15167 (match_operand:SI 2 "const_0_to_255_operand")]
15170 int mask = INTVAL (operands[2]);
15171 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
15172 GEN_INT ((mask >> 0) & 3),
15173 GEN_INT ((mask >> 2) & 3),
15174 GEN_INT ((mask >> 4) & 3),
15175 GEN_INT ((mask >> 6) & 3),
15176 GEN_INT (((mask >> 0) & 3) + 8),
15177 GEN_INT (((mask >> 2) & 3) + 8),
15178 GEN_INT (((mask >> 4) & 3) + 8),
15179 GEN_INT (((mask >> 6) & 3) + 8)));
15183 (define_insn "avx2_pshuflw_1<mask_name>"
15184 [(set (match_operand:V16HI 0 "register_operand" "=v")
15186 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
15187 (parallel [(match_operand 2 "const_0_to_3_operand")
15188 (match_operand 3 "const_0_to_3_operand")
15189 (match_operand 4 "const_0_to_3_operand")
15190 (match_operand 5 "const_0_to_3_operand")
15195 (match_operand 6 "const_8_to_11_operand")
15196 (match_operand 7 "const_8_to_11_operand")
15197 (match_operand 8 "const_8_to_11_operand")
15198 (match_operand 9 "const_8_to_11_operand")
15202 (const_int 15)])))]
15204 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15205 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15206 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15207 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15208 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15211 mask |= INTVAL (operands[2]) << 0;
15212 mask |= INTVAL (operands[3]) << 2;
15213 mask |= INTVAL (operands[4]) << 4;
15214 mask |= INTVAL (operands[5]) << 6;
15215 operands[2] = GEN_INT (mask);
15217 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15219 [(set_attr "type" "sselog")
15220 (set_attr "prefix" "maybe_evex")
15221 (set_attr "length_immediate" "1")
15222 (set_attr "mode" "OI")])
15224 (define_expand "avx512vl_pshuflw_mask"
15225 [(match_operand:V8HI 0 "register_operand")
15226 (match_operand:V8HI 1 "nonimmediate_operand")
15227 (match_operand:SI 2 "const_0_to_255_operand")
15228 (match_operand:V8HI 3 "register_operand")
15229 (match_operand:QI 4 "register_operand")]
15230 "TARGET_AVX512VL && TARGET_AVX512BW"
15232 int mask = INTVAL (operands[2]);
15233 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
15234 GEN_INT ((mask >> 0) & 3),
15235 GEN_INT ((mask >> 2) & 3),
15236 GEN_INT ((mask >> 4) & 3),
15237 GEN_INT ((mask >> 6) & 3),
15238 operands[3], operands[4]));
15242 (define_expand "sse2_pshuflw"
15243 [(match_operand:V8HI 0 "register_operand")
15244 (match_operand:V8HI 1 "vector_operand")
15245 (match_operand:SI 2 "const_int_operand")]
15248 int mask = INTVAL (operands[2]);
15249 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
15250 GEN_INT ((mask >> 0) & 3),
15251 GEN_INT ((mask >> 2) & 3),
15252 GEN_INT ((mask >> 4) & 3),
15253 GEN_INT ((mask >> 6) & 3)));
15257 (define_insn "sse2_pshuflw_1<mask_name>"
15258 [(set (match_operand:V8HI 0 "register_operand" "=v")
15260 (match_operand:V8HI 1 "vector_operand" "vBm")
15261 (parallel [(match_operand 2 "const_0_to_3_operand")
15262 (match_operand 3 "const_0_to_3_operand")
15263 (match_operand 4 "const_0_to_3_operand")
15264 (match_operand 5 "const_0_to_3_operand")
15269 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15272 mask |= INTVAL (operands[2]) << 0;
15273 mask |= INTVAL (operands[3]) << 2;
15274 mask |= INTVAL (operands[4]) << 4;
15275 mask |= INTVAL (operands[5]) << 6;
15276 operands[2] = GEN_INT (mask);
15278 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15280 [(set_attr "type" "sselog")
15281 (set_attr "prefix_data16" "0")
15282 (set_attr "prefix_rep" "1")
15283 (set_attr "prefix" "maybe_vex")
15284 (set_attr "length_immediate" "1")
15285 (set_attr "mode" "TI")])
15287 (define_expand "avx2_pshufhwv3"
15288 [(match_operand:V16HI 0 "register_operand")
15289 (match_operand:V16HI 1 "nonimmediate_operand")
15290 (match_operand:SI 2 "const_0_to_255_operand")]
15293 int mask = INTVAL (operands[2]);
15294 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
15295 GEN_INT (((mask >> 0) & 3) + 4),
15296 GEN_INT (((mask >> 2) & 3) + 4),
15297 GEN_INT (((mask >> 4) & 3) + 4),
15298 GEN_INT (((mask >> 6) & 3) + 4),
15299 GEN_INT (((mask >> 0) & 3) + 12),
15300 GEN_INT (((mask >> 2) & 3) + 12),
15301 GEN_INT (((mask >> 4) & 3) + 12),
15302 GEN_INT (((mask >> 6) & 3) + 12)));
15306 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
15307 [(set (match_operand:V32HI 0 "register_operand" "=v")
15309 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15310 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15313 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15314 [(set_attr "type" "sselog")
15315 (set_attr "prefix" "evex")
15316 (set_attr "mode" "XI")])
15318 (define_expand "avx512vl_pshufhwv3_mask"
15319 [(match_operand:V16HI 0 "register_operand")
15320 (match_operand:V16HI 1 "nonimmediate_operand")
15321 (match_operand:SI 2 "const_0_to_255_operand")
15322 (match_operand:V16HI 3 "register_operand")
15323 (match_operand:HI 4 "register_operand")]
15324 "TARGET_AVX512VL && TARGET_AVX512BW"
15326 int mask = INTVAL (operands[2]);
15327 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
15328 GEN_INT (((mask >> 0) & 3) + 4),
15329 GEN_INT (((mask >> 2) & 3) + 4),
15330 GEN_INT (((mask >> 4) & 3) + 4),
15331 GEN_INT (((mask >> 6) & 3) + 4),
15332 GEN_INT (((mask >> 0) & 3) + 12),
15333 GEN_INT (((mask >> 2) & 3) + 12),
15334 GEN_INT (((mask >> 4) & 3) + 12),
15335 GEN_INT (((mask >> 6) & 3) + 12),
15336 operands[3], operands[4]));
15340 (define_insn "avx2_pshufhw_1<mask_name>"
15341 [(set (match_operand:V16HI 0 "register_operand" "=v")
15343 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
15344 (parallel [(const_int 0)
15348 (match_operand 2 "const_4_to_7_operand")
15349 (match_operand 3 "const_4_to_7_operand")
15350 (match_operand 4 "const_4_to_7_operand")
15351 (match_operand 5 "const_4_to_7_operand")
15356 (match_operand 6 "const_12_to_15_operand")
15357 (match_operand 7 "const_12_to_15_operand")
15358 (match_operand 8 "const_12_to_15_operand")
15359 (match_operand 9 "const_12_to_15_operand")])))]
15361 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15362 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15363 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15364 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15365 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15368 mask |= (INTVAL (operands[2]) - 4) << 0;
15369 mask |= (INTVAL (operands[3]) - 4) << 2;
15370 mask |= (INTVAL (operands[4]) - 4) << 4;
15371 mask |= (INTVAL (operands[5]) - 4) << 6;
15372 operands[2] = GEN_INT (mask);
15374 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15376 [(set_attr "type" "sselog")
15377 (set_attr "prefix" "maybe_evex")
15378 (set_attr "length_immediate" "1")
15379 (set_attr "mode" "OI")])
15381 (define_expand "avx512vl_pshufhw_mask"
15382 [(match_operand:V8HI 0 "register_operand")
15383 (match_operand:V8HI 1 "nonimmediate_operand")
15384 (match_operand:SI 2 "const_0_to_255_operand")
15385 (match_operand:V8HI 3 "register_operand")
15386 (match_operand:QI 4 "register_operand")]
15387 "TARGET_AVX512VL && TARGET_AVX512BW"
15389 int mask = INTVAL (operands[2]);
15390 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
15391 GEN_INT (((mask >> 0) & 3) + 4),
15392 GEN_INT (((mask >> 2) & 3) + 4),
15393 GEN_INT (((mask >> 4) & 3) + 4),
15394 GEN_INT (((mask >> 6) & 3) + 4),
15395 operands[3], operands[4]));
15399 (define_expand "sse2_pshufhw"
15400 [(match_operand:V8HI 0 "register_operand")
15401 (match_operand:V8HI 1 "vector_operand")
15402 (match_operand:SI 2 "const_int_operand")]
15405 int mask = INTVAL (operands[2]);
15406 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
15407 GEN_INT (((mask >> 0) & 3) + 4),
15408 GEN_INT (((mask >> 2) & 3) + 4),
15409 GEN_INT (((mask >> 4) & 3) + 4),
15410 GEN_INT (((mask >> 6) & 3) + 4)));
15414 (define_insn "sse2_pshufhw_1<mask_name>"
15415 [(set (match_operand:V8HI 0 "register_operand" "=v")
15417 (match_operand:V8HI 1 "vector_operand" "vBm")
15418 (parallel [(const_int 0)
15422 (match_operand 2 "const_4_to_7_operand")
15423 (match_operand 3 "const_4_to_7_operand")
15424 (match_operand 4 "const_4_to_7_operand")
15425 (match_operand 5 "const_4_to_7_operand")])))]
15426 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15429 mask |= (INTVAL (operands[2]) - 4) << 0;
15430 mask |= (INTVAL (operands[3]) - 4) << 2;
15431 mask |= (INTVAL (operands[4]) - 4) << 4;
15432 mask |= (INTVAL (operands[5]) - 4) << 6;
15433 operands[2] = GEN_INT (mask);
15435 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15437 [(set_attr "type" "sselog")
15438 (set_attr "prefix_rep" "1")
15439 (set_attr "prefix_data16" "0")
15440 (set_attr "prefix" "maybe_vex")
15441 (set_attr "length_immediate" "1")
15442 (set_attr "mode" "TI")])
15444 (define_expand "sse2_loadd"
15445 [(set (match_operand:V4SI 0 "register_operand")
15447 (vec_duplicate:V4SI
15448 (match_operand:SI 1 "nonimmediate_operand"))
15452 "operands[2] = CONST0_RTX (V4SImode);")
15454 (define_insn "sse2_loadld"
15455 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
15457 (vec_duplicate:V4SI
15458 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
15459 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
15463 %vmovd\t{%2, %0|%0, %2}
15464 %vmovd\t{%2, %0|%0, %2}
15465 movss\t{%2, %0|%0, %2}
15466 movss\t{%2, %0|%0, %2}
15467 vmovss\t{%2, %1, %0|%0, %1, %2}"
15468 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
15469 (set_attr "type" "ssemov")
15470 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
15471 (set_attr "mode" "TI,TI,V4SF,SF,SF")
15472 (set (attr "preferred_for_speed")
15473 (cond [(eq_attr "alternative" "1")
15474 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15476 (symbol_ref "true")))])
15478 ;; QI and HI modes handled by pextr patterns.
15479 (define_mode_iterator PEXTR_MODE12
15480 [(V16QI "TARGET_SSE4_1") V8HI])
15482 (define_insn "*vec_extract<mode>"
15483 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
15484 (vec_select:<ssescalarmode>
15485 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
15487 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
15490 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15491 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
15492 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15493 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15494 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
15495 (set_attr "type" "sselog1")
15496 (set_attr "prefix_data16" "1")
15497 (set (attr "prefix_extra")
15499 (and (eq_attr "alternative" "0,2")
15500 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15502 (const_string "1")))
15503 (set_attr "length_immediate" "1")
15504 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
15505 (set_attr "mode" "TI")])
15507 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
15508 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
15510 (vec_select:<PEXTR_MODE12:ssescalarmode>
15511 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
15513 [(match_operand:SI 2
15514 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
15517 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15518 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
15519 [(set_attr "isa" "*,avx512bw")
15520 (set_attr "type" "sselog1")
15521 (set_attr "prefix_data16" "1")
15522 (set (attr "prefix_extra")
15524 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
15526 (const_string "1")))
15527 (set_attr "length_immediate" "1")
15528 (set_attr "prefix" "maybe_vex")
15529 (set_attr "mode" "TI")])
15531 (define_insn "*vec_extractv16qi_zext"
15532 [(set (match_operand:HI 0 "register_operand" "=r,r")
15535 (match_operand:V16QI 1 "register_operand" "x,v")
15537 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
15540 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
15541 vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
15542 [(set_attr "isa" "*,avx512bw")
15543 (set_attr "type" "sselog1")
15544 (set_attr "prefix_data16" "1")
15545 (set_attr "prefix_extra" "1")
15546 (set_attr "length_immediate" "1")
15547 (set_attr "prefix" "maybe_vex")
15548 (set_attr "mode" "TI")])
15550 (define_insn "*vec_extract<mode>_mem"
15551 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
15552 (vec_select:<ssescalarmode>
15553 (match_operand:VI12_128 1 "memory_operand" "o")
15555 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15559 (define_insn "*vec_extract<ssevecmodelower>_0"
15560 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
15562 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
15563 (parallel [(const_int 0)])))]
15564 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15566 [(set_attr "isa" "*,sse2,*,*")
15567 (set (attr "preferred_for_speed")
15568 (cond [(eq_attr "alternative" "1")
15569 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15571 (symbol_ref "true")))])
15573 (define_insn "*vec_extractv2di_0_sse"
15574 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
15576 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
15577 (parallel [(const_int 0)])))]
15578 "TARGET_SSE && !TARGET_64BIT
15579 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15581 [(set_attr "isa" "sse4,*,*")
15582 (set (attr "preferred_for_speed")
15583 (cond [(eq_attr "alternative" "0")
15584 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15586 (symbol_ref "true")))])
15589 [(set (match_operand:DI 0 "general_reg_operand")
15591 (match_operand:V2DI 1 "register_operand")
15592 (parallel [(const_int 0)])))]
15593 "TARGET_SSE4_1 && !TARGET_64BIT
15594 && reload_completed"
15595 [(set (match_dup 2) (match_dup 4))
15599 (parallel [(const_int 1)])))]
15601 operands[4] = gen_lowpart (SImode, operands[1]);
15602 operands[5] = gen_lowpart (V4SImode, operands[1]);
15603 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
15607 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15609 (match_operand:<ssevecmode> 1 "register_operand")
15610 (parallel [(const_int 0)])))]
15611 "TARGET_SSE && reload_completed"
15612 [(set (match_dup 0) (match_dup 1))]
15613 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
15615 (define_insn "*vec_extractv4si_0_zext_sse4"
15616 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
15619 (match_operand:V4SI 1 "register_operand" "v,x,v")
15620 (parallel [(const_int 0)]))))]
15623 [(set_attr "isa" "x64,*,avx512f")
15624 (set (attr "preferred_for_speed")
15625 (cond [(eq_attr "alternative" "0")
15626 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15628 (symbol_ref "true")))])
15630 (define_insn "*vec_extractv4si_0_zext"
15631 [(set (match_operand:DI 0 "register_operand" "=r")
15634 (match_operand:V4SI 1 "register_operand" "x")
15635 (parallel [(const_int 0)]))))]
15636 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
15640 [(set (match_operand:DI 0 "register_operand")
15643 (match_operand:V4SI 1 "register_operand")
15644 (parallel [(const_int 0)]))))]
15645 "TARGET_SSE2 && reload_completed"
15646 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15647 "operands[1] = gen_lowpart (SImode, operands[1]);")
15649 (define_insn "*vec_extractv4si"
15650 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
15652 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
15653 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
15656 switch (which_alternative)
15660 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
15664 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15665 return "psrldq\t{%2, %0|%0, %2}";
15669 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15670 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
15673 gcc_unreachable ();
15676 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
15677 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
15678 (set (attr "prefix_extra")
15679 (if_then_else (eq_attr "alternative" "0,1")
15681 (const_string "*")))
15682 (set_attr "length_immediate" "1")
15683 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
15684 (set_attr "mode" "TI")])
15686 (define_insn "*vec_extractv4si_zext"
15687 [(set (match_operand:DI 0 "register_operand" "=r,r")
15690 (match_operand:V4SI 1 "register_operand" "x,v")
15691 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15692 "TARGET_64BIT && TARGET_SSE4_1"
15693 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
15694 [(set_attr "isa" "*,avx512dq")
15695 (set_attr "type" "sselog1")
15696 (set_attr "prefix_extra" "1")
15697 (set_attr "length_immediate" "1")
15698 (set_attr "prefix" "maybe_vex")
15699 (set_attr "mode" "TI")])
15701 (define_insn "*vec_extractv4si_mem"
15702 [(set (match_operand:SI 0 "register_operand" "=x,r")
15704 (match_operand:V4SI 1 "memory_operand" "o,o")
15705 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
15709 (define_insn_and_split "*vec_extractv4si_zext_mem"
15710 [(set (match_operand:DI 0 "register_operand" "=x,r")
15713 (match_operand:V4SI 1 "memory_operand" "o,o")
15714 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15715 "TARGET_64BIT && TARGET_SSE"
15717 "&& reload_completed"
15718 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15720 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
15723 (define_insn "*vec_extractv2di_1"
15724 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
15726 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
15727 (parallel [(const_int 1)])))]
15728 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15730 %vpextrq\t{$1, %1, %0|%0, %1, 1}
15731 vpextrq\t{$1, %1, %0|%0, %1, 1}
15732 %vmovhps\t{%1, %0|%0, %1}
15733 psrldq\t{$8, %0|%0, 8}
15734 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15735 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15736 movhlps\t{%1, %0|%0, %1}
15740 (cond [(eq_attr "alternative" "0")
15741 (const_string "x64_sse4")
15742 (eq_attr "alternative" "1")
15743 (const_string "x64_avx512dq")
15744 (eq_attr "alternative" "3")
15745 (const_string "sse2_noavx")
15746 (eq_attr "alternative" "4")
15747 (const_string "avx")
15748 (eq_attr "alternative" "5")
15749 (const_string "avx512bw")
15750 (eq_attr "alternative" "6")
15751 (const_string "noavx")
15752 (eq_attr "alternative" "8")
15753 (const_string "x64")
15755 (const_string "*")))
15757 (cond [(eq_attr "alternative" "2,6,7")
15758 (const_string "ssemov")
15759 (eq_attr "alternative" "3,4,5")
15760 (const_string "sseishft1")
15761 (eq_attr "alternative" "8")
15762 (const_string "imov")
15764 (const_string "sselog1")))
15765 (set (attr "length_immediate")
15766 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
15768 (const_string "*")))
15769 (set (attr "prefix_rex")
15770 (if_then_else (eq_attr "alternative" "0,1")
15772 (const_string "*")))
15773 (set (attr "prefix_extra")
15774 (if_then_else (eq_attr "alternative" "0,1")
15776 (const_string "*")))
15777 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
15778 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
15781 [(set (match_operand:<ssescalarmode> 0 "register_operand")
15782 (vec_select:<ssescalarmode>
15783 (match_operand:VI_128 1 "memory_operand")
15785 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15786 "TARGET_SSE && reload_completed"
15787 [(set (match_dup 0) (match_dup 1))]
15789 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
15791 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
15794 (define_insn "*vec_extractv2ti"
15795 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
15797 (match_operand:V2TI 1 "register_operand" "x,v")
15799 [(match_operand:SI 2 "const_0_to_1_operand")])))]
15802 vextract%~128\t{%2, %1, %0|%0, %1, %2}
15803 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
15804 [(set_attr "type" "sselog")
15805 (set_attr "prefix_extra" "1")
15806 (set_attr "length_immediate" "1")
15807 (set_attr "prefix" "vex,evex")
15808 (set_attr "mode" "OI")])
15810 (define_insn "*vec_extractv4ti"
15811 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
15813 (match_operand:V4TI 1 "register_operand" "v")
15815 [(match_operand:SI 2 "const_0_to_3_operand")])))]
15817 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
15818 [(set_attr "type" "sselog")
15819 (set_attr "prefix_extra" "1")
15820 (set_attr "length_immediate" "1")
15821 (set_attr "prefix" "evex")
15822 (set_attr "mode" "XI")])
15824 (define_mode_iterator VEXTRACTI128_MODE
15825 [(V4TI "TARGET_AVX512F") V2TI])
15828 [(set (match_operand:TI 0 "nonimmediate_operand")
15830 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
15831 (parallel [(const_int 0)])))]
15833 && reload_completed
15834 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
15835 [(set (match_dup 0) (match_dup 1))]
15836 "operands[1] = gen_lowpart (TImode, operands[1]);")
15838 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
15839 ;; vector modes into vec_extract*.
15841 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15842 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
15843 "can_create_pseudo_p ()
15844 && REG_P (operands[1])
15845 && VECTOR_MODE_P (GET_MODE (operands[1]))
15846 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
15847 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
15848 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
15849 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
15850 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
15851 (parallel [(const_int 0)])))]
15855 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
15858 if (<MODE>mode == SImode)
15860 tmp = gen_reg_rtx (V8SImode);
15861 emit_insn (gen_vec_extract_lo_v16si (tmp,
15862 gen_lowpart (V16SImode,
15867 tmp = gen_reg_rtx (V4DImode);
15868 emit_insn (gen_vec_extract_lo_v8di (tmp,
15869 gen_lowpart (V8DImode,
15875 tmp = gen_reg_rtx (<ssevecmode>mode);
15876 if (<MODE>mode == SImode)
15877 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
15880 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
15885 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
15890 (define_insn "*vec_concatv2si_sse4_1"
15891 [(set (match_operand:V2SI 0 "register_operand"
15892 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
15894 (match_operand:SI 1 "nonimmediate_operand"
15895 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
15896 (match_operand:SI 2 "nonimm_or_0_operand"
15897 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
15898 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15900 pinsrd\t{$1, %2, %0|%0, %2, 1}
15901 pinsrd\t{$1, %2, %0|%0, %2, 1}
15902 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15903 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15904 punpckldq\t{%2, %0|%0, %2}
15905 punpckldq\t{%2, %0|%0, %2}
15906 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
15907 %vmovd\t{%1, %0|%0, %1}
15908 punpckldq\t{%2, %0|%0, %2}
15909 movd\t{%1, %0|%0, %1}"
15910 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
15911 (set (attr "mmx_isa")
15912 (if_then_else (eq_attr "alternative" "8,9")
15913 (const_string "native")
15914 (const_string "*")))
15916 (cond [(eq_attr "alternative" "7")
15917 (const_string "ssemov")
15918 (eq_attr "alternative" "8")
15919 (const_string "mmxcvt")
15920 (eq_attr "alternative" "9")
15921 (const_string "mmxmov")
15923 (const_string "sselog")))
15924 (set (attr "prefix_extra")
15925 (if_then_else (eq_attr "alternative" "0,1,2,3")
15927 (const_string "*")))
15928 (set (attr "length_immediate")
15929 (if_then_else (eq_attr "alternative" "0,1,2,3")
15931 (const_string "*")))
15932 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15933 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15935 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15936 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15937 ;; alternatives pretty much forces the MMX alternative to be chosen.
15938 (define_insn "*vec_concatv2si"
15939 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15941 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15942 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15943 "TARGET_SSE && !TARGET_SSE4_1"
15945 punpckldq\t{%2, %0|%0, %2}
15946 movd\t{%1, %0|%0, %1}
15947 unpcklps\t{%2, %0|%0, %2}
15948 movss\t{%1, %0|%0, %1}
15949 punpckldq\t{%2, %0|%0, %2}
15950 movd\t{%1, %0|%0, %1}"
15951 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15952 (set_attr "mmx_isa" "*,*,*,*,native,native")
15953 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15954 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15956 (define_insn "*vec_concatv4si"
15957 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15959 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15960 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15963 punpcklqdq\t{%2, %0|%0, %2}
15964 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15965 movlhps\t{%2, %0|%0, %2}
15966 movhps\t{%2, %0|%0, %q2}
15967 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15968 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15969 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15970 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15971 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15973 (define_insn "*vec_concatv4si_0"
15974 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15976 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15977 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15980 %vmovq\t{%1, %0|%0, %1}
15981 movq2dq\t{%1, %0|%0, %1}"
15982 [(set_attr "mmx_isa" "*,native")
15983 (set_attr "type" "ssemov")
15984 (set_attr "prefix" "maybe_vex,orig")
15985 (set_attr "mode" "TI")])
15987 (define_insn "vec_concatv2di"
15988 [(set (match_operand:V2DI 0 "register_operand"
15989 "=Yr,*x,x ,v ,x,v ,x,x,v")
15991 (match_operand:DI 1 "register_operand"
15992 " 0, 0,x ,Yv,0,Yv,0,0,v")
15993 (match_operand:DI 2 "nonimmediate_operand"
15994 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15997 pinsrq\t{$1, %2, %0|%0, %2, 1}
15998 pinsrq\t{$1, %2, %0|%0, %2, 1}
15999 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
16000 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
16001 punpcklqdq\t{%2, %0|%0, %2}
16002 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
16003 movlhps\t{%2, %0|%0, %2}
16004 movhps\t{%2, %0|%0, %2}
16005 vmovhps\t{%2, %1, %0|%0, %1, %2}"
16007 (cond [(eq_attr "alternative" "0,1")
16008 (const_string "x64_sse4_noavx")
16009 (eq_attr "alternative" "2")
16010 (const_string "x64_avx")
16011 (eq_attr "alternative" "3")
16012 (const_string "x64_avx512dq")
16013 (eq_attr "alternative" "4")
16014 (const_string "sse2_noavx")
16015 (eq_attr "alternative" "5,8")
16016 (const_string "avx")
16018 (const_string "noavx")))
16021 (eq_attr "alternative" "0,1,2,3,4,5")
16022 (const_string "sselog")
16023 (const_string "ssemov")))
16024 (set (attr "prefix_rex")
16025 (if_then_else (eq_attr "alternative" "0,1,2,3")
16027 (const_string "*")))
16028 (set (attr "prefix_extra")
16029 (if_then_else (eq_attr "alternative" "0,1,2,3")
16031 (const_string "*")))
16032 (set (attr "length_immediate")
16033 (if_then_else (eq_attr "alternative" "0,1,2,3")
16035 (const_string "*")))
16036 (set (attr "prefix")
16037 (cond [(eq_attr "alternative" "2")
16038 (const_string "vex")
16039 (eq_attr "alternative" "3")
16040 (const_string "evex")
16041 (eq_attr "alternative" "5,8")
16042 (const_string "maybe_evex")
16044 (const_string "orig")))
16045 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
16047 (define_insn "*vec_concatv2di_0"
16048 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
16050 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
16051 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
16054 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
16055 %vmovq\t{%1, %0|%0, %1}
16056 movq2dq\t{%1, %0|%0, %1}"
16057 [(set_attr "isa" "x64,*,*")
16058 (set_attr "mmx_isa" "*,*,native")
16059 (set_attr "type" "ssemov")
16060 (set_attr "prefix_rex" "1,*,*")
16061 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
16062 (set_attr "mode" "TI")
16063 (set (attr "preferred_for_speed")
16064 (cond [(eq_attr "alternative" "0")
16065 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16067 (symbol_ref "true")))])
16069 ;; vmovq clears also the higher bits.
16070 (define_insn "vec_set<mode>_0"
16071 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
16072 (vec_merge:VI8_AVX_AVX512F
16073 (vec_duplicate:VI8_AVX_AVX512F
16074 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
16075 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
16078 "vmovq\t{%2, %x0|%x0, %2}"
16079 [(set_attr "isa" "x64,*")
16080 (set_attr "type" "ssemov")
16081 (set_attr "prefix_rex" "1,*")
16082 (set_attr "prefix" "maybe_evex")
16083 (set_attr "mode" "TI")
16084 (set (attr "preferred_for_speed")
16085 (cond [(eq_attr "alternative" "0")
16086 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16088 (symbol_ref "true")))])
16090 (define_expand "vec_unpacks_lo_<mode>"
16091 [(match_operand:<sseunpackmode> 0 "register_operand")
16092 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16094 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
16096 (define_expand "vec_unpacks_hi_<mode>"
16097 [(match_operand:<sseunpackmode> 0 "register_operand")
16098 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16100 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
16102 (define_expand "vec_unpacku_lo_<mode>"
16103 [(match_operand:<sseunpackmode> 0 "register_operand")
16104 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16106 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
16108 (define_expand "vec_unpacks_sbool_lo_qi"
16109 [(match_operand:QI 0 "register_operand")
16110 (match_operand:QI 1 "register_operand")
16111 (match_operand:QI 2 "const_int_operand")]
16114 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
16116 emit_move_insn (operands[0], operands[1]);
16120 (define_expand "vec_unpacks_lo_hi"
16121 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
16122 (match_operand:HI 1 "register_operand"))]
16125 (define_expand "vec_unpacks_lo_si"
16126 [(set (match_operand:HI 0 "register_operand")
16127 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
16130 (define_expand "vec_unpacks_lo_di"
16131 [(set (match_operand:SI 0 "register_operand")
16132 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
16135 (define_expand "vec_unpacku_hi_<mode>"
16136 [(match_operand:<sseunpackmode> 0 "register_operand")
16137 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16139 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
16141 (define_expand "vec_unpacks_sbool_hi_qi"
16142 [(match_operand:QI 0 "register_operand")
16143 (match_operand:QI 1 "register_operand")
16144 (match_operand:QI 2 "const_int_operand")]
16147 HOST_WIDE_INT nunits = INTVAL (operands[2]);
16148 if (nunits != 8 && nunits != 4)
16150 if (TARGET_AVX512DQ)
16151 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
16152 GEN_INT (nunits / 2)));
16155 rtx tem = gen_reg_rtx (HImode);
16156 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
16158 GEN_INT (nunits / 2)));
16159 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
16164 (define_expand "vec_unpacks_hi_hi"
16166 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
16167 (lshiftrt:HI (match_operand:HI 1 "register_operand")
16169 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
16172 (define_expand "vec_unpacks_hi_<mode>"
16174 [(set (subreg:SWI48x
16175 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
16176 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
16178 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
16180 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
16182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16188 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
16189 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
16190 (truncate:VI12_AVX2_AVX512BW
16191 (lshiftrt:<ssedoublemode>
16192 (plus:<ssedoublemode>
16193 (plus:<ssedoublemode>
16194 (zero_extend:<ssedoublemode>
16195 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
16196 (zero_extend:<ssedoublemode>
16197 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
16198 (match_dup <mask_expand_op3>))
16200 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16202 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
16203 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
16206 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
16207 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
16208 (truncate:VI12_AVX2_AVX512BW
16209 (lshiftrt:<ssedoublemode>
16210 (plus:<ssedoublemode>
16211 (plus:<ssedoublemode>
16212 (zero_extend:<ssedoublemode>
16213 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
16214 (zero_extend:<ssedoublemode>
16215 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
16216 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
16218 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16219 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16221 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
16222 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16223 [(set_attr "isa" "noavx,avx")
16224 (set_attr "type" "sseiadd")
16225 (set_attr "prefix_data16" "1,*")
16226 (set_attr "prefix" "orig,<mask_prefix>")
16227 (set_attr "mode" "<sseinsnmode>")])
16229 ;; The correct representation for this is absolutely enormous, and
16230 ;; surely not generally useful.
16231 (define_insn "<sse2_avx2>_psadbw"
16232 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
16233 (unspec:VI8_AVX2_AVX512BW
16234 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
16235 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
16239 psadbw\t{%2, %0|%0, %2}
16240 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
16241 [(set_attr "isa" "noavx,avx")
16242 (set_attr "type" "sseiadd")
16243 (set_attr "atom_unit" "simul")
16244 (set_attr "prefix_data16" "1,*")
16245 (set_attr "prefix" "orig,maybe_evex")
16246 (set_attr "mode" "<sseinsnmode>")])
16248 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
16249 [(set (match_operand:SI 0 "register_operand" "=r")
16251 [(match_operand:VF_128_256 1 "register_operand" "x")]
16254 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
16255 [(set_attr "type" "ssemov")
16256 (set_attr "prefix" "maybe_vex")
16257 (set_attr "mode" "<MODE>")])
16259 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
16260 [(set (match_operand:DI 0 "register_operand" "=r")
16263 [(match_operand:VF_128_256 1 "register_operand" "x")]
16265 "TARGET_64BIT && TARGET_SSE"
16266 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
16267 [(set_attr "type" "ssemov")
16268 (set_attr "prefix" "maybe_vex")
16269 (set_attr "mode" "<MODE>")])
16271 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
16272 [(set (match_operand:SI 0 "register_operand" "=r")
16275 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16276 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16280 "&& reload_completed"
16281 [(set (match_dup 0)
16282 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16283 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16284 [(set_attr "type" "ssemov")
16285 (set_attr "prefix" "maybe_vex")
16286 (set_attr "mode" "<MODE>")])
16288 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
16289 [(set (match_operand:DI 0 "register_operand" "=r")
16293 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16294 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16296 "TARGET_64BIT && TARGET_SSE"
16298 "&& reload_completed"
16299 [(set (match_dup 0)
16300 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16301 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16302 [(set_attr "type" "ssemov")
16303 (set_attr "prefix" "maybe_vex")
16304 (set_attr "mode" "<MODE>")])
16306 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
16307 [(set (match_operand:SI 0 "register_operand" "=r")
16309 [(subreg:VF_128_256
16310 (ashiftrt:<sseintvecmode>
16311 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16312 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16316 "&& reload_completed"
16317 [(set (match_dup 0)
16318 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16319 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16320 [(set_attr "type" "ssemov")
16321 (set_attr "prefix" "maybe_vex")
16322 (set_attr "mode" "<MODE>")])
16324 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
16325 [(set (match_operand:DI 0 "register_operand" "=r")
16328 [(subreg:VF_128_256
16329 (ashiftrt:<sseintvecmode>
16330 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16331 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16333 "TARGET_64BIT && TARGET_SSE"
16335 "&& reload_completed"
16336 [(set (match_dup 0)
16337 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16338 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16339 [(set_attr "type" "ssemov")
16340 (set_attr "prefix" "maybe_vex")
16341 (set_attr "mode" "<MODE>")])
16343 (define_insn "<sse2_avx2>_pmovmskb"
16344 [(set (match_operand:SI 0 "register_operand" "=r")
16346 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16349 "%vpmovmskb\t{%1, %0|%0, %1}"
16350 [(set_attr "type" "ssemov")
16351 (set (attr "prefix_data16")
16353 (match_test "TARGET_AVX")
16355 (const_string "1")))
16356 (set_attr "prefix" "maybe_vex")
16357 (set_attr "mode" "SI")])
16359 (define_insn "*<sse2_avx2>_pmovmskb_zext"
16360 [(set (match_operand:DI 0 "register_operand" "=r")
16363 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16365 "TARGET_64BIT && TARGET_SSE2"
16366 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16367 [(set_attr "type" "ssemov")
16368 (set (attr "prefix_data16")
16370 (match_test "TARGET_AVX")
16372 (const_string "1")))
16373 (set_attr "prefix" "maybe_vex")
16374 (set_attr "mode" "SI")])
16376 (define_insn "*sse2_pmovmskb_ext"
16377 [(set (match_operand:DI 0 "register_operand" "=r")
16380 [(match_operand:V16QI 1 "register_operand" "x")]
16382 "TARGET_64BIT && TARGET_SSE2"
16383 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16384 [(set_attr "type" "ssemov")
16385 (set (attr "prefix_data16")
16387 (match_test "TARGET_AVX")
16389 (const_string "1")))
16390 (set_attr "prefix" "maybe_vex")
16391 (set_attr "mode" "SI")])
16393 (define_insn_and_split "*sse2_pmovskb_zexthisi"
16394 [(set (match_operand:SI 0 "register_operand")
16398 [(match_operand:V16QI 1 "register_operand")]
16399 UNSPEC_MOVMSK) 0)))]
16400 "TARGET_SSE2 && ix86_pre_reload_split ()"
16403 [(set (match_dup 0)
16404 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
16407 [(set (match_operand:SI 0 "register_operand")
16412 [(match_operand:V16QI 1 "register_operand")]
16413 UNSPEC_MOVMSK) 0))))]
16415 [(set (match_dup 2)
16416 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16418 (xor:SI (match_dup 2) (const_int 65535)))]
16419 "operands[2] = gen_reg_rtx (SImode);")
16422 [(set (match_operand:SI 0 "register_operand")
16424 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
16427 [(set (match_dup 2)
16428 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16429 (set (match_dup 0) (match_dup 3))]
16431 operands[2] = gen_reg_rtx (SImode);
16432 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16433 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16437 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16439 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16444 [(set (match_operand:SI 0 "register_operand")
16446 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
16449 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
16450 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
16451 [(set (match_dup 2)
16452 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16453 (set (match_dup 0) (match_dup 3))]
16455 operands[2] = gen_reg_rtx (SImode);
16456 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
16457 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16458 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16462 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16464 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16468 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
16469 [(set (match_operand:SI 0 "register_operand" "=r")
16471 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16472 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16477 [(set (match_dup 0)
16478 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16480 [(set_attr "type" "ssemov")
16481 (set (attr "prefix_data16")
16483 (match_test "TARGET_AVX")
16485 (const_string "1")))
16486 (set_attr "prefix" "maybe_vex")
16487 (set_attr "mode" "SI")])
16489 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
16490 [(set (match_operand:DI 0 "register_operand" "=r")
16493 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16494 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16496 "TARGET_64BIT && TARGET_SSE2"
16499 [(set (match_dup 0)
16500 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16502 [(set_attr "type" "ssemov")
16503 (set (attr "prefix_data16")
16505 (match_test "TARGET_AVX")
16507 (const_string "1")))
16508 (set_attr "prefix" "maybe_vex")
16509 (set_attr "mode" "SI")])
16511 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
16512 [(set (match_operand:DI 0 "register_operand" "=r")
16515 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
16516 (match_operand:V16QI 2 "const0_operand" "C"))]
16518 "TARGET_64BIT && TARGET_SSE2"
16521 [(set (match_dup 0)
16522 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16524 [(set_attr "type" "ssemov")
16525 (set (attr "prefix_data16")
16527 (match_test "TARGET_AVX")
16529 (const_string "1")))
16530 (set_attr "prefix" "maybe_vex")
16531 (set_attr "mode" "SI")])
16533 (define_expand "sse2_maskmovdqu"
16534 [(set (match_operand:V16QI 0 "memory_operand")
16535 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
16536 (match_operand:V16QI 2 "register_operand")
16541 (define_insn "*sse2_maskmovdqu"
16542 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
16543 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
16544 (match_operand:V16QI 2 "register_operand" "x")
16545 (mem:V16QI (match_dup 0))]
16549 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
16550 that requires %v to be at the beginning of the opcode name. */
16551 if (Pmode != word_mode)
16552 fputs ("\taddr32", asm_out_file);
16553 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
16555 [(set_attr "type" "ssemov")
16556 (set_attr "prefix_data16" "1")
16557 (set (attr "length_address")
16558 (symbol_ref ("Pmode != word_mode")))
16559 ;; The implicit %rdi operand confuses default length_vex computation.
16560 (set (attr "length_vex")
16561 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
16562 (set_attr "prefix" "maybe_vex")
16563 (set_attr "znver1_decode" "vector")
16564 (set_attr "mode" "TI")])
16566 (define_insn "sse_ldmxcsr"
16567 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
16571 [(set_attr "type" "sse")
16572 (set_attr "atom_sse_attr" "mxcsr")
16573 (set_attr "prefix" "maybe_vex")
16574 (set_attr "memory" "load")])
16576 (define_insn "sse_stmxcsr"
16577 [(set (match_operand:SI 0 "memory_operand" "=m")
16578 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
16581 [(set_attr "type" "sse")
16582 (set_attr "atom_sse_attr" "mxcsr")
16583 (set_attr "prefix" "maybe_vex")
16584 (set_attr "memory" "store")])
16586 (define_insn "sse2_clflush"
16587 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
16591 [(set_attr "type" "sse")
16592 (set_attr "atom_sse_attr" "fence")
16593 (set_attr "memory" "unknown")])
16595 ;; As per AMD and Intel ISA manuals, the first operand is extensions
16596 ;; and it goes to %ecx. The second operand received is hints and it goes
16598 (define_insn "sse3_mwait"
16599 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
16600 (match_operand:SI 1 "register_operand" "a")]
16603 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
16604 ;; Since 32bit register operands are implicitly zero extended to 64bit,
16605 ;; we only need to set up 32bit registers.
16607 [(set_attr "length" "3")])
16609 (define_insn "@sse3_monitor_<mode>"
16610 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
16611 (match_operand:SI 1 "register_operand" "c")
16612 (match_operand:SI 2 "register_operand" "d")]
16615 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
16616 ;; RCX and RDX are used. Since 32bit register operands are implicitly
16617 ;; zero extended to 64bit, we only need to set up 32bit registers.
16619 [(set (attr "length")
16620 (symbol_ref ("(Pmode != word_mode) + 3")))])
16622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16624 ;; SSSE3 instructions
16626 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16628 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
16630 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
16631 [(set (match_operand:V16HI 0 "register_operand" "=x")
16632 (ssse3_plusminus:V16HI
16635 (match_operand:V16HI 1 "register_operand" "x")
16636 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
16638 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16639 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
16640 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
16641 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
16643 (vec_concat:V32HI (match_dup 1) (match_dup 2))
16645 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16646 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
16647 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
16648 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
16650 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16651 [(set_attr "type" "sseiadd")
16652 (set_attr "prefix_extra" "1")
16653 (set_attr "prefix" "vex")
16654 (set_attr "mode" "OI")])
16656 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
16657 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16658 (ssse3_plusminus:V8HI
16661 (match_operand:V8HI 1 "register_operand" "0,x")
16662 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
16664 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16665 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
16667 (vec_concat:V16HI (match_dup 1) (match_dup 2))
16669 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16670 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
16673 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16674 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16675 [(set_attr "isa" "noavx,avx")
16676 (set_attr "type" "sseiadd")
16677 (set_attr "atom_unit" "complex")
16678 (set_attr "prefix_data16" "1,*")
16679 (set_attr "prefix_extra" "1")
16680 (set_attr "prefix" "orig,vex")
16681 (set_attr "mode" "TI")])
16683 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
16684 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16685 (ssse3_plusminus:V4HI
16688 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
16689 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
16691 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16693 (vec_concat:V8HI (match_dup 1) (match_dup 2))
16695 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16696 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16698 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16701 "TARGET_SSSE3 && reload_completed
16702 && SSE_REGNO_P (REGNO (operands[0]))"
16705 /* Generate SSE version of the operation. */
16706 rtx op0 = lowpart_subreg (V8HImode, operands[0],
16707 GET_MODE (operands[0]));
16708 rtx op1 = lowpart_subreg (V8HImode, operands[1],
16709 GET_MODE (operands[1]));
16710 rtx op2 = lowpart_subreg (V8HImode, operands[2],
16711 GET_MODE (operands[2]));
16712 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
16713 ix86_move_vector_high_sse_to_mmx (op0);
16716 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16717 (set_attr "type" "sseiadd")
16718 (set_attr "atom_unit" "complex")
16719 (set_attr "prefix_extra" "1")
16720 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16721 (set_attr "mode" "DI,TI,TI")])
16723 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
16724 [(set (match_operand:V8SI 0 "register_operand" "=x")
16728 (match_operand:V8SI 1 "register_operand" "x")
16729 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
16731 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
16732 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
16734 (vec_concat:V16SI (match_dup 1) (match_dup 2))
16736 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
16737 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
16739 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16740 [(set_attr "type" "sseiadd")
16741 (set_attr "prefix_extra" "1")
16742 (set_attr "prefix" "vex")
16743 (set_attr "mode" "OI")])
16745 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
16746 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16750 (match_operand:V4SI 1 "register_operand" "0,x")
16751 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
16753 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16755 (vec_concat:V8SI (match_dup 1) (match_dup 2))
16757 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16760 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16761 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16762 [(set_attr "isa" "noavx,avx")
16763 (set_attr "type" "sseiadd")
16764 (set_attr "atom_unit" "complex")
16765 (set_attr "prefix_data16" "1,*")
16766 (set_attr "prefix_extra" "1")
16767 (set_attr "prefix" "orig,vex")
16768 (set_attr "mode" "TI")])
16770 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
16771 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
16775 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
16776 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
16777 (parallel [(const_int 0) (const_int 2)]))
16779 (vec_concat:V4SI (match_dup 1) (match_dup 2))
16780 (parallel [(const_int 1) (const_int 3)]))))]
16781 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16783 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16786 "TARGET_SSSE3 && reload_completed
16787 && SSE_REGNO_P (REGNO (operands[0]))"
16790 /* Generate SSE version of the operation. */
16791 rtx op0 = lowpart_subreg (V4SImode, operands[0],
16792 GET_MODE (operands[0]));
16793 rtx op1 = lowpart_subreg (V4SImode, operands[1],
16794 GET_MODE (operands[1]));
16795 rtx op2 = lowpart_subreg (V4SImode, operands[2],
16796 GET_MODE (operands[2]));
16797 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
16798 ix86_move_vector_high_sse_to_mmx (op0);
16801 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16802 (set_attr "type" "sseiadd")
16803 (set_attr "atom_unit" "complex")
16804 (set_attr "prefix_extra" "1")
16805 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16806 (set_attr "mode" "DI,TI,TI")])
16808 (define_insn "avx2_pmaddubsw256"
16809 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
16814 (match_operand:V32QI 1 "register_operand" "x,v")
16815 (parallel [(const_int 0) (const_int 2)
16816 (const_int 4) (const_int 6)
16817 (const_int 8) (const_int 10)
16818 (const_int 12) (const_int 14)
16819 (const_int 16) (const_int 18)
16820 (const_int 20) (const_int 22)
16821 (const_int 24) (const_int 26)
16822 (const_int 28) (const_int 30)])))
16825 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
16826 (parallel [(const_int 0) (const_int 2)
16827 (const_int 4) (const_int 6)
16828 (const_int 8) (const_int 10)
16829 (const_int 12) (const_int 14)
16830 (const_int 16) (const_int 18)
16831 (const_int 20) (const_int 22)
16832 (const_int 24) (const_int 26)
16833 (const_int 28) (const_int 30)]))))
16836 (vec_select:V16QI (match_dup 1)
16837 (parallel [(const_int 1) (const_int 3)
16838 (const_int 5) (const_int 7)
16839 (const_int 9) (const_int 11)
16840 (const_int 13) (const_int 15)
16841 (const_int 17) (const_int 19)
16842 (const_int 21) (const_int 23)
16843 (const_int 25) (const_int 27)
16844 (const_int 29) (const_int 31)])))
16846 (vec_select:V16QI (match_dup 2)
16847 (parallel [(const_int 1) (const_int 3)
16848 (const_int 5) (const_int 7)
16849 (const_int 9) (const_int 11)
16850 (const_int 13) (const_int 15)
16851 (const_int 17) (const_int 19)
16852 (const_int 21) (const_int 23)
16853 (const_int 25) (const_int 27)
16854 (const_int 29) (const_int 31)]))))))]
16856 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16857 [(set_attr "isa" "*,avx512bw")
16858 (set_attr "type" "sseiadd")
16859 (set_attr "prefix_extra" "1")
16860 (set_attr "prefix" "vex,evex")
16861 (set_attr "mode" "OI")])
16863 ;; The correct representation for this is absolutely enormous, and
16864 ;; surely not generally useful.
16865 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
16866 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16867 (unspec:VI2_AVX512VL
16868 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
16869 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
16870 UNSPEC_PMADDUBSW512))]
16872 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
16873 [(set_attr "type" "sseiadd")
16874 (set_attr "prefix" "evex")
16875 (set_attr "mode" "XI")])
16877 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
16878 [(set (match_operand:V32HI 0 "register_operand" "=v")
16885 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
16887 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
16889 (const_vector:V32HI [(const_int 1) (const_int 1)
16890 (const_int 1) (const_int 1)
16891 (const_int 1) (const_int 1)
16892 (const_int 1) (const_int 1)
16893 (const_int 1) (const_int 1)
16894 (const_int 1) (const_int 1)
16895 (const_int 1) (const_int 1)
16896 (const_int 1) (const_int 1)
16897 (const_int 1) (const_int 1)
16898 (const_int 1) (const_int 1)
16899 (const_int 1) (const_int 1)
16900 (const_int 1) (const_int 1)
16901 (const_int 1) (const_int 1)
16902 (const_int 1) (const_int 1)
16903 (const_int 1) (const_int 1)
16904 (const_int 1) (const_int 1)]))
16907 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16908 [(set_attr "type" "sseimul")
16909 (set_attr "prefix" "evex")
16910 (set_attr "mode" "XI")])
16912 (define_insn "ssse3_pmaddubsw128"
16913 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
16918 (match_operand:V16QI 1 "register_operand" "0,x,v")
16919 (parallel [(const_int 0) (const_int 2)
16920 (const_int 4) (const_int 6)
16921 (const_int 8) (const_int 10)
16922 (const_int 12) (const_int 14)])))
16925 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
16926 (parallel [(const_int 0) (const_int 2)
16927 (const_int 4) (const_int 6)
16928 (const_int 8) (const_int 10)
16929 (const_int 12) (const_int 14)]))))
16932 (vec_select:V8QI (match_dup 1)
16933 (parallel [(const_int 1) (const_int 3)
16934 (const_int 5) (const_int 7)
16935 (const_int 9) (const_int 11)
16936 (const_int 13) (const_int 15)])))
16938 (vec_select:V8QI (match_dup 2)
16939 (parallel [(const_int 1) (const_int 3)
16940 (const_int 5) (const_int 7)
16941 (const_int 9) (const_int 11)
16942 (const_int 13) (const_int 15)]))))))]
16945 pmaddubsw\t{%2, %0|%0, %2}
16946 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16947 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16948 [(set_attr "isa" "noavx,avx,avx512bw")
16949 (set_attr "type" "sseiadd")
16950 (set_attr "atom_unit" "simul")
16951 (set_attr "prefix_data16" "1,*,*")
16952 (set_attr "prefix_extra" "1")
16953 (set_attr "prefix" "orig,vex,evex")
16954 (set_attr "mode" "TI")])
16956 (define_insn "ssse3_pmaddubsw"
16957 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16962 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16963 (parallel [(const_int 0) (const_int 2)
16964 (const_int 4) (const_int 6)])))
16967 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16968 (parallel [(const_int 0) (const_int 2)
16969 (const_int 4) (const_int 6)]))))
16972 (vec_select:V4QI (match_dup 1)
16973 (parallel [(const_int 1) (const_int 3)
16974 (const_int 5) (const_int 7)])))
16976 (vec_select:V4QI (match_dup 2)
16977 (parallel [(const_int 1) (const_int 3)
16978 (const_int 5) (const_int 7)]))))))]
16979 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16981 pmaddubsw\t{%2, %0|%0, %2}
16982 pmaddubsw\t{%2, %0|%0, %2}
16983 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16984 [(set_attr "isa" "*,noavx,avx")
16985 (set_attr "mmx_isa" "native,*,*")
16986 (set_attr "type" "sseiadd")
16987 (set_attr "atom_unit" "simul")
16988 (set_attr "prefix_extra" "1")
16989 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16990 (set_attr "mode" "DI,TI,TI")])
16992 (define_mode_iterator PMULHRSW
16993 [V8HI (V16HI "TARGET_AVX2")])
16995 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16996 [(set (match_operand:PMULHRSW 0 "register_operand")
16997 (vec_merge:PMULHRSW
16999 (lshiftrt:<ssedoublemode>
17000 (plus:<ssedoublemode>
17001 (lshiftrt:<ssedoublemode>
17002 (mult:<ssedoublemode>
17003 (sign_extend:<ssedoublemode>
17004 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
17005 (sign_extend:<ssedoublemode>
17006 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
17010 (match_operand:PMULHRSW 3 "register_operand")
17011 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
17012 "TARGET_AVX512BW && TARGET_AVX512VL"
17014 operands[5] = CONST1_RTX(<MODE>mode);
17015 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17018 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
17019 [(set (match_operand:PMULHRSW 0 "register_operand")
17021 (lshiftrt:<ssedoublemode>
17022 (plus:<ssedoublemode>
17023 (lshiftrt:<ssedoublemode>
17024 (mult:<ssedoublemode>
17025 (sign_extend:<ssedoublemode>
17026 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
17027 (sign_extend:<ssedoublemode>
17028 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
17034 operands[3] = CONST1_RTX(<MODE>mode);
17035 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17038 (define_expand "smulhrs<mode>3"
17039 [(set (match_operand:VI2_AVX2 0 "register_operand")
17041 (lshiftrt:<ssedoublemode>
17042 (plus:<ssedoublemode>
17043 (lshiftrt:<ssedoublemode>
17044 (mult:<ssedoublemode>
17045 (sign_extend:<ssedoublemode>
17046 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
17047 (sign_extend:<ssedoublemode>
17048 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
17054 operands[3] = CONST1_RTX(<MODE>mode);
17055 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17058 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
17059 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
17061 (lshiftrt:<ssedoublemode>
17062 (plus:<ssedoublemode>
17063 (lshiftrt:<ssedoublemode>
17064 (mult:<ssedoublemode>
17065 (sign_extend:<ssedoublemode>
17066 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
17067 (sign_extend:<ssedoublemode>
17068 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
17070 (match_operand:VI2_AVX2 3 "const1_operand"))
17072 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
17073 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17075 pmulhrsw\t{%2, %0|%0, %2}
17076 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
17077 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17078 [(set_attr "isa" "noavx,avx,avx512bw")
17079 (set_attr "type" "sseimul")
17080 (set_attr "prefix_data16" "1,*,*")
17081 (set_attr "prefix_extra" "1")
17082 (set_attr "prefix" "orig,maybe_evex,evex")
17083 (set_attr "mode" "<sseinsnmode>")])
17085 (define_expand "smulhrsv4hi3"
17086 [(set (match_operand:V4HI 0 "register_operand")
17093 (match_operand:V4HI 1 "register_operand"))
17095 (match_operand:V4HI 2 "register_operand")))
17099 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
17101 operands[3] = CONST1_RTX(V4HImode);
17102 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
17105 (define_expand "ssse3_pmulhrswv4hi3"
17106 [(set (match_operand:V4HI 0 "register_operand")
17113 (match_operand:V4HI 1 "register_mmxmem_operand"))
17115 (match_operand:V4HI 2 "register_mmxmem_operand")))
17119 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17121 operands[3] = CONST1_RTX(V4HImode);
17122 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
17125 (define_insn "*ssse3_pmulhrswv4hi3"
17126 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
17133 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
17135 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
17137 (match_operand:V4HI 3 "const1_operand"))
17139 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
17141 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17143 pmulhrsw\t{%2, %0|%0, %2}
17144 pmulhrsw\t{%2, %0|%0, %2}
17145 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
17146 [(set_attr "isa" "*,noavx,avx")
17147 (set_attr "mmx_isa" "native,*,*")
17148 (set_attr "type" "sseimul")
17149 (set_attr "prefix_extra" "1")
17150 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17151 (set_attr "mode" "DI,TI,TI")])
17153 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
17154 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
17156 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
17157 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
17159 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17161 pshufb\t{%2, %0|%0, %2}
17162 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17163 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17164 [(set_attr "isa" "noavx,avx,avx512bw")
17165 (set_attr "type" "sselog1")
17166 (set_attr "prefix_data16" "1,*,*")
17167 (set_attr "prefix_extra" "1")
17168 (set_attr "prefix" "orig,maybe_evex,evex")
17169 (set_attr "btver2_decode" "vector")
17170 (set_attr "mode" "<sseinsnmode>")])
17172 (define_insn_and_split "ssse3_pshufbv8qi3"
17173 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
17174 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
17175 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
17177 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
17178 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17180 pshufb\t{%2, %0|%0, %2}
17183 "TARGET_SSSE3 && reload_completed
17184 && SSE_REGNO_P (REGNO (operands[0]))"
17185 [(set (match_dup 3) (match_dup 5))
17187 (and:V4SI (match_dup 3) (match_dup 2)))
17189 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
17191 /* Emulate MMX version of pshufb with SSE version by masking out the
17192 bit 3 of the shuffle control byte. */
17193 operands[0] = lowpart_subreg (V16QImode, operands[0],
17194 GET_MODE (operands[0]));
17195 operands[1] = lowpart_subreg (V16QImode, operands[1],
17196 GET_MODE (operands[1]));
17197 operands[2] = lowpart_subreg (V4SImode, operands[2],
17198 GET_MODE (operands[2]));
17199 operands[4] = lowpart_subreg (V16QImode, operands[3],
17200 GET_MODE (operands[3]));
17201 rtx vec_const = ix86_build_const_vector (V4SImode, true,
17202 gen_int_mode (0xf7f7f7f7, SImode));
17203 operands[5] = force_const_mem (V4SImode, vec_const);
17205 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17206 (set_attr "prefix_extra" "1")
17207 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17208 (set_attr "mode" "DI,TI,TI")])
17210 (define_insn "<ssse3_avx2>_psign<mode>3"
17211 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
17213 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
17214 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
17218 psign<ssemodesuffix>\t{%2, %0|%0, %2}
17219 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17220 [(set_attr "isa" "noavx,avx")
17221 (set_attr "type" "sselog1")
17222 (set_attr "prefix_data16" "1,*")
17223 (set_attr "prefix_extra" "1")
17224 (set_attr "prefix" "orig,vex")
17225 (set_attr "mode" "<sseinsnmode>")])
17227 (define_insn "ssse3_psign<mode>3"
17228 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
17230 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
17231 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
17233 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17235 psign<mmxvecsize>\t{%2, %0|%0, %2}
17236 psign<mmxvecsize>\t{%2, %0|%0, %2}
17237 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
17238 [(set_attr "isa" "*,noavx,avx")
17239 (set_attr "mmx_isa" "native,*,*")
17240 (set_attr "type" "sselog1")
17241 (set_attr "prefix_extra" "1")
17242 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17243 (set_attr "mode" "DI,TI,TI")])
17245 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
17246 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
17247 (vec_merge:VI1_AVX512
17249 [(match_operand:VI1_AVX512 1 "register_operand" "v")
17250 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
17251 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
17253 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
17254 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
17255 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
17257 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17258 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
17260 [(set_attr "type" "sseishft")
17261 (set_attr "atom_unit" "sishuf")
17262 (set_attr "prefix_extra" "1")
17263 (set_attr "length_immediate" "1")
17264 (set_attr "prefix" "evex")
17265 (set_attr "mode" "<sseinsnmode>")])
17267 (define_insn "<ssse3_avx2>_palignr<mode>"
17268 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
17269 (unspec:SSESCALARMODE
17270 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
17271 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
17272 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17276 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17278 switch (which_alternative)
17281 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17284 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17286 gcc_unreachable ();
17289 [(set_attr "isa" "noavx,avx,avx512bw")
17290 (set_attr "type" "sseishft")
17291 (set_attr "atom_unit" "sishuf")
17292 (set_attr "prefix_data16" "1,*,*")
17293 (set_attr "prefix_extra" "1")
17294 (set_attr "length_immediate" "1")
17295 (set_attr "prefix" "orig,vex,evex")
17296 (set_attr "mode" "<sseinsnmode>")])
17298 (define_insn_and_split "ssse3_palignrdi"
17299 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
17300 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
17301 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
17302 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17304 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17306 switch (which_alternative)
17309 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17310 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17315 gcc_unreachable ();
17318 "TARGET_SSSE3 && reload_completed
17319 && SSE_REGNO_P (REGNO (operands[0]))"
17320 [(set (match_dup 0)
17321 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
17323 /* Emulate MMX palignrdi with SSE psrldq. */
17324 rtx op0 = lowpart_subreg (V2DImode, operands[0],
17325 GET_MODE (operands[0]));
17327 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
17330 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
17331 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
17332 /* Swap bits 0:63 with bits 64:127. */
17333 rtx mask = gen_rtx_PARALLEL (VOIDmode,
17334 gen_rtvec (4, GEN_INT (2),
17338 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
17339 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
17340 emit_insn (gen_rtx_SET (op1, op2));
17342 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
17344 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17345 (set_attr "type" "sseishft")
17346 (set_attr "atom_unit" "sishuf")
17347 (set_attr "prefix_extra" "1")
17348 (set_attr "length_immediate" "1")
17349 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17350 (set_attr "mode" "DI,TI,TI")])
17352 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
17353 ;; modes for abs instruction on pre AVX-512 targets.
17354 (define_mode_iterator VI1248_AVX512VL_AVX512BW
17355 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
17356 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
17357 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
17358 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
17360 (define_insn "*abs<mode>2"
17361 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
17362 (abs:VI1248_AVX512VL_AVX512BW
17363 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
17365 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
17366 [(set_attr "type" "sselog1")
17367 (set_attr "prefix_data16" "1")
17368 (set_attr "prefix_extra" "1")
17369 (set_attr "prefix" "maybe_vex")
17370 (set_attr "mode" "<sseinsnmode>")])
17372 (define_insn "abs<mode>2_mask"
17373 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17374 (vec_merge:VI48_AVX512VL
17376 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
17377 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
17378 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17380 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17381 [(set_attr "type" "sselog1")
17382 (set_attr "prefix" "evex")
17383 (set_attr "mode" "<sseinsnmode>")])
17385 (define_insn "abs<mode>2_mask"
17386 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17387 (vec_merge:VI12_AVX512VL
17389 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
17390 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
17391 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17393 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17394 [(set_attr "type" "sselog1")
17395 (set_attr "prefix" "evex")
17396 (set_attr "mode" "<sseinsnmode>")])
17398 (define_expand "abs<mode>2"
17399 [(set (match_operand:VI_AVX2 0 "register_operand")
17401 (match_operand:VI_AVX2 1 "vector_operand")))]
17405 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
17406 && !TARGET_AVX512VL))
17408 ix86_expand_sse2_abs (operands[0], operands[1]);
17413 (define_insn "ssse3_abs<mode>2"
17414 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
17416 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
17417 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17419 pabs<mmxvecsize>\t{%1, %0|%0, %1}
17420 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
17421 [(set_attr "mmx_isa" "native,*")
17422 (set_attr "type" "sselog1")
17423 (set_attr "prefix_rep" "0")
17424 (set_attr "prefix_extra" "1")
17425 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17426 (set_attr "mode" "DI,TI")])
17428 (define_insn "abs<mode>2"
17429 [(set (match_operand:MMXMODEI 0 "register_operand")
17431 (match_operand:MMXMODEI 1 "register_operand")))]
17432 "TARGET_MMX_WITH_SSE && TARGET_SSSE3")
17434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17436 ;; AMD SSE4A instructions
17438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17440 (define_insn "sse4a_movnt<mode>"
17441 [(set (match_operand:MODEF 0 "memory_operand" "=m")
17443 [(match_operand:MODEF 1 "register_operand" "x")]
17446 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
17447 [(set_attr "type" "ssemov")
17448 (set_attr "mode" "<MODE>")])
17450 (define_insn "sse4a_vmmovnt<mode>"
17451 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
17452 (unspec:<ssescalarmode>
17453 [(vec_select:<ssescalarmode>
17454 (match_operand:VF_128 1 "register_operand" "x")
17455 (parallel [(const_int 0)]))]
17458 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
17459 [(set_attr "type" "ssemov")
17460 (set_attr "mode" "<ssescalarmode>")])
17462 (define_insn "sse4a_extrqi"
17463 [(set (match_operand:V2DI 0 "register_operand" "=x")
17464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17465 (match_operand 2 "const_0_to_255_operand")
17466 (match_operand 3 "const_0_to_255_operand")]
17469 "extrq\t{%3, %2, %0|%0, %2, %3}"
17470 [(set_attr "type" "sse")
17471 (set_attr "prefix_data16" "1")
17472 (set_attr "length_immediate" "2")
17473 (set_attr "mode" "TI")])
17475 (define_insn "sse4a_extrq"
17476 [(set (match_operand:V2DI 0 "register_operand" "=x")
17477 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17478 (match_operand:V16QI 2 "register_operand" "x")]
17481 "extrq\t{%2, %0|%0, %2}"
17482 [(set_attr "type" "sse")
17483 (set_attr "prefix_data16" "1")
17484 (set_attr "mode" "TI")])
17486 (define_insn "sse4a_insertqi"
17487 [(set (match_operand:V2DI 0 "register_operand" "=x")
17488 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17489 (match_operand:V2DI 2 "register_operand" "x")
17490 (match_operand 3 "const_0_to_255_operand")
17491 (match_operand 4 "const_0_to_255_operand")]
17494 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
17495 [(set_attr "type" "sseins")
17496 (set_attr "prefix_data16" "0")
17497 (set_attr "prefix_rep" "1")
17498 (set_attr "length_immediate" "2")
17499 (set_attr "mode" "TI")])
17501 (define_insn "sse4a_insertq"
17502 [(set (match_operand:V2DI 0 "register_operand" "=x")
17503 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17504 (match_operand:V2DI 2 "register_operand" "x")]
17507 "insertq\t{%2, %0|%0, %2}"
17508 [(set_attr "type" "sseins")
17509 (set_attr "prefix_data16" "0")
17510 (set_attr "prefix_rep" "1")
17511 (set_attr "mode" "TI")])
17513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17515 ;; Intel SSE4.1 instructions
17517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17519 ;; Mapping of immediate bits for blend instructions
17520 (define_mode_attr blendbits
17521 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
17523 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
17524 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17525 (vec_merge:VF_128_256
17526 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17527 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
17528 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
17531 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17532 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17533 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17534 [(set_attr "isa" "noavx,noavx,avx")
17535 (set_attr "type" "ssemov")
17536 (set_attr "length_immediate" "1")
17537 (set_attr "prefix_data16" "1,1,*")
17538 (set_attr "prefix_extra" "1")
17539 (set_attr "prefix" "orig,orig,vex")
17540 (set_attr "mode" "<MODE>")])
17542 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
17543 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17545 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17546 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17547 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
17551 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17552 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17553 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17554 [(set_attr "isa" "noavx,noavx,avx")
17555 (set_attr "type" "ssemov")
17556 (set_attr "length_immediate" "1")
17557 (set_attr "prefix_data16" "1,1,*")
17558 (set_attr "prefix_extra" "1")
17559 (set_attr "prefix" "orig,orig,vex")
17560 (set_attr "btver2_decode" "vector,vector,vector")
17561 (set_attr "mode" "<MODE>")])
17563 ;; Also define scalar versions. These are used for conditional move.
17564 ;; Using subregs into vector modes causes register allocation lossage.
17565 ;; These patterns do not allow memory operands because the native
17566 ;; instructions read the full 128-bits.
17568 (define_insn "sse4_1_blendv<ssemodesuffix>"
17569 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
17571 [(match_operand:MODEF 1 "register_operand" "0,0,x")
17572 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
17573 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
17577 if (get_attr_mode (insn) == MODE_V4SF)
17578 return (which_alternative == 2
17579 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17580 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
17582 return (which_alternative == 2
17583 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17584 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
17586 [(set_attr "isa" "noavx,noavx,avx")
17587 (set_attr "type" "ssemov")
17588 (set_attr "length_immediate" "1")
17589 (set_attr "prefix_data16" "1,1,*")
17590 (set_attr "prefix_extra" "1")
17591 (set_attr "prefix" "orig,orig,vex")
17592 (set_attr "btver2_decode" "vector,vector,vector")
17594 (cond [(match_test "TARGET_AVX")
17595 (const_string "<ssevecmode>")
17596 (match_test "optimize_function_for_size_p (cfun)")
17597 (const_string "V4SF")
17598 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
17599 (const_string "V4SF")
17601 (const_string "<ssevecmode>")))])
17603 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
17604 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17606 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17607 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17609 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
17610 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
17614 "&& reload_completed"
17615 [(set (match_dup 0)
17617 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17618 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
17619 [(set_attr "isa" "noavx,noavx,avx")
17620 (set_attr "type" "ssemov")
17621 (set_attr "length_immediate" "1")
17622 (set_attr "prefix_data16" "1,1,*")
17623 (set_attr "prefix_extra" "1")
17624 (set_attr "prefix" "orig,orig,vex")
17625 (set_attr "btver2_decode" "vector,vector,vector")
17626 (set_attr "mode" "<MODE>")])
17628 (define_mode_attr ssefltmodesuffix
17629 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
17631 (define_mode_attr ssefltvecmode
17632 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
17634 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
17635 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
17636 (unspec:<ssebytemode>
17637 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
17638 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
17639 (subreg:<ssebytemode>
17641 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
17642 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
17646 "&& reload_completed"
17647 [(set (match_dup 0)
17648 (unspec:<ssefltvecmode>
17649 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17651 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
17652 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
17653 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
17654 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
17656 [(set_attr "isa" "noavx,noavx,avx")
17657 (set_attr "type" "ssemov")
17658 (set_attr "length_immediate" "1")
17659 (set_attr "prefix_data16" "1,1,*")
17660 (set_attr "prefix_extra" "1")
17661 (set_attr "prefix" "orig,orig,vex")
17662 (set_attr "btver2_decode" "vector,vector,vector")
17663 (set_attr "mode" "<ssefltvecmode>")])
17665 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
17666 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17668 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
17669 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17670 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17674 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17675 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17676 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17677 [(set_attr "isa" "noavx,noavx,avx")
17678 (set_attr "type" "ssemul")
17679 (set_attr "length_immediate" "1")
17680 (set_attr "prefix_data16" "1,1,*")
17681 (set_attr "prefix_extra" "1")
17682 (set_attr "prefix" "orig,orig,vex")
17683 (set_attr "btver2_decode" "vector,vector,vector")
17684 (set_attr "znver1_decode" "vector,vector,vector")
17685 (set_attr "mode" "<MODE>")])
17687 ;; Mode attribute used by `vmovntdqa' pattern
17688 (define_mode_attr vi8_sse4_1_avx2_avx512
17689 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
17691 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
17692 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
17693 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
17696 "%vmovntdqa\t{%1, %0|%0, %1}"
17697 [(set_attr "isa" "noavx,noavx,avx")
17698 (set_attr "type" "ssemov")
17699 (set_attr "prefix_extra" "1,1,*")
17700 (set_attr "prefix" "orig,orig,maybe_evex")
17701 (set_attr "mode" "<sseinsnmode>")])
17703 (define_insn "<sse4_1_avx2>_mpsadbw"
17704 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17706 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17707 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17708 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17712 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17713 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17714 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17715 [(set_attr "isa" "noavx,noavx,avx")
17716 (set_attr "type" "sselog1")
17717 (set_attr "length_immediate" "1")
17718 (set_attr "prefix_extra" "1")
17719 (set_attr "prefix" "orig,orig,vex")
17720 (set_attr "btver2_decode" "vector,vector,vector")
17721 (set_attr "znver1_decode" "vector,vector,vector")
17722 (set_attr "mode" "<sseinsnmode>")])
17724 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
17725 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
17726 (vec_concat:VI2_AVX2
17727 (us_truncate:<ssehalfvecmode>
17728 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
17729 (us_truncate:<ssehalfvecmode>
17730 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
17731 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17733 packusdw\t{%2, %0|%0, %2}
17734 packusdw\t{%2, %0|%0, %2}
17735 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17736 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17737 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
17738 (set_attr "type" "sselog")
17739 (set_attr "prefix_extra" "1")
17740 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
17741 (set_attr "mode" "<sseinsnmode>")])
17743 (define_insn "<sse4_1_avx2>_pblendvb"
17744 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17746 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17747 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17748 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
17752 pblendvb\t{%3, %2, %0|%0, %2, %3}
17753 pblendvb\t{%3, %2, %0|%0, %2, %3}
17754 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17755 [(set_attr "isa" "noavx,noavx,avx")
17756 (set_attr "type" "ssemov")
17757 (set_attr "prefix_extra" "1")
17758 (set_attr "length_immediate" "*,*,1")
17759 (set_attr "prefix" "orig,orig,vex")
17760 (set_attr "btver2_decode" "vector,vector,vector")
17761 (set_attr "mode" "<sseinsnmode>")])
17763 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
17764 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17766 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17767 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17768 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
17769 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
17774 [(set (match_dup 0)
17776 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17778 [(set_attr "isa" "noavx,noavx,avx")
17779 (set_attr "type" "ssemov")
17780 (set_attr "prefix_extra" "1")
17781 (set_attr "length_immediate" "*,*,1")
17782 (set_attr "prefix" "orig,orig,vex")
17783 (set_attr "btver2_decode" "vector,vector,vector")
17784 (set_attr "mode" "<sseinsnmode>")])
17786 (define_insn "sse4_1_pblendw"
17787 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17789 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
17790 (match_operand:V8HI 1 "register_operand" "0,0,x")
17791 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
17794 pblendw\t{%3, %2, %0|%0, %2, %3}
17795 pblendw\t{%3, %2, %0|%0, %2, %3}
17796 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17797 [(set_attr "isa" "noavx,noavx,avx")
17798 (set_attr "type" "ssemov")
17799 (set_attr "prefix_extra" "1")
17800 (set_attr "length_immediate" "1")
17801 (set_attr "prefix" "orig,orig,vex")
17802 (set_attr "mode" "TI")])
17804 ;; The builtin uses an 8-bit immediate. Expand that.
17805 (define_expand "avx2_pblendw"
17806 [(set (match_operand:V16HI 0 "register_operand")
17808 (match_operand:V16HI 2 "nonimmediate_operand")
17809 (match_operand:V16HI 1 "register_operand")
17810 (match_operand:SI 3 "const_0_to_255_operand")))]
17813 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
17814 operands[3] = GEN_INT (val << 8 | val);
17817 (define_insn "*avx2_pblendw"
17818 [(set (match_operand:V16HI 0 "register_operand" "=x")
17820 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
17821 (match_operand:V16HI 1 "register_operand" "x")
17822 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
17825 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
17826 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17828 [(set_attr "type" "ssemov")
17829 (set_attr "prefix_extra" "1")
17830 (set_attr "length_immediate" "1")
17831 (set_attr "prefix" "vex")
17832 (set_attr "mode" "OI")])
17834 (define_insn "avx2_pblendd<mode>"
17835 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
17836 (vec_merge:VI4_AVX2
17837 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
17838 (match_operand:VI4_AVX2 1 "register_operand" "x")
17839 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
17841 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17842 [(set_attr "type" "ssemov")
17843 (set_attr "prefix_extra" "1")
17844 (set_attr "length_immediate" "1")
17845 (set_attr "prefix" "vex")
17846 (set_attr "mode" "<sseinsnmode>")])
17848 (define_insn "sse4_1_phminposuw"
17849 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17850 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
17851 UNSPEC_PHMINPOSUW))]
17853 "%vphminposuw\t{%1, %0|%0, %1}"
17854 [(set_attr "isa" "noavx,noavx,avx")
17855 (set_attr "type" "sselog1")
17856 (set_attr "prefix_extra" "1")
17857 (set_attr "prefix" "orig,orig,vex")
17858 (set_attr "mode" "TI")])
17860 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
17861 [(set (match_operand:V16HI 0 "register_operand" "=v")
17863 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17864 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17865 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17866 [(set_attr "type" "ssemov")
17867 (set_attr "prefix_extra" "1")
17868 (set_attr "prefix" "maybe_evex")
17869 (set_attr "mode" "OI")])
17871 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
17872 [(set (match_operand:V32QI 0 "register_operand" "=v")
17875 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
17876 (match_operand:V32QI 2 "const0_operand" "C"))
17877 (match_parallel 3 "pmovzx_parallel"
17878 [(match_operand 4 "const_int_operand" "n")])))]
17881 "&& reload_completed"
17882 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
17884 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
17885 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
17888 (define_expand "<insn>v16qiv16hi2"
17889 [(set (match_operand:V16HI 0 "register_operand")
17891 (match_operand:V16QI 1 "nonimmediate_operand")))]
17894 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
17895 [(set (match_operand:V32HI 0 "register_operand" "=v")
17897 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
17899 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17900 [(set_attr "type" "ssemov")
17901 (set_attr "prefix_extra" "1")
17902 (set_attr "prefix" "evex")
17903 (set_attr "mode" "XI")])
17905 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
17906 [(set (match_operand:V64QI 0 "register_operand" "=v")
17909 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
17910 (match_operand:V64QI 2 "const0_operand" "C"))
17911 (match_parallel 3 "pmovzx_parallel"
17912 [(match_operand 4 "const_int_operand" "n")])))]
17915 "&& reload_completed"
17916 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
17918 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
17919 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
17922 (define_expand "<insn>v32qiv32hi2"
17923 [(set (match_operand:V32HI 0 "register_operand")
17925 (match_operand:V32QI 1 "nonimmediate_operand")))]
17928 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
17929 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17932 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17933 (parallel [(const_int 0) (const_int 1)
17934 (const_int 2) (const_int 3)
17935 (const_int 4) (const_int 5)
17936 (const_int 6) (const_int 7)]))))]
17937 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17938 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17939 [(set_attr "isa" "noavx,noavx,avx")
17940 (set_attr "type" "ssemov")
17941 (set_attr "prefix_extra" "1")
17942 (set_attr "prefix" "orig,orig,maybe_evex")
17943 (set_attr "mode" "TI")])
17945 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
17946 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17948 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
17949 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17950 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17951 [(set_attr "isa" "noavx,noavx,avx")
17952 (set_attr "type" "ssemov")
17953 (set_attr "prefix_extra" "1")
17954 (set_attr "prefix" "orig,orig,maybe_evex")
17955 (set_attr "mode" "TI")])
17957 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
17958 [(set (match_operand:V8HI 0 "register_operand")
17963 (match_operand:DI 1 "memory_operand")
17965 (parallel [(const_int 0) (const_int 1)
17966 (const_int 2) (const_int 3)
17967 (const_int 4) (const_int 5)
17968 (const_int 6) (const_int 7)]))))]
17969 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
17970 && ix86_pre_reload_split ()"
17973 [(set (match_dup 0)
17974 (any_extend:V8HI (match_dup 1)))]
17975 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17977 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
17978 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,v")
17981 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,vm")
17982 (match_operand:V16QI 2 "const0_operand" "C,C,C"))
17983 (match_parallel 3 "pmovzx_parallel"
17984 [(match_operand 4 "const_int_operand" "n,n,n")])))]
17987 "&& reload_completed"
17988 [(set (match_dup 0)
17992 (parallel [(const_int 0) (const_int 1)
17993 (const_int 2) (const_int 3)
17994 (const_int 4) (const_int 5)
17995 (const_int 6) (const_int 7)]))))]
17997 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
17998 if (MEM_P (operands[1]))
18000 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
18001 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
18002 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18006 [(set_attr "isa" "noavx,noavx,avx")])
18008 (define_expand "<insn>v8qiv8hi2"
18009 [(set (match_operand:V8HI 0 "register_operand")
18011 (match_operand:V8QI 1 "nonimmediate_operand")))]
18014 if (!MEM_P (operands[1]))
18016 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18017 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
18022 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
18023 [(set (match_operand:V16SI 0 "register_operand" "=v")
18025 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
18027 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18028 [(set_attr "type" "ssemov")
18029 (set_attr "prefix" "evex")
18030 (set_attr "mode" "XI")])
18032 (define_expand "<insn>v16qiv16si2"
18033 [(set (match_operand:V16SI 0 "register_operand")
18035 (match_operand:V16QI 1 "nonimmediate_operand")))]
18038 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
18039 [(set (match_operand:V8SI 0 "register_operand" "=v")
18042 (match_operand:V16QI 1 "register_operand" "v")
18043 (parallel [(const_int 0) (const_int 1)
18044 (const_int 2) (const_int 3)
18045 (const_int 4) (const_int 5)
18046 (const_int 6) (const_int 7)]))))]
18047 "TARGET_AVX2 && <mask_avx512vl_condition>"
18048 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18049 [(set_attr "type" "ssemov")
18050 (set_attr "prefix_extra" "1")
18051 (set_attr "prefix" "maybe_evex")
18052 (set_attr "mode" "OI")])
18054 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
18055 [(set (match_operand:V8SI 0 "register_operand" "=v")
18057 (match_operand:V8QI 1 "memory_operand" "m")))]
18058 "TARGET_AVX2 && <mask_avx512vl_condition>"
18059 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18060 [(set_attr "type" "ssemov")
18061 (set_attr "prefix_extra" "1")
18062 (set_attr "prefix" "maybe_evex")
18063 (set_attr "mode" "OI")])
18065 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
18066 [(set (match_operand:V8SI 0 "register_operand")
18071 (match_operand:DI 1 "memory_operand")
18073 (parallel [(const_int 0) (const_int 1)
18074 (const_int 2) (const_int 3)
18075 (const_int 4) (const_int 5)
18076 (const_int 6) (const_int 7)]))))]
18077 "TARGET_AVX2 && <mask_avx512vl_condition>
18078 && ix86_pre_reload_split ()"
18081 [(set (match_dup 0)
18082 (any_extend:V8SI (match_dup 1)))]
18083 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18085 (define_expand "<insn>v8qiv8si2"
18086 [(set (match_operand:V8SI 0 "register_operand")
18088 (match_operand:V8QI 1 "nonimmediate_operand")))]
18091 if (!MEM_P (operands[1]))
18093 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18094 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
18099 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
18100 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18103 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18104 (parallel [(const_int 0) (const_int 1)
18105 (const_int 2) (const_int 3)]))))]
18106 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18107 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18108 [(set_attr "isa" "noavx,noavx,avx")
18109 (set_attr "type" "ssemov")
18110 (set_attr "prefix_extra" "1")
18111 (set_attr "prefix" "orig,orig,maybe_evex")
18112 (set_attr "mode" "TI")])
18114 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
18115 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18117 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
18118 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18119 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18120 [(set_attr "isa" "noavx,noavx,avx")
18121 (set_attr "type" "ssemov")
18122 (set_attr "prefix_extra" "1")
18123 (set_attr "prefix" "orig,orig,maybe_evex")
18124 (set_attr "mode" "TI")])
18126 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
18127 [(set (match_operand:V4SI 0 "register_operand")
18132 (vec_duplicate:V4SI
18133 (match_operand:SI 1 "memory_operand"))
18135 [(const_int 0) (const_int 0)
18136 (const_int 0) (const_int 0)])
18138 (parallel [(const_int 0) (const_int 1)
18139 (const_int 2) (const_int 3)]))))]
18140 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18141 && ix86_pre_reload_split ()"
18144 [(set (match_dup 0)
18145 (any_extend:V4SI (match_dup 1)))]
18146 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18148 (define_expand "<insn>v4qiv4si2"
18149 [(set (match_operand:V4SI 0 "register_operand")
18151 (match_operand:V4QI 1 "nonimmediate_operand")))]
18154 if (!MEM_P (operands[1]))
18156 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
18157 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
18162 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
18163 [(set (match_operand:V16SI 0 "register_operand" "=v")
18165 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
18167 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18168 [(set_attr "type" "ssemov")
18169 (set_attr "prefix" "evex")
18170 (set_attr "mode" "XI")])
18172 (define_expand "<insn>v16hiv16si2"
18173 [(set (match_operand:V16SI 0 "register_operand")
18175 (match_operand:V16HI 1 "nonimmediate_operand")))]
18178 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
18179 [(set (match_operand:V32HI 0 "register_operand" "=v")
18182 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
18183 (match_operand:V32HI 2 "const0_operand" "C"))
18184 (match_parallel 3 "pmovzx_parallel"
18185 [(match_operand 4 "const_int_operand" "n")])))]
18188 "&& reload_completed"
18189 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
18191 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
18192 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
18195 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
18196 [(set (match_operand:V8SI 0 "register_operand" "=v")
18198 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
18199 "TARGET_AVX2 && <mask_avx512vl_condition>"
18200 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18201 [(set_attr "type" "ssemov")
18202 (set_attr "prefix_extra" "1")
18203 (set_attr "prefix" "maybe_evex")
18204 (set_attr "mode" "OI")])
18206 (define_expand "<insn>v8hiv8si2"
18207 [(set (match_operand:V8SI 0 "register_operand")
18209 (match_operand:V8HI 1 "nonimmediate_operand")))]
18212 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
18213 [(set (match_operand:V16HI 0 "register_operand" "=v")
18216 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
18217 (match_operand:V16HI 2 "const0_operand" "C"))
18218 (match_parallel 3 "pmovzx_parallel"
18219 [(match_operand 4 "const_int_operand" "n")])))]
18222 "&& reload_completed"
18223 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
18225 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
18226 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
18229 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
18230 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18233 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
18234 (parallel [(const_int 0) (const_int 1)
18235 (const_int 2) (const_int 3)]))))]
18236 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18237 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18238 [(set_attr "isa" "noavx,noavx,avx")
18239 (set_attr "type" "ssemov")
18240 (set_attr "prefix_extra" "1")
18241 (set_attr "prefix" "orig,orig,maybe_evex")
18242 (set_attr "mode" "TI")])
18244 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
18245 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18247 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
18248 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18249 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18250 [(set_attr "isa" "noavx,noavx,avx")
18251 (set_attr "type" "ssemov")
18252 (set_attr "prefix_extra" "1")
18253 (set_attr "prefix" "orig,orig,maybe_evex")
18254 (set_attr "mode" "TI")])
18256 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
18257 [(set (match_operand:V4SI 0 "register_operand")
18262 (match_operand:DI 1 "memory_operand")
18264 (parallel [(const_int 0) (const_int 1)
18265 (const_int 2) (const_int 3)]))))]
18266 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18267 && ix86_pre_reload_split ()"
18270 [(set (match_dup 0)
18271 (any_extend:V4SI (match_dup 1)))]
18272 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18274 (define_expand "<insn>v4hiv4si2"
18275 [(set (match_operand:V4SI 0 "register_operand")
18277 (match_operand:V4HI 1 "nonimmediate_operand")))]
18280 if (!MEM_P (operands[1]))
18282 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18283 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
18288 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
18289 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
18292 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
18293 (match_operand:V8HI 2 "const0_operand" "C,C,C"))
18294 (match_parallel 3 "pmovzx_parallel"
18295 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18298 "&& reload_completed"
18299 [(set (match_dup 0)
18303 (parallel [(const_int 0) (const_int 1)
18304 (const_int 2) (const_int 3)]))))]
18306 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
18307 if (MEM_P (operands[1]))
18309 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
18310 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
18311 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18315 [(set_attr "isa" "noavx,noavx,avx")])
18317 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
18318 [(set (match_operand:V8DI 0 "register_operand" "=v")
18321 (match_operand:V16QI 1 "register_operand" "v")
18322 (parallel [(const_int 0) (const_int 1)
18323 (const_int 2) (const_int 3)
18324 (const_int 4) (const_int 5)
18325 (const_int 6) (const_int 7)]))))]
18327 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18328 [(set_attr "type" "ssemov")
18329 (set_attr "prefix" "evex")
18330 (set_attr "mode" "XI")])
18332 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
18333 [(set (match_operand:V8DI 0 "register_operand" "=v")
18335 (match_operand:V8QI 1 "memory_operand" "m")))]
18337 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18338 [(set_attr "type" "ssemov")
18339 (set_attr "prefix" "evex")
18340 (set_attr "mode" "XI")])
18342 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
18343 [(set (match_operand:V8DI 0 "register_operand")
18348 (match_operand:DI 1 "memory_operand")
18350 (parallel [(const_int 0) (const_int 1)
18351 (const_int 2) (const_int 3)
18352 (const_int 4) (const_int 5)
18353 (const_int 6) (const_int 7)]))))]
18354 "TARGET_AVX512F && ix86_pre_reload_split ()"
18357 [(set (match_dup 0)
18358 (any_extend:V8DI (match_dup 1)))]
18359 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18361 (define_expand "<insn>v8qiv8di2"
18362 [(set (match_operand:V8DI 0 "register_operand")
18364 (match_operand:V8QI 1 "nonimmediate_operand")))]
18367 if (!MEM_P (operands[1]))
18369 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18370 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
18375 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
18376 [(set (match_operand:V4DI 0 "register_operand" "=v")
18379 (match_operand:V16QI 1 "register_operand" "v")
18380 (parallel [(const_int 0) (const_int 1)
18381 (const_int 2) (const_int 3)]))))]
18382 "TARGET_AVX2 && <mask_avx512vl_condition>"
18383 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18384 [(set_attr "type" "ssemov")
18385 (set_attr "prefix_extra" "1")
18386 (set_attr "prefix" "maybe_evex")
18387 (set_attr "mode" "OI")])
18389 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
18390 [(set (match_operand:V4DI 0 "register_operand" "=v")
18392 (match_operand:V4QI 1 "memory_operand" "m")))]
18393 "TARGET_AVX2 && <mask_avx512vl_condition>"
18394 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18395 [(set_attr "type" "ssemov")
18396 (set_attr "prefix_extra" "1")
18397 (set_attr "prefix" "maybe_evex")
18398 (set_attr "mode" "OI")])
18400 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
18401 [(set (match_operand:V4DI 0 "register_operand")
18406 (vec_duplicate:V4SI
18407 (match_operand:SI 1 "memory_operand"))
18409 [(const_int 0) (const_int 0)
18410 (const_int 0) (const_int 0)])
18412 (parallel [(const_int 0) (const_int 1)
18413 (const_int 2) (const_int 3)]))))]
18414 "TARGET_AVX2 && <mask_avx512vl_condition>
18415 && ix86_pre_reload_split ()"
18418 [(set (match_dup 0)
18419 (any_extend:V4DI (match_dup 1)))]
18420 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18422 (define_expand "<insn>v4qiv4di2"
18423 [(set (match_operand:V4DI 0 "register_operand")
18425 (match_operand:V4QI 1 "nonimmediate_operand")))]
18428 if (!MEM_P (operands[1]))
18430 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18431 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
18436 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
18437 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18440 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18441 (parallel [(const_int 0) (const_int 1)]))))]
18442 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18443 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18444 [(set_attr "isa" "noavx,noavx,avx")
18445 (set_attr "type" "ssemov")
18446 (set_attr "prefix_extra" "1")
18447 (set_attr "prefix" "orig,orig,maybe_evex")
18448 (set_attr "mode" "TI")])
18450 (define_expand "<insn>v2qiv2di2"
18451 [(set (match_operand:V2DI 0 "register_operand")
18453 (match_operand:V2QI 1 "register_operand")))]
18456 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
18457 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
18461 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
18462 [(set (match_operand:V8DI 0 "register_operand" "=v")
18464 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
18466 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18467 [(set_attr "type" "ssemov")
18468 (set_attr "prefix" "evex")
18469 (set_attr "mode" "XI")])
18471 (define_expand "<insn>v8hiv8di2"
18472 [(set (match_operand:V8DI 0 "register_operand")
18474 (match_operand:V8HI 1 "nonimmediate_operand")))]
18477 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
18478 [(set (match_operand:V4DI 0 "register_operand" "=v")
18481 (match_operand:V8HI 1 "register_operand" "v")
18482 (parallel [(const_int 0) (const_int 1)
18483 (const_int 2) (const_int 3)]))))]
18484 "TARGET_AVX2 && <mask_avx512vl_condition>"
18485 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18486 [(set_attr "type" "ssemov")
18487 (set_attr "prefix_extra" "1")
18488 (set_attr "prefix" "maybe_evex")
18489 (set_attr "mode" "OI")])
18491 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
18492 [(set (match_operand:V4DI 0 "register_operand" "=v")
18494 (match_operand:V4HI 1 "memory_operand" "m")))]
18495 "TARGET_AVX2 && <mask_avx512vl_condition>"
18496 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18497 [(set_attr "type" "ssemov")
18498 (set_attr "prefix_extra" "1")
18499 (set_attr "prefix" "maybe_evex")
18500 (set_attr "mode" "OI")])
18502 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
18503 [(set (match_operand:V4DI 0 "register_operand")
18508 (match_operand:DI 1 "memory_operand")
18510 (parallel [(const_int 0) (const_int 1)
18511 (const_int 2) (const_int 3)]))))]
18512 "TARGET_AVX2 && <mask_avx512vl_condition>
18513 && ix86_pre_reload_split ()"
18516 [(set (match_dup 0)
18517 (any_extend:V4DI (match_dup 1)))]
18518 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18520 (define_expand "<insn>v4hiv4di2"
18521 [(set (match_operand:V4DI 0 "register_operand")
18523 (match_operand:V4HI 1 "nonimmediate_operand")))]
18526 if (!MEM_P (operands[1]))
18528 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18529 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
18534 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
18535 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18538 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
18539 (parallel [(const_int 0) (const_int 1)]))))]
18540 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18541 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18542 [(set_attr "isa" "noavx,noavx,avx")
18543 (set_attr "type" "ssemov")
18544 (set_attr "prefix_extra" "1")
18545 (set_attr "prefix" "orig,orig,maybe_evex")
18546 (set_attr "mode" "TI")])
18548 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
18549 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18551 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
18552 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18553 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18554 [(set_attr "isa" "noavx,noavx,avx")
18555 (set_attr "type" "ssemov")
18556 (set_attr "prefix_extra" "1")
18557 (set_attr "prefix" "orig,orig,maybe_evex")
18558 (set_attr "mode" "TI")])
18560 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
18561 [(set (match_operand:V2DI 0 "register_operand")
18566 (vec_duplicate:V4SI
18567 (match_operand:SI 1 "memory_operand"))
18569 [(const_int 0) (const_int 0)
18570 (const_int 0) (const_int 0)])
18572 (parallel [(const_int 0) (const_int 1)]))))]
18573 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18574 && ix86_pre_reload_split ()"
18577 [(set (match_dup 0)
18578 (any_extend:V2DI (match_dup 1)))]
18579 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
18581 (define_expand "<insn>v2hiv2di2"
18582 [(set (match_operand:V2DI 0 "register_operand")
18584 (match_operand:V2HI 1 "nonimmediate_operand")))]
18587 if (!MEM_P (operands[1]))
18589 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
18590 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
18595 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
18596 [(set (match_operand:V8DI 0 "register_operand" "=v")
18598 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18600 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18601 [(set_attr "type" "ssemov")
18602 (set_attr "prefix" "evex")
18603 (set_attr "mode" "XI")])
18605 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
18606 [(set (match_operand:V16SI 0 "register_operand" "=v")
18609 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
18610 (match_operand:V16SI 2 "const0_operand" "C"))
18611 (match_parallel 3 "pmovzx_parallel"
18612 [(match_operand 4 "const_int_operand" "n")])))]
18615 "&& reload_completed"
18616 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
18618 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
18619 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
18622 (define_expand "<insn>v8siv8di2"
18623 [(set (match_operand:V8DI 0 "register_operand" "=v")
18625 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18628 (define_insn "avx2_<code>v4siv4di2<mask_name>"
18629 [(set (match_operand:V4DI 0 "register_operand" "=v")
18631 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18632 "TARGET_AVX2 && <mask_avx512vl_condition>"
18633 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18634 [(set_attr "type" "ssemov")
18635 (set_attr "prefix" "maybe_evex")
18636 (set_attr "prefix_extra" "1")
18637 (set_attr "mode" "OI")])
18639 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
18640 [(set (match_operand:V8SI 0 "register_operand" "=v")
18643 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
18644 (match_operand:V8SI 2 "const0_operand" "C"))
18645 (match_parallel 3 "pmovzx_parallel"
18646 [(match_operand 4 "const_int_operand" "n")])))]
18649 "&& reload_completed"
18650 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
18652 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
18653 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
18656 (define_expand "<insn>v4siv4di2"
18657 [(set (match_operand:V4DI 0 "register_operand" "=v")
18659 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18662 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
18663 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18666 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
18667 (parallel [(const_int 0) (const_int 1)]))))]
18668 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18669 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18670 [(set_attr "isa" "noavx,noavx,avx")
18671 (set_attr "type" "ssemov")
18672 (set_attr "prefix_extra" "1")
18673 (set_attr "prefix" "orig,orig,maybe_evex")
18674 (set_attr "mode" "TI")])
18676 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
18677 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18679 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
18680 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18681 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18682 [(set_attr "isa" "noavx,noavx,avx")
18683 (set_attr "type" "ssemov")
18684 (set_attr "prefix_extra" "1")
18685 (set_attr "prefix" "orig,orig,maybe_evex")
18686 (set_attr "mode" "TI")])
18688 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
18689 [(set (match_operand:V2DI 0 "register_operand")
18694 (match_operand:DI 1 "memory_operand")
18696 (parallel [(const_int 0) (const_int 1)]))))]
18697 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18698 && ix86_pre_reload_split ()"
18701 [(set (match_dup 0)
18702 (any_extend:V2DI (match_dup 1)))]
18703 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
18705 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
18706 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18709 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
18710 (match_operand:V4SI 2 "const0_operand" "C,C,C"))
18711 (match_parallel 3 "pmovzx_parallel"
18712 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18715 "&& reload_completed"
18716 [(set (match_dup 0)
18718 (vec_select:V2SI (match_dup 1)
18719 (parallel [(const_int 0) (const_int 1)]))))]
18721 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
18722 if (MEM_P (operands[1]))
18724 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
18725 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
18726 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18730 [(set_attr "isa" "noavx,noavx,avx")])
18732 (define_expand "<insn>v2siv2di2"
18733 [(set (match_operand:V2DI 0 "register_operand")
18735 (match_operand:V2SI 1 "nonimmediate_operand")))]
18738 if (!MEM_P (operands[1]))
18740 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
18741 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
18746 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
18747 ;; setting FLAGS_REG. But it is not a really compare instruction.
18748 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
18749 [(set (reg:CC FLAGS_REG)
18750 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
18751 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
18754 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
18755 [(set_attr "type" "ssecomi")
18756 (set_attr "prefix_extra" "1")
18757 (set_attr "prefix" "vex")
18758 (set_attr "mode" "<MODE>")])
18760 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
18761 ;; But it is not a really compare instruction.
18762 (define_insn "<sse4_1>_ptest<mode>"
18763 [(set (reg:CC FLAGS_REG)
18764 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
18765 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
18768 "%vptest\t{%1, %0|%0, %1}"
18769 [(set_attr "isa" "noavx,noavx,avx")
18770 (set_attr "type" "ssecomi")
18771 (set_attr "prefix_extra" "1")
18772 (set_attr "prefix" "orig,orig,vex")
18773 (set (attr "btver2_decode")
18775 (match_test "<sseinsnmode>mode==OImode")
18776 (const_string "vector")
18777 (const_string "*")))
18778 (set_attr "mode" "<sseinsnmode>")])
18780 (define_insn "ptesttf2"
18781 [(set (reg:CC FLAGS_REG)
18782 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
18783 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
18786 "%vptest\t{%1, %0|%0, %1}"
18787 [(set_attr "isa" "noavx,noavx,avx")
18788 (set_attr "type" "ssecomi")
18789 (set_attr "prefix_extra" "1")
18790 (set_attr "prefix" "orig,orig,vex")
18791 (set_attr "mode" "TI")])
18793 (define_expand "nearbyint<mode>2"
18794 [(set (match_operand:VF 0 "register_operand")
18796 [(match_operand:VF 1 "vector_operand")
18800 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
18802 (define_expand "rint<mode>2"
18803 [(set (match_operand:VF 0 "register_operand")
18805 [(match_operand:VF 1 "vector_operand")
18809 "operands[2] = GEN_INT (ROUND_MXCSR);")
18811 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
18812 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18814 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
18815 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
18818 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18819 [(set_attr "isa" "noavx,noavx,avx")
18820 (set_attr "type" "ssecvt")
18821 (set_attr "prefix_data16" "1,1,*")
18822 (set_attr "prefix_extra" "1")
18823 (set_attr "length_immediate" "1")
18824 (set_attr "prefix" "orig,orig,vex")
18825 (set_attr "mode" "<MODE>")])
18827 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
18828 [(match_operand:<sseintvecmode> 0 "register_operand")
18829 (match_operand:VF1_128_256 1 "vector_operand")
18830 (match_operand:SI 2 "const_0_to_15_operand")]
18833 rtx tmp = gen_reg_rtx (<MODE>mode);
18836 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
18839 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18843 (define_expand "avx512f_round<castmode>512"
18844 [(match_operand:VF_512 0 "register_operand")
18845 (match_operand:VF_512 1 "nonimmediate_operand")
18846 (match_operand:SI 2 "const_0_to_15_operand")]
18849 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
18853 (define_expand "avx512f_roundps512_sfix"
18854 [(match_operand:V16SI 0 "register_operand")
18855 (match_operand:V16SF 1 "nonimmediate_operand")
18856 (match_operand:SI 2 "const_0_to_15_operand")]
18859 rtx tmp = gen_reg_rtx (V16SFmode);
18860 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
18861 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
18865 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
18866 [(match_operand:<ssepackfltmode> 0 "register_operand")
18867 (match_operand:VF2 1 "vector_operand")
18868 (match_operand:VF2 2 "vector_operand")
18869 (match_operand:SI 3 "const_0_to_15_operand")]
18874 if (<MODE>mode == V2DFmode
18875 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18877 rtx tmp2 = gen_reg_rtx (V4DFmode);
18879 tmp0 = gen_reg_rtx (V4DFmode);
18880 tmp1 = force_reg (V2DFmode, operands[1]);
18882 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18883 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
18884 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18888 tmp0 = gen_reg_rtx (<MODE>mode);
18889 tmp1 = gen_reg_rtx (<MODE>mode);
18892 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
18895 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
18898 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18903 (define_insn "sse4_1_round<ssescalarmodesuffix>"
18904 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18907 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18908 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18910 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18914 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18915 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18916 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
18917 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
18918 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18919 (set_attr "type" "ssecvt")
18920 (set_attr "length_immediate" "1")
18921 (set_attr "prefix_data16" "1,1,*,*")
18922 (set_attr "prefix_extra" "1")
18923 (set_attr "prefix" "orig,orig,vex,evex")
18924 (set_attr "mode" "<MODE>")])
18926 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
18927 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18929 (vec_duplicate:VF_128
18930 (unspec:<ssescalarmode>
18931 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18932 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18934 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18938 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18939 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18940 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
18941 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18942 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18943 (set_attr "type" "ssecvt")
18944 (set_attr "length_immediate" "1")
18945 (set_attr "prefix_data16" "1,1,*,*")
18946 (set_attr "prefix_extra" "1")
18947 (set_attr "prefix" "orig,orig,vex,evex")
18948 (set_attr "mode" "<MODE>")])
18950 (define_expand "round<mode>2"
18951 [(set (match_dup 3)
18953 (match_operand:VF 1 "register_operand")
18955 (set (match_operand:VF 0 "register_operand")
18957 [(match_dup 3) (match_dup 4)]
18959 "TARGET_SSE4_1 && !flag_trapping_math"
18961 machine_mode scalar_mode;
18962 const struct real_format *fmt;
18963 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
18964 rtx half, vec_half;
18966 scalar_mode = GET_MODE_INNER (<MODE>mode);
18968 /* load nextafter (0.5, 0.0) */
18969 fmt = REAL_MODE_FORMAT (scalar_mode);
18970 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
18971 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
18972 half = const_double_from_real_value (pred_half, scalar_mode);
18974 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
18975 vec_half = force_reg (<MODE>mode, vec_half);
18977 operands[2] = gen_reg_rtx (<MODE>mode);
18978 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
18980 operands[3] = gen_reg_rtx (<MODE>mode);
18981 operands[4] = GEN_INT (ROUND_TRUNC);
18984 (define_expand "round<mode>2_sfix"
18985 [(match_operand:<sseintvecmode> 0 "register_operand")
18986 (match_operand:VF1 1 "register_operand")]
18987 "TARGET_SSE4_1 && !flag_trapping_math"
18989 rtx tmp = gen_reg_rtx (<MODE>mode);
18991 emit_insn (gen_round<mode>2 (tmp, operands[1]));
18994 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18998 (define_expand "round<mode>2_vec_pack_sfix"
18999 [(match_operand:<ssepackfltmode> 0 "register_operand")
19000 (match_operand:VF2 1 "register_operand")
19001 (match_operand:VF2 2 "register_operand")]
19002 "TARGET_SSE4_1 && !flag_trapping_math"
19006 if (<MODE>mode == V2DFmode
19007 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
19009 rtx tmp2 = gen_reg_rtx (V4DFmode);
19011 tmp0 = gen_reg_rtx (V4DFmode);
19012 tmp1 = force_reg (V2DFmode, operands[1]);
19014 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
19015 emit_insn (gen_roundv4df2 (tmp2, tmp0));
19016 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
19020 tmp0 = gen_reg_rtx (<MODE>mode);
19021 tmp1 = gen_reg_rtx (<MODE>mode);
19023 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
19024 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
19027 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
19032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19034 ;; Intel SSE4.2 string/text processing instructions
19036 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19038 (define_insn_and_split "sse4_2_pcmpestr"
19039 [(set (match_operand:SI 0 "register_operand" "=c,c")
19041 [(match_operand:V16QI 2 "register_operand" "x,x")
19042 (match_operand:SI 3 "register_operand" "a,a")
19043 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
19044 (match_operand:SI 5 "register_operand" "d,d")
19045 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
19047 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
19055 (set (reg:CC FLAGS_REG)
19064 && ix86_pre_reload_split ()"
19069 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
19070 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
19071 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
19074 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
19075 operands[3], operands[4],
19076 operands[5], operands[6]));
19078 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
19079 operands[3], operands[4],
19080 operands[5], operands[6]));
19081 if (flags && !(ecx || xmm0))
19082 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
19083 operands[2], operands[3],
19084 operands[4], operands[5],
19086 if (!(flags || ecx || xmm0))
19087 emit_note (NOTE_INSN_DELETED);
19091 [(set_attr "type" "sselog")
19092 (set_attr "prefix_data16" "1")
19093 (set_attr "prefix_extra" "1")
19094 (set_attr "length_immediate" "1")
19095 (set_attr "memory" "none,load")
19096 (set_attr "mode" "TI")])
19098 (define_insn "sse4_2_pcmpestri"
19099 [(set (match_operand:SI 0 "register_operand" "=c,c")
19101 [(match_operand:V16QI 1 "register_operand" "x,x")
19102 (match_operand:SI 2 "register_operand" "a,a")
19103 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
19104 (match_operand:SI 4 "register_operand" "d,d")
19105 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
19107 (set (reg:CC FLAGS_REG)
19116 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
19117 [(set_attr "type" "sselog")
19118 (set_attr "prefix_data16" "1")
19119 (set_attr "prefix_extra" "1")
19120 (set_attr "prefix" "maybe_vex")
19121 (set_attr "length_immediate" "1")
19122 (set_attr "btver2_decode" "vector")
19123 (set_attr "memory" "none,load")
19124 (set_attr "mode" "TI")])
19126 (define_insn "sse4_2_pcmpestrm"
19127 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
19129 [(match_operand:V16QI 1 "register_operand" "x,x")
19130 (match_operand:SI 2 "register_operand" "a,a")
19131 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
19132 (match_operand:SI 4 "register_operand" "d,d")
19133 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
19135 (set (reg:CC FLAGS_REG)
19144 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
19145 [(set_attr "type" "sselog")
19146 (set_attr "prefix_data16" "1")
19147 (set_attr "prefix_extra" "1")
19148 (set_attr "length_immediate" "1")
19149 (set_attr "prefix" "maybe_vex")
19150 (set_attr "btver2_decode" "vector")
19151 (set_attr "memory" "none,load")
19152 (set_attr "mode" "TI")])
19154 (define_insn "sse4_2_pcmpestr_cconly"
19155 [(set (reg:CC FLAGS_REG)
19157 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
19158 (match_operand:SI 3 "register_operand" "a,a,a,a")
19159 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
19160 (match_operand:SI 5 "register_operand" "d,d,d,d")
19161 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
19163 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
19164 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
19167 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
19168 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
19169 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
19170 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
19171 [(set_attr "type" "sselog")
19172 (set_attr "prefix_data16" "1")
19173 (set_attr "prefix_extra" "1")
19174 (set_attr "length_immediate" "1")
19175 (set_attr "memory" "none,load,none,load")
19176 (set_attr "btver2_decode" "vector,vector,vector,vector")
19177 (set_attr "prefix" "maybe_vex")
19178 (set_attr "mode" "TI")])
19180 (define_insn_and_split "sse4_2_pcmpistr"
19181 [(set (match_operand:SI 0 "register_operand" "=c,c")
19183 [(match_operand:V16QI 2 "register_operand" "x,x")
19184 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
19185 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
19187 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
19193 (set (reg:CC FLAGS_REG)
19200 && ix86_pre_reload_split ()"
19205 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
19206 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
19207 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
19210 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
19211 operands[3], operands[4]));
19213 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
19214 operands[3], operands[4]));
19215 if (flags && !(ecx || xmm0))
19216 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
19217 operands[2], operands[3],
19219 if (!(flags || ecx || xmm0))
19220 emit_note (NOTE_INSN_DELETED);
19224 [(set_attr "type" "sselog")
19225 (set_attr "prefix_data16" "1")
19226 (set_attr "prefix_extra" "1")
19227 (set_attr "length_immediate" "1")
19228 (set_attr "memory" "none,load")
19229 (set_attr "mode" "TI")])
19231 (define_insn "sse4_2_pcmpistri"
19232 [(set (match_operand:SI 0 "register_operand" "=c,c")
19234 [(match_operand:V16QI 1 "register_operand" "x,x")
19235 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19236 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19238 (set (reg:CC FLAGS_REG)
19245 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
19246 [(set_attr "type" "sselog")
19247 (set_attr "prefix_data16" "1")
19248 (set_attr "prefix_extra" "1")
19249 (set_attr "length_immediate" "1")
19250 (set_attr "prefix" "maybe_vex")
19251 (set_attr "memory" "none,load")
19252 (set_attr "btver2_decode" "vector")
19253 (set_attr "mode" "TI")])
19255 (define_insn "sse4_2_pcmpistrm"
19256 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
19258 [(match_operand:V16QI 1 "register_operand" "x,x")
19259 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19260 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19262 (set (reg:CC FLAGS_REG)
19269 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
19270 [(set_attr "type" "sselog")
19271 (set_attr "prefix_data16" "1")
19272 (set_attr "prefix_extra" "1")
19273 (set_attr "length_immediate" "1")
19274 (set_attr "prefix" "maybe_vex")
19275 (set_attr "memory" "none,load")
19276 (set_attr "btver2_decode" "vector")
19277 (set_attr "mode" "TI")])
19279 (define_insn "sse4_2_pcmpistr_cconly"
19280 [(set (reg:CC FLAGS_REG)
19282 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
19283 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
19284 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
19286 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
19287 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
19290 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19291 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19292 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
19293 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
19294 [(set_attr "type" "sselog")
19295 (set_attr "prefix_data16" "1")
19296 (set_attr "prefix_extra" "1")
19297 (set_attr "length_immediate" "1")
19298 (set_attr "memory" "none,load,none,load")
19299 (set_attr "prefix" "maybe_vex")
19300 (set_attr "btver2_decode" "vector,vector,vector,vector")
19301 (set_attr "mode" "TI")])
19303 ;; Packed float variants
19304 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
19305 [(V8DI "V8SF") (V16SI "V16SF")])
19307 (define_expand "avx512pf_gatherpf<mode>sf"
19309 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19310 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19312 [(match_operand 2 "vsib_address_operand")
19313 (match_operand:VI48_512 1 "register_operand")
19314 (match_operand:SI 3 "const1248_operand")]))
19315 (match_operand:SI 4 "const_2_to_3_operand")]
19316 UNSPEC_GATHER_PREFETCH)]
19320 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19321 operands[3]), UNSPEC_VSIBADDR);
19324 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
19326 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19327 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19329 [(match_operand:P 2 "vsib_address_operand" "Tv")
19330 (match_operand:VI48_512 1 "register_operand" "v")
19331 (match_operand:SI 3 "const1248_operand" "n")]
19333 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19334 UNSPEC_GATHER_PREFETCH)]
19337 switch (INTVAL (operands[4]))
19340 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19341 gas changed what it requires incompatibly. */
19342 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19344 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19346 gcc_unreachable ();
19349 [(set_attr "type" "sse")
19350 (set_attr "prefix" "evex")
19351 (set_attr "mode" "XI")])
19353 ;; Packed double variants
19354 (define_expand "avx512pf_gatherpf<mode>df"
19356 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19359 [(match_operand 2 "vsib_address_operand")
19360 (match_operand:VI4_256_8_512 1 "register_operand")
19361 (match_operand:SI 3 "const1248_operand")]))
19362 (match_operand:SI 4 "const_2_to_3_operand")]
19363 UNSPEC_GATHER_PREFETCH)]
19367 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19368 operands[3]), UNSPEC_VSIBADDR);
19371 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
19373 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19374 (match_operator:V8DF 5 "vsib_mem_operator"
19376 [(match_operand:P 2 "vsib_address_operand" "Tv")
19377 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19378 (match_operand:SI 3 "const1248_operand" "n")]
19380 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19381 UNSPEC_GATHER_PREFETCH)]
19384 switch (INTVAL (operands[4]))
19387 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19388 gas changed what it requires incompatibly. */
19389 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19391 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19393 gcc_unreachable ();
19396 [(set_attr "type" "sse")
19397 (set_attr "prefix" "evex")
19398 (set_attr "mode" "XI")])
19400 ;; Packed float variants
19401 (define_expand "avx512pf_scatterpf<mode>sf"
19403 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19404 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19406 [(match_operand 2 "vsib_address_operand")
19407 (match_operand:VI48_512 1 "register_operand")
19408 (match_operand:SI 3 "const1248_operand")]))
19409 (match_operand:SI 4 "const2367_operand")]
19410 UNSPEC_SCATTER_PREFETCH)]
19414 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19415 operands[3]), UNSPEC_VSIBADDR);
19418 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
19420 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19421 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19423 [(match_operand:P 2 "vsib_address_operand" "Tv")
19424 (match_operand:VI48_512 1 "register_operand" "v")
19425 (match_operand:SI 3 "const1248_operand" "n")]
19427 (match_operand:SI 4 "const2367_operand" "n")]
19428 UNSPEC_SCATTER_PREFETCH)]
19431 switch (INTVAL (operands[4]))
19435 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19436 gas changed what it requires incompatibly. */
19437 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19440 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19442 gcc_unreachable ();
19445 [(set_attr "type" "sse")
19446 (set_attr "prefix" "evex")
19447 (set_attr "mode" "XI")])
19449 ;; Packed double variants
19450 (define_expand "avx512pf_scatterpf<mode>df"
19452 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19455 [(match_operand 2 "vsib_address_operand")
19456 (match_operand:VI4_256_8_512 1 "register_operand")
19457 (match_operand:SI 3 "const1248_operand")]))
19458 (match_operand:SI 4 "const2367_operand")]
19459 UNSPEC_SCATTER_PREFETCH)]
19463 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19464 operands[3]), UNSPEC_VSIBADDR);
19467 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
19469 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19470 (match_operator:V8DF 5 "vsib_mem_operator"
19472 [(match_operand:P 2 "vsib_address_operand" "Tv")
19473 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19474 (match_operand:SI 3 "const1248_operand" "n")]
19476 (match_operand:SI 4 "const2367_operand" "n")]
19477 UNSPEC_SCATTER_PREFETCH)]
19480 switch (INTVAL (operands[4]))
19484 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19485 gas changed what it requires incompatibly. */
19486 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19489 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19491 gcc_unreachable ();
19494 [(set_attr "type" "sse")
19495 (set_attr "prefix" "evex")
19496 (set_attr "mode" "XI")])
19498 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
19499 [(set (match_operand:VF_512 0 "register_operand" "=v")
19501 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19504 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19505 [(set_attr "prefix" "evex")
19506 (set_attr "type" "sse")
19507 (set_attr "mode" "<MODE>")])
19509 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
19510 [(set (match_operand:VF_512 0 "register_operand" "=v")
19512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19515 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19516 [(set_attr "prefix" "evex")
19517 (set_attr "type" "sse")
19518 (set_attr "mode" "<MODE>")])
19520 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
19521 [(set (match_operand:VF_128 0 "register_operand" "=v")
19524 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19526 (match_operand:VF_128 2 "register_operand" "v")
19529 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19530 [(set_attr "length_immediate" "1")
19531 (set_attr "prefix" "evex")
19532 (set_attr "type" "sse")
19533 (set_attr "mode" "<MODE>")])
19535 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
19536 [(set (match_operand:VF_512 0 "register_operand" "=v")
19538 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19541 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19542 [(set_attr "prefix" "evex")
19543 (set_attr "type" "sse")
19544 (set_attr "mode" "<MODE>")])
19546 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
19547 [(set (match_operand:VF_128 0 "register_operand" "=v")
19550 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19552 (match_operand:VF_128 2 "register_operand" "v")
19555 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19556 [(set_attr "length_immediate" "1")
19557 (set_attr "type" "sse")
19558 (set_attr "prefix" "evex")
19559 (set_attr "mode" "<MODE>")])
19561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19563 ;; XOP instructions
19565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19567 (define_code_iterator xop_plus [plus ss_plus])
19569 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
19570 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
19572 ;; XOP parallel integer multiply/add instructions.
19574 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
19575 [(set (match_operand:VI24_128 0 "register_operand" "=x")
19578 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
19579 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
19580 (match_operand:VI24_128 3 "register_operand" "x")))]
19582 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19583 [(set_attr "type" "ssemuladd")
19584 (set_attr "mode" "TI")])
19586 (define_insn "xop_p<macs>dql"
19587 [(set (match_operand:V2DI 0 "register_operand" "=x")
19592 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19593 (parallel [(const_int 0) (const_int 2)])))
19596 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19597 (parallel [(const_int 0) (const_int 2)]))))
19598 (match_operand:V2DI 3 "register_operand" "x")))]
19600 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19601 [(set_attr "type" "ssemuladd")
19602 (set_attr "mode" "TI")])
19604 (define_insn "xop_p<macs>dqh"
19605 [(set (match_operand:V2DI 0 "register_operand" "=x")
19610 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19611 (parallel [(const_int 1) (const_int 3)])))
19614 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19615 (parallel [(const_int 1) (const_int 3)]))))
19616 (match_operand:V2DI 3 "register_operand" "x")))]
19618 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19619 [(set_attr "type" "ssemuladd")
19620 (set_attr "mode" "TI")])
19622 ;; XOP parallel integer multiply/add instructions for the intrinisics
19623 (define_insn "xop_p<macs>wd"
19624 [(set (match_operand:V4SI 0 "register_operand" "=x")
19629 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19630 (parallel [(const_int 1) (const_int 3)
19631 (const_int 5) (const_int 7)])))
19634 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19635 (parallel [(const_int 1) (const_int 3)
19636 (const_int 5) (const_int 7)]))))
19637 (match_operand:V4SI 3 "register_operand" "x")))]
19639 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19640 [(set_attr "type" "ssemuladd")
19641 (set_attr "mode" "TI")])
19643 (define_insn "xop_p<madcs>wd"
19644 [(set (match_operand:V4SI 0 "register_operand" "=x")
19650 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19651 (parallel [(const_int 0) (const_int 2)
19652 (const_int 4) (const_int 6)])))
19655 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19656 (parallel [(const_int 0) (const_int 2)
19657 (const_int 4) (const_int 6)]))))
19662 (parallel [(const_int 1) (const_int 3)
19663 (const_int 5) (const_int 7)])))
19667 (parallel [(const_int 1) (const_int 3)
19668 (const_int 5) (const_int 7)])))))
19669 (match_operand:V4SI 3 "register_operand" "x")))]
19671 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19672 [(set_attr "type" "ssemuladd")
19673 (set_attr "mode" "TI")])
19675 ;; XOP parallel XMM conditional moves
19676 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
19677 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
19678 (if_then_else:V_128_256
19679 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
19680 (match_operand:V_128_256 1 "register_operand" "x,x")
19681 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
19683 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19684 [(set_attr "type" "sse4arg")])
19686 ;; XOP horizontal add/subtract instructions
19687 (define_insn "xop_phadd<u>bw"
19688 [(set (match_operand:V8HI 0 "register_operand" "=x")
19692 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19693 (parallel [(const_int 0) (const_int 2)
19694 (const_int 4) (const_int 6)
19695 (const_int 8) (const_int 10)
19696 (const_int 12) (const_int 14)])))
19700 (parallel [(const_int 1) (const_int 3)
19701 (const_int 5) (const_int 7)
19702 (const_int 9) (const_int 11)
19703 (const_int 13) (const_int 15)])))))]
19705 "vphadd<u>bw\t{%1, %0|%0, %1}"
19706 [(set_attr "type" "sseiadd1")])
19708 (define_insn "xop_phadd<u>bd"
19709 [(set (match_operand:V4SI 0 "register_operand" "=x")
19714 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19715 (parallel [(const_int 0) (const_int 4)
19716 (const_int 8) (const_int 12)])))
19720 (parallel [(const_int 1) (const_int 5)
19721 (const_int 9) (const_int 13)]))))
19726 (parallel [(const_int 2) (const_int 6)
19727 (const_int 10) (const_int 14)])))
19731 (parallel [(const_int 3) (const_int 7)
19732 (const_int 11) (const_int 15)]))))))]
19734 "vphadd<u>bd\t{%1, %0|%0, %1}"
19735 [(set_attr "type" "sseiadd1")])
19737 (define_insn "xop_phadd<u>bq"
19738 [(set (match_operand:V2DI 0 "register_operand" "=x")
19744 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19745 (parallel [(const_int 0) (const_int 8)])))
19749 (parallel [(const_int 1) (const_int 9)]))))
19754 (parallel [(const_int 2) (const_int 10)])))
19758 (parallel [(const_int 3) (const_int 11)])))))
19764 (parallel [(const_int 4) (const_int 12)])))
19768 (parallel [(const_int 5) (const_int 13)]))))
19773 (parallel [(const_int 6) (const_int 14)])))
19777 (parallel [(const_int 7) (const_int 15)])))))))]
19779 "vphadd<u>bq\t{%1, %0|%0, %1}"
19780 [(set_attr "type" "sseiadd1")])
19782 (define_insn "xop_phadd<u>wd"
19783 [(set (match_operand:V4SI 0 "register_operand" "=x")
19787 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19788 (parallel [(const_int 0) (const_int 2)
19789 (const_int 4) (const_int 6)])))
19793 (parallel [(const_int 1) (const_int 3)
19794 (const_int 5) (const_int 7)])))))]
19796 "vphadd<u>wd\t{%1, %0|%0, %1}"
19797 [(set_attr "type" "sseiadd1")])
19799 (define_insn "xop_phadd<u>wq"
19800 [(set (match_operand:V2DI 0 "register_operand" "=x")
19805 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19806 (parallel [(const_int 0) (const_int 4)])))
19810 (parallel [(const_int 1) (const_int 5)]))))
19815 (parallel [(const_int 2) (const_int 6)])))
19819 (parallel [(const_int 3) (const_int 7)]))))))]
19821 "vphadd<u>wq\t{%1, %0|%0, %1}"
19822 [(set_attr "type" "sseiadd1")])
19824 (define_insn "xop_phadd<u>dq"
19825 [(set (match_operand:V2DI 0 "register_operand" "=x")
19829 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19830 (parallel [(const_int 0) (const_int 2)])))
19834 (parallel [(const_int 1) (const_int 3)])))))]
19836 "vphadd<u>dq\t{%1, %0|%0, %1}"
19837 [(set_attr "type" "sseiadd1")])
19839 (define_insn "xop_phsubbw"
19840 [(set (match_operand:V8HI 0 "register_operand" "=x")
19844 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19845 (parallel [(const_int 0) (const_int 2)
19846 (const_int 4) (const_int 6)
19847 (const_int 8) (const_int 10)
19848 (const_int 12) (const_int 14)])))
19852 (parallel [(const_int 1) (const_int 3)
19853 (const_int 5) (const_int 7)
19854 (const_int 9) (const_int 11)
19855 (const_int 13) (const_int 15)])))))]
19857 "vphsubbw\t{%1, %0|%0, %1}"
19858 [(set_attr "type" "sseiadd1")])
19860 (define_insn "xop_phsubwd"
19861 [(set (match_operand:V4SI 0 "register_operand" "=x")
19865 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19866 (parallel [(const_int 0) (const_int 2)
19867 (const_int 4) (const_int 6)])))
19871 (parallel [(const_int 1) (const_int 3)
19872 (const_int 5) (const_int 7)])))))]
19874 "vphsubwd\t{%1, %0|%0, %1}"
19875 [(set_attr "type" "sseiadd1")])
19877 (define_insn "xop_phsubdq"
19878 [(set (match_operand:V2DI 0 "register_operand" "=x")
19882 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19883 (parallel [(const_int 0) (const_int 2)])))
19887 (parallel [(const_int 1) (const_int 3)])))))]
19889 "vphsubdq\t{%1, %0|%0, %1}"
19890 [(set_attr "type" "sseiadd1")])
19892 ;; XOP permute instructions
19893 (define_insn "xop_pperm"
19894 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19896 [(match_operand:V16QI 1 "register_operand" "x,x")
19897 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19898 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
19899 UNSPEC_XOP_PERMUTE))]
19900 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19901 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19902 [(set_attr "type" "sse4arg")
19903 (set_attr "mode" "TI")])
19905 ;; XOP pack instructions that combine two vectors into a smaller vector
19906 (define_insn "xop_pperm_pack_v2di_v4si"
19907 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
19910 (match_operand:V2DI 1 "register_operand" "x,x"))
19912 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
19913 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19914 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19915 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19916 [(set_attr "type" "sse4arg")
19917 (set_attr "mode" "TI")])
19919 (define_insn "xop_pperm_pack_v4si_v8hi"
19920 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
19923 (match_operand:V4SI 1 "register_operand" "x,x"))
19925 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
19926 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19927 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19928 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19929 [(set_attr "type" "sse4arg")
19930 (set_attr "mode" "TI")])
19932 (define_insn "xop_pperm_pack_v8hi_v16qi"
19933 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19936 (match_operand:V8HI 1 "register_operand" "x,x"))
19938 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
19939 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19940 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19941 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19942 [(set_attr "type" "sse4arg")
19943 (set_attr "mode" "TI")])
19945 ;; XOP packed rotate instructions
19946 (define_expand "rotl<mode>3"
19947 [(set (match_operand:VI_128 0 "register_operand")
19949 (match_operand:VI_128 1 "nonimmediate_operand")
19950 (match_operand:SI 2 "general_operand")))]
19953 /* If we were given a scalar, convert it to parallel */
19954 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19956 rtvec vs = rtvec_alloc (<ssescalarnum>);
19957 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19958 rtx reg = gen_reg_rtx (<MODE>mode);
19959 rtx op2 = operands[2];
19962 if (GET_MODE (op2) != <ssescalarmode>mode)
19964 op2 = gen_reg_rtx (<ssescalarmode>mode);
19965 convert_move (op2, operands[2], false);
19968 for (i = 0; i < <ssescalarnum>; i++)
19969 RTVEC_ELT (vs, i) = op2;
19971 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19972 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19977 (define_expand "rotr<mode>3"
19978 [(set (match_operand:VI_128 0 "register_operand")
19980 (match_operand:VI_128 1 "nonimmediate_operand")
19981 (match_operand:SI 2 "general_operand")))]
19984 /* If we were given a scalar, convert it to parallel */
19985 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19987 rtvec vs = rtvec_alloc (<ssescalarnum>);
19988 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19989 rtx neg = gen_reg_rtx (<MODE>mode);
19990 rtx reg = gen_reg_rtx (<MODE>mode);
19991 rtx op2 = operands[2];
19994 if (GET_MODE (op2) != <ssescalarmode>mode)
19996 op2 = gen_reg_rtx (<ssescalarmode>mode);
19997 convert_move (op2, operands[2], false);
20000 for (i = 0; i < <ssescalarnum>; i++)
20001 RTVEC_ELT (vs, i) = op2;
20003 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
20004 emit_insn (gen_neg<mode>2 (neg, reg));
20005 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
20010 (define_insn "xop_rotl<mode>3"
20011 [(set (match_operand:VI_128 0 "register_operand" "=x")
20013 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
20014 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
20016 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20017 [(set_attr "type" "sseishft")
20018 (set_attr "length_immediate" "1")
20019 (set_attr "mode" "TI")])
20021 (define_insn "xop_rotr<mode>3"
20022 [(set (match_operand:VI_128 0 "register_operand" "=x")
20024 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
20025 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
20029 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
20030 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
20032 [(set_attr "type" "sseishft")
20033 (set_attr "length_immediate" "1")
20034 (set_attr "mode" "TI")])
20036 (define_expand "vrotr<mode>3"
20037 [(match_operand:VI_128 0 "register_operand")
20038 (match_operand:VI_128 1 "register_operand")
20039 (match_operand:VI_128 2 "register_operand")]
20042 rtx reg = gen_reg_rtx (<MODE>mode);
20043 emit_insn (gen_neg<mode>2 (reg, operands[2]));
20044 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
20048 (define_expand "vrotl<mode>3"
20049 [(match_operand:VI_128 0 "register_operand")
20050 (match_operand:VI_128 1 "register_operand")
20051 (match_operand:VI_128 2 "register_operand")]
20054 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
20058 (define_insn "xop_vrotl<mode>3"
20059 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
20060 (if_then_else:VI_128
20062 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20065 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20069 (neg:VI_128 (match_dup 2)))))]
20070 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20071 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20072 [(set_attr "type" "sseishft")
20073 (set_attr "prefix_data16" "0")
20074 (set_attr "prefix_extra" "2")
20075 (set_attr "mode" "TI")])
20077 ;; XOP packed shift instructions.
20078 (define_expand "vlshr<mode>3"
20079 [(set (match_operand:VI12_128 0 "register_operand")
20081 (match_operand:VI12_128 1 "register_operand")
20082 (match_operand:VI12_128 2 "nonimmediate_operand")))]
20085 rtx neg = gen_reg_rtx (<MODE>mode);
20086 emit_insn (gen_neg<mode>2 (neg, operands[2]));
20087 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
20091 (define_expand "vlshr<mode>3"
20092 [(set (match_operand:VI48_128 0 "register_operand")
20094 (match_operand:VI48_128 1 "register_operand")
20095 (match_operand:VI48_128 2 "nonimmediate_operand")))]
20096 "TARGET_AVX2 || TARGET_XOP"
20100 rtx neg = gen_reg_rtx (<MODE>mode);
20101 emit_insn (gen_neg<mode>2 (neg, operands[2]));
20102 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
20107 (define_expand "vlshr<mode>3"
20108 [(set (match_operand:VI48_512 0 "register_operand")
20110 (match_operand:VI48_512 1 "register_operand")
20111 (match_operand:VI48_512 2 "nonimmediate_operand")))]
20114 (define_expand "vlshr<mode>3"
20115 [(set (match_operand:VI48_256 0 "register_operand")
20117 (match_operand:VI48_256 1 "register_operand")
20118 (match_operand:VI48_256 2 "nonimmediate_operand")))]
20121 (define_expand "vashrv8hi3<mask_name>"
20122 [(set (match_operand:V8HI 0 "register_operand")
20124 (match_operand:V8HI 1 "register_operand")
20125 (match_operand:V8HI 2 "nonimmediate_operand")))]
20126 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
20130 rtx neg = gen_reg_rtx (V8HImode);
20131 emit_insn (gen_negv8hi2 (neg, operands[2]));
20132 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
20137 (define_expand "vashrv16qi3"
20138 [(set (match_operand:V16QI 0 "register_operand")
20140 (match_operand:V16QI 1 "register_operand")
20141 (match_operand:V16QI 2 "nonimmediate_operand")))]
20144 rtx neg = gen_reg_rtx (V16QImode);
20145 emit_insn (gen_negv16qi2 (neg, operands[2]));
20146 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
20150 (define_expand "vashrv2di3<mask_name>"
20151 [(set (match_operand:V2DI 0 "register_operand")
20153 (match_operand:V2DI 1 "register_operand")
20154 (match_operand:V2DI 2 "nonimmediate_operand")))]
20155 "TARGET_XOP || TARGET_AVX512VL"
20159 rtx neg = gen_reg_rtx (V2DImode);
20160 emit_insn (gen_negv2di2 (neg, operands[2]));
20161 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
20166 (define_expand "vashrv4si3"
20167 [(set (match_operand:V4SI 0 "register_operand")
20168 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
20169 (match_operand:V4SI 2 "nonimmediate_operand")))]
20170 "TARGET_AVX2 || TARGET_XOP"
20174 rtx neg = gen_reg_rtx (V4SImode);
20175 emit_insn (gen_negv4si2 (neg, operands[2]));
20176 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
20181 (define_expand "vashrv16si3"
20182 [(set (match_operand:V16SI 0 "register_operand")
20183 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
20184 (match_operand:V16SI 2 "nonimmediate_operand")))]
20187 (define_expand "vashrv8si3"
20188 [(set (match_operand:V8SI 0 "register_operand")
20189 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
20190 (match_operand:V8SI 2 "nonimmediate_operand")))]
20193 (define_expand "vashl<mode>3"
20194 [(set (match_operand:VI12_128 0 "register_operand")
20196 (match_operand:VI12_128 1 "register_operand")
20197 (match_operand:VI12_128 2 "nonimmediate_operand")))]
20200 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
20204 (define_expand "vashl<mode>3"
20205 [(set (match_operand:VI48_128 0 "register_operand")
20207 (match_operand:VI48_128 1 "register_operand")
20208 (match_operand:VI48_128 2 "nonimmediate_operand")))]
20209 "TARGET_AVX2 || TARGET_XOP"
20213 operands[2] = force_reg (<MODE>mode, operands[2]);
20214 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
20219 (define_expand "vashl<mode>3"
20220 [(set (match_operand:VI48_512 0 "register_operand")
20222 (match_operand:VI48_512 1 "register_operand")
20223 (match_operand:VI48_512 2 "nonimmediate_operand")))]
20226 (define_expand "vashl<mode>3"
20227 [(set (match_operand:VI48_256 0 "register_operand")
20229 (match_operand:VI48_256 1 "register_operand")
20230 (match_operand:VI48_256 2 "nonimmediate_operand")))]
20233 (define_insn "xop_sha<mode>3"
20234 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
20235 (if_then_else:VI_128
20237 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20240 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20244 (neg:VI_128 (match_dup 2)))))]
20245 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20246 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20247 [(set_attr "type" "sseishft")
20248 (set_attr "prefix_data16" "0")
20249 (set_attr "prefix_extra" "2")
20250 (set_attr "mode" "TI")])
20252 (define_insn "xop_shl<mode>3"
20253 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
20254 (if_then_else:VI_128
20256 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20259 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20263 (neg:VI_128 (match_dup 2)))))]
20264 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20265 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20266 [(set_attr "type" "sseishft")
20267 (set_attr "prefix_data16" "0")
20268 (set_attr "prefix_extra" "2")
20269 (set_attr "mode" "TI")])
20271 (define_expand "<insn><mode>3"
20272 [(set (match_operand:VI1_AVX512 0 "register_operand")
20273 (any_shift:VI1_AVX512
20274 (match_operand:VI1_AVX512 1 "register_operand")
20275 (match_operand:SI 2 "nonmemory_operand")))]
20278 if (TARGET_XOP && <MODE>mode == V16QImode)
20280 bool negate = false;
20281 rtx (*gen) (rtx, rtx, rtx);
20285 if (<CODE> != ASHIFT)
20287 if (CONST_INT_P (operands[2]))
20288 operands[2] = GEN_INT (-INTVAL (operands[2]));
20292 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
20293 for (i = 0; i < 16; i++)
20294 XVECEXP (par, 0, i) = operands[2];
20296 tmp = gen_reg_rtx (V16QImode);
20297 emit_insn (gen_vec_initv16qiqi (tmp, par));
20300 emit_insn (gen_negv16qi2 (tmp, tmp));
20302 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
20303 emit_insn (gen (operands[0], operands[1], tmp));
20305 else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
20306 operands[1], operands[2]))
20307 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
20311 (define_expand "ashrv2di3"
20312 [(set (match_operand:V2DI 0 "register_operand")
20314 (match_operand:V2DI 1 "register_operand")
20315 (match_operand:DI 2 "nonmemory_operand")))]
20316 "TARGET_XOP || TARGET_AVX512VL"
20318 if (!TARGET_AVX512VL)
20320 rtx reg = gen_reg_rtx (V2DImode);
20322 bool negate = false;
20325 if (CONST_INT_P (operands[2]))
20326 operands[2] = GEN_INT (-INTVAL (operands[2]));
20330 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
20331 for (i = 0; i < 2; i++)
20332 XVECEXP (par, 0, i) = operands[2];
20334 emit_insn (gen_vec_initv2didi (reg, par));
20337 emit_insn (gen_negv2di2 (reg, reg));
20339 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
20344 ;; XOP FRCZ support
20345 (define_insn "xop_frcz<mode>2"
20346 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
20348 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
20351 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
20352 [(set_attr "type" "ssecvt1")
20353 (set_attr "mode" "<MODE>")])
20355 (define_expand "xop_vmfrcz<mode>2"
20356 [(set (match_operand:VF_128 0 "register_operand")
20359 [(match_operand:VF_128 1 "nonimmediate_operand")]
20364 "operands[2] = CONST0_RTX (<MODE>mode);")
20366 (define_insn "*xop_vmfrcz<mode>2"
20367 [(set (match_operand:VF_128 0 "register_operand" "=x")
20370 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
20372 (match_operand:VF_128 2 "const0_operand")
20375 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
20376 [(set_attr "type" "ssecvt1")
20377 (set_attr "mode" "<MODE>")])
20379 (define_insn "xop_maskcmp<mode>3"
20380 [(set (match_operand:VI_128 0 "register_operand" "=x")
20381 (match_operator:VI_128 1 "ix86_comparison_int_operator"
20382 [(match_operand:VI_128 2 "register_operand" "x")
20383 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20385 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20386 [(set_attr "type" "sse4arg")
20387 (set_attr "prefix_data16" "0")
20388 (set_attr "prefix_rep" "0")
20389 (set_attr "prefix_extra" "2")
20390 (set_attr "length_immediate" "1")
20391 (set_attr "mode" "TI")])
20393 (define_insn "xop_maskcmp_uns<mode>3"
20394 [(set (match_operand:VI_128 0 "register_operand" "=x")
20395 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
20396 [(match_operand:VI_128 2 "register_operand" "x")
20397 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20399 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20400 [(set_attr "type" "ssecmp")
20401 (set_attr "prefix_data16" "0")
20402 (set_attr "prefix_rep" "0")
20403 (set_attr "prefix_extra" "2")
20404 (set_attr "length_immediate" "1")
20405 (set_attr "mode" "TI")])
20407 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
20408 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
20409 ;; the exact instruction generated for the intrinsic.
20410 (define_insn "xop_maskcmp_uns2<mode>3"
20411 [(set (match_operand:VI_128 0 "register_operand" "=x")
20413 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
20414 [(match_operand:VI_128 2 "register_operand" "x")
20415 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
20416 UNSPEC_XOP_UNSIGNED_CMP))]
20418 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20419 [(set_attr "type" "ssecmp")
20420 (set_attr "prefix_data16" "0")
20421 (set_attr "prefix_extra" "2")
20422 (set_attr "length_immediate" "1")
20423 (set_attr "mode" "TI")])
20425 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
20426 ;; being added here to be complete.
20427 (define_insn "xop_pcom_tf<mode>3"
20428 [(set (match_operand:VI_128 0 "register_operand" "=x")
20430 [(match_operand:VI_128 1 "register_operand" "x")
20431 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
20432 (match_operand:SI 3 "const_int_operand" "n")]
20433 UNSPEC_XOP_TRUEFALSE))]
20436 return ((INTVAL (operands[3]) != 0)
20437 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20438 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
20440 [(set_attr "type" "ssecmp")
20441 (set_attr "prefix_data16" "0")
20442 (set_attr "prefix_extra" "2")
20443 (set_attr "length_immediate" "1")
20444 (set_attr "mode" "TI")])
20446 (define_insn "xop_vpermil2<mode>3"
20447 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
20449 [(match_operand:VF_128_256 1 "register_operand" "x,x")
20450 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
20451 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
20452 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
20455 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
20456 [(set_attr "type" "sse4arg")
20457 (set_attr "length_immediate" "1")
20458 (set_attr "mode" "<MODE>")])
20460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20462 (define_insn "aesenc"
20463 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20465 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20469 aesenc\t{%2, %0|%0, %2}
20470 vaesenc\t{%2, %1, %0|%0, %1, %2}"
20471 [(set_attr "isa" "noavx,avx")
20472 (set_attr "type" "sselog1")
20473 (set_attr "prefix_extra" "1")
20474 (set_attr "prefix" "orig,vex")
20475 (set_attr "btver2_decode" "double,double")
20476 (set_attr "mode" "TI")])
20478 (define_insn "aesenclast"
20479 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20481 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20482 UNSPEC_AESENCLAST))]
20485 aesenclast\t{%2, %0|%0, %2}
20486 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
20487 [(set_attr "isa" "noavx,avx")
20488 (set_attr "type" "sselog1")
20489 (set_attr "prefix_extra" "1")
20490 (set_attr "prefix" "orig,vex")
20491 (set_attr "btver2_decode" "double,double")
20492 (set_attr "mode" "TI")])
20494 (define_insn "aesdec"
20495 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20496 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20497 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20501 aesdec\t{%2, %0|%0, %2}
20502 vaesdec\t{%2, %1, %0|%0, %1, %2}"
20503 [(set_attr "isa" "noavx,avx")
20504 (set_attr "type" "sselog1")
20505 (set_attr "prefix_extra" "1")
20506 (set_attr "prefix" "orig,vex")
20507 (set_attr "btver2_decode" "double,double")
20508 (set_attr "mode" "TI")])
20510 (define_insn "aesdeclast"
20511 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20512 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20513 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20514 UNSPEC_AESDECLAST))]
20517 aesdeclast\t{%2, %0|%0, %2}
20518 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
20519 [(set_attr "isa" "noavx,avx")
20520 (set_attr "type" "sselog1")
20521 (set_attr "prefix_extra" "1")
20522 (set_attr "prefix" "orig,vex")
20523 (set_attr "btver2_decode" "double,double")
20524 (set_attr "mode" "TI")])
20526 (define_insn "aesimc"
20527 [(set (match_operand:V2DI 0 "register_operand" "=x")
20528 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
20531 "%vaesimc\t{%1, %0|%0, %1}"
20532 [(set_attr "type" "sselog1")
20533 (set_attr "prefix_extra" "1")
20534 (set_attr "prefix" "maybe_vex")
20535 (set_attr "mode" "TI")])
20537 (define_insn "aeskeygenassist"
20538 [(set (match_operand:V2DI 0 "register_operand" "=x")
20539 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
20540 (match_operand:SI 2 "const_0_to_255_operand" "n")]
20541 UNSPEC_AESKEYGENASSIST))]
20543 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
20544 [(set_attr "type" "sselog1")
20545 (set_attr "prefix_extra" "1")
20546 (set_attr "length_immediate" "1")
20547 (set_attr "prefix" "maybe_vex")
20548 (set_attr "mode" "TI")])
20550 (define_insn "pclmulqdq"
20551 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20552 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20553 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
20554 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20558 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
20559 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20560 [(set_attr "isa" "noavx,avx")
20561 (set_attr "type" "sselog1")
20562 (set_attr "prefix_extra" "1")
20563 (set_attr "length_immediate" "1")
20564 (set_attr "prefix" "orig,vex")
20565 (set_attr "mode" "TI")])
20567 (define_expand "avx_vzeroall"
20568 [(match_par_dup 0 [(const_int 0)])]
20571 int nregs = TARGET_64BIT ? 16 : 8;
20574 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
20576 XVECEXP (operands[0], 0, 0)
20577 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
20580 for (regno = 0; regno < nregs; regno++)
20581 XVECEXP (operands[0], 0, regno + 1)
20582 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
20583 CONST0_RTX (V8SImode));
20586 (define_insn "*avx_vzeroall"
20587 [(match_parallel 0 "vzeroall_operation"
20588 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
20591 [(set_attr "type" "sse")
20592 (set_attr "modrm" "0")
20593 (set_attr "memory" "none")
20594 (set_attr "prefix" "vex")
20595 (set_attr "btver2_decode" "vector")
20596 (set_attr "mode" "OI")])
20598 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
20599 ;; if the upper 128bits are unused. Initially we expand the instructions
20600 ;; as though they had no effect on the SSE registers, but later add SETs and
20601 ;; CLOBBERs to the PARALLEL to model the real effect.
20602 (define_expand "avx_vzeroupper"
20603 [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20606 (define_insn "*avx_vzeroupper"
20607 [(match_parallel 0 "vzeroupper_pattern"
20608 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20609 "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
20611 [(set_attr "type" "sse")
20612 (set_attr "modrm" "0")
20613 (set_attr "memory" "none")
20614 (set_attr "prefix" "vex")
20615 (set_attr "btver2_decode" "vector")
20616 (set_attr "mode" "OI")])
20618 (define_insn_and_split "*avx_vzeroupper_1"
20619 [(match_parallel 0 "vzeroupper_pattern"
20620 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20621 "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
20623 "&& epilogue_completed"
20626 /* For IPA-RA purposes, make it clear the instruction clobbers
20627 even XMM registers not mentioned explicitly in the pattern. */
20628 unsigned int nregs = TARGET_64BIT ? 16 : 8;
20629 unsigned int npats = XVECLEN (operands[0], 0);
20630 rtvec vec = rtvec_alloc (nregs + 1);
20631 RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
20632 for (unsigned int i = 0, j = 1; i < nregs; ++i)
20634 unsigned int regno = GET_SSE_REGNO (i);
20636 && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
20638 RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
20643 rtx reg = gen_rtx_REG (V2DImode, regno);
20644 RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
20647 operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
20649 [(set_attr "type" "sse")
20650 (set_attr "modrm" "0")
20651 (set_attr "memory" "none")
20652 (set_attr "prefix" "vex")
20653 (set_attr "btver2_decode" "vector")
20654 (set_attr "mode" "OI")])
20656 (define_mode_attr pbroadcast_evex_isa
20657 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
20658 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
20659 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
20660 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
20662 (define_insn "avx2_pbroadcast<mode>"
20663 [(set (match_operand:VI 0 "register_operand" "=x,v")
20665 (vec_select:<ssescalarmode>
20666 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
20667 (parallel [(const_int 0)]))))]
20669 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
20670 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
20671 (set_attr "type" "ssemov")
20672 (set_attr "prefix_extra" "1")
20673 (set_attr "prefix" "vex,evex")
20674 (set_attr "mode" "<sseinsnmode>")])
20676 (define_insn "avx2_pbroadcast<mode>_1"
20677 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
20678 (vec_duplicate:VI_256
20679 (vec_select:<ssescalarmode>
20680 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
20681 (parallel [(const_int 0)]))))]
20684 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20685 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20686 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20687 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
20688 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
20689 (set_attr "type" "ssemov")
20690 (set_attr "prefix_extra" "1")
20691 (set_attr "prefix" "vex")
20692 (set_attr "mode" "<sseinsnmode>")])
20694 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
20695 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
20696 (unspec:VI48F_256_512
20697 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
20698 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20700 "TARGET_AVX2 && <mask_mode512bit_condition>"
20701 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20702 [(set_attr "type" "sselog")
20703 (set_attr "prefix" "<mask_prefix2>")
20704 (set_attr "mode" "<sseinsnmode>")])
20706 (define_insn "<avx512>_permvar<mode><mask_name>"
20707 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
20708 (unspec:VI1_AVX512VL
20709 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
20710 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20712 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
20713 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20714 [(set_attr "type" "sselog")
20715 (set_attr "prefix" "<mask_prefix2>")
20716 (set_attr "mode" "<sseinsnmode>")])
20718 (define_insn "<avx512>_permvar<mode><mask_name>"
20719 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20720 (unspec:VI2_AVX512VL
20721 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
20722 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20724 "TARGET_AVX512BW && <mask_mode512bit_condition>"
20725 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20726 [(set_attr "type" "sselog")
20727 (set_attr "prefix" "<mask_prefix2>")
20728 (set_attr "mode" "<sseinsnmode>")])
20730 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
20731 ;; If it so happens that the input is in memory, use vbroadcast.
20732 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
20733 (define_insn "*avx_vperm_broadcast_v4sf"
20734 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
20736 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
20737 (match_parallel 2 "avx_vbroadcast_operand"
20738 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20741 int elt = INTVAL (operands[3]);
20742 switch (which_alternative)
20746 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
20747 return "vbroadcastss\t{%1, %0|%0, %k1}";
20749 operands[2] = GEN_INT (elt * 0x55);
20750 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
20752 gcc_unreachable ();
20755 [(set_attr "type" "ssemov,ssemov,sselog1")
20756 (set_attr "prefix_extra" "1")
20757 (set_attr "length_immediate" "0,0,1")
20758 (set_attr "prefix" "maybe_evex")
20759 (set_attr "mode" "SF,SF,V4SF")])
20761 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
20762 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
20764 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
20765 (match_parallel 2 "avx_vbroadcast_operand"
20766 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20768 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
20770 "&& reload_completed"
20771 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
20773 rtx op0 = operands[0], op1 = operands[1];
20774 int elt = INTVAL (operands[3]);
20780 if (TARGET_AVX2 && elt == 0)
20782 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
20787 /* Shuffle element we care about into all elements of the 128-bit lane.
20788 The other lane gets shuffled too, but we don't care. */
20789 if (<MODE>mode == V4DFmode)
20790 mask = (elt & 1 ? 15 : 0);
20792 mask = (elt & 3) * 0x55;
20793 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
20795 /* Shuffle the lane we care about into both lanes of the dest. */
20796 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
20797 if (EXT_REX_SSE_REG_P (op0))
20799 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
20801 gcc_assert (<MODE>mode == V8SFmode);
20802 if ((mask & 1) == 0)
20803 emit_insn (gen_avx2_vec_dupv8sf (op0,
20804 gen_lowpart (V4SFmode, op0)));
20806 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
20807 GEN_INT (4), GEN_INT (5),
20808 GEN_INT (6), GEN_INT (7),
20809 GEN_INT (12), GEN_INT (13),
20810 GEN_INT (14), GEN_INT (15)));
20814 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
20818 operands[1] = adjust_address (op1, <ssescalarmode>mode,
20819 elt * GET_MODE_SIZE (<ssescalarmode>mode));
20822 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20823 [(set (match_operand:VF2 0 "register_operand")
20825 (match_operand:VF2 1 "nonimmediate_operand")
20826 (match_operand:SI 2 "const_0_to_255_operand")))]
20827 "TARGET_AVX && <mask_mode512bit_condition>"
20829 int mask = INTVAL (operands[2]);
20830 rtx perm[<ssescalarnum>];
20833 for (i = 0; i < <ssescalarnum>; i = i + 2)
20835 perm[i] = GEN_INT (((mask >> i) & 1) + i);
20836 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
20840 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20843 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20844 [(set (match_operand:VF1 0 "register_operand")
20846 (match_operand:VF1 1 "nonimmediate_operand")
20847 (match_operand:SI 2 "const_0_to_255_operand")))]
20848 "TARGET_AVX && <mask_mode512bit_condition>"
20850 int mask = INTVAL (operands[2]);
20851 rtx perm[<ssescalarnum>];
20854 for (i = 0; i < <ssescalarnum>; i = i + 4)
20856 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
20857 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
20858 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
20859 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
20863 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20866 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
20867 ;; patterns, as they have the same RTL representation (vpermilp*
20868 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
20869 ;; latency as it never crosses lanes.
20870 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
20871 [(set (match_operand:VF 0 "register_operand" "=v")
20873 (match_operand:VF 1 "nonimmediate_operand" "vm")
20874 (match_parallel 2 ""
20875 [(match_operand 3 "const_int_operand")])))]
20876 "TARGET_AVX && <mask_mode512bit_condition>
20877 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
20879 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
20880 operands[2] = GEN_INT (mask);
20881 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
20883 [(set_attr "type" "sselog")
20884 (set_attr "prefix_extra" "1")
20885 (set_attr "length_immediate" "1")
20886 (set_attr "prefix" "<mask_prefix>")
20887 (set_attr "mode" "<sseinsnmode>")])
20889 (define_expand "avx2_perm<mode>"
20890 [(match_operand:VI8F_256 0 "register_operand")
20891 (match_operand:VI8F_256 1 "nonimmediate_operand")
20892 (match_operand:SI 2 "const_0_to_255_operand")]
20895 int mask = INTVAL (operands[2]);
20896 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
20897 GEN_INT ((mask >> 0) & 3),
20898 GEN_INT ((mask >> 2) & 3),
20899 GEN_INT ((mask >> 4) & 3),
20900 GEN_INT ((mask >> 6) & 3)));
20904 (define_expand "avx512vl_perm<mode>_mask"
20905 [(match_operand:VI8F_256 0 "register_operand")
20906 (match_operand:VI8F_256 1 "nonimmediate_operand")
20907 (match_operand:SI 2 "const_0_to_255_operand")
20908 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
20909 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20912 int mask = INTVAL (operands[2]);
20913 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
20914 GEN_INT ((mask >> 0) & 3),
20915 GEN_INT ((mask >> 2) & 3),
20916 GEN_INT ((mask >> 4) & 3),
20917 GEN_INT ((mask >> 6) & 3),
20918 operands[3], operands[4]));
20922 (define_insn "avx2_perm<mode>_1<mask_name>"
20923 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20924 (vec_select:VI8F_256
20925 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
20926 (parallel [(match_operand 2 "const_0_to_3_operand")
20927 (match_operand 3 "const_0_to_3_operand")
20928 (match_operand 4 "const_0_to_3_operand")
20929 (match_operand 5 "const_0_to_3_operand")])))]
20930 "TARGET_AVX2 && <mask_mode512bit_condition>"
20933 mask |= INTVAL (operands[2]) << 0;
20934 mask |= INTVAL (operands[3]) << 2;
20935 mask |= INTVAL (operands[4]) << 4;
20936 mask |= INTVAL (operands[5]) << 6;
20937 operands[2] = GEN_INT (mask);
20938 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
20940 [(set_attr "type" "sselog")
20941 (set_attr "prefix" "<mask_prefix2>")
20942 (set_attr "mode" "<sseinsnmode>")])
20944 (define_expand "avx512f_perm<mode>"
20945 [(match_operand:V8FI 0 "register_operand")
20946 (match_operand:V8FI 1 "nonimmediate_operand")
20947 (match_operand:SI 2 "const_0_to_255_operand")]
20950 int mask = INTVAL (operands[2]);
20951 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
20952 GEN_INT ((mask >> 0) & 3),
20953 GEN_INT ((mask >> 2) & 3),
20954 GEN_INT ((mask >> 4) & 3),
20955 GEN_INT ((mask >> 6) & 3),
20956 GEN_INT (((mask >> 0) & 3) + 4),
20957 GEN_INT (((mask >> 2) & 3) + 4),
20958 GEN_INT (((mask >> 4) & 3) + 4),
20959 GEN_INT (((mask >> 6) & 3) + 4)));
20963 (define_expand "avx512f_perm<mode>_mask"
20964 [(match_operand:V8FI 0 "register_operand")
20965 (match_operand:V8FI 1 "nonimmediate_operand")
20966 (match_operand:SI 2 "const_0_to_255_operand")
20967 (match_operand:V8FI 3 "nonimm_or_0_operand")
20968 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20971 int mask = INTVAL (operands[2]);
20972 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
20973 GEN_INT ((mask >> 0) & 3),
20974 GEN_INT ((mask >> 2) & 3),
20975 GEN_INT ((mask >> 4) & 3),
20976 GEN_INT ((mask >> 6) & 3),
20977 GEN_INT (((mask >> 0) & 3) + 4),
20978 GEN_INT (((mask >> 2) & 3) + 4),
20979 GEN_INT (((mask >> 4) & 3) + 4),
20980 GEN_INT (((mask >> 6) & 3) + 4),
20981 operands[3], operands[4]));
20985 (define_insn "avx512f_perm<mode>_1<mask_name>"
20986 [(set (match_operand:V8FI 0 "register_operand" "=v")
20988 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
20989 (parallel [(match_operand 2 "const_0_to_3_operand")
20990 (match_operand 3 "const_0_to_3_operand")
20991 (match_operand 4 "const_0_to_3_operand")
20992 (match_operand 5 "const_0_to_3_operand")
20993 (match_operand 6 "const_4_to_7_operand")
20994 (match_operand 7 "const_4_to_7_operand")
20995 (match_operand 8 "const_4_to_7_operand")
20996 (match_operand 9 "const_4_to_7_operand")])))]
20997 "TARGET_AVX512F && <mask_mode512bit_condition>
20998 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
20999 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
21000 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
21001 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
21004 mask |= INTVAL (operands[2]) << 0;
21005 mask |= INTVAL (operands[3]) << 2;
21006 mask |= INTVAL (operands[4]) << 4;
21007 mask |= INTVAL (operands[5]) << 6;
21008 operands[2] = GEN_INT (mask);
21009 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
21011 [(set_attr "type" "sselog")
21012 (set_attr "prefix" "<mask_prefix2>")
21013 (set_attr "mode" "<sseinsnmode>")])
21015 (define_insn "avx2_permv2ti"
21016 [(set (match_operand:V4DI 0 "register_operand" "=x")
21018 [(match_operand:V4DI 1 "register_operand" "x")
21019 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
21020 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21023 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21024 [(set_attr "type" "sselog")
21025 (set_attr "prefix" "vex")
21026 (set_attr "mode" "OI")])
21028 (define_insn "avx2_vec_dupv4df"
21029 [(set (match_operand:V4DF 0 "register_operand" "=v")
21030 (vec_duplicate:V4DF
21032 (match_operand:V2DF 1 "register_operand" "v")
21033 (parallel [(const_int 0)]))))]
21035 "vbroadcastsd\t{%1, %0|%0, %1}"
21036 [(set_attr "type" "sselog1")
21037 (set_attr "prefix" "maybe_evex")
21038 (set_attr "mode" "V4DF")])
21040 (define_insn "<avx512>_vec_dup<mode>_1"
21041 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
21042 (vec_duplicate:VI_AVX512BW
21043 (vec_select:<ssescalarmode>
21044 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
21045 (parallel [(const_int 0)]))))]
21048 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
21049 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
21050 [(set_attr "type" "ssemov")
21051 (set_attr "prefix" "evex")
21052 (set_attr "mode" "<sseinsnmode>")])
21054 (define_insn "<avx512>_vec_dup<mode><mask_name>"
21055 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
21056 (vec_duplicate:V48_AVX512VL
21057 (vec_select:<ssescalarmode>
21058 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21059 (parallel [(const_int 0)]))))]
21062 /* There is no DF broadcast (in AVX-512*) to 128b register.
21063 Mimic it with integer variant. */
21064 if (<MODE>mode == V2DFmode)
21065 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
21067 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
21069 [(set_attr "type" "ssemov")
21070 (set_attr "prefix" "evex")
21071 (set_attr "mode" "<sseinsnmode>")])
21073 (define_insn "<avx512>_vec_dup<mode><mask_name>"
21074 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21075 (vec_duplicate:VI12_AVX512VL
21076 (vec_select:<ssescalarmode>
21077 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21078 (parallel [(const_int 0)]))))]
21080 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
21081 [(set_attr "type" "ssemov")
21082 (set_attr "prefix" "evex")
21083 (set_attr "mode" "<sseinsnmode>")])
21085 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
21086 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
21087 (vec_duplicate:V16FI
21088 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
21091 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
21092 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21093 [(set_attr "type" "ssemov")
21094 (set_attr "prefix" "evex")
21095 (set_attr "mode" "<sseinsnmode>")])
21097 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
21098 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
21099 (vec_duplicate:V8FI
21100 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
21103 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
21104 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21105 [(set_attr "type" "ssemov")
21106 (set_attr "prefix" "evex")
21107 (set_attr "mode" "<sseinsnmode>")])
21109 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
21110 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
21111 (vec_duplicate:VI12_AVX512VL
21112 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
21115 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
21116 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
21117 [(set_attr "type" "ssemov")
21118 (set_attr "prefix" "evex")
21119 (set_attr "mode" "<sseinsnmode>")])
21121 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
21122 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
21123 (vec_duplicate:V48_AVX512VL
21124 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
21126 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21127 [(set_attr "type" "ssemov")
21128 (set_attr "prefix" "evex")
21129 (set_attr "mode" "<sseinsnmode>")
21130 (set (attr "enabled")
21131 (if_then_else (eq_attr "alternative" "1")
21132 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
21133 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
21136 (define_insn "vec_dupv4sf"
21137 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
21138 (vec_duplicate:V4SF
21139 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
21142 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
21143 vbroadcastss\t{%1, %0|%0, %1}
21144 shufps\t{$0, %0, %0|%0, %0, 0}"
21145 [(set_attr "isa" "avx,avx,noavx")
21146 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
21147 (set_attr "length_immediate" "1,0,1")
21148 (set_attr "prefix_extra" "0,1,*")
21149 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
21150 (set_attr "mode" "V4SF")])
21152 (define_insn "*vec_dupv4si"
21153 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
21154 (vec_duplicate:V4SI
21155 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
21158 %vpshufd\t{$0, %1, %0|%0, %1, 0}
21159 vbroadcastss\t{%1, %0|%0, %1}
21160 shufps\t{$0, %0, %0|%0, %0, 0}"
21161 [(set_attr "isa" "sse2,avx,noavx")
21162 (set_attr "type" "sselog1,ssemov,sselog1")
21163 (set_attr "length_immediate" "1,0,1")
21164 (set_attr "prefix_extra" "0,1,*")
21165 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
21166 (set_attr "mode" "TI,V4SF,V4SF")])
21168 (define_insn "*vec_dupv2di"
21169 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
21170 (vec_duplicate:V2DI
21171 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
21175 vpunpcklqdq\t{%d1, %0|%0, %d1}
21176 %vmovddup\t{%1, %0|%0, %1}
21178 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
21179 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
21180 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
21181 (set_attr "mode" "TI,TI,DF,V4SF")])
21183 (define_insn "avx2_vbroadcasti128_<mode>"
21184 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
21186 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
21190 vbroadcasti128\t{%1, %0|%0, %1}
21191 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
21192 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
21193 [(set_attr "isa" "*,avx512dq,avx512vl")
21194 (set_attr "type" "ssemov")
21195 (set_attr "prefix_extra" "1")
21196 (set_attr "prefix" "vex,evex,evex")
21197 (set_attr "mode" "OI")])
21199 ;; Modes handled by AVX vec_dup patterns.
21200 (define_mode_iterator AVX_VEC_DUP_MODE
21201 [V8SI V8SF V4DI V4DF])
21202 (define_mode_attr vecdupssescalarmodesuffix
21203 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
21204 ;; Modes handled by AVX2 vec_dup patterns.
21205 (define_mode_iterator AVX2_VEC_DUP_MODE
21206 [V32QI V16QI V16HI V8HI V8SI V4SI])
21208 (define_insn "*vec_dup<mode>"
21209 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
21210 (vec_duplicate:AVX2_VEC_DUP_MODE
21211 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
21214 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
21215 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
21217 [(set_attr "isa" "*,*,noavx512vl")
21218 (set_attr "type" "ssemov")
21219 (set_attr "prefix_extra" "1")
21220 (set_attr "prefix" "maybe_evex")
21221 (set_attr "mode" "<sseinsnmode>")
21222 (set (attr "preferred_for_speed")
21223 (cond [(eq_attr "alternative" "2")
21224 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
21226 (symbol_ref "true")))])
21228 (define_insn "vec_dup<mode>"
21229 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
21230 (vec_duplicate:AVX_VEC_DUP_MODE
21231 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
21234 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
21235 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
21236 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
21237 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
21239 [(set_attr "type" "ssemov")
21240 (set_attr "prefix_extra" "1")
21241 (set_attr "prefix" "maybe_evex")
21242 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
21243 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
21246 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
21247 (vec_duplicate:AVX2_VEC_DUP_MODE
21248 (match_operand:<ssescalarmode> 1 "register_operand")))]
21250 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
21251 available, because then we can broadcast from GPRs directly.
21252 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
21253 for V*SI mode it requires just -mavx512vl. */
21254 && !(TARGET_AVX512VL
21255 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
21256 && reload_completed && GENERAL_REG_P (operands[1])"
21259 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
21260 CONST0_RTX (V4SImode),
21261 gen_lowpart (SImode, operands[1])));
21262 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
21263 gen_lowpart (<ssexmmmode>mode,
21269 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
21270 (vec_duplicate:AVX_VEC_DUP_MODE
21271 (match_operand:<ssescalarmode> 1 "register_operand")))]
21272 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
21273 [(set (match_dup 2)
21274 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
21276 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
21277 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
21279 (define_insn "avx_vbroadcastf128_<mode>"
21280 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
21282 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
21286 vbroadcast<i128>\t{%1, %0|%0, %1}
21287 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21288 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
21289 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
21290 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21291 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
21292 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
21293 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
21294 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
21295 (set_attr "prefix_extra" "1")
21296 (set_attr "length_immediate" "0,1,1,0,1,0,1")
21297 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
21298 (set_attr "mode" "<sseinsnmode>")])
21300 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
21301 (define_mode_iterator VI4F_BRCST32x2
21302 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21303 V16SF (V8SF "TARGET_AVX512VL")])
21305 (define_mode_attr 64x2mode
21306 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
21308 (define_mode_attr 32x2mode
21309 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
21310 (V8SF "V2SF") (V4SI "V2SI")])
21312 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
21313 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
21314 (vec_duplicate:VI4F_BRCST32x2
21315 (vec_select:<32x2mode>
21316 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21317 (parallel [(const_int 0) (const_int 1)]))))]
21319 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
21320 [(set_attr "type" "ssemov")
21321 (set_attr "prefix_extra" "1")
21322 (set_attr "prefix" "evex")
21323 (set_attr "mode" "<sseinsnmode>")])
21325 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
21326 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
21327 (vec_duplicate:VI4F_256
21328 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
21331 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
21332 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21333 [(set_attr "type" "ssemov")
21334 (set_attr "prefix_extra" "1")
21335 (set_attr "prefix" "evex")
21336 (set_attr "mode" "<sseinsnmode>")])
21338 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21339 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
21340 (vec_duplicate:V16FI
21341 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
21344 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
21345 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21346 [(set_attr "type" "ssemov")
21347 (set_attr "prefix_extra" "1")
21348 (set_attr "prefix" "evex")
21349 (set_attr "mode" "<sseinsnmode>")])
21351 ;; For broadcast[i|f]64x2
21352 (define_mode_iterator VI8F_BRCST64x2
21353 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
21355 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21356 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
21357 (vec_duplicate:VI8F_BRCST64x2
21358 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
21361 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
21362 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21363 [(set_attr "type" "ssemov")
21364 (set_attr "prefix_extra" "1")
21365 (set_attr "prefix" "evex")
21366 (set_attr "mode" "<sseinsnmode>")])
21368 (define_insn "avx512cd_maskb_vec_dup<mode>"
21369 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21370 (vec_duplicate:VI8_AVX512VL
21372 (match_operand:QI 1 "register_operand" "k"))))]
21374 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
21375 [(set_attr "type" "mskmov")
21376 (set_attr "prefix" "evex")
21377 (set_attr "mode" "XI")])
21379 (define_insn "avx512cd_maskw_vec_dup<mode>"
21380 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21381 (vec_duplicate:VI4_AVX512VL
21383 (match_operand:HI 1 "register_operand" "k"))))]
21385 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
21386 [(set_attr "type" "mskmov")
21387 (set_attr "prefix" "evex")
21388 (set_attr "mode" "XI")])
21390 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
21391 [(set (match_operand:VF 0 "register_operand" "=v")
21393 [(match_operand:VF 1 "register_operand" "v")
21394 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
21396 "TARGET_AVX && <mask_mode512bit_condition>"
21397 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21398 [(set_attr "type" "sselog")
21399 (set_attr "prefix_extra" "1")
21400 (set_attr "btver2_decode" "vector")
21401 (set_attr "prefix" "<mask_prefix>")
21402 (set_attr "mode" "<sseinsnmode>")])
21404 (define_mode_iterator VPERMI2
21405 [V16SI V16SF V8DI V8DF
21406 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
21407 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
21408 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
21409 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
21410 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21411 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21412 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21413 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21415 (define_mode_iterator VPERMI2I
21417 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21418 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
21419 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21420 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21421 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21422 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21424 (define_expand "<avx512>_vpermi2var<mode>3_mask"
21425 [(set (match_operand:VPERMI2 0 "register_operand")
21428 [(match_operand:<sseintvecmode> 2 "register_operand")
21429 (match_operand:VPERMI2 1 "register_operand")
21430 (match_operand:VPERMI2 3 "nonimmediate_operand")]
21433 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
21436 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
21437 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
21440 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21441 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
21442 (vec_merge:VPERMI2I
21444 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21445 (match_operand:VPERMI2I 1 "register_operand" "v")
21446 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
21449 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21451 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21452 [(set_attr "type" "sselog")
21453 (set_attr "prefix" "evex")
21454 (set_attr "mode" "<sseinsnmode>")])
21456 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21457 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21458 (vec_merge:VF_AVX512VL
21459 (unspec:VF_AVX512VL
21460 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21461 (match_operand:VF_AVX512VL 1 "register_operand" "v")
21462 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
21464 (subreg:VF_AVX512VL (match_dup 2) 0)
21465 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21467 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21468 [(set_attr "type" "sselog")
21469 (set_attr "prefix" "evex")
21470 (set_attr "mode" "<sseinsnmode>")])
21472 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
21473 [(match_operand:VPERMI2 0 "register_operand")
21474 (match_operand:<sseintvecmode> 1 "register_operand")
21475 (match_operand:VPERMI2 2 "register_operand")
21476 (match_operand:VPERMI2 3 "nonimmediate_operand")
21477 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21480 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
21481 operands[0], operands[1], operands[2], operands[3],
21482 CONST0_RTX (<MODE>mode), operands[4]));
21486 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
21487 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
21489 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
21490 (match_operand:VPERMI2 2 "register_operand" "0,v")
21491 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
21495 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
21496 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21497 [(set_attr "type" "sselog")
21498 (set_attr "prefix" "evex")
21499 (set_attr "mode" "<sseinsnmode>")])
21501 (define_insn "<avx512>_vpermt2var<mode>3_mask"
21502 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
21505 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
21506 (match_operand:VPERMI2 2 "register_operand" "0")
21507 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
21510 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21512 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21513 [(set_attr "type" "sselog")
21514 (set_attr "prefix" "evex")
21515 (set_attr "mode" "<sseinsnmode>")])
21517 (define_expand "avx_vperm2f128<mode>3"
21518 [(set (match_operand:AVX256MODE2P 0 "register_operand")
21519 (unspec:AVX256MODE2P
21520 [(match_operand:AVX256MODE2P 1 "register_operand")
21521 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
21522 (match_operand:SI 3 "const_0_to_255_operand")]
21523 UNSPEC_VPERMIL2F128))]
21526 int mask = INTVAL (operands[3]);
21527 if ((mask & 0x88) == 0)
21529 rtx perm[<ssescalarnum>], t1, t2;
21530 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
21532 base = (mask & 3) * nelt2;
21533 for (i = 0; i < nelt2; ++i)
21534 perm[i] = GEN_INT (base + i);
21536 base = ((mask >> 4) & 3) * nelt2;
21537 for (i = 0; i < nelt2; ++i)
21538 perm[i + nelt2] = GEN_INT (base + i);
21540 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
21541 operands[1], operands[2]);
21542 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
21543 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
21544 t2 = gen_rtx_SET (operands[0], t2);
21550 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
21551 ;; means that in order to represent this properly in rtl we'd have to
21552 ;; nest *another* vec_concat with a zero operand and do the select from
21553 ;; a 4x wide vector. That doesn't seem very nice.
21554 (define_insn "*avx_vperm2f128<mode>_full"
21555 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21556 (unspec:AVX256MODE2P
21557 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
21558 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
21559 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21560 UNSPEC_VPERMIL2F128))]
21562 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21563 [(set_attr "type" "sselog")
21564 (set_attr "prefix_extra" "1")
21565 (set_attr "length_immediate" "1")
21566 (set_attr "prefix" "vex")
21567 (set_attr "mode" "<sseinsnmode>")])
21569 (define_insn "*avx_vperm2f128<mode>_nozero"
21570 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21571 (vec_select:AVX256MODE2P
21572 (vec_concat:<ssedoublevecmode>
21573 (match_operand:AVX256MODE2P 1 "register_operand" "x")
21574 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
21575 (match_parallel 3 ""
21576 [(match_operand 4 "const_int_operand")])))]
21578 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
21580 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
21582 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
21584 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
21585 operands[3] = GEN_INT (mask);
21586 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
21588 [(set_attr "type" "sselog")
21589 (set_attr "prefix_extra" "1")
21590 (set_attr "length_immediate" "1")
21591 (set_attr "prefix" "vex")
21592 (set_attr "mode" "<sseinsnmode>")])
21594 (define_insn "*ssse3_palignr<mode>_perm"
21595 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
21597 (match_operand:V_128 1 "register_operand" "0,x,v")
21598 (match_parallel 2 "palignr_operand"
21599 [(match_operand 3 "const_int_operand" "n,n,n")])))]
21602 operands[2] = (GEN_INT (INTVAL (operands[3])
21603 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
21605 switch (which_alternative)
21608 return "palignr\t{%2, %1, %0|%0, %1, %2}";
21611 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
21613 gcc_unreachable ();
21616 [(set_attr "isa" "noavx,avx,avx512bw")
21617 (set_attr "type" "sseishft")
21618 (set_attr "atom_unit" "sishuf")
21619 (set_attr "prefix_data16" "1,*,*")
21620 (set_attr "prefix_extra" "1")
21621 (set_attr "length_immediate" "1")
21622 (set_attr "prefix" "orig,vex,evex")])
21624 (define_expand "avx512vl_vinsert<mode>"
21625 [(match_operand:VI48F_256 0 "register_operand")
21626 (match_operand:VI48F_256 1 "register_operand")
21627 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21628 (match_operand:SI 3 "const_0_to_1_operand")
21629 (match_operand:VI48F_256 4 "register_operand")
21630 (match_operand:<avx512fmaskmode> 5 "register_operand")]
21633 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21635 switch (INTVAL (operands[3]))
21638 insn = gen_vec_set_lo_<mode>_mask;
21641 insn = gen_vec_set_hi_<mode>_mask;
21644 gcc_unreachable ();
21647 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
21652 (define_expand "avx_vinsertf128<mode>"
21653 [(match_operand:V_256 0 "register_operand")
21654 (match_operand:V_256 1 "register_operand")
21655 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21656 (match_operand:SI 3 "const_0_to_1_operand")]
21659 rtx (*insn)(rtx, rtx, rtx);
21661 switch (INTVAL (operands[3]))
21664 insn = gen_vec_set_lo_<mode>;
21667 insn = gen_vec_set_hi_<mode>;
21670 gcc_unreachable ();
21673 emit_insn (insn (operands[0], operands[1], operands[2]));
21677 (define_insn "vec_set_lo_<mode><mask_name>"
21678 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21679 (vec_concat:VI8F_256
21680 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21681 (vec_select:<ssehalfvecmode>
21682 (match_operand:VI8F_256 1 "register_operand" "v")
21683 (parallel [(const_int 2) (const_int 3)]))))]
21684 "TARGET_AVX && <mask_avx512dq_condition>"
21686 if (TARGET_AVX512DQ)
21687 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21688 else if (TARGET_AVX512VL)
21689 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21691 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21693 [(set_attr "type" "sselog")
21694 (set_attr "prefix_extra" "1")
21695 (set_attr "length_immediate" "1")
21696 (set_attr "prefix" "vex")
21697 (set_attr "mode" "<sseinsnmode>")])
21699 (define_insn "vec_set_hi_<mode><mask_name>"
21700 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21701 (vec_concat:VI8F_256
21702 (vec_select:<ssehalfvecmode>
21703 (match_operand:VI8F_256 1 "register_operand" "v")
21704 (parallel [(const_int 0) (const_int 1)]))
21705 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21706 "TARGET_AVX && <mask_avx512dq_condition>"
21708 if (TARGET_AVX512DQ)
21709 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21710 else if (TARGET_AVX512VL)
21711 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21713 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21715 [(set_attr "type" "sselog")
21716 (set_attr "prefix_extra" "1")
21717 (set_attr "length_immediate" "1")
21718 (set_attr "prefix" "vex")
21719 (set_attr "mode" "<sseinsnmode>")])
21721 (define_insn "vec_set_lo_<mode><mask_name>"
21722 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21723 (vec_concat:VI4F_256
21724 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21725 (vec_select:<ssehalfvecmode>
21726 (match_operand:VI4F_256 1 "register_operand" "v")
21727 (parallel [(const_int 4) (const_int 5)
21728 (const_int 6) (const_int 7)]))))]
21731 if (TARGET_AVX512VL)
21732 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21734 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21736 [(set_attr "type" "sselog")
21737 (set_attr "prefix_extra" "1")
21738 (set_attr "length_immediate" "1")
21739 (set_attr "prefix" "vex")
21740 (set_attr "mode" "<sseinsnmode>")])
21742 (define_insn "vec_set_hi_<mode><mask_name>"
21743 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21744 (vec_concat:VI4F_256
21745 (vec_select:<ssehalfvecmode>
21746 (match_operand:VI4F_256 1 "register_operand" "v")
21747 (parallel [(const_int 0) (const_int 1)
21748 (const_int 2) (const_int 3)]))
21749 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21752 if (TARGET_AVX512VL)
21753 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21755 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21757 [(set_attr "type" "sselog")
21758 (set_attr "prefix_extra" "1")
21759 (set_attr "length_immediate" "1")
21760 (set_attr "prefix" "vex")
21761 (set_attr "mode" "<sseinsnmode>")])
21763 (define_insn "vec_set_lo_v16hi"
21764 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21766 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
21768 (match_operand:V16HI 1 "register_operand" "x,v")
21769 (parallel [(const_int 8) (const_int 9)
21770 (const_int 10) (const_int 11)
21771 (const_int 12) (const_int 13)
21772 (const_int 14) (const_int 15)]))))]
21775 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21776 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21777 [(set_attr "type" "sselog")
21778 (set_attr "prefix_extra" "1")
21779 (set_attr "length_immediate" "1")
21780 (set_attr "prefix" "vex,evex")
21781 (set_attr "mode" "OI")])
21783 (define_insn "vec_set_hi_v16hi"
21784 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21787 (match_operand:V16HI 1 "register_operand" "x,v")
21788 (parallel [(const_int 0) (const_int 1)
21789 (const_int 2) (const_int 3)
21790 (const_int 4) (const_int 5)
21791 (const_int 6) (const_int 7)]))
21792 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
21795 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21796 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21797 [(set_attr "type" "sselog")
21798 (set_attr "prefix_extra" "1")
21799 (set_attr "length_immediate" "1")
21800 (set_attr "prefix" "vex,evex")
21801 (set_attr "mode" "OI")])
21803 (define_insn "vec_set_lo_v32qi"
21804 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21806 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
21808 (match_operand:V32QI 1 "register_operand" "x,v")
21809 (parallel [(const_int 16) (const_int 17)
21810 (const_int 18) (const_int 19)
21811 (const_int 20) (const_int 21)
21812 (const_int 22) (const_int 23)
21813 (const_int 24) (const_int 25)
21814 (const_int 26) (const_int 27)
21815 (const_int 28) (const_int 29)
21816 (const_int 30) (const_int 31)]))))]
21819 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21820 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21821 [(set_attr "type" "sselog")
21822 (set_attr "prefix_extra" "1")
21823 (set_attr "length_immediate" "1")
21824 (set_attr "prefix" "vex,evex")
21825 (set_attr "mode" "OI")])
21827 (define_insn "vec_set_hi_v32qi"
21828 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21831 (match_operand:V32QI 1 "register_operand" "x,v")
21832 (parallel [(const_int 0) (const_int 1)
21833 (const_int 2) (const_int 3)
21834 (const_int 4) (const_int 5)
21835 (const_int 6) (const_int 7)
21836 (const_int 8) (const_int 9)
21837 (const_int 10) (const_int 11)
21838 (const_int 12) (const_int 13)
21839 (const_int 14) (const_int 15)]))
21840 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
21843 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21844 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21845 [(set_attr "type" "sselog")
21846 (set_attr "prefix_extra" "1")
21847 (set_attr "length_immediate" "1")
21848 (set_attr "prefix" "vex,evex")
21849 (set_attr "mode" "OI")])
21851 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
21852 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
21854 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
21855 (match_operand:V48_AVX2 1 "memory_operand" "m")]
21858 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
21859 [(set_attr "type" "sselog1")
21860 (set_attr "prefix_extra" "1")
21861 (set_attr "prefix" "vex")
21862 (set_attr "btver2_decode" "vector")
21863 (set_attr "mode" "<sseinsnmode>")])
21865 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
21866 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
21868 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
21869 (match_operand:V48_AVX2 2 "register_operand" "x")
21873 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21874 [(set_attr "type" "sselog1")
21875 (set_attr "prefix_extra" "1")
21876 (set_attr "prefix" "vex")
21877 (set_attr "btver2_decode" "vector")
21878 (set_attr "mode" "<sseinsnmode>")])
21880 (define_expand "maskload<mode><sseintvecmodelower>"
21881 [(set (match_operand:V48_AVX2 0 "register_operand")
21883 [(match_operand:<sseintvecmode> 2 "register_operand")
21884 (match_operand:V48_AVX2 1 "memory_operand")]
21888 (define_expand "maskload<mode><avx512fmaskmodelower>"
21889 [(set (match_operand:V48_AVX512VL 0 "register_operand")
21890 (vec_merge:V48_AVX512VL
21891 (match_operand:V48_AVX512VL 1 "memory_operand")
21893 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21896 (define_expand "maskload<mode><avx512fmaskmodelower>"
21897 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
21898 (vec_merge:VI12_AVX512VL
21899 (match_operand:VI12_AVX512VL 1 "memory_operand")
21901 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21904 (define_expand "maskstore<mode><sseintvecmodelower>"
21905 [(set (match_operand:V48_AVX2 0 "memory_operand")
21907 [(match_operand:<sseintvecmode> 2 "register_operand")
21908 (match_operand:V48_AVX2 1 "register_operand")
21913 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21914 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
21915 (vec_merge:V48_AVX512VL
21916 (match_operand:V48_AVX512VL 1 "register_operand")
21918 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21921 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21922 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
21923 (vec_merge:VI12_AVX512VL
21924 (match_operand:VI12_AVX512VL 1 "register_operand")
21926 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21929 (define_expand "cbranch<mode>4"
21930 [(set (reg:CC FLAGS_REG)
21931 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
21932 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
21933 (set (pc) (if_then_else
21934 (match_operator 0 "bt_comparison_operator"
21935 [(reg:CC FLAGS_REG) (const_int 0)])
21936 (label_ref (match_operand 3))
21940 ix86_expand_branch (GET_CODE (operands[0]),
21941 operands[1], operands[2], operands[3]);
21946 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
21947 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
21948 (vec_concat:AVX256MODE2P
21949 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
21950 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
21951 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21953 "&& reload_completed"
21954 [(set (match_dup 0) (match_dup 1))]
21956 if (REG_P (operands[0]))
21957 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21959 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21960 <ssehalfvecmode>mode);
21963 ;; Modes handled by vec_init expanders.
21964 (define_mode_iterator VEC_INIT_MODE
21965 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21966 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21967 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21968 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
21969 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21970 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
21971 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
21973 ;; Likewise, but for initialization from half sized vectors.
21974 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
21975 (define_mode_iterator VEC_INIT_HALF_MODE
21976 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21977 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21978 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21979 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
21980 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21981 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
21982 (V4TI "TARGET_AVX512F")])
21984 (define_expand "vec_init<mode><ssescalarmodelower>"
21985 [(match_operand:VEC_INIT_MODE 0 "register_operand")
21989 ix86_expand_vector_init (false, operands[0], operands[1]);
21993 (define_expand "vec_init<mode><ssehalfvecmodelower>"
21994 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
21998 ix86_expand_vector_init (false, operands[0], operands[1]);
22002 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
22003 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
22004 (ashiftrt:VI48_AVX512F_AVX512VL
22005 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
22006 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
22007 "TARGET_AVX2 && <mask_mode512bit_condition>"
22008 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22009 [(set_attr "type" "sseishft")
22010 (set_attr "prefix" "maybe_evex")
22011 (set_attr "mode" "<sseinsnmode>")])
22013 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
22014 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22015 (ashiftrt:VI2_AVX512VL
22016 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
22017 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
22019 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22020 [(set_attr "type" "sseishft")
22021 (set_attr "prefix" "maybe_evex")
22022 (set_attr "mode" "<sseinsnmode>")])
22024 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
22025 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
22026 (any_lshift:VI48_AVX512F
22027 (match_operand:VI48_AVX512F 1 "register_operand" "v")
22028 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
22029 "TARGET_AVX2 && <mask_mode512bit_condition>"
22030 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22031 [(set_attr "type" "sseishft")
22032 (set_attr "prefix" "maybe_evex")
22033 (set_attr "mode" "<sseinsnmode>")])
22035 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
22036 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22037 (any_lshift:VI2_AVX512VL
22038 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
22039 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
22041 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22042 [(set_attr "type" "sseishft")
22043 (set_attr "prefix" "maybe_evex")
22044 (set_attr "mode" "<sseinsnmode>")])
22046 (define_insn "avx_vec_concat<mode>"
22047 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
22048 (vec_concat:V_256_512
22049 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
22050 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
22052 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
22053 || !MEM_P (operands[1]))"
22055 switch (which_alternative)
22058 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22060 if (<MODE_SIZE> == 64)
22062 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
22063 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22065 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22069 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22070 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22072 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
22076 switch (get_attr_mode (insn))
22079 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22080 return "vmovups\t{%1, %t0|%t0, %1}";
22082 return "vmovaps\t{%1, %t0|%t0, %1}";
22084 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22085 return "vmovupd\t{%1, %t0|%t0, %1}";
22087 return "vmovapd\t{%1, %t0|%t0, %1}";
22089 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22090 return "vmovups\t{%1, %x0|%x0, %1}";
22092 return "vmovaps\t{%1, %x0|%x0, %1}";
22094 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22095 return "vmovupd\t{%1, %x0|%x0, %1}";
22097 return "vmovapd\t{%1, %x0|%x0, %1}";
22099 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22101 if (which_alternative == 2)
22102 return "vmovdqu\t{%1, %t0|%t0, %1}";
22103 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22104 return "vmovdqu64\t{%1, %t0|%t0, %1}";
22106 return "vmovdqu32\t{%1, %t0|%t0, %1}";
22110 if (which_alternative == 2)
22111 return "vmovdqa\t{%1, %t0|%t0, %1}";
22112 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22113 return "vmovdqa64\t{%1, %t0|%t0, %1}";
22115 return "vmovdqa32\t{%1, %t0|%t0, %1}";
22118 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
22120 if (which_alternative == 2)
22121 return "vmovdqu\t{%1, %x0|%x0, %1}";
22122 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22123 return "vmovdqu64\t{%1, %x0|%x0, %1}";
22125 return "vmovdqu32\t{%1, %x0|%x0, %1}";
22129 if (which_alternative == 2)
22130 return "vmovdqa\t{%1, %x0|%x0, %1}";
22131 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
22132 return "vmovdqa64\t{%1, %x0|%x0, %1}";
22134 return "vmovdqa32\t{%1, %x0|%x0, %1}";
22137 gcc_unreachable ();
22140 gcc_unreachable ();
22143 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
22144 (set_attr "prefix_extra" "1,1,*,*")
22145 (set_attr "length_immediate" "1,1,*,*")
22146 (set_attr "prefix" "maybe_evex")
22147 (set_attr "mode" "<sseinsnmode>")])
22149 (define_insn "vcvtph2ps<mask_name>"
22150 [(set (match_operand:V4SF 0 "register_operand" "=v")
22152 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
22154 (parallel [(const_int 0) (const_int 1)
22155 (const_int 2) (const_int 3)])))]
22156 "TARGET_F16C || TARGET_AVX512VL"
22157 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22158 [(set_attr "type" "ssecvt")
22159 (set_attr "prefix" "maybe_evex")
22160 (set_attr "mode" "V4SF")])
22162 (define_insn "*vcvtph2ps_load<mask_name>"
22163 [(set (match_operand:V4SF 0 "register_operand" "=v")
22164 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
22165 UNSPEC_VCVTPH2PS))]
22166 "TARGET_F16C || TARGET_AVX512VL"
22167 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22168 [(set_attr "type" "ssecvt")
22169 (set_attr "prefix" "vex")
22170 (set_attr "mode" "V8SF")])
22172 (define_insn "vcvtph2ps256<mask_name>"
22173 [(set (match_operand:V8SF 0 "register_operand" "=v")
22174 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
22175 UNSPEC_VCVTPH2PS))]
22176 "TARGET_F16C || TARGET_AVX512VL"
22177 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22178 [(set_attr "type" "ssecvt")
22179 (set_attr "prefix" "vex")
22180 (set_attr "btver2_decode" "double")
22181 (set_attr "mode" "V8SF")])
22183 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
22184 [(set (match_operand:V16SF 0 "register_operand" "=v")
22186 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
22187 UNSPEC_VCVTPH2PS))]
22189 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
22190 [(set_attr "type" "ssecvt")
22191 (set_attr "prefix" "evex")
22192 (set_attr "mode" "V16SF")])
22194 (define_expand "vcvtps2ph_mask"
22195 [(set (match_operand:V8HI 0 "register_operand")
22198 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
22199 (match_operand:SI 2 "const_0_to_255_operand")]
22202 (match_operand:V8HI 3 "nonimm_or_0_operand")
22203 (match_operand:QI 4 "register_operand")))]
22205 "operands[5] = CONST0_RTX (V4HImode);")
22207 (define_expand "vcvtps2ph"
22208 [(set (match_operand:V8HI 0 "register_operand")
22210 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
22211 (match_operand:SI 2 "const_0_to_255_operand")]
22215 "operands[3] = CONST0_RTX (V4HImode);")
22217 (define_insn "*vcvtps2ph<mask_name>"
22218 [(set (match_operand:V8HI 0 "register_operand" "=v")
22220 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
22221 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22223 (match_operand:V4HI 3 "const0_operand")))]
22224 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
22225 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
22226 [(set_attr "type" "ssecvt")
22227 (set_attr "prefix" "maybe_evex")
22228 (set_attr "mode" "V4SF")])
22230 (define_insn "*vcvtps2ph_store<merge_mask_name>"
22231 [(set (match_operand:V4HI 0 "memory_operand" "=m")
22232 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
22233 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22234 UNSPEC_VCVTPS2PH))]
22235 "TARGET_F16C || TARGET_AVX512VL"
22236 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22237 [(set_attr "type" "ssecvt")
22238 (set_attr "prefix" "maybe_evex")
22239 (set_attr "mode" "V4SF")])
22241 (define_insn "vcvtps2ph256<mask_name>"
22242 [(set (match_operand:V8HI 0 "register_operand" "=v")
22243 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
22244 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22245 UNSPEC_VCVTPS2PH))]
22246 "TARGET_F16C || TARGET_AVX512VL"
22247 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22248 [(set_attr "type" "ssecvt")
22249 (set_attr "prefix" "maybe_evex")
22250 (set_attr "btver2_decode" "vector")
22251 (set_attr "mode" "V8SF")])
22253 (define_insn "*vcvtps2ph256<merge_mask_name>"
22254 [(set (match_operand:V8HI 0 "memory_operand" "=m")
22255 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
22256 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22257 UNSPEC_VCVTPS2PH))]
22258 "TARGET_F16C || TARGET_AVX512VL"
22259 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22260 [(set_attr "type" "ssecvt")
22261 (set_attr "prefix" "maybe_evex")
22262 (set_attr "btver2_decode" "vector")
22263 (set_attr "mode" "V8SF")])
22265 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
22266 [(set (match_operand:V16HI 0 "register_operand" "=v")
22268 [(match_operand:V16SF 1 "register_operand" "v")
22269 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22270 UNSPEC_VCVTPS2PH))]
22272 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22273 [(set_attr "type" "ssecvt")
22274 (set_attr "prefix" "evex")
22275 (set_attr "mode" "V16SF")])
22277 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
22278 [(set (match_operand:V16HI 0 "memory_operand" "=m")
22280 [(match_operand:V16SF 1 "register_operand" "v")
22281 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22282 UNSPEC_VCVTPS2PH))]
22284 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22285 [(set_attr "type" "ssecvt")
22286 (set_attr "prefix" "evex")
22287 (set_attr "mode" "V16SF")])
22289 ;; For gather* insn patterns
22290 (define_mode_iterator VEC_GATHER_MODE
22291 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
22292 (define_mode_attr VEC_GATHER_IDXSI
22293 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
22294 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
22295 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
22296 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
22298 (define_mode_attr VEC_GATHER_IDXDI
22299 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22300 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
22301 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
22302 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
22304 (define_mode_attr VEC_GATHER_SRCDI
22305 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22306 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
22307 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
22308 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
22310 (define_expand "avx2_gathersi<mode>"
22311 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22312 (unspec:VEC_GATHER_MODE
22313 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
22314 (mem:<ssescalarmode>
22316 [(match_operand 2 "vsib_address_operand")
22317 (match_operand:<VEC_GATHER_IDXSI>
22318 3 "register_operand")
22319 (match_operand:SI 5 "const1248_operand ")]))
22320 (mem:BLK (scratch))
22321 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
22323 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22327 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22328 operands[5]), UNSPEC_VSIBADDR);
22331 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
22332 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22333 (unspec:VEC_GATHER_MODE
22334 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
22335 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22337 [(match_operand:P 3 "vsib_address_operand" "Tv")
22338 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
22339 (match_operand:SI 6 "const1248_operand" "n")]
22341 (mem:BLK (scratch))
22342 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
22344 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22346 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
22347 [(set_attr "type" "ssemov")
22348 (set_attr "prefix" "vex")
22349 (set_attr "mode" "<sseinsnmode>")])
22351 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
22352 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22353 (unspec:VEC_GATHER_MODE
22355 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22357 [(match_operand:P 2 "vsib_address_operand" "Tv")
22358 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
22359 (match_operand:SI 5 "const1248_operand" "n")]
22361 (mem:BLK (scratch))
22362 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
22364 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22366 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
22367 [(set_attr "type" "ssemov")
22368 (set_attr "prefix" "vex")
22369 (set_attr "mode" "<sseinsnmode>")])
22371 (define_expand "avx2_gatherdi<mode>"
22372 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22373 (unspec:VEC_GATHER_MODE
22374 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22375 (mem:<ssescalarmode>
22377 [(match_operand 2 "vsib_address_operand")
22378 (match_operand:<VEC_GATHER_IDXDI>
22379 3 "register_operand")
22380 (match_operand:SI 5 "const1248_operand ")]))
22381 (mem:BLK (scratch))
22382 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
22384 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22388 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22389 operands[5]), UNSPEC_VSIBADDR);
22392 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
22393 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22394 (unspec:VEC_GATHER_MODE
22395 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22396 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22398 [(match_operand:P 3 "vsib_address_operand" "Tv")
22399 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22400 (match_operand:SI 6 "const1248_operand" "n")]
22402 (mem:BLK (scratch))
22403 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22405 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22407 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
22408 [(set_attr "type" "ssemov")
22409 (set_attr "prefix" "vex")
22410 (set_attr "mode" "<sseinsnmode>")])
22412 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
22413 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22414 (unspec:VEC_GATHER_MODE
22416 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22418 [(match_operand:P 2 "vsib_address_operand" "Tv")
22419 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22420 (match_operand:SI 5 "const1248_operand" "n")]
22422 (mem:BLK (scratch))
22423 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22425 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22428 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
22429 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
22430 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
22432 [(set_attr "type" "ssemov")
22433 (set_attr "prefix" "vex")
22434 (set_attr "mode" "<sseinsnmode>")])
22436 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
22437 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22438 (vec_select:<VEC_GATHER_SRCDI>
22440 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22441 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22443 [(match_operand:P 3 "vsib_address_operand" "Tv")
22444 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22445 (match_operand:SI 6 "const1248_operand" "n")]
22447 (mem:BLK (scratch))
22448 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22450 (parallel [(const_int 0) (const_int 1)
22451 (const_int 2) (const_int 3)])))
22452 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22454 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
22455 [(set_attr "type" "ssemov")
22456 (set_attr "prefix" "vex")
22457 (set_attr "mode" "<sseinsnmode>")])
22459 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
22460 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22461 (vec_select:<VEC_GATHER_SRCDI>
22464 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22466 [(match_operand:P 2 "vsib_address_operand" "Tv")
22467 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22468 (match_operand:SI 5 "const1248_operand" "n")]
22470 (mem:BLK (scratch))
22471 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22473 (parallel [(const_int 0) (const_int 1)
22474 (const_int 2) (const_int 3)])))
22475 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22477 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
22478 [(set_attr "type" "ssemov")
22479 (set_attr "prefix" "vex")
22480 (set_attr "mode" "<sseinsnmode>")])
22482 (define_expand "<avx512>_gathersi<mode>"
22483 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22485 [(match_operand:VI48F 1 "register_operand")
22486 (match_operand:<avx512fmaskmode> 4 "register_operand")
22487 (mem:<ssescalarmode>
22489 [(match_operand 2 "vsib_address_operand")
22490 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
22491 (match_operand:SI 5 "const1248_operand")]))]
22493 (clobber (match_scratch:<avx512fmaskmode> 7))])]
22497 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22498 operands[5]), UNSPEC_VSIBADDR);
22501 (define_insn "*avx512f_gathersi<VI48F:mode>"
22502 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22504 [(match_operand:VI48F 1 "register_operand" "0")
22505 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
22506 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22508 [(match_operand:P 4 "vsib_address_operand" "Tv")
22509 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
22510 (match_operand:SI 5 "const1248_operand" "n")]
22511 UNSPEC_VSIBADDR)])]
22513 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
22515 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22516 ;; gas changed what it requires incompatibly.
22517 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
22518 [(set_attr "type" "ssemov")
22519 (set_attr "prefix" "evex")
22520 (set_attr "mode" "<sseinsnmode>")])
22522 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
22523 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22526 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22527 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22529 [(match_operand:P 3 "vsib_address_operand" "Tv")
22530 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22531 (match_operand:SI 4 "const1248_operand" "n")]
22532 UNSPEC_VSIBADDR)])]
22534 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22536 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22537 ;; gas changed what it requires incompatibly.
22538 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
22539 [(set_attr "type" "ssemov")
22540 (set_attr "prefix" "evex")
22541 (set_attr "mode" "<sseinsnmode>")])
22544 (define_expand "<avx512>_gatherdi<mode>"
22545 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22547 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22548 (match_operand:QI 4 "register_operand")
22549 (mem:<ssescalarmode>
22551 [(match_operand 2 "vsib_address_operand")
22552 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
22553 (match_operand:SI 5 "const1248_operand")]))]
22555 (clobber (match_scratch:QI 7))])]
22559 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22560 operands[5]), UNSPEC_VSIBADDR);
22563 (define_insn "*avx512f_gatherdi<VI48F:mode>"
22564 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22566 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
22567 (match_operand:QI 7 "register_operand" "2")
22568 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22570 [(match_operand:P 4 "vsib_address_operand" "Tv")
22571 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
22572 (match_operand:SI 5 "const1248_operand" "n")]
22573 UNSPEC_VSIBADDR)])]
22575 (clobber (match_scratch:QI 2 "=&Yk"))]
22577 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22578 ;; gas changed what it requires incompatibly.
22579 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
22580 [(set_attr "type" "ssemov")
22581 (set_attr "prefix" "evex")
22582 (set_attr "mode" "<sseinsnmode>")])
22584 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
22585 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22588 (match_operand:QI 6 "register_operand" "1")
22589 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22591 [(match_operand:P 3 "vsib_address_operand" "Tv")
22592 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22593 (match_operand:SI 4 "const1248_operand" "n")]
22594 UNSPEC_VSIBADDR)])]
22596 (clobber (match_scratch:QI 1 "=&Yk"))]
22599 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22600 gas changed what it requires incompatibly. */
22601 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
22603 if (<VI48F:MODE_SIZE> != 64)
22604 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
22606 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
22608 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
22610 [(set_attr "type" "ssemov")
22611 (set_attr "prefix" "evex")
22612 (set_attr "mode" "<sseinsnmode>")])
22614 (define_expand "<avx512>_scattersi<mode>"
22615 [(parallel [(set (mem:VI48F
22617 [(match_operand 0 "vsib_address_operand")
22618 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
22619 (match_operand:SI 4 "const1248_operand")]))
22621 [(match_operand:<avx512fmaskmode> 1 "register_operand")
22622 (match_operand:VI48F 3 "register_operand")]
22624 (clobber (match_scratch:<avx512fmaskmode> 6))])]
22628 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22629 operands[4]), UNSPEC_VSIBADDR);
22632 (define_insn "*avx512f_scattersi<VI48F:mode>"
22633 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22635 [(match_operand:P 0 "vsib_address_operand" "Tv")
22636 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22637 (match_operand:SI 4 "const1248_operand" "n")]
22640 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22641 (match_operand:VI48F 3 "register_operand" "v")]
22643 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22645 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22646 ;; gas changed what it requires incompatibly.
22647 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22648 [(set_attr "type" "ssemov")
22649 (set_attr "prefix" "evex")
22650 (set_attr "mode" "<sseinsnmode>")])
22652 (define_expand "<avx512>_scatterdi<mode>"
22653 [(parallel [(set (mem:VI48F
22655 [(match_operand 0 "vsib_address_operand")
22656 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
22657 (match_operand:SI 4 "const1248_operand")]))
22659 [(match_operand:QI 1 "register_operand")
22660 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
22662 (clobber (match_scratch:QI 6))])]
22666 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22667 operands[4]), UNSPEC_VSIBADDR);
22670 (define_insn "*avx512f_scatterdi<VI48F:mode>"
22671 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22673 [(match_operand:P 0 "vsib_address_operand" "Tv")
22674 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22675 (match_operand:SI 4 "const1248_operand" "n")]
22678 [(match_operand:QI 6 "register_operand" "1")
22679 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
22681 (clobber (match_scratch:QI 1 "=&Yk"))]
22683 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22684 ;; gas changed what it requires incompatibly.
22685 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22686 [(set_attr "type" "ssemov")
22687 (set_attr "prefix" "evex")
22688 (set_attr "mode" "<sseinsnmode>")])
22690 (define_insn "<avx512>_compress<mode>_mask"
22691 [(set (match_operand:VI48F 0 "register_operand" "=v")
22693 [(match_operand:VI48F 1 "register_operand" "v")
22694 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
22695 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22698 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22699 [(set_attr "type" "ssemov")
22700 (set_attr "prefix" "evex")
22701 (set_attr "mode" "<sseinsnmode>")])
22703 (define_insn "compress<mode>_mask"
22704 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
22705 (unspec:VI12_AVX512VLBW
22706 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
22707 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
22708 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22710 "TARGET_AVX512VBMI2"
22711 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22712 [(set_attr "type" "ssemov")
22713 (set_attr "prefix" "evex")
22714 (set_attr "mode" "<sseinsnmode>")])
22716 (define_insn "<avx512>_compressstore<mode>_mask"
22717 [(set (match_operand:VI48F 0 "memory_operand" "=m")
22719 [(match_operand:VI48F 1 "register_operand" "x")
22721 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22722 UNSPEC_COMPRESS_STORE))]
22724 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22725 [(set_attr "type" "ssemov")
22726 (set_attr "prefix" "evex")
22727 (set_attr "memory" "store")
22728 (set_attr "mode" "<sseinsnmode>")])
22730 (define_insn "compressstore<mode>_mask"
22731 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
22732 (unspec:VI12_AVX512VLBW
22733 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
22735 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22736 UNSPEC_COMPRESS_STORE))]
22737 "TARGET_AVX512VBMI2"
22738 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22739 [(set_attr "type" "ssemov")
22740 (set_attr "prefix" "evex")
22741 (set_attr "memory" "store")
22742 (set_attr "mode" "<sseinsnmode>")])
22744 (define_expand "<avx512>_expand<mode>_maskz"
22745 [(set (match_operand:VI48F 0 "register_operand")
22747 [(match_operand:VI48F 1 "nonimmediate_operand")
22748 (match_operand:VI48F 2 "nonimm_or_0_operand")
22749 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22752 "operands[2] = CONST0_RTX (<MODE>mode);")
22754 (define_insn "<avx512>_expand<mode>_mask"
22755 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
22757 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
22758 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
22759 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22762 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22763 [(set_attr "type" "ssemov")
22764 (set_attr "prefix" "evex")
22765 (set_attr "memory" "none,load")
22766 (set_attr "mode" "<sseinsnmode>")])
22768 (define_insn "expand<mode>_mask"
22769 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
22770 (unspec:VI12_AVX512VLBW
22771 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
22772 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
22773 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22775 "TARGET_AVX512VBMI2"
22776 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22777 [(set_attr "type" "ssemov")
22778 (set_attr "prefix" "evex")
22779 (set_attr "memory" "none,load")
22780 (set_attr "mode" "<sseinsnmode>")])
22782 (define_expand "expand<mode>_maskz"
22783 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
22784 (unspec:VI12_AVX512VLBW
22785 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
22786 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
22787 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22789 "TARGET_AVX512VBMI2"
22790 "operands[2] = CONST0_RTX (<MODE>mode);")
22792 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
22793 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22794 (unspec:VF_AVX512VL
22795 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
22796 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
22797 (match_operand:SI 3 "const_0_to_15_operand")]
22799 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
22800 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
22801 [(set_attr "type" "sse")
22802 (set_attr "prefix" "evex")
22803 (set_attr "mode" "<MODE>")])
22805 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
22806 [(set (match_operand:VF_128 0 "register_operand" "=v")
22809 [(match_operand:VF_128 1 "register_operand" "v")
22810 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22811 (match_operand:SI 3 "const_0_to_15_operand")]
22816 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
22817 [(set_attr "type" "sse")
22818 (set_attr "prefix" "evex")
22819 (set_attr "mode" "<MODE>")])
22821 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
22822 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22823 (unspec:<avx512fmaskmode>
22824 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
22825 (match_operand 2 "const_0_to_255_operand" "n")]
22828 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22829 [(set_attr "type" "sse")
22830 (set_attr "length_immediate" "1")
22831 (set_attr "prefix" "evex")
22832 (set_attr "mode" "<MODE>")])
22834 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
22835 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22836 (and:<avx512fmaskmode>
22837 (unspec:<avx512fmaskmode>
22838 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
22839 (match_operand 2 "const_0_to_255_operand" "n")]
22843 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22844 [(set_attr "type" "sse")
22845 (set_attr "length_immediate" "1")
22846 (set_attr "prefix" "evex")
22847 (set_attr "mode" "<MODE>")])
22849 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
22850 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22851 (unspec:VF_AVX512VL
22852 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
22853 (match_operand:SI 2 "const_0_to_15_operand")]
22856 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
22857 [(set_attr "prefix" "evex")
22858 (set_attr "mode" "<MODE>")])
22860 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
22861 [(set (match_operand:VF_128 0 "register_operand" "=v")
22864 [(match_operand:VF_128 1 "register_operand" "v")
22865 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22866 (match_operand:SI 3 "const_0_to_15_operand")]
22871 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
22872 [(set_attr "prefix" "evex")
22873 (set_attr "mode" "<ssescalarmode>")])
22875 ;; The correct representation for this is absolutely enormous, and
22876 ;; surely not generally useful.
22877 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
22878 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22879 (unspec:VI2_AVX512VL
22880 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
22881 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
22882 (match_operand:SI 3 "const_0_to_255_operand")]
22885 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
22886 [(set_attr "type" "sselog1")
22887 (set_attr "length_immediate" "1")
22888 (set_attr "prefix" "evex")
22889 (set_attr "mode" "<sseinsnmode>")])
22891 (define_insn "clz<mode>2<mask_name>"
22892 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22894 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22896 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22897 [(set_attr "type" "sse")
22898 (set_attr "prefix" "evex")
22899 (set_attr "mode" "<sseinsnmode>")])
22901 (define_insn "<mask_codefor>conflict<mode><mask_name>"
22902 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22903 (unspec:VI48_AVX512VL
22904 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
22907 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22908 [(set_attr "type" "sse")
22909 (set_attr "prefix" "evex")
22910 (set_attr "mode" "<sseinsnmode>")])
22912 (define_insn "sha1msg1"
22913 [(set (match_operand:V4SI 0 "register_operand" "=x")
22915 [(match_operand:V4SI 1 "register_operand" "0")
22916 (match_operand:V4SI 2 "vector_operand" "xBm")]
22919 "sha1msg1\t{%2, %0|%0, %2}"
22920 [(set_attr "type" "sselog1")
22921 (set_attr "mode" "TI")])
22923 (define_insn "sha1msg2"
22924 [(set (match_operand:V4SI 0 "register_operand" "=x")
22926 [(match_operand:V4SI 1 "register_operand" "0")
22927 (match_operand:V4SI 2 "vector_operand" "xBm")]
22930 "sha1msg2\t{%2, %0|%0, %2}"
22931 [(set_attr "type" "sselog1")
22932 (set_attr "mode" "TI")])
22934 (define_insn "sha1nexte"
22935 [(set (match_operand:V4SI 0 "register_operand" "=x")
22937 [(match_operand:V4SI 1 "register_operand" "0")
22938 (match_operand:V4SI 2 "vector_operand" "xBm")]
22939 UNSPEC_SHA1NEXTE))]
22941 "sha1nexte\t{%2, %0|%0, %2}"
22942 [(set_attr "type" "sselog1")
22943 (set_attr "mode" "TI")])
22945 (define_insn "sha1rnds4"
22946 [(set (match_operand:V4SI 0 "register_operand" "=x")
22948 [(match_operand:V4SI 1 "register_operand" "0")
22949 (match_operand:V4SI 2 "vector_operand" "xBm")
22950 (match_operand:SI 3 "const_0_to_3_operand" "n")]
22951 UNSPEC_SHA1RNDS4))]
22953 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
22954 [(set_attr "type" "sselog1")
22955 (set_attr "length_immediate" "1")
22956 (set_attr "mode" "TI")])
22958 (define_insn "sha256msg1"
22959 [(set (match_operand:V4SI 0 "register_operand" "=x")
22961 [(match_operand:V4SI 1 "register_operand" "0")
22962 (match_operand:V4SI 2 "vector_operand" "xBm")]
22963 UNSPEC_SHA256MSG1))]
22965 "sha256msg1\t{%2, %0|%0, %2}"
22966 [(set_attr "type" "sselog1")
22967 (set_attr "mode" "TI")])
22969 (define_insn "sha256msg2"
22970 [(set (match_operand:V4SI 0 "register_operand" "=x")
22972 [(match_operand:V4SI 1 "register_operand" "0")
22973 (match_operand:V4SI 2 "vector_operand" "xBm")]
22974 UNSPEC_SHA256MSG2))]
22976 "sha256msg2\t{%2, %0|%0, %2}"
22977 [(set_attr "type" "sselog1")
22978 (set_attr "mode" "TI")])
22980 (define_insn "sha256rnds2"
22981 [(set (match_operand:V4SI 0 "register_operand" "=x")
22983 [(match_operand:V4SI 1 "register_operand" "0")
22984 (match_operand:V4SI 2 "vector_operand" "xBm")
22985 (match_operand:V4SI 3 "register_operand" "Yz")]
22986 UNSPEC_SHA256RNDS2))]
22988 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
22989 [(set_attr "type" "sselog1")
22990 (set_attr "length_immediate" "1")
22991 (set_attr "mode" "TI")])
22993 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
22994 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22995 (vec_concat:AVX512MODE2P
22996 (vec_concat:<ssehalfvecmode>
22997 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
22998 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
22999 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
23000 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23002 "&& reload_completed"
23003 [(set (match_dup 0) (match_dup 1))]
23005 if (REG_P (operands[0]))
23006 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
23008 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
23009 <ssequartermode>mode);
23012 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
23013 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
23014 (vec_concat:AVX512MODE2P
23015 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
23016 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
23017 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23019 "&& reload_completed"
23020 [(set (match_dup 0) (match_dup 1))]
23022 if (REG_P (operands[0]))
23023 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
23025 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
23026 <ssehalfvecmode>mode);
23029 (define_int_iterator VPMADD52
23030 [UNSPEC_VPMADD52LUQ
23031 UNSPEC_VPMADD52HUQ])
23033 (define_int_attr vpmadd52type
23034 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
23036 (define_expand "vpamdd52huq<mode>_maskz"
23037 [(match_operand:VI8_AVX512VL 0 "register_operand")
23038 (match_operand:VI8_AVX512VL 1 "register_operand")
23039 (match_operand:VI8_AVX512VL 2 "register_operand")
23040 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
23041 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23042 "TARGET_AVX512IFMA"
23044 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
23045 operands[0], operands[1], operands[2], operands[3],
23046 CONST0_RTX (<MODE>mode), operands[4]));
23050 (define_expand "vpamdd52luq<mode>_maskz"
23051 [(match_operand:VI8_AVX512VL 0 "register_operand")
23052 (match_operand:VI8_AVX512VL 1 "register_operand")
23053 (match_operand:VI8_AVX512VL 2 "register_operand")
23054 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
23055 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23056 "TARGET_AVX512IFMA"
23058 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
23059 operands[0], operands[1], operands[2], operands[3],
23060 CONST0_RTX (<MODE>mode), operands[4]));
23064 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
23065 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
23066 (unspec:VI8_AVX512VL
23067 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
23068 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
23069 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
23071 "TARGET_AVX512IFMA"
23072 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
23073 [(set_attr "type" "ssemuladd")
23074 (set_attr "prefix" "evex")
23075 (set_attr "mode" "<sseinsnmode>")])
23077 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
23078 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
23079 (vec_merge:VI8_AVX512VL
23080 (unspec:VI8_AVX512VL
23081 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
23082 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
23083 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
23086 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23087 "TARGET_AVX512IFMA"
23088 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
23089 [(set_attr "type" "ssemuladd")
23090 (set_attr "prefix" "evex")
23091 (set_attr "mode" "<sseinsnmode>")])
23093 (define_insn "vpmultishiftqb<mode><mask_name>"
23094 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
23095 (unspec:VI1_AVX512VL
23096 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
23097 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
23098 UNSPEC_VPMULTISHIFT))]
23099 "TARGET_AVX512VBMI"
23100 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23101 [(set_attr "type" "sselog")
23102 (set_attr "prefix" "evex")
23103 (set_attr "mode" "<sseinsnmode>")])
23105 (define_mode_iterator IMOD4
23106 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
23108 (define_mode_attr imod4_narrow
23109 [(V64SF "V16SF") (V64SI "V16SI")])
23111 (define_expand "mov<mode>"
23112 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
23113 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
23116 ix86_expand_vector_move (<MODE>mode, operands);
23120 (define_insn_and_split "*mov<mode>_internal"
23121 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
23122 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
23124 && (register_operand (operands[0], <MODE>mode)
23125 || register_operand (operands[1], <MODE>mode))"
23127 "&& reload_completed"
23133 for (i = 0; i < 4; i++)
23135 op0 = simplify_subreg
23136 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
23137 op1 = simplify_subreg
23138 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
23139 emit_move_insn (op0, op1);
23144 (define_insn "avx5124fmaddps_4fmaddps"
23145 [(set (match_operand:V16SF 0 "register_operand" "=v")
23147 [(match_operand:V16SF 1 "register_operand" "0")
23148 (match_operand:V64SF 2 "register_operand" "v")
23149 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
23150 "TARGET_AVX5124FMAPS"
23151 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
23152 [(set_attr ("type") ("ssemuladd"))
23153 (set_attr ("prefix") ("evex"))
23154 (set_attr ("mode") ("V16SF"))])
23156 (define_insn "avx5124fmaddps_4fmaddps_mask"
23157 [(set (match_operand:V16SF 0 "register_operand" "=v")
23160 [(match_operand:V64SF 1 "register_operand" "v")
23161 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
23162 (match_operand:V16SF 3 "register_operand" "0")
23163 (match_operand:HI 4 "register_operand" "Yk")))]
23164 "TARGET_AVX5124FMAPS"
23165 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23166 [(set_attr ("type") ("ssemuladd"))
23167 (set_attr ("prefix") ("evex"))
23168 (set_attr ("mode") ("V16SF"))])
23170 (define_insn "avx5124fmaddps_4fmaddps_maskz"
23171 [(set (match_operand:V16SF 0 "register_operand" "=v")
23174 [(match_operand:V16SF 1 "register_operand" "0")
23175 (match_operand:V64SF 2 "register_operand" "v")
23176 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
23177 (match_operand:V16SF 4 "const0_operand" "C")
23178 (match_operand:HI 5 "register_operand" "Yk")))]
23179 "TARGET_AVX5124FMAPS"
23180 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23181 [(set_attr ("type") ("ssemuladd"))
23182 (set_attr ("prefix") ("evex"))
23183 (set_attr ("mode") ("V16SF"))])
23185 (define_insn "avx5124fmaddps_4fmaddss"
23186 [(set (match_operand:V4SF 0 "register_operand" "=v")
23188 [(match_operand:V4SF 1 "register_operand" "0")
23189 (match_operand:V64SF 2 "register_operand" "v")
23190 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
23191 "TARGET_AVX5124FMAPS"
23192 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
23193 [(set_attr ("type") ("ssemuladd"))
23194 (set_attr ("prefix") ("evex"))
23195 (set_attr ("mode") ("SF"))])
23197 (define_insn "avx5124fmaddps_4fmaddss_mask"
23198 [(set (match_operand:V4SF 0 "register_operand" "=v")
23201 [(match_operand:V64SF 1 "register_operand" "v")
23202 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
23203 (match_operand:V4SF 3 "register_operand" "0")
23204 (match_operand:QI 4 "register_operand" "Yk")))]
23205 "TARGET_AVX5124FMAPS"
23206 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
23207 [(set_attr ("type") ("ssemuladd"))
23208 (set_attr ("prefix") ("evex"))
23209 (set_attr ("mode") ("SF"))])
23211 (define_insn "avx5124fmaddps_4fmaddss_maskz"
23212 [(set (match_operand:V4SF 0 "register_operand" "=v")
23215 [(match_operand:V4SF 1 "register_operand" "0")
23216 (match_operand:V64SF 2 "register_operand" "v")
23217 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
23218 (match_operand:V4SF 4 "const0_operand" "C")
23219 (match_operand:QI 5 "register_operand" "Yk")))]
23220 "TARGET_AVX5124FMAPS"
23221 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
23222 [(set_attr ("type") ("ssemuladd"))
23223 (set_attr ("prefix") ("evex"))
23224 (set_attr ("mode") ("SF"))])
23226 (define_insn "avx5124fmaddps_4fnmaddps"
23227 [(set (match_operand:V16SF 0 "register_operand" "=v")
23229 [(match_operand:V16SF 1 "register_operand" "0")
23230 (match_operand:V64SF 2 "register_operand" "v")
23231 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
23232 "TARGET_AVX5124FMAPS"
23233 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
23234 [(set_attr ("type") ("ssemuladd"))
23235 (set_attr ("prefix") ("evex"))
23236 (set_attr ("mode") ("V16SF"))])
23238 (define_insn "avx5124fmaddps_4fnmaddps_mask"
23239 [(set (match_operand:V16SF 0 "register_operand" "=v")
23242 [(match_operand:V64SF 1 "register_operand" "v")
23243 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23244 (match_operand:V16SF 3 "register_operand" "0")
23245 (match_operand:HI 4 "register_operand" "Yk")))]
23246 "TARGET_AVX5124FMAPS"
23247 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23248 [(set_attr ("type") ("ssemuladd"))
23249 (set_attr ("prefix") ("evex"))
23250 (set_attr ("mode") ("V16SF"))])
23252 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
23253 [(set (match_operand:V16SF 0 "register_operand" "=v")
23256 [(match_operand:V16SF 1 "register_operand" "0")
23257 (match_operand:V64SF 2 "register_operand" "v")
23258 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23259 (match_operand:V16SF 4 "const0_operand" "C")
23260 (match_operand:HI 5 "register_operand" "Yk")))]
23261 "TARGET_AVX5124FMAPS"
23262 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23263 [(set_attr ("type") ("ssemuladd"))
23264 (set_attr ("prefix") ("evex"))
23265 (set_attr ("mode") ("V16SF"))])
23267 (define_insn "avx5124fmaddps_4fnmaddss"
23268 [(set (match_operand:V4SF 0 "register_operand" "=v")
23270 [(match_operand:V4SF 1 "register_operand" "0")
23271 (match_operand:V64SF 2 "register_operand" "v")
23272 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
23273 "TARGET_AVX5124FMAPS"
23274 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
23275 [(set_attr ("type") ("ssemuladd"))
23276 (set_attr ("prefix") ("evex"))
23277 (set_attr ("mode") ("SF"))])
23279 (define_insn "avx5124fmaddps_4fnmaddss_mask"
23280 [(set (match_operand:V4SF 0 "register_operand" "=v")
23283 [(match_operand:V64SF 1 "register_operand" "v")
23284 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23285 (match_operand:V4SF 3 "register_operand" "0")
23286 (match_operand:QI 4 "register_operand" "Yk")))]
23287 "TARGET_AVX5124FMAPS"
23288 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
23289 [(set_attr ("type") ("ssemuladd"))
23290 (set_attr ("prefix") ("evex"))
23291 (set_attr ("mode") ("SF"))])
23293 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
23294 [(set (match_operand:V4SF 0 "register_operand" "=v")
23297 [(match_operand:V4SF 1 "register_operand" "0")
23298 (match_operand:V64SF 2 "register_operand" "v")
23299 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23300 (match_operand:V4SF 4 "const0_operand" "C")
23301 (match_operand:QI 5 "register_operand" "Yk")))]
23302 "TARGET_AVX5124FMAPS"
23303 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
23304 [(set_attr ("type") ("ssemuladd"))
23305 (set_attr ("prefix") ("evex"))
23306 (set_attr ("mode") ("SF"))])
23308 (define_insn "avx5124vnniw_vp4dpwssd"
23309 [(set (match_operand:V16SI 0 "register_operand" "=v")
23311 [(match_operand:V16SI 1 "register_operand" "0")
23312 (match_operand:V64SI 2 "register_operand" "v")
23313 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
23314 "TARGET_AVX5124VNNIW"
23315 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
23316 [(set_attr ("type") ("ssemuladd"))
23317 (set_attr ("prefix") ("evex"))
23318 (set_attr ("mode") ("TI"))])
23320 (define_insn "avx5124vnniw_vp4dpwssd_mask"
23321 [(set (match_operand:V16SI 0 "register_operand" "=v")
23324 [(match_operand:V64SI 1 "register_operand" "v")
23325 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23326 (match_operand:V16SI 3 "register_operand" "0")
23327 (match_operand:HI 4 "register_operand" "Yk")))]
23328 "TARGET_AVX5124VNNIW"
23329 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23330 [(set_attr ("type") ("ssemuladd"))
23331 (set_attr ("prefix") ("evex"))
23332 (set_attr ("mode") ("TI"))])
23334 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
23335 [(set (match_operand:V16SI 0 "register_operand" "=v")
23338 [(match_operand:V16SI 1 "register_operand" "0")
23339 (match_operand:V64SI 2 "register_operand" "v")
23340 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23341 (match_operand:V16SI 4 "const0_operand" "C")
23342 (match_operand:HI 5 "register_operand" "Yk")))]
23343 "TARGET_AVX5124VNNIW"
23344 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23345 [(set_attr ("type") ("ssemuladd"))
23346 (set_attr ("prefix") ("evex"))
23347 (set_attr ("mode") ("TI"))])
23349 (define_insn "avx5124vnniw_vp4dpwssds"
23350 [(set (match_operand:V16SI 0 "register_operand" "=v")
23352 [(match_operand:V16SI 1 "register_operand" "0")
23353 (match_operand:V64SI 2 "register_operand" "v")
23354 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
23355 "TARGET_AVX5124VNNIW"
23356 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
23357 [(set_attr ("type") ("ssemuladd"))
23358 (set_attr ("prefix") ("evex"))
23359 (set_attr ("mode") ("TI"))])
23361 (define_insn "avx5124vnniw_vp4dpwssds_mask"
23362 [(set (match_operand:V16SI 0 "register_operand" "=v")
23365 [(match_operand:V64SI 1 "register_operand" "v")
23366 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23367 (match_operand:V16SI 3 "register_operand" "0")
23368 (match_operand:HI 4 "register_operand" "Yk")))]
23369 "TARGET_AVX5124VNNIW"
23370 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23371 [(set_attr ("type") ("ssemuladd"))
23372 (set_attr ("prefix") ("evex"))
23373 (set_attr ("mode") ("TI"))])
23375 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
23376 [(set (match_operand:V16SI 0 "register_operand" "=v")
23379 [(match_operand:V16SI 1 "register_operand" "0")
23380 (match_operand:V64SI 2 "register_operand" "v")
23381 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23382 (match_operand:V16SI 4 "const0_operand" "C")
23383 (match_operand:HI 5 "register_operand" "Yk")))]
23384 "TARGET_AVX5124VNNIW"
23385 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23386 [(set_attr ("type") ("ssemuladd"))
23387 (set_attr ("prefix") ("evex"))
23388 (set_attr ("mode") ("TI"))])
23390 (define_expand "popcount<mode>2"
23391 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
23392 (popcount:VI48_AVX512VL
23393 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
23394 "TARGET_AVX512VPOPCNTDQ")
23396 (define_insn "vpopcount<mode><mask_name>"
23397 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
23398 (popcount:VI48_AVX512VL
23399 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
23400 "TARGET_AVX512VPOPCNTDQ"
23401 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23403 ;; Save multiple registers out-of-line.
23404 (define_insn "*save_multiple<mode>"
23405 [(match_parallel 0 "save_multiple"
23406 [(use (match_operand:P 1 "symbol_operand"))])]
23407 "TARGET_SSE && TARGET_64BIT"
23410 ;; Restore multiple registers out-of-line.
23411 (define_insn "*restore_multiple<mode>"
23412 [(match_parallel 0 "restore_multiple"
23413 [(use (match_operand:P 1 "symbol_operand"))])]
23414 "TARGET_SSE && TARGET_64BIT"
23417 ;; Restore multiple registers out-of-line and return.
23418 (define_insn "*restore_multiple_and_return<mode>"
23419 [(match_parallel 0 "restore_multiple"
23421 (use (match_operand:P 1 "symbol_operand"))
23422 (set (reg:DI SP_REG) (reg:DI R10_REG))
23424 "TARGET_SSE && TARGET_64BIT"
23427 ;; Restore multiple registers out-of-line when hard frame pointer is used,
23428 ;; perform the leave operation prior to returning (from the function).
23429 (define_insn "*restore_multiple_leave_return<mode>"
23430 [(match_parallel 0 "restore_multiple"
23432 (use (match_operand:P 1 "symbol_operand"))
23433 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
23434 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
23435 (clobber (mem:BLK (scratch)))
23437 "TARGET_SSE && TARGET_64BIT"
23440 (define_expand "popcount<mode>2"
23441 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23442 (popcount:VI12_AVX512VL
23443 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23444 "TARGET_AVX512BITALG")
23446 (define_insn "vpopcount<mode><mask_name>"
23447 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23448 (popcount:VI12_AVX512VL
23449 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23450 "TARGET_AVX512BITALG"
23451 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23453 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
23454 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23455 (unspec:VI1_AVX512F
23456 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23457 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23458 (match_operand 3 "const_0_to_255_operand" "n,n")]
23459 UNSPEC_GF2P8AFFINEINV))]
23462 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
23463 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23464 [(set_attr "isa" "noavx,avx")
23465 (set_attr "prefix_data16" "1,*")
23466 (set_attr "prefix_extra" "1")
23467 (set_attr "prefix" "orig,maybe_evex")
23468 (set_attr "mode" "<sseinsnmode>")])
23470 (define_insn "vgf2p8affineqb_<mode><mask_name>"
23471 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23472 (unspec:VI1_AVX512F
23473 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23474 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23475 (match_operand 3 "const_0_to_255_operand" "n,n")]
23476 UNSPEC_GF2P8AFFINE))]
23479 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
23480 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23481 [(set_attr "isa" "noavx,avx")
23482 (set_attr "prefix_data16" "1,*")
23483 (set_attr "prefix_extra" "1")
23484 (set_attr "prefix" "orig,maybe_evex")
23485 (set_attr "mode" "<sseinsnmode>")])
23487 (define_insn "vgf2p8mulb_<mode><mask_name>"
23488 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23489 (unspec:VI1_AVX512F
23490 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
23491 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
23495 gf2p8mulb\t{%2, %0| %0, %2}
23496 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
23497 [(set_attr "isa" "noavx,avx")
23498 (set_attr "prefix_data16" "1,*")
23499 (set_attr "prefix_extra" "1")
23500 (set_attr "prefix" "orig,maybe_evex")
23501 (set_attr "mode" "<sseinsnmode>")])
23503 (define_insn "vpshrd_<mode><mask_name>"
23504 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23505 (unspec:VI248_AVX512VL
23506 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23507 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23508 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23510 "TARGET_AVX512VBMI2"
23511 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23512 [(set_attr ("prefix") ("evex"))])
23514 (define_insn "vpshld_<mode><mask_name>"
23515 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23516 (unspec:VI248_AVX512VL
23517 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23518 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23519 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23521 "TARGET_AVX512VBMI2"
23522 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23523 [(set_attr ("prefix") ("evex"))])
23525 (define_insn "vpshrdv_<mode>"
23526 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23527 (unspec:VI248_AVX512VL
23528 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23529 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23530 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23532 "TARGET_AVX512VBMI2"
23533 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23534 [(set_attr ("prefix") ("evex"))
23535 (set_attr "mode" "<sseinsnmode>")])
23537 (define_insn "vpshrdv_<mode>_mask"
23538 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23539 (vec_merge:VI248_AVX512VL
23540 (unspec:VI248_AVX512VL
23541 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23542 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23543 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23546 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23547 "TARGET_AVX512VBMI2"
23548 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23549 [(set_attr ("prefix") ("evex"))
23550 (set_attr "mode" "<sseinsnmode>")])
23552 (define_expand "vpshrdv_<mode>_maskz"
23553 [(match_operand:VI248_AVX512VL 0 "register_operand")
23554 (match_operand:VI248_AVX512VL 1 "register_operand")
23555 (match_operand:VI248_AVX512VL 2 "register_operand")
23556 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23557 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23558 "TARGET_AVX512VBMI2"
23560 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
23561 operands[2], operands[3],
23562 CONST0_RTX (<MODE>mode),
23567 (define_insn "vpshrdv_<mode>_maskz_1"
23568 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23569 (vec_merge:VI248_AVX512VL
23570 (unspec:VI248_AVX512VL
23571 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23572 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23573 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23575 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23576 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23577 "TARGET_AVX512VBMI2"
23578 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23579 [(set_attr ("prefix") ("evex"))
23580 (set_attr "mode" "<sseinsnmode>")])
23582 (define_insn "vpshldv_<mode>"
23583 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23584 (unspec:VI248_AVX512VL
23585 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23586 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23587 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23589 "TARGET_AVX512VBMI2"
23590 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23591 [(set_attr ("prefix") ("evex"))
23592 (set_attr "mode" "<sseinsnmode>")])
23594 (define_insn "vpshldv_<mode>_mask"
23595 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23596 (vec_merge:VI248_AVX512VL
23597 (unspec:VI248_AVX512VL
23598 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23599 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23600 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23603 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23604 "TARGET_AVX512VBMI2"
23605 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23606 [(set_attr ("prefix") ("evex"))
23607 (set_attr "mode" "<sseinsnmode>")])
23609 (define_expand "vpshldv_<mode>_maskz"
23610 [(match_operand:VI248_AVX512VL 0 "register_operand")
23611 (match_operand:VI248_AVX512VL 1 "register_operand")
23612 (match_operand:VI248_AVX512VL 2 "register_operand")
23613 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23614 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23615 "TARGET_AVX512VBMI2"
23617 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
23618 operands[2], operands[3],
23619 CONST0_RTX (<MODE>mode),
23624 (define_insn "vpshldv_<mode>_maskz_1"
23625 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23626 (vec_merge:VI248_AVX512VL
23627 (unspec:VI248_AVX512VL
23628 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23629 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23630 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23632 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23633 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23634 "TARGET_AVX512VBMI2"
23635 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23636 [(set_attr ("prefix") ("evex"))
23637 (set_attr "mode" "<sseinsnmode>")])
23639 (define_insn "vpdpbusd_v16si"
23640 [(set (match_operand:V16SI 0 "register_operand" "=v")
23642 [(match_operand:V16SI 1 "register_operand" "0")
23643 (match_operand:V16SI 2 "register_operand" "v")
23644 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23645 UNSPEC_VPMADDUBSWACCD))]
23646 "TARGET_AVX512VNNI"
23647 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23648 [(set_attr ("prefix") ("evex"))])
23650 (define_insn "vpdpbusd_<mode>"
23651 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23653 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23654 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23655 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23656 UNSPEC_VPMADDUBSWACCD))]
23657 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23659 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
23660 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23661 [(set_attr ("prefix") ("vex,evex"))
23662 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23664 (define_insn "vpdpbusd_<mode>_mask"
23665 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23666 (vec_merge:VI4_AVX512VL
23667 (unspec:VI4_AVX512VL
23668 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23669 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23670 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23671 UNSPEC_VPMADDUBSWACCD)
23673 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23674 "TARGET_AVX512VNNI"
23675 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23676 [(set_attr ("prefix") ("evex"))])
23678 (define_expand "vpdpbusd_<mode>_maskz"
23679 [(match_operand:VI4_AVX512VL 0 "register_operand")
23680 (match_operand:VI4_AVX512VL 1 "register_operand")
23681 (match_operand:VI4_AVX512VL 2 "register_operand")
23682 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23683 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23684 "TARGET_AVX512VNNI"
23686 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
23687 operands[2], operands[3],
23688 CONST0_RTX (<MODE>mode),
23693 (define_insn "vpdpbusd_<mode>_maskz_1"
23694 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23695 (vec_merge:VI4_AVX512VL
23696 (unspec:VI4_AVX512VL
23697 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23698 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23699 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
23700 ] UNSPEC_VPMADDUBSWACCD)
23701 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23702 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23703 "TARGET_AVX512VNNI"
23704 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23705 [(set_attr ("prefix") ("evex"))])
23707 (define_insn "vpdpbusds_v16si"
23708 [(set (match_operand:V16SI 0 "register_operand" "=v")
23710 [(match_operand:V16SI 1 "register_operand" "0")
23711 (match_operand:V16SI 2 "register_operand" "v")
23712 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23713 UNSPEC_VPMADDUBSWACCSSD))]
23714 "TARGET_AVX512VNNI"
23715 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23716 [(set_attr ("prefix") ("evex"))])
23718 (define_insn "vpdpbusds_<mode>"
23719 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23721 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23722 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23723 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23724 UNSPEC_VPMADDUBSWACCSSD))]
23725 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23727 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
23728 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23729 [(set_attr ("prefix") ("vex,evex"))
23730 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23732 (define_insn "vpdpbusds_<mode>_mask"
23733 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23734 (vec_merge:VI4_AVX512VL
23735 (unspec:VI4_AVX512VL
23736 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23737 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23738 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23739 UNSPEC_VPMADDUBSWACCSSD)
23741 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23742 "TARGET_AVX512VNNI"
23743 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23744 [(set_attr ("prefix") ("evex"))])
23746 (define_expand "vpdpbusds_<mode>_maskz"
23747 [(match_operand:VI4_AVX512VL 0 "register_operand")
23748 (match_operand:VI4_AVX512VL 1 "register_operand")
23749 (match_operand:VI4_AVX512VL 2 "register_operand")
23750 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23751 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23752 "TARGET_AVX512VNNI"
23754 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
23755 operands[2], operands[3],
23756 CONST0_RTX (<MODE>mode),
23761 (define_insn "vpdpbusds_<mode>_maskz_1"
23762 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23763 (vec_merge:VI4_AVX512VL
23764 (unspec:VI4_AVX512VL
23765 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23766 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23767 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23768 UNSPEC_VPMADDUBSWACCSSD)
23769 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23770 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23771 "TARGET_AVX512VNNI"
23772 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23773 [(set_attr ("prefix") ("evex"))])
23775 (define_insn "vpdpwssd_v16si"
23776 [(set (match_operand:V16SI 0 "register_operand" "=v")
23778 [(match_operand:V16SI 1 "register_operand" "0")
23779 (match_operand:V16SI 2 "register_operand" "v")
23780 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23781 UNSPEC_VPMADDWDACCD))]
23782 "TARGET_AVX512VNNI"
23783 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23784 [(set_attr ("prefix") ("evex"))])
23786 (define_insn "vpdpwssd_<mode>"
23787 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23789 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23790 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23791 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23792 UNSPEC_VPMADDWDACCD))]
23793 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23795 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
23796 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23797 [(set_attr ("prefix") ("vex,evex"))
23798 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23800 (define_insn "vpdpwssd_<mode>_mask"
23801 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23802 (vec_merge:VI4_AVX512VL
23803 (unspec:VI4_AVX512VL
23804 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23805 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23806 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23807 UNSPEC_VPMADDWDACCD)
23809 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23810 "TARGET_AVX512VNNI"
23811 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23812 [(set_attr ("prefix") ("evex"))])
23814 (define_expand "vpdpwssd_<mode>_maskz"
23815 [(match_operand:VI4_AVX512VL 0 "register_operand")
23816 (match_operand:VI4_AVX512VL 1 "register_operand")
23817 (match_operand:VI4_AVX512VL 2 "register_operand")
23818 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23819 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23820 "TARGET_AVX512VNNI"
23822 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
23823 operands[2], operands[3],
23824 CONST0_RTX (<MODE>mode),
23829 (define_insn "vpdpwssd_<mode>_maskz_1"
23830 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23831 (vec_merge:VI4_AVX512VL
23832 (unspec:VI4_AVX512VL
23833 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23834 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23835 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23836 UNSPEC_VPMADDWDACCD)
23837 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23838 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23839 "TARGET_AVX512VNNI"
23840 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23841 [(set_attr ("prefix") ("evex"))])
23843 (define_insn "vpdpwssds_v16si"
23844 [(set (match_operand:V16SI 0 "register_operand" "=v")
23846 [(match_operand:V16SI 1 "register_operand" "0")
23847 (match_operand:V16SI 2 "register_operand" "v")
23848 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23849 UNSPEC_VPMADDWDACCSSD))]
23850 "TARGET_AVX512VNNI"
23851 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23852 [(set_attr ("prefix") ("evex"))])
23854 (define_insn "vpdpwssds_<mode>"
23855 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23857 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23858 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23859 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23860 UNSPEC_VPMADDWDACCSSD))]
23861 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23863 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
23864 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23865 [(set_attr ("prefix") ("vex,evex"))
23866 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23868 (define_insn "vpdpwssds_<mode>_mask"
23869 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23870 (vec_merge:VI4_AVX512VL
23871 (unspec:VI4_AVX512VL
23872 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23873 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23874 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23875 UNSPEC_VPMADDWDACCSSD)
23877 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23878 "TARGET_AVX512VNNI"
23879 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23880 [(set_attr ("prefix") ("evex"))])
23882 (define_expand "vpdpwssds_<mode>_maskz"
23883 [(match_operand:VI4_AVX512VL 0 "register_operand")
23884 (match_operand:VI4_AVX512VL 1 "register_operand")
23885 (match_operand:VI4_AVX512VL 2 "register_operand")
23886 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23887 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23888 "TARGET_AVX512VNNI"
23890 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
23891 operands[2], operands[3],
23892 CONST0_RTX (<MODE>mode),
23897 (define_insn "vpdpwssds_<mode>_maskz_1"
23898 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23899 (vec_merge:VI4_AVX512VL
23900 (unspec:VI4_AVX512VL
23901 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23902 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23903 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23904 UNSPEC_VPMADDWDACCSSD)
23905 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23906 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23907 "TARGET_AVX512VNNI"
23908 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23909 [(set_attr ("prefix") ("evex"))])
23911 (define_insn "vaesdec_<mode>"
23912 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23913 (unspec:VI1_AVX512VL_F
23914 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23915 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23918 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
23921 (define_insn "vaesdeclast_<mode>"
23922 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23923 (unspec:VI1_AVX512VL_F
23924 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23925 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23926 UNSPEC_VAESDECLAST))]
23928 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
23931 (define_insn "vaesenc_<mode>"
23932 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23933 (unspec:VI1_AVX512VL_F
23934 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23935 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23938 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
23941 (define_insn "vaesenclast_<mode>"
23942 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23943 (unspec:VI1_AVX512VL_F
23944 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23945 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23946 UNSPEC_VAESENCLAST))]
23948 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
23951 (define_insn "vpclmulqdq_<mode>"
23952 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
23953 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
23954 (match_operand:VI8_FVL 2 "vector_operand" "vm")
23955 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23956 UNSPEC_VPCLMULQDQ))]
23957 "TARGET_VPCLMULQDQ"
23958 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23959 [(set_attr "mode" "DI")])
23961 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
23962 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
23963 (unspec:<avx512fmaskmode>
23964 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
23965 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
23966 UNSPEC_VPSHUFBIT))]
23967 "TARGET_AVX512BITALG"
23968 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
23969 [(set_attr "prefix" "evex")
23970 (set_attr "mode" "<sseinsnmode>")])
23972 (define_mode_iterator VI48_AVX512VP2VL
23974 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
23975 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
23977 (define_mode_iterator MASK_DWI [P2QI P2HI])
23979 (define_expand "mov<mode>"
23980 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
23981 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
23982 "TARGET_AVX512VP2INTERSECT"
23984 if (MEM_P (operands[0]) && MEM_P (operands[1]))
23985 operands[1] = force_reg (<MODE>mode, operands[1]);
23988 (define_insn_and_split "*mov<mode>_internal"
23989 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
23990 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
23991 "TARGET_AVX512VP2INTERSECT
23992 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23994 "&& reload_completed"
23995 [(set (match_dup 0) (match_dup 1))
23996 (set (match_dup 2) (match_dup 3))]
23998 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
24001 (define_insn "avx512vp2intersect_2intersect<mode>"
24002 [(set (match_operand:P2QI 0 "register_operand" "=k")
24004 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
24005 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
24006 UNSPEC_VP2INTERSECT))]
24007 "TARGET_AVX512VP2INTERSECT"
24008 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24009 [(set_attr ("prefix") ("evex"))])
24011 (define_insn "avx512vp2intersect_2intersectv16si"
24012 [(set (match_operand:P2HI 0 "register_operand" "=k")
24013 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
24014 (match_operand:V16SI 2 "vector_operand" "vm")]
24015 UNSPEC_VP2INTERSECT))]
24016 "TARGET_AVX512VP2INTERSECT"
24017 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
24018 [(set_attr ("prefix") ("evex"))])
24020 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
24021 ;; Converting from BF to SF
24022 (define_mode_attr bf16_cvt_2sf
24023 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
24024 ;; Converting from SF to BF
24025 (define_mode_attr sf_cvt_bf16
24026 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
24027 ;; Mapping from BF to SF
24028 (define_mode_attr sf_bf16
24029 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
24031 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
24032 [(match_operand:BF16 0 "register_operand")
24033 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
24034 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
24035 (match_operand:<avx512fmaskmode> 3 "register_operand")]
24036 "TARGET_AVX512BF16"
24038 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
24039 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
24043 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
24044 [(set (match_operand:BF16 0 "register_operand" "=v")
24046 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
24047 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
24048 UNSPEC_VCVTNE2PS2BF16))]
24049 "TARGET_AVX512BF16"
24050 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
24052 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
24053 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
24054 (match_operand:VF1_AVX512VL 1 "register_operand")
24055 (match_operand:<avx512fmaskmode> 2 "register_operand")]
24056 "TARGET_AVX512BF16"
24058 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
24059 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
24063 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
24064 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
24065 (unspec:<sf_cvt_bf16>
24066 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
24067 UNSPEC_VCVTNEPS2BF16))]
24068 "TARGET_AVX512BF16"
24069 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
24071 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
24072 [(match_operand:VF1_AVX512VL 0 "register_operand")
24073 (match_operand:VF1_AVX512VL 1 "register_operand")
24074 (match_operand:<sf_bf16> 2 "register_operand")
24075 (match_operand:<sf_bf16> 3 "register_operand")
24076 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
24077 "TARGET_AVX512BF16"
24079 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
24080 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
24084 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
24085 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
24086 (unspec:VF1_AVX512VL
24087 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
24088 (match_operand:<sf_bf16> 2 "register_operand" "v")
24089 (match_operand:<sf_bf16> 3 "register_operand" "v")]
24090 UNSPEC_VDPBF16PS))]
24091 "TARGET_AVX512BF16"
24092 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
24094 (define_insn "avx512f_dpbf16ps_<mode>_mask"
24095 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
24096 (vec_merge:VF1_AVX512VL
24097 (unspec:VF1_AVX512VL
24098 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
24099 (match_operand:<sf_bf16> 2 "register_operand" "v")
24100 (match_operand:<sf_bf16> 3 "register_operand" "v")]
24103 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
24104 "TARGET_AVX512BF16"
24105 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
24108 (define_insn "loadiwkey"
24109 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
24110 (match_operand:V2DI 1 "register_operand" "v")
24111 (match_operand:V2DI 2 "register_operand" "Yz")
24112 (match_operand:SI 3 "register_operand" "a")]
24114 (clobber (reg:CC FLAGS_REG))]
24116 "loadiwkey\t{%0, %1|%1, %0}"
24117 [(set_attr "type" "other")])
24119 (define_expand "encodekey128u32"
24121 [(set (match_operand:SI 0 "register_operand")
24122 (unspec_volatile:SI
24123 [(match_operand:SI 1 "register_operand")
24124 (reg:V2DI XMM0_REG)]
24125 UNSPECV_ENCODEKEY128U32))])]
24132 /* parallel rtx for encodekey128 predicate */
24133 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
24135 for (i = 0; i < 7; i++)
24136 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24139 = gen_rtx_UNSPEC_VOLATILE (SImode,
24140 gen_rtvec (2, operands[1], xmm_regs[0]),
24141 UNSPECV_ENCODEKEY128U32);
24143 XVECEXP (operands[2], 0, 0)
24144 = gen_rtx_SET (operands[0], tmp_unspec);
24147 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24148 gen_rtvec (1, const0_rtx),
24149 UNSPECV_ENCODEKEY128U32);
24151 for (i = 0; i < 3; i++)
24152 XVECEXP (operands[2], 0, i + 1)
24153 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
24155 for (i = 4; i < 7; i++)
24156 XVECEXP (operands[2], 0, i)
24157 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
24159 XVECEXP (operands[2], 0, 7)
24160 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
24163 (define_insn "*encodekey128u32"
24164 [(match_parallel 2 "encodekey128_operation"
24165 [(set (match_operand:SI 0 "register_operand" "=r")
24166 (unspec_volatile:SI
24167 [(match_operand:SI 1 "register_operand" "r")
24168 (reg:V2DI XMM0_REG)]
24169 UNSPECV_ENCODEKEY128U32))])]
24171 "encodekey128\t{%1, %0|%0, %1}"
24172 [(set_attr "type" "other")])
24174 (define_expand "encodekey256u32"
24176 [(set (match_operand:SI 0 "register_operand")
24177 (unspec_volatile:SI
24178 [(match_operand:SI 1 "register_operand")
24179 (reg:V2DI XMM0_REG)
24180 (reg:V2DI XMM1_REG)]
24181 UNSPECV_ENCODEKEY256U32))])]
24188 /* parallel rtx for encodekey256 predicate */
24189 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
24191 for (i = 0; i < 7; i++)
24192 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24195 = gen_rtx_UNSPEC_VOLATILE (SImode,
24196 gen_rtvec (3, operands[1],
24197 xmm_regs[0], xmm_regs[1]),
24198 UNSPECV_ENCODEKEY256U32);
24200 XVECEXP (operands[2], 0, 0)
24201 = gen_rtx_SET (operands[0], tmp_unspec);
24204 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24205 gen_rtvec (1, const0_rtx),
24206 UNSPECV_ENCODEKEY256U32);
24208 for (i = 0; i < 4; i++)
24209 XVECEXP (operands[2], 0, i + 1)
24210 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
24212 for (i = 4; i < 7; i++)
24213 XVECEXP (operands[2], 0, i + 1)
24214 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
24216 XVECEXP (operands[2], 0, 8)
24217 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
24220 (define_insn "*encodekey256u32"
24221 [(match_parallel 2 "encodekey256_operation"
24222 [(set (match_operand:SI 0 "register_operand" "=r")
24223 (unspec_volatile:SI
24224 [(match_operand:SI 1 "register_operand" "r")
24225 (reg:V2DI XMM0_REG)
24226 (reg:V2DI XMM1_REG)]
24227 UNSPECV_ENCODEKEY256U32))])]
24229 "encodekey256\t{%1, %0|%0, %1}"
24230 [(set_attr "type" "other")])
24232 (define_int_iterator AESDECENCKL
24233 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
24234 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
24236 (define_int_attr aesklvariant
24237 [(UNSPECV_AESDEC128KLU8 "dec128kl")
24238 (UNSPECV_AESDEC256KLU8 "dec256kl")
24239 (UNSPECV_AESENC128KLU8 "enc128kl")
24240 (UNSPECV_AESENC256KLU8 "enc256kl")])
24242 (define_insn "aes<aesklvariant>u8"
24243 [(set (match_operand:V2DI 0 "register_operand" "=v")
24244 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
24245 (match_operand:BLK 2 "memory_operand" "m")]
24247 (set (reg:CCZ FLAGS_REG)
24248 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
24250 "aes<aesklvariant>\t{%2, %0|%0, %2}"
24251 [(set_attr "type" "other")])
24253 (define_int_iterator AESDECENCWIDEKL
24254 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
24255 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
24257 (define_int_attr aeswideklvariant
24258 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
24259 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
24260 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
24261 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
24263 (define_int_attr AESWIDEKLVARIANT
24264 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
24265 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
24266 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
24267 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
24269 (define_expand "aes<aeswideklvariant>u8"
24271 [(set (reg:CCZ FLAGS_REG)
24272 (unspec_volatile:CCZ
24273 [(match_operand:BLK 0 "memory_operand")]
24274 AESDECENCWIDEKL))])]
24280 /* parallel rtx for widekl predicate */
24281 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
24284 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
24285 gen_rtvec (1, operands[0]),
24286 UNSPECV_<AESWIDEKLVARIANT>);
24288 XVECEXP (operands[1], 0, 0)
24289 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
24292 for (i = 0; i < 8; i++)
24294 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24297 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24298 gen_rtvec (1, xmm_reg),
24299 UNSPECV_<AESWIDEKLVARIANT>);
24300 XVECEXP (operands[1], 0, i + 1)
24301 = gen_rtx_SET (xmm_reg, tmp_unspec);
24305 (define_insn "*aes<aeswideklvariant>u8"
24306 [(match_parallel 1 "aeswidekl_operation"
24307 [(set (reg:CCZ FLAGS_REG)
24308 (unspec_volatile:CCZ
24309 [(match_operand:BLK 0 "memory_operand" "m")]
24310 AESDECENCWIDEKL))])]
24312 "aes<aeswideklvariant>\t{%0}"
24313 [(set_attr "type" "other")])