1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
56 UNSPEC_XOP_UNSIGNED_CMP
67 UNSPEC_AESKEYGENASSIST
88 ;; For AVX512F support
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
105 UNSPEC_COMPRESS_STORE
117 ;; For embed. rounding feature
118 UNSPEC_EMBEDDED_ROUNDING
120 ;; For AVX512PF support
121 UNSPEC_GATHER_PREFETCH
122 UNSPEC_SCATTER_PREFETCH
124 ;; For AVX512ER support
138 ;; For AVX512BW support
146 ;; For AVX512DQ support
151 ;; For AVX512IFMA support
155 ;; For AVX512VBMI support
158 ;; For AVX5124FMAPS/AVX5124VNNIW support
165 UNSPEC_GF2P8AFFINEINV
169 ;; For AVX512VBMI2 support
175 ;; For AVX512VNNI support
176 UNSPEC_VPMADDUBSWACCD
177 UNSPEC_VPMADDUBSWACCSSD
179 UNSPEC_VPMADDWDACCSSD
187 ;; For VPCLMULQDQ support
190 ;; For AVX512BITALG support
193 ;; For VP2INTERSECT support
196 ;; For AVX512BF16 support
197 UNSPEC_VCVTNE2PS2BF16
202 (define_c_enum "unspecv" [
213 UNSPECV_AESDEC128KLU8
214 UNSPECV_AESENC128KLU8
215 UNSPECV_AESDEC256KLU8
216 UNSPECV_AESENC256KLU8
217 UNSPECV_AESDECWIDE128KLU8
218 UNSPECV_AESENCWIDE128KLU8
219 UNSPECV_AESDECWIDE256KLU8
220 UNSPECV_AESENCWIDE256KLU8
221 UNSPECV_ENCODEKEY128U32
222 UNSPECV_ENCODEKEY256U32
225 ;; All vector modes including V?TImode, used in move patterns.
226 (define_mode_iterator VMOVE
227 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
228 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
229 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
230 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
231 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
232 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
233 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
235 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
236 (define_mode_iterator V48_AVX512VL
237 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
238 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
239 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
240 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
242 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
243 (define_mode_iterator VI12_AVX512VL
244 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
245 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
247 ;; Same iterator, but without supposed TARGET_AVX512BW
248 (define_mode_iterator VI12_AVX512VLBW
249 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
250 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
251 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
253 (define_mode_iterator VI1_AVX512VL
254 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
257 (define_mode_iterator V
258 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
259 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
260 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
261 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
262 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
263 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
265 ;; All 128bit vector modes
266 (define_mode_iterator V_128
267 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
269 ;; All 256bit vector modes
270 (define_mode_iterator V_256
271 [V32QI V16HI V8SI V4DI V8SF V4DF])
273 ;; All 128bit and 256bit vector modes
274 (define_mode_iterator V_128_256
275 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
277 ;; All 512bit vector modes
278 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
280 ;; All 256bit and 512bit vector modes
281 (define_mode_iterator V_256_512
282 [V32QI V16HI V8SI V4DI V8SF V4DF
283 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
284 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
286 ;; All vector float modes
287 (define_mode_iterator VF
288 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
289 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
291 ;; 128- and 256-bit float vector modes
292 (define_mode_iterator VF_128_256
293 [(V8SF "TARGET_AVX") V4SF
294 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
296 ;; All SFmode vector float modes
297 (define_mode_iterator VF1
298 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
300 (define_mode_iterator VF1_AVX2
301 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
303 ;; 128- and 256-bit SF vector modes
304 (define_mode_iterator VF1_128_256
305 [(V8SF "TARGET_AVX") V4SF])
307 (define_mode_iterator VF1_128_256VL
308 [V8SF (V4SF "TARGET_AVX512VL")])
310 ;; All DFmode vector float modes
311 (define_mode_iterator VF2
312 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
314 ;; 128- and 256-bit DF vector modes
315 (define_mode_iterator VF2_128_256
316 [(V4DF "TARGET_AVX") V2DF])
318 (define_mode_iterator VF2_512_256
319 [(V8DF "TARGET_AVX512F") V4DF])
321 (define_mode_iterator VF2_512_256VL
322 [V8DF (V4DF "TARGET_AVX512VL")])
324 ;; All 128bit vector float modes
325 (define_mode_iterator VF_128
326 [V4SF (V2DF "TARGET_SSE2")])
328 ;; All 256bit vector float modes
329 (define_mode_iterator VF_256
332 ;; All 512bit vector float modes
333 (define_mode_iterator VF_512
336 (define_mode_iterator VI48_AVX512VL
337 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
338 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
340 (define_mode_iterator VF_AVX512VL
341 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
342 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
344 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
345 (define_mode_iterator VF1_AVX512ER_128_256
346 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
348 (define_mode_iterator VF2_AVX512VL
349 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
351 (define_mode_iterator VF1_AVX512VL
352 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
354 ;; All vector integer modes
355 (define_mode_iterator VI
356 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
357 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
358 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
359 (V8SI "TARGET_AVX") V4SI
360 (V4DI "TARGET_AVX") V2DI])
362 (define_mode_iterator VI_AVX2
363 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
364 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
365 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
366 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
368 ;; All QImode vector integer modes
369 (define_mode_iterator VI1
370 [(V32QI "TARGET_AVX") V16QI])
372 ;; All DImode vector integer modes
373 (define_mode_iterator V_AVX
374 [V16QI V8HI V4SI V2DI V4SF V2DF
375 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
376 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
377 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
379 (define_mode_iterator VI48_AVX
381 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
383 (define_mode_iterator VI8
384 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
386 (define_mode_iterator VI8_FVL
387 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
389 (define_mode_iterator VI8_AVX512VL
390 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
392 (define_mode_iterator VI8_256_512
393 [V8DI (V4DI "TARGET_AVX512VL")])
395 (define_mode_iterator VI1_AVX2
396 [(V32QI "TARGET_AVX2") V16QI])
398 (define_mode_iterator VI1_AVX512
399 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
401 (define_mode_iterator VI1_AVX512F
402 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
404 (define_mode_iterator VI2_AVX2
405 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
407 (define_mode_iterator VI2_AVX512F
408 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
410 (define_mode_iterator VI4_AVX
411 [(V8SI "TARGET_AVX") V4SI])
413 (define_mode_iterator VI4_AVX2
414 [(V8SI "TARGET_AVX2") V4SI])
416 (define_mode_iterator VI4_AVX512F
417 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
419 (define_mode_iterator VI4_AVX512VL
420 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
422 (define_mode_iterator VI48_AVX512F_AVX512VL
423 [V4SI V8SI (V16SI "TARGET_AVX512F")
424 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
426 (define_mode_iterator VI2_AVX512VL
427 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
429 (define_mode_iterator VI1_AVX512VL_F
430 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
432 (define_mode_iterator VI8_AVX2_AVX512BW
433 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
435 (define_mode_iterator VI8_AVX2
436 [(V4DI "TARGET_AVX2") V2DI])
438 (define_mode_iterator VI8_AVX2_AVX512F
439 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
441 (define_mode_iterator VI8_AVX_AVX512F
442 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
444 (define_mode_iterator VI4_128_8_256
448 (define_mode_iterator V8FI
452 (define_mode_iterator V16FI
455 ;; ??? We should probably use TImode instead.
456 (define_mode_iterator VIMAX_AVX2_AVX512BW
457 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
459 ;; Suppose TARGET_AVX512BW as baseline
460 (define_mode_iterator VIMAX_AVX512VL
461 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
463 (define_mode_iterator VIMAX_AVX2
464 [(V2TI "TARGET_AVX2") V1TI])
466 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
467 (define_mode_iterator SSESCALARMODE
468 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
470 (define_mode_iterator VI12_AVX2
471 [(V32QI "TARGET_AVX2") V16QI
472 (V16HI "TARGET_AVX2") V8HI])
474 (define_mode_iterator VI12_AVX2_AVX512BW
475 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
476 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
478 (define_mode_iterator VI24_AVX2
479 [(V16HI "TARGET_AVX2") V8HI
480 (V8SI "TARGET_AVX2") V4SI])
482 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
483 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
484 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
485 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
487 (define_mode_iterator VI124_AVX2
488 [(V32QI "TARGET_AVX2") V16QI
489 (V16HI "TARGET_AVX2") V8HI
490 (V8SI "TARGET_AVX2") V4SI])
492 (define_mode_iterator VI2_AVX2_AVX512BW
493 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
495 (define_mode_iterator VI248_AVX512VL
497 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
498 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
499 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
501 (define_mode_iterator VI48_AVX2
502 [(V8SI "TARGET_AVX2") V4SI
503 (V4DI "TARGET_AVX2") V2DI])
505 (define_mode_iterator VI248_AVX2
506 [(V16HI "TARGET_AVX2") V8HI
507 (V8SI "TARGET_AVX2") V4SI
508 (V4DI "TARGET_AVX2") V2DI])
510 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
511 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
512 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
513 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
515 (define_mode_iterator VI248_AVX512BW
516 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
518 (define_mode_iterator VI248_AVX512BW_AVX512VL
519 [(V32HI "TARGET_AVX512BW")
520 (V4DI "TARGET_AVX512VL") V16SI V8DI])
522 ;; Suppose TARGET_AVX512VL as baseline
523 (define_mode_iterator VI248_AVX512BW_1
524 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
528 (define_mode_iterator VI248_AVX512BW_2
529 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
533 (define_mode_iterator VI48_AVX512F
534 [(V16SI "TARGET_AVX512F") V8SI V4SI
535 (V8DI "TARGET_AVX512F") V4DI V2DI])
537 (define_mode_iterator VI48_AVX_AVX512F
538 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
541 (define_mode_iterator VI12_AVX_AVX512F
542 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
543 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
545 (define_mode_iterator V48_AVX2
548 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
549 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
551 (define_mode_iterator VI1_AVX512VLBW
552 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
553 (V16QI "TARGET_AVX512VL")])
555 (define_mode_attr avx512
556 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
557 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
558 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
559 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
560 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
561 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
563 (define_mode_attr sse2_avx_avx512f
564 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
565 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
566 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
567 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
568 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
569 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
571 (define_mode_attr sse2_avx2
572 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
573 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
574 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
575 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
576 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
578 (define_mode_attr ssse3_avx2
579 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
580 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
581 (V4SI "ssse3") (V8SI "avx2")
582 (V2DI "ssse3") (V4DI "avx2")
583 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
585 (define_mode_attr sse4_1_avx2
586 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
587 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
588 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
589 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
591 (define_mode_attr avx_avx2
592 [(V4SF "avx") (V2DF "avx")
593 (V8SF "avx") (V4DF "avx")
594 (V4SI "avx2") (V2DI "avx2")
595 (V8SI "avx2") (V4DI "avx2")])
597 (define_mode_attr vec_avx2
598 [(V16QI "vec") (V32QI "avx2")
599 (V8HI "vec") (V16HI "avx2")
600 (V4SI "vec") (V8SI "avx2")
601 (V2DI "vec") (V4DI "avx2")])
603 (define_mode_attr avx2_avx512
604 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
605 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
606 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
607 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
608 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
610 (define_mode_attr shuffletype
611 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
612 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
613 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
614 (V32HI "i") (V16HI "i") (V8HI "i")
615 (V64QI "i") (V32QI "i") (V16QI "i")
616 (V4TI "i") (V2TI "i") (V1TI "i")])
618 (define_mode_attr ssequartermode
619 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
621 (define_mode_attr ssequarterinsnmode
622 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
624 (define_mode_attr vecmemsuffix
625 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
626 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
628 (define_mode_attr ssedoublemodelower
629 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
630 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
631 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
633 (define_mode_attr ssedoublemode
634 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
635 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
636 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
637 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
638 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
639 (V4DI "V8DI") (V8DI "V16DI")])
641 (define_mode_attr ssebytemode
642 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
643 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
645 ;; All 128bit vector integer modes
646 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
648 ;; All 256bit vector integer modes
649 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
651 ;; Various 128bit vector integer mode combinations
652 (define_mode_iterator VI12_128 [V16QI V8HI])
653 (define_mode_iterator VI14_128 [V16QI V4SI])
654 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
655 (define_mode_iterator VI24_128 [V8HI V4SI])
656 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
657 (define_mode_iterator VI48_128 [V4SI V2DI])
659 ;; Various 256bit and 512 vector integer mode combinations
660 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
661 (define_mode_iterator VI124_256_AVX512F_AVX512BW
663 (V64QI "TARGET_AVX512BW")
664 (V32HI "TARGET_AVX512BW")
665 (V16SI "TARGET_AVX512F")])
666 (define_mode_iterator VI48_256 [V8SI V4DI])
667 (define_mode_iterator VI48_512 [V16SI V8DI])
668 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
669 (define_mode_iterator VI_AVX512BW
670 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
672 ;; Int-float size matches
673 (define_mode_iterator VI4F_128 [V4SI V4SF])
674 (define_mode_iterator VI8F_128 [V2DI V2DF])
675 (define_mode_iterator VI4F_256 [V8SI V8SF])
676 (define_mode_iterator VI8F_256 [V4DI V4DF])
677 (define_mode_iterator VI4F_256_512
679 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
680 (define_mode_iterator VI48F_256_512
682 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
683 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
684 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
685 (define_mode_iterator VF48_I1248
686 [V16SI V16SF V8DI V8DF V32HI V64QI])
687 (define_mode_iterator VI48F
688 [V16SI V16SF V8DI V8DF
689 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
690 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
691 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
692 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
693 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
695 (define_mode_iterator VF_AVX512
696 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
697 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
700 (define_mode_attr avx512bcst
701 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
702 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
703 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
704 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
705 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
706 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
708 ;; Mapping from float mode to required SSE level
709 (define_mode_attr sse
710 [(SF "sse") (DF "sse2")
711 (V4SF "sse") (V2DF "sse2")
712 (V16SF "avx512f") (V8SF "avx")
713 (V8DF "avx512f") (V4DF "avx")])
715 (define_mode_attr sse2
716 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
717 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
719 (define_mode_attr sse3
720 [(V16QI "sse3") (V32QI "avx")])
722 (define_mode_attr sse4_1
723 [(V4SF "sse4_1") (V2DF "sse4_1")
724 (V8SF "avx") (V4DF "avx")
726 (V4DI "avx") (V2DI "sse4_1")
727 (V8SI "avx") (V4SI "sse4_1")
728 (V16QI "sse4_1") (V32QI "avx")
729 (V8HI "sse4_1") (V16HI "avx")])
731 (define_mode_attr avxsizesuffix
732 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
733 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
734 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
735 (V16SF "512") (V8DF "512")
736 (V8SF "256") (V4DF "256")
737 (V4SF "") (V2DF "")])
739 ;; SSE instruction mode
740 (define_mode_attr sseinsnmode
741 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
742 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
743 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
744 (V16SF "V16SF") (V8DF "V8DF")
745 (V8SF "V8SF") (V4DF "V4DF")
746 (V4SF "V4SF") (V2DF "V2DF")
749 ;; Mapping of vector modes to corresponding mask size
750 (define_mode_attr avx512fmaskmode
751 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
752 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
753 (V16SI "HI") (V8SI "QI") (V4SI "QI")
754 (V8DI "QI") (V4DI "QI") (V2DI "QI")
755 (V16SF "HI") (V8SF "QI") (V4SF "QI")
756 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
758 ;; Mapping of vector modes to corresponding mask size
759 (define_mode_attr avx512fmaskmodelower
760 [(V64QI "di") (V32QI "si") (V16QI "hi")
761 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
762 (V16SI "hi") (V8SI "qi") (V4SI "qi")
763 (V8DI "qi") (V4DI "qi") (V2DI "qi")
764 (V16SF "hi") (V8SF "qi") (V4SF "qi")
765 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
767 ;; Mapping of vector modes to corresponding mask half size
768 (define_mode_attr avx512fmaskhalfmode
769 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
770 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
771 (V16SI "QI") (V8SI "QI") (V4SI "QI")
772 (V8DI "QI") (V4DI "QI") (V2DI "QI")
773 (V16SF "QI") (V8SF "QI") (V4SF "QI")
774 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
776 ;; Mapping of vector float modes to an integer mode of the same size
777 (define_mode_attr sseintvecmode
778 [(V16SF "V16SI") (V8DF "V8DI")
779 (V8SF "V8SI") (V4DF "V4DI")
780 (V4SF "V4SI") (V2DF "V2DI")
781 (V16SI "V16SI") (V8DI "V8DI")
782 (V8SI "V8SI") (V4DI "V4DI")
783 (V4SI "V4SI") (V2DI "V2DI")
784 (V16HI "V16HI") (V8HI "V8HI")
785 (V32HI "V32HI") (V64QI "V64QI")
786 (V32QI "V32QI") (V16QI "V16QI")])
788 (define_mode_attr sseintvecmode2
789 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
790 (V8SF "OI") (V4SF "TI")])
792 (define_mode_attr sseintvecmodelower
793 [(V16SF "v16si") (V8DF "v8di")
794 (V8SF "v8si") (V4DF "v4di")
795 (V4SF "v4si") (V2DF "v2di")
796 (V8SI "v8si") (V4DI "v4di")
797 (V4SI "v4si") (V2DI "v2di")
798 (V16HI "v16hi") (V8HI "v8hi")
799 (V32QI "v32qi") (V16QI "v16qi")])
801 ;; Mapping of vector modes to a vector mode of double size
802 (define_mode_attr ssedoublevecmode
803 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
804 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
805 (V8SF "V16SF") (V4DF "V8DF")
806 (V4SF "V8SF") (V2DF "V4DF")])
808 ;; Mapping of vector modes to a vector mode of half size
809 (define_mode_attr ssehalfvecmode
810 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
811 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
812 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
813 (V16SF "V8SF") (V8DF "V4DF")
814 (V8SF "V4SF") (V4DF "V2DF")
817 (define_mode_attr ssehalfvecmodelower
818 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
819 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
820 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
821 (V16SF "v8sf") (V8DF "v4df")
822 (V8SF "v4sf") (V4DF "v2df")
825 ;; Mapping of vector modes ti packed single mode of the same size
826 (define_mode_attr ssePSmode
827 [(V16SI "V16SF") (V8DF "V16SF")
828 (V16SF "V16SF") (V8DI "V16SF")
829 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
830 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
831 (V8SI "V8SF") (V4SI "V4SF")
832 (V4DI "V8SF") (V2DI "V4SF")
833 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
834 (V8SF "V8SF") (V4SF "V4SF")
835 (V4DF "V8SF") (V2DF "V4SF")])
837 (define_mode_attr ssePSmode2
838 [(V8DI "V8SF") (V4DI "V4SF")])
840 ;; Mapping of vector modes back to the scalar modes
841 (define_mode_attr ssescalarmode
842 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
843 (V32HI "HI") (V16HI "HI") (V8HI "HI")
844 (V16SI "SI") (V8SI "SI") (V4SI "SI")
845 (V8DI "DI") (V4DI "DI") (V2DI "DI")
846 (V16SF "SF") (V8SF "SF") (V4SF "SF")
847 (V8DF "DF") (V4DF "DF") (V2DF "DF")
848 (V4TI "TI") (V2TI "TI")])
850 ;; Mapping of vector modes back to the scalar modes
851 (define_mode_attr ssescalarmodelower
852 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
853 (V32HI "hi") (V16HI "hi") (V8HI "hi")
854 (V16SI "si") (V8SI "si") (V4SI "si")
855 (V8DI "di") (V4DI "di") (V2DI "di")
856 (V16SF "sf") (V8SF "sf") (V4SF "sf")
857 (V8DF "df") (V4DF "df") (V2DF "df")
858 (V4TI "ti") (V2TI "ti")])
860 ;; Mapping of vector modes to the 128bit modes
861 (define_mode_attr ssexmmmode
862 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
863 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
864 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
865 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
866 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
867 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
869 ;; Pointer size override for scalar modes (Intel asm dialect)
870 (define_mode_attr iptr
871 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
872 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
873 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
874 (V16SF "k") (V8DF "q")
875 (V8SF "k") (V4DF "q")
876 (V4SF "k") (V2DF "q")
879 ;; Mapping of vector modes to VPTERNLOG suffix
880 (define_mode_attr ternlogsuffix
881 [(V8DI "q") (V4DI "q") (V2DI "q")
882 (V16SI "d") (V8SI "d") (V4SI "d")
883 (V32HI "d") (V16HI "d") (V8HI "d")
884 (V64QI "d") (V32QI "d") (V16QI "d")])
886 ;; Number of scalar elements in each vector type
887 (define_mode_attr ssescalarnum
888 [(V64QI "64") (V16SI "16") (V8DI "8")
889 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
890 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
891 (V16SF "16") (V8DF "8")
892 (V8SF "8") (V4DF "4")
893 (V4SF "4") (V2DF "2")])
895 ;; Mask of scalar elements in each vector type
896 (define_mode_attr ssescalarnummask
897 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
898 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
899 (V8SF "7") (V4DF "3")
900 (V4SF "3") (V2DF "1")])
902 (define_mode_attr ssescalarsize
903 [(V4TI "64") (V2TI "64") (V1TI "64")
904 (V8DI "64") (V4DI "64") (V2DI "64")
905 (V64QI "8") (V32QI "8") (V16QI "8")
906 (V32HI "16") (V16HI "16") (V8HI "16")
907 (V16SI "32") (V8SI "32") (V4SI "32")
908 (V16SF "32") (V8SF "32") (V4SF "32")
909 (V8DF "64") (V4DF "64") (V2DF "64")])
911 ;; SSE prefix for integer vector modes
912 (define_mode_attr sseintprefix
913 [(V2DI "p") (V2DF "")
918 (V16SI "p") (V16SF "")
919 (V16QI "p") (V8HI "p")
920 (V32QI "p") (V16HI "p")
921 (V64QI "p") (V32HI "p")])
923 ;; SSE scalar suffix for vector modes
924 (define_mode_attr ssescalarmodesuffix
926 (V16SF "ss") (V8DF "sd")
927 (V8SF "ss") (V4DF "sd")
928 (V4SF "ss") (V2DF "sd")
929 (V16SI "d") (V8DI "q")
930 (V8SI "d") (V4DI "q")
931 (V4SI "d") (V2DI "q")])
933 ;; Pack/unpack vector modes
934 (define_mode_attr sseunpackmode
935 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
936 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
937 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
939 (define_mode_attr ssepackmode
940 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
941 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
942 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
944 ;; Mapping of the max integer size for xop rotate immediate constraint
945 (define_mode_attr sserotatemax
946 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
948 ;; Mapping of mode to cast intrinsic name
949 (define_mode_attr castmode
950 [(V8SI "si") (V8SF "ps") (V4DF "pd")
951 (V16SI "si") (V16SF "ps") (V8DF "pd")])
953 ;; Instruction suffix for sign and zero extensions.
954 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
956 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
957 ;; i64x4 or f64x4 for 512bit modes.
958 (define_mode_attr i128
959 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
960 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
961 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
963 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
964 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
965 (define_mode_attr i128vldq
966 [(V8SF "f32x4") (V4DF "f64x2")
967 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
970 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
971 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
973 ;; Mapping for dbpsabbw modes
974 (define_mode_attr dbpsadbwmode
975 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
977 ;; Mapping suffixes for broadcast
978 (define_mode_attr bcstscalarsuff
979 [(V64QI "b") (V32QI "b") (V16QI "b")
980 (V32HI "w") (V16HI "w") (V8HI "w")
981 (V16SI "d") (V8SI "d") (V4SI "d")
982 (V8DI "q") (V4DI "q") (V2DI "q")
983 (V16SF "ss") (V8SF "ss") (V4SF "ss")
984 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
986 ;; Tie mode of assembler operand to mode iterator
987 (define_mode_attr xtg_mode
988 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
989 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
990 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
992 ;; Half mask mode for unpacks
993 (define_mode_attr HALFMASKMODE
994 [(DI "SI") (SI "HI")])
996 ;; Double mask mode for packs
997 (define_mode_attr DOUBLEMASKMODE
998 [(HI "SI") (SI "DI")])
1001 ;; Include define_subst patterns for instructions with mask
1002 (include "subst.md")
1004 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1012 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1013 ;; This is essential for maintaining stable calling conventions.
1015 (define_expand "mov<mode>"
1016 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1017 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1020 ix86_expand_vector_move (<MODE>mode, operands);
1024 (define_insn "mov<mode>_internal"
1025 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1027 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1030 && (register_operand (operands[0], <MODE>mode)
1031 || register_operand (operands[1], <MODE>mode))"
1033 switch (get_attr_type (insn))
1036 return standard_sse_constant_opcode (insn, operands);
1039 return ix86_output_ssemov (insn, operands);
1045 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1046 (set_attr "prefix" "maybe_vex")
1048 (cond [(match_test "TARGET_AVX")
1049 (const_string "<sseinsnmode>")
1050 (ior (not (match_test "TARGET_SSE2"))
1051 (match_test "optimize_function_for_size_p (cfun)"))
1052 (const_string "V4SF")
1053 (and (match_test "<MODE>mode == V2DFmode")
1054 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1055 (const_string "V4SF")
1056 (and (eq_attr "alternative" "3")
1057 (match_test "TARGET_SSE_TYPELESS_STORES"))
1058 (const_string "V4SF")
1059 (and (eq_attr "alternative" "0")
1060 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1063 (const_string "<sseinsnmode>")))
1064 (set (attr "enabled")
1065 (cond [(and (match_test "<MODE_SIZE> == 16")
1066 (eq_attr "alternative" "1"))
1067 (symbol_ref "TARGET_SSE2")
1068 (and (match_test "<MODE_SIZE> == 32")
1069 (eq_attr "alternative" "1"))
1070 (symbol_ref "TARGET_AVX2")
1072 (symbol_ref "true")))])
1074 ;; If mem_addr points to a memory region with less than whole vector size bytes
1075 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1076 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1078 (define_expand "<avx512>_load<mode>_mask"
1079 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1080 (vec_merge:V48_AVX512VL
1081 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1082 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1083 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1086 if (CONST_INT_P (operands[3]))
1088 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1091 else if (MEM_P (operands[1]))
1092 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1093 gen_rtvec(1, operands[1]),
1097 (define_insn "*<avx512>_load<mode>_mask"
1098 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1099 (vec_merge:V48_AVX512VL
1100 (unspec:V48_AVX512VL
1101 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1103 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1104 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1107 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1109 if (misaligned_operand (operands[1], <MODE>mode))
1110 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1112 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1116 if (misaligned_operand (operands[1], <MODE>mode))
1117 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1119 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1122 [(set_attr "type" "ssemov")
1123 (set_attr "prefix" "evex")
1124 (set_attr "mode" "<sseinsnmode>")])
1126 (define_insn_and_split "*<avx512>_load<mode>"
1127 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1128 (unspec:V48_AVX512VL
1129 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1134 [(set (match_dup 0) (match_dup 1))])
1136 (define_expand "<avx512>_load<mode>_mask"
1137 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1138 (vec_merge:VI12_AVX512VL
1139 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1140 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1141 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1144 if (CONST_INT_P (operands[3]))
1146 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1149 else if (MEM_P (operands[1]))
1150 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1151 gen_rtvec(1, operands[1]),
1156 (define_insn "*<avx512>_load<mode>_mask"
1157 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1158 (vec_merge:VI12_AVX512VL
1159 (unspec:VI12_AVX512VL
1160 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1162 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1163 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1165 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1166 [(set_attr "type" "ssemov")
1167 (set_attr "prefix" "evex")
1168 (set_attr "mode" "<sseinsnmode>")])
1170 (define_insn_and_split "*<avx512>_load<mode>"
1171 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1172 (unspec:VI12_AVX512VL
1173 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1178 [(set (match_dup 0) (match_dup 1))])
1180 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1181 [(set (match_operand:VF_128 0 "register_operand" "=v")
1184 (match_operand:VF_128 2 "register_operand" "v")
1185 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1186 (match_operand:QI 4 "register_operand" "Yk"))
1187 (match_operand:VF_128 1 "register_operand" "v")
1190 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1191 [(set_attr "type" "ssemov")
1192 (set_attr "prefix" "evex")
1193 (set_attr "mode" "<ssescalarmode>")])
1195 (define_expand "avx512f_load<mode>_mask"
1196 [(set (match_operand:<ssevecmode> 0 "register_operand")
1197 (vec_merge:<ssevecmode>
1198 (vec_merge:<ssevecmode>
1199 (vec_duplicate:<ssevecmode>
1200 (match_operand:MODEF 1 "memory_operand"))
1201 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1202 (match_operand:QI 3 "register_operand"))
1206 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1208 (define_insn "*avx512f_load<mode>_mask"
1209 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1210 (vec_merge:<ssevecmode>
1211 (vec_merge:<ssevecmode>
1212 (vec_duplicate:<ssevecmode>
1213 (match_operand:MODEF 1 "memory_operand" "m"))
1214 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1215 (match_operand:QI 3 "register_operand" "Yk"))
1216 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1219 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1220 [(set_attr "type" "ssemov")
1221 (set_attr "prefix" "evex")
1222 (set_attr "memory" "load")
1223 (set_attr "mode" "<MODE>")])
1225 (define_insn "avx512f_store<mode>_mask"
1226 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1228 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1231 (match_operand:<ssevecmode> 1 "register_operand" "v")
1232 (parallel [(const_int 0)]))
1235 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1236 [(set_attr "type" "ssemov")
1237 (set_attr "prefix" "evex")
1238 (set_attr "memory" "store")
1239 (set_attr "mode" "<MODE>")])
1241 (define_insn "<avx512>_blendm<mode>"
1242 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1243 (vec_merge:V48_AVX512VL
1244 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1245 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1246 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1249 if (REG_P (operands[1])
1250 && REGNO (operands[1]) != REGNO (operands[0]))
1251 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1253 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1255 if (misaligned_operand (operands[2], <MODE>mode))
1256 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1258 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1262 if (misaligned_operand (operands[2], <MODE>mode))
1263 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1265 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1268 [(set_attr "type" "ssemov")
1269 (set_attr "prefix" "evex")
1270 (set_attr "mode" "<sseinsnmode>")])
1272 (define_insn "<avx512>_blendm<mode>"
1273 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1274 (vec_merge:VI12_AVX512VL
1275 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1276 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1277 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1280 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1281 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1282 [(set_attr "type" "ssemov")
1283 (set_attr "prefix" "evex")
1284 (set_attr "mode" "<sseinsnmode>")])
1286 (define_insn "<avx512>_store<mode>_mask"
1287 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1288 (vec_merge:V48_AVX512VL
1289 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1291 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1294 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1296 if (misaligned_operand (operands[0], <MODE>mode))
1297 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1299 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1303 if (misaligned_operand (operands[0], <MODE>mode))
1304 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1306 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1309 [(set_attr "type" "ssemov")
1310 (set_attr "prefix" "evex")
1311 (set_attr "memory" "store")
1312 (set_attr "mode" "<sseinsnmode>")])
1314 (define_insn "<avx512>_store<mode>_mask"
1315 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1316 (vec_merge:VI12_AVX512VL
1317 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1319 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1321 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1322 [(set_attr "type" "ssemov")
1323 (set_attr "prefix" "evex")
1324 (set_attr "memory" "store")
1325 (set_attr "mode" "<sseinsnmode>")])
1327 (define_insn "sse2_movq128"
1328 [(set (match_operand:V2DI 0 "register_operand" "=v")
1331 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1332 (parallel [(const_int 0)]))
1335 "%vmovq\t{%1, %0|%0, %q1}"
1336 [(set_attr "type" "ssemov")
1337 (set_attr "prefix" "maybe_vex")
1338 (set_attr "mode" "TI")])
1340 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1341 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1342 ;; from memory, we'd prefer to load the memory directly into the %xmm
1343 ;; register. To facilitate this happy circumstance, this pattern won't
1344 ;; split until after register allocation. If the 64-bit value didn't
1345 ;; come from memory, this is the best we can do. This is much better
1346 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1349 (define_insn_and_split "movdi_to_sse"
1350 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1351 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1352 UNSPEC_MOVDI_TO_SSE))
1353 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1354 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1356 "&& reload_completed"
1359 if (register_operand (operands[1], DImode))
1361 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1362 Assemble the 64-bit DImode value in an xmm register. */
1363 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1364 gen_lowpart (SImode, operands[1])));
1366 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1367 gen_highpart (SImode, operands[1]),
1371 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1372 gen_highpart (SImode, operands[1])));
1373 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1377 else if (memory_operand (operands[1], DImode))
1378 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1379 operands[1], const0_rtx));
1384 [(set_attr "isa" "sse4,*,*")])
1387 [(set (match_operand:V4SF 0 "register_operand")
1388 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1389 "TARGET_SSE && reload_completed"
1392 (vec_duplicate:V4SF (match_dup 1))
1396 operands[1] = gen_lowpart (SFmode, operands[1]);
1397 operands[2] = CONST0_RTX (V4SFmode);
1401 [(set (match_operand:V2DF 0 "register_operand")
1402 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1403 "TARGET_SSE2 && reload_completed"
1404 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1406 operands[1] = gen_lowpart (DFmode, operands[1]);
1407 operands[2] = CONST0_RTX (DFmode);
1410 (define_expand "movmisalign<mode>"
1411 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1412 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1415 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1419 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1421 [(set (match_operand:V2DF 0 "sse_reg_operand")
1422 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1423 (match_operand:DF 4 "const0_operand")))
1424 (set (match_operand:V2DF 2 "sse_reg_operand")
1425 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1426 (parallel [(const_int 0)]))
1427 (match_operand:DF 3 "memory_operand")))]
1428 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1429 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1430 [(set (match_dup 2) (match_dup 5))]
1431 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1434 [(set (match_operand:DF 0 "sse_reg_operand")
1435 (match_operand:DF 1 "memory_operand"))
1436 (set (match_operand:V2DF 2 "sse_reg_operand")
1437 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1438 (match_operand:DF 3 "memory_operand")))]
1439 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1440 && REGNO (operands[4]) == REGNO (operands[2])
1441 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1442 [(set (match_dup 2) (match_dup 5))]
1443 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1445 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1447 [(set (match_operand:DF 0 "memory_operand")
1448 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1449 (parallel [(const_int 0)])))
1450 (set (match_operand:DF 2 "memory_operand")
1451 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1452 (parallel [(const_int 1)])))]
1453 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1454 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1455 [(set (match_dup 4) (match_dup 1))]
1456 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1458 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1459 [(set (match_operand:VI1 0 "register_operand" "=x")
1460 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1463 "%vlddqu\t{%1, %0|%0, %1}"
1464 [(set_attr "type" "ssemov")
1465 (set_attr "movu" "1")
1466 (set (attr "prefix_data16")
1468 (match_test "TARGET_AVX")
1470 (const_string "0")))
1471 (set (attr "prefix_rep")
1473 (match_test "TARGET_AVX")
1475 (const_string "1")))
1476 (set_attr "prefix" "maybe_vex")
1477 (set_attr "mode" "<sseinsnmode>")])
1479 (define_insn "sse2_movnti<mode>"
1480 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1481 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1484 "movnti\t{%1, %0|%0, %1}"
1485 [(set_attr "type" "ssemov")
1486 (set_attr "prefix_data16" "0")
1487 (set_attr "mode" "<MODE>")])
1489 (define_insn "<sse>_movnt<mode>"
1490 [(set (match_operand:VF 0 "memory_operand" "=m")
1492 [(match_operand:VF 1 "register_operand" "v")]
1495 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1496 [(set_attr "type" "ssemov")
1497 (set_attr "prefix" "maybe_vex")
1498 (set_attr "mode" "<MODE>")])
1500 (define_insn "<sse2>_movnt<mode>"
1501 [(set (match_operand:VI8 0 "memory_operand" "=m")
1502 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1505 "%vmovntdq\t{%1, %0|%0, %1}"
1506 [(set_attr "type" "ssecvt")
1507 (set (attr "prefix_data16")
1509 (match_test "TARGET_AVX")
1511 (const_string "1")))
1512 (set_attr "prefix" "maybe_vex")
1513 (set_attr "mode" "<sseinsnmode>")])
1515 ; Expand patterns for non-temporal stores. At the moment, only those
1516 ; that directly map to insns are defined; it would be possible to
1517 ; define patterns for other modes that would expand to several insns.
1519 ;; Modes handled by storent patterns.
1520 (define_mode_iterator STORENT_MODE
1521 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1522 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1523 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1524 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1525 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1527 (define_expand "storent<mode>"
1528 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1529 (unspec:STORENT_MODE
1530 [(match_operand:STORENT_MODE 1 "register_operand")]
1534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1540 ;; All integer modes with AVX512BW/DQ.
1541 (define_mode_iterator SWI1248_AVX512BWDQ
1542 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1544 ;; All integer modes with AVX512BW, where HImode operation
1545 ;; can be used instead of QImode.
1546 (define_mode_iterator SWI1248_AVX512BW
1547 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1549 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1550 (define_mode_iterator SWI1248_AVX512BWDQ2
1551 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1552 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1554 (define_expand "kmov<mskmodesuffix>"
1555 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1556 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1558 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1560 (define_insn "k<code><mode>"
1561 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1562 (any_logic:SWI1248_AVX512BW
1563 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1564 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1565 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1568 if (get_attr_mode (insn) == MODE_HI)
1569 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1571 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1573 [(set_attr "type" "msklog")
1574 (set_attr "prefix" "vex")
1576 (cond [(and (match_test "<MODE>mode == QImode")
1577 (not (match_test "TARGET_AVX512DQ")))
1580 (const_string "<MODE>")))])
1583 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1584 (any_logic:SWI1248_AVX512BW
1585 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1586 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1587 (clobber (reg:CC FLAGS_REG))]
1588 "TARGET_AVX512F && reload_completed"
1591 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1592 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1594 (define_insn "kandn<mode>"
1595 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1596 (and:SWI1248_AVX512BW
1597 (not:SWI1248_AVX512BW
1598 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1599 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1600 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1603 if (get_attr_mode (insn) == MODE_HI)
1604 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1606 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1608 [(set_attr "type" "msklog")
1609 (set_attr "prefix" "vex")
1611 (cond [(and (match_test "<MODE>mode == QImode")
1612 (not (match_test "TARGET_AVX512DQ")))
1615 (const_string "<MODE>")))])
1618 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1619 (and:SWI1248_AVX512BW
1620 (not:SWI1248_AVX512BW
1621 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1622 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1623 (clobber (reg:CC FLAGS_REG))]
1624 "TARGET_AVX512F && reload_completed"
1627 (and:SWI1248_AVX512BW
1628 (not:SWI1248_AVX512BW (match_dup 1))
1630 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1632 (define_insn "kxnor<mode>"
1633 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1634 (not:SWI1248_AVX512BW
1635 (xor:SWI1248_AVX512BW
1636 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1637 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1638 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1641 if (get_attr_mode (insn) == MODE_HI)
1642 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1644 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1646 [(set_attr "type" "msklog")
1647 (set_attr "prefix" "vex")
1649 (cond [(and (match_test "<MODE>mode == QImode")
1650 (not (match_test "TARGET_AVX512DQ")))
1653 (const_string "<MODE>")))])
1655 (define_insn "knot<mode>"
1656 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1657 (not:SWI1248_AVX512BW
1658 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1659 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1662 if (get_attr_mode (insn) == MODE_HI)
1663 return "knotw\t{%1, %0|%0, %1}";
1665 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1667 [(set_attr "type" "msklog")
1668 (set_attr "prefix" "vex")
1670 (cond [(and (match_test "<MODE>mode == QImode")
1671 (not (match_test "TARGET_AVX512DQ")))
1674 (const_string "<MODE>")))])
1677 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1678 (not:SWI1248_AVX512BW
1679 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
1680 "TARGET_AVX512F && reload_completed"
1683 (not:SWI1248_AVX512BW (match_dup 1)))
1684 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1686 (define_insn "*knotsi_1_zext"
1687 [(set (match_operand:DI 0 "register_operand" "=k")
1689 (not:SI (match_operand:SI 1 "register_operand" "k"))))
1690 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1692 "knotd\t{%1, %0|%0, %1}";
1693 [(set_attr "type" "msklog")
1694 (set_attr "prefix" "vex")
1695 (set_attr "mode" "SI")])
1698 [(set (match_operand:DI 0 "mask_reg_operand")
1700 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
1701 "TARGET_AVX512BW && reload_completed"
1705 (not:SI (match_dup 1))))
1706 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1708 (define_insn "kadd<mode>"
1709 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1710 (plus:SWI1248_AVX512BWDQ2
1711 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1712 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1713 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1715 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1716 [(set_attr "type" "msklog")
1717 (set_attr "prefix" "vex")
1718 (set_attr "mode" "<MODE>")])
1720 ;; Mask variant shift mnemonics
1721 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1723 (define_insn "k<code><mode>"
1724 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1725 (any_lshift:SWI1248_AVX512BWDQ
1726 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1727 (match_operand 2 "const_0_to_255_operand" "n")))
1728 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1730 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1731 [(set_attr "type" "msklog")
1732 (set_attr "prefix" "vex")
1733 (set_attr "mode" "<MODE>")])
1735 (define_insn "ktest<mode>"
1736 [(set (reg:CC FLAGS_REG)
1738 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1739 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1742 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1743 [(set_attr "mode" "<MODE>")
1744 (set_attr "type" "msklog")
1745 (set_attr "prefix" "vex")])
1747 (define_insn "kortest<mode>"
1748 [(set (reg:CC FLAGS_REG)
1750 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1751 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1754 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1755 [(set_attr "mode" "<MODE>")
1756 (set_attr "type" "msklog")
1757 (set_attr "prefix" "vex")])
1759 (define_insn "kunpckhi"
1760 [(set (match_operand:HI 0 "register_operand" "=k")
1763 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1765 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1767 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1768 [(set_attr "mode" "HI")
1769 (set_attr "type" "msklog")
1770 (set_attr "prefix" "vex")])
1772 (define_insn "kunpcksi"
1773 [(set (match_operand:SI 0 "register_operand" "=k")
1776 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1778 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1780 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1781 [(set_attr "mode" "SI")])
1783 (define_insn "kunpckdi"
1784 [(set (match_operand:DI 0 "register_operand" "=k")
1787 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1789 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1791 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1792 [(set_attr "mode" "DI")])
1795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1797 ;; Parallel floating point arithmetic
1799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1801 (define_expand "<code><mode>2"
1802 [(set (match_operand:VF 0 "register_operand")
1804 (match_operand:VF 1 "register_operand")))]
1806 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1808 (define_insn_and_split "*<code><mode>2"
1809 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1811 (match_operand:VF 1 "vector_operand" "0,xBm,v,m")))
1812 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1815 "&& reload_completed"
1817 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1821 if (MEM_P (operands[1]))
1822 std::swap (operands[1], operands[2]);
1826 if (operands_match_p (operands[0], operands[2]))
1827 std::swap (operands[1], operands[2]);
1830 [(set_attr "isa" "noavx,noavx,avx,avx")])
1832 (define_insn_and_split "*nabs<mode>2"
1833 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1836 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1837 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1840 "&& reload_completed"
1842 (ior:VF (match_dup 1) (match_dup 2)))]
1846 if (MEM_P (operands[1]))
1847 std::swap (operands[1], operands[2]);
1851 if (operands_match_p (operands[0], operands[2]))
1852 std::swap (operands[1], operands[2]);
1855 [(set_attr "isa" "noavx,noavx,avx,avx")])
1857 (define_expand "<insn><mode>3<mask_name><round_name>"
1858 [(set (match_operand:VF 0 "register_operand")
1860 (match_operand:VF 1 "<round_nimm_predicate>")
1861 (match_operand:VF 2 "<round_nimm_predicate>")))]
1862 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1863 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1865 (define_insn "*<insn><mode>3<mask_name><round_name>"
1866 [(set (match_operand:VF 0 "register_operand" "=x,v")
1868 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
1869 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1870 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1871 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1873 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1874 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1875 [(set_attr "isa" "noavx,avx")
1876 (set_attr "type" "sseadd")
1877 (set_attr "prefix" "<bcst_mask_prefix3>")
1878 (set_attr "mode" "<MODE>")])
1880 ;; Standard scalar operation patterns which preserve the rest of the
1881 ;; vector for combiner.
1882 (define_insn "*<sse>_vm<insn><mode>3"
1883 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1885 (vec_duplicate:VF_128
1886 (plusminus:<ssescalarmode>
1887 (vec_select:<ssescalarmode>
1888 (match_operand:VF_128 1 "register_operand" "0,v")
1889 (parallel [(const_int 0)]))
1890 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1895 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1896 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1897 [(set_attr "isa" "noavx,avx")
1898 (set_attr "type" "sseadd")
1899 (set_attr "prefix" "orig,vex")
1900 (set_attr "mode" "<ssescalarmode>")])
1902 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
1903 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1906 (match_operand:VF_128 1 "register_operand" "0,v")
1907 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1912 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1913 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1914 [(set_attr "isa" "noavx,avx")
1915 (set_attr "type" "sseadd")
1916 (set_attr "prefix" "<round_scalar_prefix>")
1917 (set_attr "mode" "<ssescalarmode>")])
1919 (define_expand "mul<mode>3<mask_name><round_name>"
1920 [(set (match_operand:VF 0 "register_operand")
1922 (match_operand:VF 1 "<round_nimm_predicate>")
1923 (match_operand:VF 2 "<round_nimm_predicate>")))]
1924 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1925 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1927 (define_insn "*mul<mode>3<mask_name><round_name>"
1928 [(set (match_operand:VF 0 "register_operand" "=x,v")
1930 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
1931 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1932 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
1933 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1935 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1936 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1937 [(set_attr "isa" "noavx,avx")
1938 (set_attr "type" "ssemul")
1939 (set_attr "prefix" "<bcst_mask_prefix3>")
1940 (set_attr "btver2_decode" "direct,double")
1941 (set_attr "mode" "<MODE>")])
1943 ;; Standard scalar operation patterns which preserve the rest of the
1944 ;; vector for combiner.
1945 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
1946 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1948 (vec_duplicate:VF_128
1949 (multdiv:<ssescalarmode>
1950 (vec_select:<ssescalarmode>
1951 (match_operand:VF_128 1 "register_operand" "0,v")
1952 (parallel [(const_int 0)]))
1953 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1958 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1959 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1960 [(set_attr "isa" "noavx,avx")
1961 (set_attr "type" "sse<multdiv_mnemonic>")
1962 (set_attr "prefix" "orig,vex")
1963 (set_attr "btver2_decode" "direct,double")
1964 (set_attr "mode" "<ssescalarmode>")])
1966 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1967 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1970 (match_operand:VF_128 1 "register_operand" "0,v")
1971 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1976 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1977 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1978 [(set_attr "isa" "noavx,avx")
1979 (set_attr "type" "sse<multdiv_mnemonic>")
1980 (set_attr "prefix" "<round_scalar_prefix>")
1981 (set_attr "btver2_decode" "direct,double")
1982 (set_attr "mode" "<ssescalarmode>")])
1984 (define_expand "div<mode>3"
1985 [(set (match_operand:VF2 0 "register_operand")
1986 (div:VF2 (match_operand:VF2 1 "register_operand")
1987 (match_operand:VF2 2 "vector_operand")))]
1989 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1991 (define_expand "div<mode>3"
1992 [(set (match_operand:VF1 0 "register_operand")
1993 (div:VF1 (match_operand:VF1 1 "register_operand")
1994 (match_operand:VF1 2 "vector_operand")))]
1997 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
2000 && TARGET_RECIP_VEC_DIV
2001 && !optimize_insn_for_size_p ()
2002 && flag_finite_math_only && !flag_trapping_math
2003 && flag_unsafe_math_optimizations)
2005 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2010 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2011 [(set (match_operand:VF 0 "register_operand" "=x,v")
2013 (match_operand:VF 1 "register_operand" "0,v")
2014 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2015 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2017 div<ssemodesuffix>\t{%2, %0|%0, %2}
2018 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2019 [(set_attr "isa" "noavx,avx")
2020 (set_attr "type" "ssediv")
2021 (set_attr "prefix" "<bcst_mask_prefix3>")
2022 (set_attr "mode" "<MODE>")])
2024 (define_insn "<sse>_rcp<mode>2"
2025 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2027 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2029 "%vrcpps\t{%1, %0|%0, %1}"
2030 [(set_attr "type" "sse")
2031 (set_attr "atom_sse_attr" "rcp")
2032 (set_attr "btver2_sse_attr" "rcp")
2033 (set_attr "prefix" "maybe_vex")
2034 (set_attr "mode" "<MODE>")])
2036 (define_insn "sse_vmrcpv4sf2"
2037 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2039 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2041 (match_operand:V4SF 2 "register_operand" "0,x")
2045 rcpss\t{%1, %0|%0, %k1}
2046 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2047 [(set_attr "isa" "noavx,avx")
2048 (set_attr "type" "sse")
2049 (set_attr "atom_sse_attr" "rcp")
2050 (set_attr "btver2_sse_attr" "rcp")
2051 (set_attr "prefix" "orig,vex")
2052 (set_attr "mode" "SF")])
2054 (define_insn "*sse_vmrcpv4sf2"
2055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2058 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2060 (match_operand:V4SF 2 "register_operand" "0,x")
2064 rcpss\t{%1, %0|%0, %1}
2065 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2066 [(set_attr "isa" "noavx,avx")
2067 (set_attr "type" "sse")
2068 (set_attr "atom_sse_attr" "rcp")
2069 (set_attr "btver2_sse_attr" "rcp")
2070 (set_attr "prefix" "orig,vex")
2071 (set_attr "mode" "SF")])
2073 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2074 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2076 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2079 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2080 [(set_attr "type" "sse")
2081 (set_attr "prefix" "evex")
2082 (set_attr "mode" "<MODE>")])
2084 (define_insn "srcp14<mode>"
2085 [(set (match_operand:VF_128 0 "register_operand" "=v")
2088 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2090 (match_operand:VF_128 2 "register_operand" "v")
2093 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2094 [(set_attr "type" "sse")
2095 (set_attr "prefix" "evex")
2096 (set_attr "mode" "<MODE>")])
2098 (define_insn "srcp14<mode>_mask"
2099 [(set (match_operand:VF_128 0 "register_operand" "=v")
2103 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2105 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2106 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2107 (match_operand:VF_128 2 "register_operand" "v")
2110 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2111 [(set_attr "type" "sse")
2112 (set_attr "prefix" "evex")
2113 (set_attr "mode" "<MODE>")])
2115 (define_expand "sqrt<mode>2"
2116 [(set (match_operand:VF2 0 "register_operand")
2117 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2120 (define_expand "sqrt<mode>2"
2121 [(set (match_operand:VF1 0 "register_operand")
2122 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2126 && TARGET_RECIP_VEC_SQRT
2127 && !optimize_insn_for_size_p ()
2128 && flag_finite_math_only && !flag_trapping_math
2129 && flag_unsafe_math_optimizations)
2131 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2136 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2137 [(set (match_operand:VF 0 "register_operand" "=x,v")
2138 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2139 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2141 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2142 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2143 [(set_attr "isa" "noavx,avx")
2144 (set_attr "type" "sse")
2145 (set_attr "atom_sse_attr" "sqrt")
2146 (set_attr "btver2_sse_attr" "sqrt")
2147 (set_attr "prefix" "maybe_vex")
2148 (set_attr "mode" "<MODE>")])
2150 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2151 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2154 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2155 (match_operand:VF_128 2 "register_operand" "0,v")
2159 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2160 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2161 [(set_attr "isa" "noavx,avx")
2162 (set_attr "type" "sse")
2163 (set_attr "atom_sse_attr" "sqrt")
2164 (set_attr "prefix" "<round_scalar_prefix>")
2165 (set_attr "btver2_sse_attr" "sqrt")
2166 (set_attr "mode" "<ssescalarmode>")])
2168 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2169 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2171 (vec_duplicate:VF_128
2172 (sqrt:<ssescalarmode>
2173 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2174 (match_operand:VF_128 2 "register_operand" "0,v")
2178 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2179 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2180 [(set_attr "isa" "noavx,avx")
2181 (set_attr "type" "sse")
2182 (set_attr "atom_sse_attr" "sqrt")
2183 (set_attr "prefix" "<round_scalar_prefix>")
2184 (set_attr "btver2_sse_attr" "sqrt")
2185 (set_attr "mode" "<ssescalarmode>")])
2187 (define_expand "rsqrt<mode>2"
2188 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2189 (unspec:VF1_AVX512ER_128_256
2190 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2192 "TARGET_SSE && TARGET_SSE_MATH"
2194 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2198 (define_insn "<sse>_rsqrt<mode>2"
2199 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2201 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2203 "%vrsqrtps\t{%1, %0|%0, %1}"
2204 [(set_attr "type" "sse")
2205 (set_attr "prefix" "maybe_vex")
2206 (set_attr "mode" "<MODE>")])
2208 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2209 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2211 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2214 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2215 [(set_attr "type" "sse")
2216 (set_attr "prefix" "evex")
2217 (set_attr "mode" "<MODE>")])
2219 (define_insn "rsqrt14<mode>"
2220 [(set (match_operand:VF_128 0 "register_operand" "=v")
2223 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2225 (match_operand:VF_128 2 "register_operand" "v")
2228 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2229 [(set_attr "type" "sse")
2230 (set_attr "prefix" "evex")
2231 (set_attr "mode" "<MODE>")])
2233 (define_insn "rsqrt14_<mode>_mask"
2234 [(set (match_operand:VF_128 0 "register_operand" "=v")
2238 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2240 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2241 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2242 (match_operand:VF_128 2 "register_operand" "v")
2245 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2246 [(set_attr "type" "sse")
2247 (set_attr "prefix" "evex")
2248 (set_attr "mode" "<MODE>")])
2250 (define_insn "sse_vmrsqrtv4sf2"
2251 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2253 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2255 (match_operand:V4SF 2 "register_operand" "0,x")
2259 rsqrtss\t{%1, %0|%0, %k1}
2260 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2261 [(set_attr "isa" "noavx,avx")
2262 (set_attr "type" "sse")
2263 (set_attr "prefix" "orig,vex")
2264 (set_attr "mode" "SF")])
2266 (define_insn "*sse_vmrsqrtv4sf2"
2267 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2270 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2272 (match_operand:V4SF 2 "register_operand" "0,x")
2276 rsqrtss\t{%1, %0|%0, %1}
2277 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2278 [(set_attr "isa" "noavx,avx")
2279 (set_attr "type" "sse")
2280 (set_attr "prefix" "orig,vex")
2281 (set_attr "mode" "SF")])
2283 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2284 [(set (match_operand:VF 0 "register_operand")
2286 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2287 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2288 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2290 if (!flag_finite_math_only || flag_signed_zeros)
2292 operands[1] = force_reg (<MODE>mode, operands[1]);
2293 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2294 (operands[0], operands[1], operands[2]
2295 <mask_operand_arg34>
2296 <round_saeonly_mask_arg3>));
2300 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2303 ;; These versions of the min/max patterns are intentionally ignorant of
2304 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2305 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2306 ;; are undefined in this condition, we're certain this is correct.
2308 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2309 [(set (match_operand:VF 0 "register_operand" "=x,v")
2311 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2312 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2314 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2315 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2317 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2318 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2319 [(set_attr "isa" "noavx,avx")
2320 (set_attr "type" "sseadd")
2321 (set_attr "btver2_sse_attr" "maxmin")
2322 (set_attr "prefix" "<mask_prefix3>")
2323 (set_attr "mode" "<MODE>")])
2325 ;; These versions of the min/max patterns implement exactly the operations
2326 ;; min = (op1 < op2 ? op1 : op2)
2327 ;; max = (!(op1 < op2) ? op1 : op2)
2328 ;; Their operands are not commutative, and thus they may be used in the
2329 ;; presence of -0.0 and NaN.
2331 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2332 [(set (match_operand:VF 0 "register_operand" "=x,v")
2334 [(match_operand:VF 1 "register_operand" "0,v")
2335 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2338 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2340 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2341 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2342 [(set_attr "isa" "noavx,avx")
2343 (set_attr "type" "sseadd")
2344 (set_attr "btver2_sse_attr" "maxmin")
2345 (set_attr "prefix" "<mask_prefix3>")
2346 (set_attr "mode" "<MODE>")])
2348 ;; Standard scalar operation patterns which preserve the rest of the
2349 ;; vector for combiner.
2350 (define_insn "*ieee_<ieee_maxmin><mode>3"
2351 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2353 (vec_duplicate:VF_128
2354 (unspec:<ssescalarmode>
2355 [(vec_select:<ssescalarmode>
2356 (match_operand:VF_128 1 "register_operand" "0,v")
2357 (parallel [(const_int 0)]))
2358 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2364 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2365 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2366 [(set_attr "isa" "noavx,avx")
2367 (set_attr "type" "sseadd")
2368 (set_attr "btver2_sse_attr" "maxmin")
2369 (set_attr "prefix" "orig,vex")
2370 (set_attr "mode" "<ssescalarmode>")])
2372 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2373 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2376 (match_operand:VF_128 1 "register_operand" "0,v")
2377 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2382 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2383 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2384 [(set_attr "isa" "noavx,avx")
2385 (set_attr "type" "sse")
2386 (set_attr "btver2_sse_attr" "maxmin")
2387 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2388 (set_attr "mode" "<ssescalarmode>")])
2390 (define_insn "avx_addsubv4df3"
2391 [(set (match_operand:V4DF 0 "register_operand" "=x")
2394 (match_operand:V4DF 1 "register_operand" "x")
2395 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2396 (plus:V4DF (match_dup 1) (match_dup 2))
2399 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2400 [(set_attr "type" "sseadd")
2401 (set_attr "prefix" "vex")
2402 (set_attr "mode" "V4DF")])
2404 (define_insn "sse3_addsubv2df3"
2405 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2408 (match_operand:V2DF 1 "register_operand" "0,x")
2409 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2410 (plus:V2DF (match_dup 1) (match_dup 2))
2414 addsubpd\t{%2, %0|%0, %2}
2415 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2416 [(set_attr "isa" "noavx,avx")
2417 (set_attr "type" "sseadd")
2418 (set_attr "atom_unit" "complex")
2419 (set_attr "prefix" "orig,vex")
2420 (set_attr "mode" "V2DF")])
2422 (define_insn "avx_addsubv8sf3"
2423 [(set (match_operand:V8SF 0 "register_operand" "=x")
2426 (match_operand:V8SF 1 "register_operand" "x")
2427 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2428 (plus:V8SF (match_dup 1) (match_dup 2))
2431 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2432 [(set_attr "type" "sseadd")
2433 (set_attr "prefix" "vex")
2434 (set_attr "mode" "V8SF")])
2436 (define_insn "sse3_addsubv4sf3"
2437 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2440 (match_operand:V4SF 1 "register_operand" "0,x")
2441 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2442 (plus:V4SF (match_dup 1) (match_dup 2))
2446 addsubps\t{%2, %0|%0, %2}
2447 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2448 [(set_attr "isa" "noavx,avx")
2449 (set_attr "type" "sseadd")
2450 (set_attr "prefix" "orig,vex")
2451 (set_attr "prefix_rep" "1,*")
2452 (set_attr "mode" "V4SF")])
2455 [(set (match_operand:VF_128_256 0 "register_operand")
2456 (match_operator:VF_128_256 6 "addsub_vm_operator"
2458 (match_operand:VF_128_256 1 "register_operand")
2459 (match_operand:VF_128_256 2 "vector_operand"))
2461 (match_operand:VF_128_256 3 "vector_operand")
2462 (match_operand:VF_128_256 4 "vector_operand"))
2463 (match_operand 5 "const_int_operand")]))]
2465 && can_create_pseudo_p ()
2466 && ((rtx_equal_p (operands[1], operands[3])
2467 && rtx_equal_p (operands[2], operands[4]))
2468 || (rtx_equal_p (operands[1], operands[4])
2469 && rtx_equal_p (operands[2], operands[3])))"
2471 (vec_merge:VF_128_256
2472 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2473 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2477 [(set (match_operand:VF_128_256 0 "register_operand")
2478 (match_operator:VF_128_256 6 "addsub_vm_operator"
2480 (match_operand:VF_128_256 1 "vector_operand")
2481 (match_operand:VF_128_256 2 "vector_operand"))
2483 (match_operand:VF_128_256 3 "register_operand")
2484 (match_operand:VF_128_256 4 "vector_operand"))
2485 (match_operand 5 "const_int_operand")]))]
2487 && can_create_pseudo_p ()
2488 && ((rtx_equal_p (operands[1], operands[3])
2489 && rtx_equal_p (operands[2], operands[4]))
2490 || (rtx_equal_p (operands[1], operands[4])
2491 && rtx_equal_p (operands[2], operands[3])))"
2493 (vec_merge:VF_128_256
2494 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2495 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2498 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2500 = GEN_INT (~INTVAL (operands[5])
2501 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2505 [(set (match_operand:VF_128_256 0 "register_operand")
2506 (match_operator:VF_128_256 7 "addsub_vs_operator"
2507 [(vec_concat:<ssedoublemode>
2509 (match_operand:VF_128_256 1 "register_operand")
2510 (match_operand:VF_128_256 2 "vector_operand"))
2512 (match_operand:VF_128_256 3 "vector_operand")
2513 (match_operand:VF_128_256 4 "vector_operand")))
2514 (match_parallel 5 "addsub_vs_parallel"
2515 [(match_operand 6 "const_int_operand")])]))]
2517 && can_create_pseudo_p ()
2518 && ((rtx_equal_p (operands[1], operands[3])
2519 && rtx_equal_p (operands[2], operands[4]))
2520 || (rtx_equal_p (operands[1], operands[4])
2521 && rtx_equal_p (operands[2], operands[3])))"
2523 (vec_merge:VF_128_256
2524 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2525 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2528 int i, nelt = XVECLEN (operands[5], 0);
2529 HOST_WIDE_INT ival = 0;
2531 for (i = 0; i < nelt; i++)
2532 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2533 ival |= HOST_WIDE_INT_1 << i;
2535 operands[5] = GEN_INT (ival);
2539 [(set (match_operand:VF_128_256 0 "register_operand")
2540 (match_operator:VF_128_256 7 "addsub_vs_operator"
2541 [(vec_concat:<ssedoublemode>
2543 (match_operand:VF_128_256 1 "vector_operand")
2544 (match_operand:VF_128_256 2 "vector_operand"))
2546 (match_operand:VF_128_256 3 "register_operand")
2547 (match_operand:VF_128_256 4 "vector_operand")))
2548 (match_parallel 5 "addsub_vs_parallel"
2549 [(match_operand 6 "const_int_operand")])]))]
2551 && can_create_pseudo_p ()
2552 && ((rtx_equal_p (operands[1], operands[3])
2553 && rtx_equal_p (operands[2], operands[4]))
2554 || (rtx_equal_p (operands[1], operands[4])
2555 && rtx_equal_p (operands[2], operands[3])))"
2557 (vec_merge:VF_128_256
2558 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2559 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2562 int i, nelt = XVECLEN (operands[5], 0);
2563 HOST_WIDE_INT ival = 0;
2565 for (i = 0; i < nelt; i++)
2566 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2567 ival |= HOST_WIDE_INT_1 << i;
2569 operands[5] = GEN_INT (ival);
2572 (define_insn "avx_h<insn>v4df3"
2573 [(set (match_operand:V4DF 0 "register_operand" "=x")
2578 (match_operand:V4DF 1 "register_operand" "x")
2579 (parallel [(const_int 0)]))
2580 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2583 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2584 (parallel [(const_int 0)]))
2585 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2588 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2589 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2591 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2592 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2594 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2595 [(set_attr "type" "sseadd")
2596 (set_attr "prefix" "vex")
2597 (set_attr "mode" "V4DF")])
2599 (define_expand "sse3_haddv2df3"
2600 [(set (match_operand:V2DF 0 "register_operand")
2604 (match_operand:V2DF 1 "register_operand")
2605 (parallel [(const_int 0)]))
2606 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2609 (match_operand:V2DF 2 "vector_operand")
2610 (parallel [(const_int 0)]))
2611 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2614 (define_insn "*sse3_haddv2df3"
2615 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2619 (match_operand:V2DF 1 "register_operand" "0,x")
2620 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2623 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2626 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2627 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2630 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2632 && INTVAL (operands[3]) != INTVAL (operands[4])
2633 && INTVAL (operands[5]) != INTVAL (operands[6])"
2635 haddpd\t{%2, %0|%0, %2}
2636 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2637 [(set_attr "isa" "noavx,avx")
2638 (set_attr "type" "sseadd")
2639 (set_attr "prefix" "orig,vex")
2640 (set_attr "mode" "V2DF")])
2642 (define_insn "sse3_hsubv2df3"
2643 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2647 (match_operand:V2DF 1 "register_operand" "0,x")
2648 (parallel [(const_int 0)]))
2649 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2652 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2653 (parallel [(const_int 0)]))
2654 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2657 hsubpd\t{%2, %0|%0, %2}
2658 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2659 [(set_attr "isa" "noavx,avx")
2660 (set_attr "type" "sseadd")
2661 (set_attr "prefix" "orig,vex")
2662 (set_attr "mode" "V2DF")])
2664 (define_insn "*sse3_haddv2df3_low"
2665 [(set (match_operand:DF 0 "register_operand" "=x,x")
2668 (match_operand:V2DF 1 "register_operand" "0,x")
2669 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2672 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2674 && INTVAL (operands[2]) != INTVAL (operands[3])"
2676 haddpd\t{%0, %0|%0, %0}
2677 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2678 [(set_attr "isa" "noavx,avx")
2679 (set_attr "type" "sseadd1")
2680 (set_attr "prefix" "orig,vex")
2681 (set_attr "mode" "V2DF")])
2683 (define_insn "*sse3_hsubv2df3_low"
2684 [(set (match_operand:DF 0 "register_operand" "=x,x")
2687 (match_operand:V2DF 1 "register_operand" "0,x")
2688 (parallel [(const_int 0)]))
2691 (parallel [(const_int 1)]))))]
2694 hsubpd\t{%0, %0|%0, %0}
2695 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2696 [(set_attr "isa" "noavx,avx")
2697 (set_attr "type" "sseadd1")
2698 (set_attr "prefix" "orig,vex")
2699 (set_attr "mode" "V2DF")])
2701 (define_insn "avx_h<insn>v8sf3"
2702 [(set (match_operand:V8SF 0 "register_operand" "=x")
2708 (match_operand:V8SF 1 "register_operand" "x")
2709 (parallel [(const_int 0)]))
2710 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2712 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2713 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2717 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2718 (parallel [(const_int 0)]))
2719 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2721 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2722 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2726 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2727 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2729 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2730 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2733 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2734 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2736 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2737 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2739 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2740 [(set_attr "type" "sseadd")
2741 (set_attr "prefix" "vex")
2742 (set_attr "mode" "V8SF")])
2744 (define_insn "sse3_h<insn>v4sf3"
2745 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2750 (match_operand:V4SF 1 "register_operand" "0,x")
2751 (parallel [(const_int 0)]))
2752 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2754 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2755 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2759 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2760 (parallel [(const_int 0)]))
2761 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2763 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2764 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2767 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2768 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2769 [(set_attr "isa" "noavx,avx")
2770 (set_attr "type" "sseadd")
2771 (set_attr "atom_unit" "complex")
2772 (set_attr "prefix" "orig,vex")
2773 (set_attr "prefix_rep" "1,*")
2774 (set_attr "mode" "V4SF")])
2776 (define_mode_iterator REDUC_SSE_PLUS_MODE
2777 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2779 (define_expand "reduc_plus_scal_<mode>"
2780 [(plus:REDUC_SSE_PLUS_MODE
2781 (match_operand:<ssescalarmode> 0 "register_operand")
2782 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2785 rtx tmp = gen_reg_rtx (<MODE>mode);
2786 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2787 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2792 (define_expand "reduc_plus_scal_v16qi"
2794 (match_operand:QI 0 "register_operand")
2795 (match_operand:V16QI 1 "register_operand"))]
2798 rtx tmp = gen_reg_rtx (V1TImode);
2799 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2801 rtx tmp2 = gen_reg_rtx (V16QImode);
2802 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2803 rtx tmp3 = gen_reg_rtx (V16QImode);
2804 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2805 rtx tmp4 = gen_reg_rtx (V2DImode);
2806 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2807 tmp4 = gen_lowpart (V16QImode, tmp4);
2808 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2812 (define_mode_iterator REDUC_PLUS_MODE
2813 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2814 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2815 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2817 (define_expand "reduc_plus_scal_<mode>"
2818 [(plus:REDUC_PLUS_MODE
2819 (match_operand:<ssescalarmode> 0 "register_operand")
2820 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2823 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2824 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2825 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2826 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2827 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2828 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2832 ;; Modes handled by reduc_sm{in,ax}* patterns.
2833 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2834 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2835 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2836 (V2DI "TARGET_SSE4_2")])
2838 (define_expand "reduc_<code>_scal_<mode>"
2839 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2840 (match_operand:<ssescalarmode> 0 "register_operand")
2841 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2844 rtx tmp = gen_reg_rtx (<MODE>mode);
2845 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2846 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2851 (define_mode_iterator REDUC_SMINMAX_MODE
2852 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2853 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2854 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2855 (V64QI "TARGET_AVX512BW")
2856 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2857 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2858 (V8DF "TARGET_AVX512F")])
2860 (define_expand "reduc_<code>_scal_<mode>"
2861 [(smaxmin:REDUC_SMINMAX_MODE
2862 (match_operand:<ssescalarmode> 0 "register_operand")
2863 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2866 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2867 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2868 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2869 emit_insn (gen_<code><ssehalfvecmodelower>3
2870 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2871 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2875 (define_expand "reduc_<code>_scal_<mode>"
2876 [(umaxmin:VI_AVX512BW
2877 (match_operand:<ssescalarmode> 0 "register_operand")
2878 (match_operand:VI_AVX512BW 1 "register_operand"))]
2881 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2882 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2883 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2884 emit_insn (gen_<code><ssehalfvecmodelower>3
2885 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2886 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2890 (define_expand "reduc_<code>_scal_<mode>"
2892 (match_operand:<ssescalarmode> 0 "register_operand")
2893 (match_operand:VI_256 1 "register_operand"))]
2896 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2897 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2898 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2899 emit_insn (gen_<code><ssehalfvecmodelower>3
2900 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2901 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2902 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2903 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2904 (operands[0], tmp3, const0_rtx));
2908 (define_expand "reduc_umin_scal_v8hi"
2910 (match_operand:HI 0 "register_operand")
2911 (match_operand:V8HI 1 "register_operand"))]
2914 rtx tmp = gen_reg_rtx (V8HImode);
2915 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2916 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2920 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
2921 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2923 [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2924 (match_operand:SI 2 "const_0_to_255_operand")]
2927 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
2928 [(set_attr "type" "sse")
2929 (set_attr "prefix" "evex")
2930 (set_attr "mode" "<MODE>")])
2932 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
2933 [(set (match_operand:VF_128 0 "register_operand" "=v")
2936 [(match_operand:VF_128 1 "register_operand" "v")
2937 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
2938 (match_operand:SI 3 "const_0_to_255_operand")]
2943 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
2944 [(set_attr "type" "sse")
2945 (set_attr "prefix" "evex")
2946 (set_attr "mode" "<MODE>")])
2948 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2950 ;; Parallel floating point comparisons
2952 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2954 (define_insn "avx_cmp<mode>3"
2955 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2957 [(match_operand:VF_128_256 1 "register_operand" "x")
2958 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2959 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2962 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2963 [(set_attr "type" "ssecmp")
2964 (set_attr "length_immediate" "1")
2965 (set_attr "prefix" "vex")
2966 (set_attr "mode" "<MODE>")])
2968 (define_insn "avx_vmcmp<mode>3"
2969 [(set (match_operand:VF_128 0 "register_operand" "=x")
2972 [(match_operand:VF_128 1 "register_operand" "x")
2973 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2974 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2979 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2980 [(set_attr "type" "ssecmp")
2981 (set_attr "length_immediate" "1")
2982 (set_attr "prefix" "vex")
2983 (set_attr "mode" "<ssescalarmode>")])
2985 (define_insn "*<sse>_maskcmp<mode>3_comm"
2986 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2987 (match_operator:VF_128_256 3 "sse_comparison_operator"
2988 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2989 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2991 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2993 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2994 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2995 [(set_attr "isa" "noavx,avx")
2996 (set_attr "type" "ssecmp")
2997 (set_attr "length_immediate" "1")
2998 (set_attr "prefix" "orig,vex")
2999 (set_attr "mode" "<MODE>")])
3001 (define_insn "<sse>_maskcmp<mode>3"
3002 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3003 (match_operator:VF_128_256 3 "sse_comparison_operator"
3004 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3005 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3008 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3009 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3010 [(set_attr "isa" "noavx,avx")
3011 (set_attr "type" "ssecmp")
3012 (set_attr "length_immediate" "1")
3013 (set_attr "prefix" "orig,vex")
3014 (set_attr "mode" "<MODE>")])
3016 (define_insn "<sse>_vmmaskcmp<mode>3"
3017 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3019 (match_operator:VF_128 3 "sse_comparison_operator"
3020 [(match_operand:VF_128 1 "register_operand" "0,x")
3021 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3026 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3027 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3028 [(set_attr "isa" "noavx,avx")
3029 (set_attr "type" "ssecmp")
3030 (set_attr "length_immediate" "1,*")
3031 (set_attr "prefix" "orig,vex")
3032 (set_attr "mode" "<ssescalarmode>")])
3034 (define_mode_attr cmp_imm_predicate
3035 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3036 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3037 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3038 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3039 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3040 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3041 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3042 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3043 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3045 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3046 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3047 (unspec:<avx512fmaskmode>
3048 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
3049 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3050 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3052 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3053 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3054 [(set_attr "type" "ssecmp")
3055 (set_attr "length_immediate" "1")
3056 (set_attr "prefix" "evex")
3057 (set_attr "mode" "<sseinsnmode>")])
3059 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3060 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3061 (unspec:<avx512fmaskmode>
3062 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3063 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3064 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3067 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3068 [(set_attr "type" "ssecmp")
3069 (set_attr "length_immediate" "1")
3070 (set_attr "prefix" "evex")
3071 (set_attr "mode" "<sseinsnmode>")])
3073 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3074 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3075 (unspec:<avx512fmaskmode>
3076 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3077 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3078 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3079 UNSPEC_UNSIGNED_PCMP))]
3081 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3082 [(set_attr "type" "ssecmp")
3083 (set_attr "length_immediate" "1")
3084 (set_attr "prefix" "evex")
3085 (set_attr "mode" "<sseinsnmode>")])
3087 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3088 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3089 (unspec:<avx512fmaskmode>
3090 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3091 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3092 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3093 UNSPEC_UNSIGNED_PCMP))]
3095 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3096 [(set_attr "type" "ssecmp")
3097 (set_attr "length_immediate" "1")
3098 (set_attr "prefix" "evex")
3099 (set_attr "mode" "<sseinsnmode>")])
3101 (define_int_iterator UNSPEC_PCMP_ITER
3102 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3104 (define_int_attr pcmp_signed_mask
3105 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
3107 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
3108 ;; For signed comparison, handle EQ 0: NEQ 4,
3109 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
3112 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3113 (unspec:<avx512fmaskmode>
3114 [(us_minus:VI12_AVX512VL
3115 (match_operand:VI12_AVX512VL 1 "vector_operand")
3116 (match_operand:VI12_AVX512VL 2 "vector_operand"))
3117 (match_operand:VI12_AVX512VL 3 "const0_operand")
3118 (match_operand:SI 4 "const_0_to_7_operand")]
3121 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
3122 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
3125 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
3126 int cmp_predicate = 2; /* LE */
3127 if (MEM_P (operands[1]))
3129 std::swap (operands[1], operands[2]);
3130 cmp_predicate = 5; /* NLT (GE) */
3132 if ((INTVAL (operands[4]) & 4) != 0)
3133 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
3134 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
3135 GEN_INT (cmp_predicate)));
3139 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3140 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3141 (and:<avx512fmaskmode>
3142 (unspec:<avx512fmaskmode>
3143 [(match_operand:VF_128 1 "register_operand" "v")
3144 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3145 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3149 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3150 [(set_attr "type" "ssecmp")
3151 (set_attr "length_immediate" "1")
3152 (set_attr "prefix" "evex")
3153 (set_attr "mode" "<ssescalarmode>")])
3155 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3156 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3157 (and:<avx512fmaskmode>
3158 (unspec:<avx512fmaskmode>
3159 [(match_operand:VF_128 1 "register_operand" "v")
3160 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3161 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3163 (and:<avx512fmaskmode>
3164 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3167 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3168 [(set_attr "type" "ssecmp")
3169 (set_attr "length_immediate" "1")
3170 (set_attr "prefix" "evex")
3171 (set_attr "mode" "<ssescalarmode>")])
3173 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3174 [(set (reg:CCFP FLAGS_REG)
3177 (match_operand:<ssevecmode> 0 "register_operand" "v")
3178 (parallel [(const_int 0)]))
3180 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3181 (parallel [(const_int 0)]))))]
3182 "SSE_FLOAT_MODE_P (<MODE>mode)"
3183 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3184 [(set_attr "type" "ssecomi")
3185 (set_attr "prefix" "maybe_vex")
3186 (set_attr "prefix_rep" "0")
3187 (set (attr "prefix_data16")
3188 (if_then_else (eq_attr "mode" "DF")
3190 (const_string "0")))
3191 (set_attr "mode" "<MODE>")])
3193 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3194 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3195 (match_operator:<avx512fmaskmode> 1 ""
3196 [(match_operand:V48_AVX512VL 2 "register_operand")
3197 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3200 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3201 operands[2], operands[3]);
3206 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3207 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3208 (match_operator:<avx512fmaskmode> 1 ""
3209 [(match_operand:VI12_AVX512VL 2 "register_operand")
3210 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3213 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3214 operands[2], operands[3]);
3219 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3220 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3221 (match_operator:<sseintvecmode> 1 ""
3222 [(match_operand:VI_256 2 "register_operand")
3223 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3226 bool ok = ix86_expand_int_vec_cmp (operands);
3231 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3232 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3233 (match_operator:<sseintvecmode> 1 ""
3234 [(match_operand:VI124_128 2 "register_operand")
3235 (match_operand:VI124_128 3 "vector_operand")]))]
3238 bool ok = ix86_expand_int_vec_cmp (operands);
3243 (define_expand "vec_cmpv2div2di"
3244 [(set (match_operand:V2DI 0 "register_operand")
3245 (match_operator:V2DI 1 ""
3246 [(match_operand:V2DI 2 "register_operand")
3247 (match_operand:V2DI 3 "vector_operand")]))]
3250 bool ok = ix86_expand_int_vec_cmp (operands);
3255 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3256 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3257 (match_operator:<sseintvecmode> 1 ""
3258 [(match_operand:VF_256 2 "register_operand")
3259 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3262 bool ok = ix86_expand_fp_vec_cmp (operands);
3267 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3268 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3269 (match_operator:<sseintvecmode> 1 ""
3270 [(match_operand:VF_128 2 "register_operand")
3271 (match_operand:VF_128 3 "vector_operand")]))]
3274 bool ok = ix86_expand_fp_vec_cmp (operands);
3279 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3280 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3281 (match_operator:<avx512fmaskmode> 1 ""
3282 [(match_operand:VI48_AVX512VL 2 "register_operand")
3283 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3286 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3287 operands[2], operands[3]);
3292 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3293 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3294 (match_operator:<avx512fmaskmode> 1 ""
3295 [(match_operand:VI12_AVX512VL 2 "register_operand")
3296 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3299 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3300 operands[2], operands[3]);
3305 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3306 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3307 (match_operator:<sseintvecmode> 1 ""
3308 [(match_operand:VI_256 2 "register_operand")
3309 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3312 bool ok = ix86_expand_int_vec_cmp (operands);
3317 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3318 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3319 (match_operator:<sseintvecmode> 1 ""
3320 [(match_operand:VI124_128 2 "register_operand")
3321 (match_operand:VI124_128 3 "vector_operand")]))]
3324 bool ok = ix86_expand_int_vec_cmp (operands);
3329 (define_expand "vec_cmpuv2div2di"
3330 [(set (match_operand:V2DI 0 "register_operand")
3331 (match_operator:V2DI 1 ""
3332 [(match_operand:V2DI 2 "register_operand")
3333 (match_operand:V2DI 3 "vector_operand")]))]
3336 bool ok = ix86_expand_int_vec_cmp (operands);
3341 (define_expand "vec_cmpeqv2div2di"
3342 [(set (match_operand:V2DI 0 "register_operand")
3343 (match_operator:V2DI 1 ""
3344 [(match_operand:V2DI 2 "register_operand")
3345 (match_operand:V2DI 3 "vector_operand")]))]
3348 bool ok = ix86_expand_int_vec_cmp (operands);
3353 (define_expand "vcond<V_512:mode><VF_512:mode>"
3354 [(set (match_operand:V_512 0 "register_operand")
3356 (match_operator 3 ""
3357 [(match_operand:VF_512 4 "nonimmediate_operand")
3358 (match_operand:VF_512 5 "nonimmediate_operand")])
3359 (match_operand:V_512 1 "general_operand")
3360 (match_operand:V_512 2 "general_operand")))]
3362 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3363 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3365 bool ok = ix86_expand_fp_vcond (operands);
3370 (define_expand "vcond<V_256:mode><VF_256:mode>"
3371 [(set (match_operand:V_256 0 "register_operand")
3373 (match_operator 3 ""
3374 [(match_operand:VF_256 4 "nonimmediate_operand")
3375 (match_operand:VF_256 5 "nonimmediate_operand")])
3376 (match_operand:V_256 1 "general_operand")
3377 (match_operand:V_256 2 "general_operand")))]
3379 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3380 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3382 bool ok = ix86_expand_fp_vcond (operands);
3387 (define_expand "vcond<V_128:mode><VF_128:mode>"
3388 [(set (match_operand:V_128 0 "register_operand")
3390 (match_operator 3 ""
3391 [(match_operand:VF_128 4 "vector_operand")
3392 (match_operand:VF_128 5 "vector_operand")])
3393 (match_operand:V_128 1 "general_operand")
3394 (match_operand:V_128 2 "general_operand")))]
3396 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3397 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3399 bool ok = ix86_expand_fp_vcond (operands);
3404 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3405 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3406 (vec_merge:V48_AVX512VL
3407 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3408 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3409 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3412 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3413 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3414 (vec_merge:VI12_AVX512VL
3415 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3416 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3417 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3420 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3421 ;; and their condition can be folded late into a constant, we need to
3422 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3423 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3426 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3427 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3428 (vec_merge:VI_256_AVX2
3429 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3430 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3431 (match_operand:<sseintvecmode> 3 "register_operand")))]
3434 ix86_expand_sse_movcc (operands[0], operands[3],
3435 operands[1], operands[2]);
3439 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3440 [(set (match_operand:VI124_128 0 "register_operand")
3441 (vec_merge:VI124_128
3442 (match_operand:VI124_128 1 "vector_operand")
3443 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3444 (match_operand:<sseintvecmode> 3 "register_operand")))]
3447 ix86_expand_sse_movcc (operands[0], operands[3],
3448 operands[1], operands[2]);
3452 (define_expand "vcond_mask_v2div2di"
3453 [(set (match_operand:V2DI 0 "register_operand")
3455 (match_operand:V2DI 1 "vector_operand")
3456 (match_operand:V2DI 2 "nonimm_or_0_operand")
3457 (match_operand:V2DI 3 "register_operand")))]
3460 ix86_expand_sse_movcc (operands[0], operands[3],
3461 operands[1], operands[2]);
3465 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3466 [(set (match_operand:VF_256 0 "register_operand")
3468 (match_operand:VF_256 1 "nonimmediate_operand")
3469 (match_operand:VF_256 2 "nonimm_or_0_operand")
3470 (match_operand:<sseintvecmode> 3 "register_operand")))]
3473 ix86_expand_sse_movcc (operands[0], operands[3],
3474 operands[1], operands[2]);
3478 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3479 [(set (match_operand:VF_128 0 "register_operand")
3481 (match_operand:VF_128 1 "vector_operand")
3482 (match_operand:VF_128 2 "nonimm_or_0_operand")
3483 (match_operand:<sseintvecmode> 3 "register_operand")))]
3486 ix86_expand_sse_movcc (operands[0], operands[3],
3487 operands[1], operands[2]);
3491 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3493 ;; Parallel floating point logical operations
3495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3497 (define_insn "<sse>_andnot<mode>3<mask_name>"
3498 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3501 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3502 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3503 "TARGET_SSE && <mask_avx512vl_condition>"
3509 switch (which_alternative)
3512 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3517 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3523 switch (get_attr_mode (insn))
3531 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3532 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3533 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3536 suffix = "<ssemodesuffix>";
3539 snprintf (buf, sizeof (buf), ops, suffix);
3540 output_asm_insn (buf, operands);
3543 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3544 (set_attr "type" "sselog")
3545 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3547 (cond [(and (match_test "<mask_applied>")
3548 (and (eq_attr "alternative" "1")
3549 (match_test "!TARGET_AVX512DQ")))
3550 (const_string "<sseintvecmode2>")
3551 (eq_attr "alternative" "3")
3552 (const_string "<sseintvecmode2>")
3553 (match_test "TARGET_AVX")
3554 (const_string "<MODE>")
3555 (match_test "optimize_function_for_size_p (cfun)")
3556 (const_string "V4SF")
3557 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3558 (const_string "V4SF")
3560 (const_string "<MODE>")))])
3562 (define_insn "<sse>_andnot<mode>3<mask_name>"
3563 [(set (match_operand:VF_512 0 "register_operand" "=v")
3566 (match_operand:VF_512 1 "register_operand" "v"))
3567 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3574 suffix = "<ssemodesuffix>";
3577 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3578 if (!TARGET_AVX512DQ)
3580 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3584 snprintf (buf, sizeof (buf),
3585 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3587 output_asm_insn (buf, operands);
3590 [(set_attr "type" "sselog")
3591 (set_attr "prefix" "evex")
3593 (if_then_else (match_test "TARGET_AVX512DQ")
3594 (const_string "<sseinsnmode>")
3595 (const_string "XI")))])
3597 (define_expand "<code><mode>3<mask_name>"
3598 [(set (match_operand:VF_128_256 0 "register_operand")
3599 (any_logic:VF_128_256
3600 (match_operand:VF_128_256 1 "vector_operand")
3601 (match_operand:VF_128_256 2 "vector_operand")))]
3602 "TARGET_SSE && <mask_avx512vl_condition>"
3603 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3605 (define_expand "<code><mode>3<mask_name>"
3606 [(set (match_operand:VF_512 0 "register_operand")
3608 (match_operand:VF_512 1 "nonimmediate_operand")
3609 (match_operand:VF_512 2 "nonimmediate_operand")))]
3611 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3613 (define_insn "*<code><mode>3<mask_name>"
3614 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3615 (any_logic:VF_128_256
3616 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3617 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3618 "TARGET_SSE && <mask_avx512vl_condition>
3619 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3625 switch (which_alternative)
3628 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3633 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3639 switch (get_attr_mode (insn))
3647 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3648 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3649 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3652 suffix = "<ssemodesuffix>";
3655 snprintf (buf, sizeof (buf), ops, suffix);
3656 output_asm_insn (buf, operands);
3659 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3660 (set_attr "type" "sselog")
3661 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3663 (cond [(and (match_test "<mask_applied>")
3664 (and (eq_attr "alternative" "1")
3665 (match_test "!TARGET_AVX512DQ")))
3666 (const_string "<sseintvecmode2>")
3667 (eq_attr "alternative" "3")
3668 (const_string "<sseintvecmode2>")
3669 (match_test "TARGET_AVX")
3670 (const_string "<MODE>")
3671 (match_test "optimize_function_for_size_p (cfun)")
3672 (const_string "V4SF")
3673 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3674 (const_string "V4SF")
3676 (const_string "<MODE>")))])
3678 (define_insn "*<code><mode>3<mask_name>"
3679 [(set (match_operand:VF_512 0 "register_operand" "=v")
3681 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3682 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3683 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3689 suffix = "<ssemodesuffix>";
3692 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3693 if (!TARGET_AVX512DQ)
3695 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3699 snprintf (buf, sizeof (buf),
3700 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3702 output_asm_insn (buf, operands);
3705 [(set_attr "type" "sselog")
3706 (set_attr "prefix" "evex")
3708 (if_then_else (match_test "TARGET_AVX512DQ")
3709 (const_string "<sseinsnmode>")
3710 (const_string "XI")))])
3712 (define_expand "copysign<mode>3"
3715 (not:VF (match_dup 3))
3716 (match_operand:VF 1 "vector_operand")))
3718 (and:VF (match_dup 3)
3719 (match_operand:VF 2 "vector_operand")))
3720 (set (match_operand:VF 0 "register_operand")
3721 (ior:VF (match_dup 4) (match_dup 5)))]
3724 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3726 operands[4] = gen_reg_rtx (<MODE>mode);
3727 operands[5] = gen_reg_rtx (<MODE>mode);
3730 (define_expand "xorsign<mode>3"
3732 (and:VF (match_dup 3)
3733 (match_operand:VF 2 "vector_operand")))
3734 (set (match_operand:VF 0 "register_operand")
3735 (xor:VF (match_dup 4)
3736 (match_operand:VF 1 "vector_operand")))]
3739 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3741 operands[4] = gen_reg_rtx (<MODE>mode);
3744 (define_expand "signbit<mode>2"
3745 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3746 (lshiftrt:<sseintvecmode>
3747 (subreg:<sseintvecmode>
3748 (match_operand:VF1_AVX2 1 "register_operand") 0)
3751 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3753 ;; Also define scalar versions. These are used for abs, neg, and
3754 ;; conditional move. Using subregs into vector modes causes register
3755 ;; allocation lossage. These patterns do not allow memory operands
3756 ;; because the native instructions read the full 128-bits.
3758 (define_insn "*andnot<mode>3"
3759 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3762 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3763 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3764 "SSE_FLOAT_MODE_P (<MODE>mode)"
3769 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3771 switch (which_alternative)
3774 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3777 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3780 if (TARGET_AVX512DQ)
3781 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3784 suffix = <MODE>mode == DFmode ? "q" : "d";
3785 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3789 if (TARGET_AVX512DQ)
3790 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3793 suffix = <MODE>mode == DFmode ? "q" : "d";
3794 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3801 snprintf (buf, sizeof (buf), ops, suffix);
3802 output_asm_insn (buf, operands);
3805 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3806 (set_attr "type" "sselog")
3807 (set_attr "prefix" "orig,vex,evex,evex")
3809 (cond [(eq_attr "alternative" "2")
3810 (if_then_else (match_test "TARGET_AVX512DQ")
3811 (const_string "<ssevecmode>")
3812 (const_string "TI"))
3813 (eq_attr "alternative" "3")
3814 (if_then_else (match_test "TARGET_AVX512DQ")
3815 (const_string "<avx512fvecmode>")
3816 (const_string "XI"))
3817 (match_test "TARGET_AVX")
3818 (const_string "<ssevecmode>")
3819 (match_test "optimize_function_for_size_p (cfun)")
3820 (const_string "V4SF")
3821 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3822 (const_string "V4SF")
3824 (const_string "<ssevecmode>")))])
3826 (define_insn "*andnottf3"
3827 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3829 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3830 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3836 = (which_alternative >= 2 ? "pandnq"
3837 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3839 switch (which_alternative)
3842 ops = "%s\t{%%2, %%0|%%0, %%2}";
3846 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3849 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3855 snprintf (buf, sizeof (buf), ops, tmp);
3856 output_asm_insn (buf, operands);
3859 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3860 (set_attr "type" "sselog")
3861 (set (attr "prefix_data16")
3863 (and (eq_attr "alternative" "0")
3864 (eq_attr "mode" "TI"))
3866 (const_string "*")))
3867 (set_attr "prefix" "orig,vex,evex,evex")
3869 (cond [(eq_attr "alternative" "2")
3871 (eq_attr "alternative" "3")
3873 (match_test "TARGET_AVX")
3875 (ior (not (match_test "TARGET_SSE2"))
3876 (match_test "optimize_function_for_size_p (cfun)"))
3877 (const_string "V4SF")
3878 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3879 (const_string "V4SF")
3881 (const_string "TI")))])
3883 (define_insn "*<code><mode>3"
3884 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3886 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3887 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3888 "SSE_FLOAT_MODE_P (<MODE>mode)"
3893 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3895 switch (which_alternative)
3898 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3901 if (!TARGET_AVX512DQ)
3903 suffix = <MODE>mode == DFmode ? "q" : "d";
3904 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3909 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3912 if (TARGET_AVX512DQ)
3913 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3916 suffix = <MODE>mode == DFmode ? "q" : "d";
3917 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3924 snprintf (buf, sizeof (buf), ops, suffix);
3925 output_asm_insn (buf, operands);
3928 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3929 (set_attr "type" "sselog")
3930 (set_attr "prefix" "orig,vex,evex,evex")
3932 (cond [(eq_attr "alternative" "2")
3933 (if_then_else (match_test "TARGET_AVX512DQ")
3934 (const_string "<ssevecmode>")
3935 (const_string "TI"))
3936 (eq_attr "alternative" "3")
3937 (if_then_else (match_test "TARGET_AVX512DQ")
3938 (const_string "<avx512fvecmode>")
3939 (const_string "XI"))
3940 (match_test "TARGET_AVX")
3941 (const_string "<ssevecmode>")
3942 (match_test "optimize_function_for_size_p (cfun)")
3943 (const_string "V4SF")
3944 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3945 (const_string "V4SF")
3947 (const_string "<ssevecmode>")))])
3949 (define_expand "<code>tf3"
3950 [(set (match_operand:TF 0 "register_operand")
3952 (match_operand:TF 1 "vector_operand")
3953 (match_operand:TF 2 "vector_operand")))]
3955 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3957 (define_insn "*<code>tf3"
3958 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3960 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3961 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3962 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3967 = (which_alternative >= 2 ? "p<logic>q"
3968 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3970 switch (which_alternative)
3973 ops = "%s\t{%%2, %%0|%%0, %%2}";
3977 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3980 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3986 snprintf (buf, sizeof (buf), ops, tmp);
3987 output_asm_insn (buf, operands);
3990 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3991 (set_attr "type" "sselog")
3992 (set (attr "prefix_data16")
3994 (and (eq_attr "alternative" "0")
3995 (eq_attr "mode" "TI"))
3997 (const_string "*")))
3998 (set_attr "prefix" "orig,vex,evex,evex")
4000 (cond [(eq_attr "alternative" "2")
4002 (eq_attr "alternative" "3")
4004 (match_test "TARGET_AVX")
4006 (ior (not (match_test "TARGET_SSE2"))
4007 (match_test "optimize_function_for_size_p (cfun)"))
4008 (const_string "V4SF")
4009 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4010 (const_string "V4SF")
4012 (const_string "TI")))])
4014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4016 ;; FMA floating point multiply/accumulate instructions. These include
4017 ;; scalar versions of the instructions as well as vector versions.
4019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4021 ;; The standard names for scalar FMA are only available with SSE math enabled.
4022 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
4023 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
4024 ;; and TARGET_FMA4 are both false.
4025 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
4026 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
4027 ;; GAS to allow proper prefix selection. However, for the moment all hardware
4028 ;; that supports AVX512F also supports FMA so we can ignore this for now.
4029 (define_mode_iterator FMAMODEM
4030 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4031 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4032 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4033 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4034 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4035 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4036 (V16SF "TARGET_AVX512F")
4037 (V8DF "TARGET_AVX512F")])
4039 (define_expand "fma<mode>4"
4040 [(set (match_operand:FMAMODEM 0 "register_operand")
4042 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4043 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4044 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4046 (define_expand "fms<mode>4"
4047 [(set (match_operand:FMAMODEM 0 "register_operand")
4049 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4050 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4051 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4053 (define_expand "fnma<mode>4"
4054 [(set (match_operand:FMAMODEM 0 "register_operand")
4056 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4057 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4058 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4060 (define_expand "fnms<mode>4"
4061 [(set (match_operand:FMAMODEM 0 "register_operand")
4063 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4064 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4065 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4067 ;; The builtins for intrinsics are not constrained by SSE math enabled.
4068 (define_mode_iterator FMAMODE_AVX512
4069 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4070 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4071 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4072 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4073 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4074 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4075 (V16SF "TARGET_AVX512F")
4076 (V8DF "TARGET_AVX512F")])
4078 (define_mode_iterator FMAMODE
4079 [SF DF V4SF V2DF V8SF V4DF])
4081 (define_expand "fma4i_fmadd_<mode>"
4082 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4084 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4085 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4086 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4088 (define_expand "fma4i_fmsub_<mode>"
4089 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4091 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4092 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4094 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4096 (define_expand "fma4i_fnmadd_<mode>"
4097 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4100 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4101 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4102 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4104 (define_expand "fma4i_fnmsub_<mode>"
4105 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4108 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4109 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4111 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4113 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4114 [(match_operand:VF_AVX512VL 0 "register_operand")
4115 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4116 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4117 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4118 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4119 "TARGET_AVX512F && <round_mode512bit_condition>"
4121 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4122 operands[0], operands[1], operands[2], operands[3],
4123 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4127 (define_insn "*fma_fmadd_<mode>"
4128 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4130 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4131 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4132 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4133 "TARGET_FMA || TARGET_FMA4"
4135 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4136 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4137 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4138 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4139 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4140 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4141 (set_attr "type" "ssemuladd")
4142 (set_attr "mode" "<MODE>")])
4144 ;; Suppose AVX-512F as baseline
4145 (define_mode_iterator VF_SF_AVX512VL
4146 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4147 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4149 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4150 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4152 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4153 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4154 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4155 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4157 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4158 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4159 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4160 [(set_attr "type" "ssemuladd")
4161 (set_attr "mode" "<MODE>")])
4163 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4164 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4165 (vec_merge:VF_AVX512VL
4167 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4168 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4169 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4171 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4172 "TARGET_AVX512F && <round_mode512bit_condition>"
4174 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4175 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4176 [(set_attr "type" "ssemuladd")
4177 (set_attr "mode" "<MODE>")])
4179 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4180 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4181 (vec_merge:VF_AVX512VL
4183 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4184 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4185 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4187 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4189 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4190 [(set_attr "type" "ssemuladd")
4191 (set_attr "mode" "<MODE>")])
4193 (define_insn "*fma_fmsub_<mode>"
4194 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4196 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4197 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4199 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4200 "TARGET_FMA || TARGET_FMA4"
4202 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4203 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4204 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4205 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4206 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4207 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4208 (set_attr "type" "ssemuladd")
4209 (set_attr "mode" "<MODE>")])
4211 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4212 [(match_operand:VF_AVX512VL 0 "register_operand")
4213 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4214 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4215 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4216 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4217 "TARGET_AVX512F && <round_mode512bit_condition>"
4219 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4220 operands[0], operands[1], operands[2], operands[3],
4221 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4225 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4226 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4228 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4229 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4231 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4232 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4234 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4235 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4236 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4237 [(set_attr "type" "ssemuladd")
4238 (set_attr "mode" "<MODE>")])
4240 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4241 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4242 (vec_merge:VF_AVX512VL
4244 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4245 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4247 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4249 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4252 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4253 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4254 [(set_attr "type" "ssemuladd")
4255 (set_attr "mode" "<MODE>")])
4257 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4258 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4259 (vec_merge:VF_AVX512VL
4261 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4262 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4264 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4266 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4267 "TARGET_AVX512F && <round_mode512bit_condition>"
4268 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4269 [(set_attr "type" "ssemuladd")
4270 (set_attr "mode" "<MODE>")])
4272 (define_insn "*fma_fnmadd_<mode>"
4273 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4276 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4277 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4278 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4279 "TARGET_FMA || TARGET_FMA4"
4281 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4282 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4283 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4284 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4285 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4286 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4287 (set_attr "type" "ssemuladd")
4288 (set_attr "mode" "<MODE>")])
4290 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4291 [(match_operand:VF_AVX512VL 0 "register_operand")
4292 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4293 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4294 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4295 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4296 "TARGET_AVX512F && <round_mode512bit_condition>"
4298 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4299 operands[0], operands[1], operands[2], operands[3],
4300 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4304 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4305 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4308 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4309 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4310 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4311 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4313 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4314 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4315 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4316 [(set_attr "type" "ssemuladd")
4317 (set_attr "mode" "<MODE>")])
4319 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4320 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4321 (vec_merge:VF_AVX512VL
4324 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4325 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4326 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4328 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4329 "TARGET_AVX512F && <round_mode512bit_condition>"
4331 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4332 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4333 [(set_attr "type" "ssemuladd")
4334 (set_attr "mode" "<MODE>")])
4336 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4337 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4338 (vec_merge:VF_AVX512VL
4341 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4342 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4343 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4345 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4346 "TARGET_AVX512F && <round_mode512bit_condition>"
4347 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4348 [(set_attr "type" "ssemuladd")
4349 (set_attr "mode" "<MODE>")])
4351 (define_insn "*fma_fnmsub_<mode>"
4352 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4355 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4356 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4358 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4359 "TARGET_FMA || TARGET_FMA4"
4361 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4362 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4363 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4364 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4365 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4366 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4367 (set_attr "type" "ssemuladd")
4368 (set_attr "mode" "<MODE>")])
4370 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4371 [(match_operand:VF_AVX512VL 0 "register_operand")
4372 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4373 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4374 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4375 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4376 "TARGET_AVX512F && <round_mode512bit_condition>"
4378 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4379 operands[0], operands[1], operands[2], operands[3],
4380 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4384 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4385 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4388 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4389 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4391 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4392 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4394 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4395 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4396 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4397 [(set_attr "type" "ssemuladd")
4398 (set_attr "mode" "<MODE>")])
4400 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4401 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4402 (vec_merge:VF_AVX512VL
4405 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4406 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4408 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4410 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4411 "TARGET_AVX512F && <round_mode512bit_condition>"
4413 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4414 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4415 [(set_attr "type" "ssemuladd")
4416 (set_attr "mode" "<MODE>")])
4418 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4419 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4420 (vec_merge:VF_AVX512VL
4423 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4424 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4426 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4428 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4430 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4431 [(set_attr "type" "ssemuladd")
4432 (set_attr "mode" "<MODE>")])
4434 ;; FMA parallel floating point multiply addsub and subadd operations.
4436 ;; It would be possible to represent these without the UNSPEC as
4439 ;; (fma op1 op2 op3)
4440 ;; (fma op1 op2 (neg op3))
4443 ;; But this doesn't seem useful in practice.
4445 (define_expand "fmaddsub_<mode>"
4446 [(set (match_operand:VF 0 "register_operand")
4448 [(match_operand:VF 1 "nonimmediate_operand")
4449 (match_operand:VF 2 "nonimmediate_operand")
4450 (match_operand:VF 3 "nonimmediate_operand")]
4452 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4454 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4455 [(match_operand:VF_AVX512VL 0 "register_operand")
4456 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4457 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4458 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4459 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4462 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4463 operands[0], operands[1], operands[2], operands[3],
4464 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4468 (define_insn "*fma_fmaddsub_<mode>"
4469 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4471 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4472 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4473 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4475 "TARGET_FMA || TARGET_FMA4"
4477 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4478 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4479 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4480 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4481 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4482 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4483 (set_attr "type" "ssemuladd")
4484 (set_attr "mode" "<MODE>")])
4486 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4487 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4488 (unspec:VF_SF_AVX512VL
4489 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4490 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4491 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4493 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4495 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4496 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4497 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4498 [(set_attr "type" "ssemuladd")
4499 (set_attr "mode" "<MODE>")])
4501 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4502 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4503 (vec_merge:VF_AVX512VL
4505 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4506 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4507 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4510 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4513 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4514 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4515 [(set_attr "type" "ssemuladd")
4516 (set_attr "mode" "<MODE>")])
4518 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4519 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4520 (vec_merge:VF_AVX512VL
4522 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4523 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4524 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4527 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4529 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4530 [(set_attr "type" "ssemuladd")
4531 (set_attr "mode" "<MODE>")])
4533 (define_insn "*fma_fmsubadd_<mode>"
4534 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4536 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4537 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4539 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4541 "TARGET_FMA || TARGET_FMA4"
4543 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4544 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4545 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4546 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4547 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4548 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4549 (set_attr "type" "ssemuladd")
4550 (set_attr "mode" "<MODE>")])
4552 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4553 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4554 (unspec:VF_SF_AVX512VL
4555 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4556 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4558 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4560 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4562 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4563 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4564 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4565 [(set_attr "type" "ssemuladd")
4566 (set_attr "mode" "<MODE>")])
4568 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4569 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4570 (vec_merge:VF_AVX512VL
4572 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4573 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4575 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4578 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4581 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4582 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4583 [(set_attr "type" "ssemuladd")
4584 (set_attr "mode" "<MODE>")])
4586 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4587 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4588 (vec_merge:VF_AVX512VL
4590 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4591 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4593 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4596 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4598 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4599 [(set_attr "type" "ssemuladd")
4600 (set_attr "mode" "<MODE>")])
4602 ;; FMA3 floating point scalar intrinsics. These merge result with
4603 ;; high-order elements from the destination register.
4605 (define_expand "fmai_vmfmadd_<mode><round_name>"
4606 [(set (match_operand:VF_128 0 "register_operand")
4609 (match_operand:VF_128 1 "register_operand")
4610 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4611 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4616 (define_expand "fmai_vmfmsub_<mode><round_name>"
4617 [(set (match_operand:VF_128 0 "register_operand")
4620 (match_operand:VF_128 1 "register_operand")
4621 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4623 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4628 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4629 [(set (match_operand:VF_128 0 "register_operand")
4633 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4634 (match_operand:VF_128 1 "register_operand")
4635 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4640 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4641 [(set (match_operand:VF_128 0 "register_operand")
4645 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4646 (match_operand:VF_128 1 "register_operand")
4648 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4653 (define_insn "*fmai_fmadd_<mode>"
4654 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4657 (match_operand:VF_128 1 "register_operand" "0,0")
4658 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
4659 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4662 "TARGET_FMA || TARGET_AVX512F"
4664 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4665 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4666 [(set_attr "type" "ssemuladd")
4667 (set_attr "mode" "<MODE>")])
4669 (define_insn "*fmai_fmsub_<mode>"
4670 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4673 (match_operand:VF_128 1 "register_operand" "0,0")
4674 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4676 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4679 "TARGET_FMA || TARGET_AVX512F"
4681 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4682 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4683 [(set_attr "type" "ssemuladd")
4684 (set_attr "mode" "<MODE>")])
4686 (define_insn "*fmai_fnmadd_<mode><round_name>"
4687 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4691 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4692 (match_operand:VF_128 1 "register_operand" "0,0")
4693 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4696 "TARGET_FMA || TARGET_AVX512F"
4698 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4699 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4700 [(set_attr "type" "ssemuladd")
4701 (set_attr "mode" "<MODE>")])
4703 (define_insn "*fmai_fnmsub_<mode><round_name>"
4704 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4708 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4709 (match_operand:VF_128 1 "register_operand" "0,0")
4711 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4714 "TARGET_FMA || TARGET_AVX512F"
4716 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4717 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4718 [(set_attr "type" "ssemuladd")
4719 (set_attr "mode" "<MODE>")])
4721 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4722 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4726 (match_operand:VF_128 1 "register_operand" "0,0")
4727 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4728 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4730 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4735 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4736 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4737 [(set_attr "type" "ssemuladd")
4738 (set_attr "mode" "<MODE>")])
4740 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4741 [(set (match_operand:VF_128 0 "register_operand" "=v")
4745 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4746 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4747 (match_operand:VF_128 3 "register_operand" "0"))
4749 (match_operand:QI 4 "register_operand" "Yk"))
4753 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4754 [(set_attr "type" "ssemuladd")
4755 (set_attr "mode" "<MODE>")])
4757 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4758 [(match_operand:VF_128 0 "register_operand")
4759 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4760 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4761 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4762 (match_operand:QI 4 "register_operand")]
4765 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4766 operands[0], operands[1], operands[2], operands[3],
4767 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4771 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4772 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4776 (match_operand:VF_128 1 "register_operand" "0,0")
4777 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4778 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4779 (match_operand:VF_128 4 "const0_operand" "C,C")
4780 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4785 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4786 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4787 [(set_attr "type" "ssemuladd")
4788 (set_attr "mode" "<MODE>")])
4790 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4791 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4795 (match_operand:VF_128 1 "register_operand" "0,0")
4796 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4798 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4800 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4805 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4806 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4807 [(set_attr "type" "ssemuladd")
4808 (set_attr "mode" "<MODE>")])
4810 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4811 [(set (match_operand:VF_128 0 "register_operand" "=v")
4815 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4816 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4818 (match_operand:VF_128 3 "register_operand" "0")))
4820 (match_operand:QI 4 "register_operand" "Yk"))
4824 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4825 [(set_attr "type" "ssemuladd")
4826 (set_attr "mode" "<MODE>")])
4828 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4829 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4833 (match_operand:VF_128 1 "register_operand" "0,0")
4834 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4836 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4837 (match_operand:VF_128 4 "const0_operand" "C,C")
4838 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4843 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4844 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4845 [(set_attr "type" "ssemuladd")
4846 (set_attr "mode" "<MODE>")])
4848 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4849 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4854 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4855 (match_operand:VF_128 1 "register_operand" "0,0")
4856 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4858 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4863 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4864 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4865 [(set_attr "type" "ssemuladd")
4866 (set_attr "mode" "<MODE>")])
4868 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4869 [(set (match_operand:VF_128 0 "register_operand" "=v")
4874 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4875 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4876 (match_operand:VF_128 3 "register_operand" "0"))
4878 (match_operand:QI 4 "register_operand" "Yk"))
4882 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4883 [(set_attr "type" "ssemuladd")
4884 (set_attr "mode" "<MODE>")])
4886 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4887 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4892 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4893 (match_operand:VF_128 1 "register_operand" "0,0")
4894 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4895 (match_operand:VF_128 4 "const0_operand" "C,C")
4896 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4901 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4902 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4903 [(set_attr "type" "ssemuladd")
4904 (set_attr "mode" "<MODE>")])
4906 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4907 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4912 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4913 (match_operand:VF_128 1 "register_operand" "0,0")
4915 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4917 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4922 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4923 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4924 [(set_attr "type" "ssemuladd")
4925 (set_attr "mode" "<MODE>")])
4927 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4928 [(set (match_operand:VF_128 0 "register_operand" "=v")
4933 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4934 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4936 (match_operand:VF_128 3 "register_operand" "0")))
4938 (match_operand:QI 4 "register_operand" "Yk"))
4942 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4943 [(set_attr "type" "ssemuladd")
4944 (set_attr "mode" "<MODE>")])
4946 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
4947 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4952 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4953 (match_operand:VF_128 1 "register_operand" "0,0")
4955 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4956 (match_operand:VF_128 4 "const0_operand" "C,C")
4957 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4962 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4963 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4964 [(set_attr "type" "ssemuladd")
4965 (set_attr "mode" "<MODE>")])
4967 ;; FMA4 floating point scalar intrinsics. These write the
4968 ;; entire destination register, with the high-order elements zeroed.
4970 (define_expand "fma4i_vmfmadd_<mode>"
4971 [(set (match_operand:VF_128 0 "register_operand")
4974 (match_operand:VF_128 1 "nonimmediate_operand")
4975 (match_operand:VF_128 2 "nonimmediate_operand")
4976 (match_operand:VF_128 3 "nonimmediate_operand"))
4980 "operands[4] = CONST0_RTX (<MODE>mode);")
4982 (define_insn "*fma4i_vmfmadd_<mode>"
4983 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4986 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4987 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4988 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4989 (match_operand:VF_128 4 "const0_operand")
4992 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4993 [(set_attr "type" "ssemuladd")
4994 (set_attr "mode" "<MODE>")])
4996 (define_insn "*fma4i_vmfmsub_<mode>"
4997 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5000 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5001 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5003 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5004 (match_operand:VF_128 4 "const0_operand")
5007 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5008 [(set_attr "type" "ssemuladd")
5009 (set_attr "mode" "<MODE>")])
5011 (define_insn "*fma4i_vmfnmadd_<mode>"
5012 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5016 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5017 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5018 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5019 (match_operand:VF_128 4 "const0_operand")
5022 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5023 [(set_attr "type" "ssemuladd")
5024 (set_attr "mode" "<MODE>")])
5026 (define_insn "*fma4i_vmfnmsub_<mode>"
5027 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5031 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5032 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5034 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5035 (match_operand:VF_128 4 "const0_operand")
5038 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5039 [(set_attr "type" "ssemuladd")
5040 (set_attr "mode" "<MODE>")])
5042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5044 ;; Parallel single-precision floating point conversion operations
5046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5048 (define_insn_and_split "sse_cvtpi2ps"
5049 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5052 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5053 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5055 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5056 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5058 cvtpi2ps\t{%2, %0|%0, %2}
5061 "TARGET_SSE2 && reload_completed
5062 && SSE_REG_P (operands[2])"
5065 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5066 GET_MODE (operands[2]));
5067 /* Generate SSE2 cvtdq2ps. */
5068 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5070 /* Merge operands[3] with operands[0]. */
5074 mask = gen_rtx_PARALLEL (VOIDmode,
5075 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5076 GEN_INT (6), GEN_INT (7)));
5077 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5078 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5079 emit_insn (gen_rtx_SET (operands[0], op2));
5083 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5084 mask = gen_rtx_PARALLEL (VOIDmode,
5085 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5086 GEN_INT (4), GEN_INT (5)));
5087 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5088 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5089 emit_insn (gen_rtx_SET (operands[0], op2));
5091 /* Swap bits 0:63 with bits 64:127. */
5092 mask = gen_rtx_PARALLEL (VOIDmode,
5093 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5094 GEN_INT (0), GEN_INT (1)));
5095 rtx dest = lowpart_subreg (V4SImode, operands[0],
5096 GET_MODE (operands[0]));
5097 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5098 emit_insn (gen_rtx_SET (dest, op1));
5102 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5103 (set_attr "type" "ssecvt")
5104 (set_attr "mode" "V4SF")])
5106 (define_insn_and_split "sse_cvtps2pi"
5107 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5109 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5111 (parallel [(const_int 0) (const_int 1)])))]
5112 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5114 cvtps2pi\t{%1, %0|%0, %q1}
5116 "TARGET_SSE2 && reload_completed
5117 && SSE_REG_P (operands[0])"
5120 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5121 GET_MODE (operands[1]));
5122 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5123 GET_MODE (operands[0]));
5125 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5126 emit_insn (gen_rtx_SET (tmp, op1));
5128 rtx dest = lowpart_subreg (V4SImode, operands[0],
5129 GET_MODE (operands[0]));
5130 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5133 [(set_attr "isa" "*,sse2")
5134 (set_attr "mmx_isa" "native,*")
5135 (set_attr "type" "ssecvt")
5136 (set_attr "unit" "mmx,*")
5137 (set_attr "mode" "DI")])
5139 (define_insn_and_split "sse_cvttps2pi"
5140 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5142 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5143 (parallel [(const_int 0) (const_int 1)])))]
5144 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5146 cvttps2pi\t{%1, %0|%0, %q1}
5148 "TARGET_SSE2 && reload_completed
5149 && SSE_REG_P (operands[0])"
5152 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5153 GET_MODE (operands[1]));
5154 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5155 GET_MODE (operands[0]));
5157 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5158 emit_insn (gen_rtx_SET (tmp, op1));
5160 rtx dest = lowpart_subreg (V4SImode, operands[0],
5161 GET_MODE (operands[0]));
5162 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5165 [(set_attr "isa" "*,sse2")
5166 (set_attr "mmx_isa" "native,*")
5167 (set_attr "type" "ssecvt")
5168 (set_attr "unit" "mmx,*")
5169 (set_attr "prefix_rep" "0")
5170 (set_attr "mode" "SF")])
5172 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5173 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5176 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5177 (match_operand:V4SF 1 "register_operand" "0,0,v")
5181 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5182 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5183 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5184 [(set_attr "isa" "noavx,noavx,avx")
5185 (set_attr "type" "sseicvt")
5186 (set_attr "athlon_decode" "vector,double,*")
5187 (set_attr "amdfam10_decode" "vector,double,*")
5188 (set_attr "bdver1_decode" "double,direct,*")
5189 (set_attr "btver2_decode" "double,double,double")
5190 (set_attr "znver1_decode" "double,double,double")
5191 (set (attr "length_vex")
5193 (and (match_test "<MODE>mode == DImode")
5194 (eq_attr "alternative" "2"))
5196 (const_string "*")))
5197 (set (attr "prefix_rex")
5199 (and (match_test "<MODE>mode == DImode")
5200 (eq_attr "alternative" "0,1"))
5202 (const_string "*")))
5203 (set_attr "prefix" "orig,orig,maybe_evex")
5204 (set_attr "mode" "SF")])
5206 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5207 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5210 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5211 (parallel [(const_int 0)]))]
5212 UNSPEC_FIX_NOTRUNC))]
5214 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5215 [(set_attr "type" "sseicvt")
5216 (set_attr "athlon_decode" "double,vector")
5217 (set_attr "bdver1_decode" "double,double")
5218 (set_attr "prefix_rep" "1")
5219 (set_attr "prefix" "maybe_vex")
5220 (set_attr "mode" "<MODE>")])
5222 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5223 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5224 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5225 UNSPEC_FIX_NOTRUNC))]
5227 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5228 [(set_attr "type" "sseicvt")
5229 (set_attr "athlon_decode" "double,vector")
5230 (set_attr "amdfam10_decode" "double,double")
5231 (set_attr "bdver1_decode" "double,double")
5232 (set_attr "prefix_rep" "1")
5233 (set_attr "prefix" "maybe_vex")
5234 (set_attr "mode" "<MODE>")])
5236 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5237 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5240 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5241 (parallel [(const_int 0)]))))]
5243 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5244 [(set_attr "type" "sseicvt")
5245 (set_attr "athlon_decode" "double,vector")
5246 (set_attr "amdfam10_decode" "double,double")
5247 (set_attr "bdver1_decode" "double,double")
5248 (set_attr "prefix_rep" "1")
5249 (set_attr "prefix" "maybe_vex")
5250 (set_attr "mode" "<MODE>")])
5252 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5253 [(set (match_operand:VF_128 0 "register_operand" "=v")
5255 (vec_duplicate:VF_128
5256 (unsigned_float:<ssescalarmode>
5257 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5258 (match_operand:VF_128 1 "register_operand" "v")
5260 "TARGET_AVX512F && <round_modev4sf_condition>"
5261 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5262 [(set_attr "type" "sseicvt")
5263 (set_attr "prefix" "evex")
5264 (set_attr "mode" "<ssescalarmode>")])
5266 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5267 [(set (match_operand:VF_128 0 "register_operand" "=v")
5269 (vec_duplicate:VF_128
5270 (unsigned_float:<ssescalarmode>
5271 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5272 (match_operand:VF_128 1 "register_operand" "v")
5274 "TARGET_AVX512F && TARGET_64BIT"
5275 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5276 [(set_attr "type" "sseicvt")
5277 (set_attr "prefix" "evex")
5278 (set_attr "mode" "<ssescalarmode>")])
5280 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5281 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5283 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5284 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5286 cvtdq2ps\t{%1, %0|%0, %1}
5287 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5288 [(set_attr "isa" "noavx,avx")
5289 (set_attr "type" "ssecvt")
5290 (set_attr "prefix" "maybe_vex")
5291 (set_attr "mode" "<sseinsnmode>")])
5293 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5294 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5295 (unsigned_float:VF1_AVX512VL
5296 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5298 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5299 [(set_attr "type" "ssecvt")
5300 (set_attr "prefix" "evex")
5301 (set_attr "mode" "<MODE>")])
5303 (define_expand "floatuns<sseintvecmodelower><mode>2"
5304 [(match_operand:VF1 0 "register_operand")
5305 (match_operand:<sseintvecmode> 1 "register_operand")]
5306 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5308 if (<MODE>mode == V16SFmode)
5309 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5311 if (TARGET_AVX512VL)
5313 if (<MODE>mode == V4SFmode)
5314 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5316 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5319 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5325 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5326 (define_mode_attr sf2simodelower
5327 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5329 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5330 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5332 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5333 UNSPEC_FIX_NOTRUNC))]
5334 "TARGET_SSE2 && <mask_mode512bit_condition>"
5335 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5336 [(set_attr "type" "ssecvt")
5337 (set (attr "prefix_data16")
5339 (match_test "TARGET_AVX")
5341 (const_string "1")))
5342 (set_attr "prefix" "maybe_vex")
5343 (set_attr "mode" "<sseinsnmode>")])
5345 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5346 [(set (match_operand:V16SI 0 "register_operand" "=v")
5348 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5349 UNSPEC_FIX_NOTRUNC))]
5351 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5352 [(set_attr "type" "ssecvt")
5353 (set_attr "prefix" "evex")
5354 (set_attr "mode" "XI")])
5356 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5357 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5358 (unspec:VI4_AVX512VL
5359 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5360 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5362 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5363 [(set_attr "type" "ssecvt")
5364 (set_attr "prefix" "evex")
5365 (set_attr "mode" "<sseinsnmode>")])
5367 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5368 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5369 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5370 UNSPEC_FIX_NOTRUNC))]
5371 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5372 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5373 [(set_attr "type" "ssecvt")
5374 (set_attr "prefix" "evex")
5375 (set_attr "mode" "<sseinsnmode>")])
5377 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5378 [(set (match_operand:V2DI 0 "register_operand" "=v")
5381 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5382 (parallel [(const_int 0) (const_int 1)]))]
5383 UNSPEC_FIX_NOTRUNC))]
5384 "TARGET_AVX512DQ && TARGET_AVX512VL"
5385 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5386 [(set_attr "type" "ssecvt")
5387 (set_attr "prefix" "evex")
5388 (set_attr "mode" "TI")])
5390 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5391 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5392 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5393 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5394 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5395 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5396 [(set_attr "type" "ssecvt")
5397 (set_attr "prefix" "evex")
5398 (set_attr "mode" "<sseinsnmode>")])
5400 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5401 [(set (match_operand:V2DI 0 "register_operand" "=v")
5404 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5405 (parallel [(const_int 0) (const_int 1)]))]
5406 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5407 "TARGET_AVX512DQ && TARGET_AVX512VL"
5408 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5409 [(set_attr "type" "ssecvt")
5410 (set_attr "prefix" "evex")
5411 (set_attr "mode" "TI")])
5413 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5414 [(set (match_operand:V16SI 0 "register_operand" "=v")
5416 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5418 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5419 [(set_attr "type" "ssecvt")
5420 (set_attr "prefix" "evex")
5421 (set_attr "mode" "XI")])
5423 (define_insn "fix_truncv8sfv8si2<mask_name>"
5424 [(set (match_operand:V8SI 0 "register_operand" "=v")
5425 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5426 "TARGET_AVX && <mask_avx512vl_condition>"
5427 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5428 [(set_attr "type" "ssecvt")
5429 (set_attr "prefix" "<mask_prefix>")
5430 (set_attr "mode" "OI")])
5432 (define_insn "fix_truncv4sfv4si2<mask_name>"
5433 [(set (match_operand:V4SI 0 "register_operand" "=v")
5434 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5435 "TARGET_SSE2 && <mask_avx512vl_condition>"
5436 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5437 [(set_attr "type" "ssecvt")
5438 (set (attr "prefix_rep")
5440 (match_test "TARGET_AVX")
5442 (const_string "1")))
5443 (set (attr "prefix_data16")
5445 (match_test "TARGET_AVX")
5447 (const_string "0")))
5448 (set_attr "prefix_data16" "0")
5449 (set_attr "prefix" "<mask_prefix2>")
5450 (set_attr "mode" "TI")])
5452 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5453 [(match_operand:<sseintvecmode> 0 "register_operand")
5454 (match_operand:VF1 1 "register_operand")]
5457 if (<MODE>mode == V16SFmode)
5458 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5463 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5464 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5465 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5466 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5471 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5473 ;; Parallel double-precision floating point conversion operations
5475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5477 (define_insn "sse2_cvtpi2pd"
5478 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5479 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5482 %vcvtdq2pd\t{%1, %0|%0, %1}
5483 cvtpi2pd\t{%1, %0|%0, %1}"
5484 [(set_attr "mmx_isa" "*,native")
5485 (set_attr "type" "ssecvt")
5486 (set_attr "unit" "*,mmx")
5487 (set_attr "prefix_data16" "*,1")
5488 (set_attr "prefix" "maybe_vex,*")
5489 (set_attr "mode" "V2DF")])
5491 (define_expand "floatv2siv2df2"
5492 [(set (match_operand:V2DF 0 "register_operand")
5493 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5494 "TARGET_MMX_WITH_SSE")
5496 (define_insn "floatunsv2siv2df2"
5497 [(set (match_operand:V2DF 0 "register_operand" "=v")
5498 (unsigned_float:V2DF
5499 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5500 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5501 "vcvtudq2pd\t{%1, %0|%0, %1}"
5502 [(set_attr "type" "ssecvt")
5503 (set_attr "prefix" "evex")
5504 (set_attr "mode" "V2DF")])
5506 (define_insn "sse2_cvtpd2pi"
5507 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5508 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5509 UNSPEC_FIX_NOTRUNC))]
5512 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5513 cvtpd2pi\t{%1, %0|%0, %1}"
5514 [(set_attr "mmx_isa" "*,native")
5515 (set_attr "type" "ssecvt")
5516 (set_attr "unit" "*,mmx")
5517 (set_attr "amdfam10_decode" "double")
5518 (set_attr "athlon_decode" "vector")
5519 (set_attr "bdver1_decode" "double")
5520 (set_attr "prefix_data16" "*,1")
5521 (set_attr "prefix" "maybe_vex,*")
5522 (set_attr "mode" "TI")])
5524 (define_insn "sse2_cvttpd2pi"
5525 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5526 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5529 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5530 cvttpd2pi\t{%1, %0|%0, %1}"
5531 [(set_attr "mmx_isa" "*,native")
5532 (set_attr "type" "ssecvt")
5533 (set_attr "unit" "*,mmx")
5534 (set_attr "amdfam10_decode" "double")
5535 (set_attr "athlon_decode" "vector")
5536 (set_attr "bdver1_decode" "double")
5537 (set_attr "prefix_data16" "*,1")
5538 (set_attr "prefix" "maybe_vex,*")
5539 (set_attr "mode" "TI")])
5541 (define_expand "fix_truncv2dfv2si2"
5542 [(set (match_operand:V2SI 0 "register_operand")
5543 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5544 "TARGET_MMX_WITH_SSE")
5546 (define_insn "fixuns_truncv2dfv2si2"
5547 [(set (match_operand:V2SI 0 "register_operand" "=v")
5549 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5550 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5551 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5552 [(set_attr "type" "ssecvt")
5553 (set_attr "prefix" "evex")
5554 (set_attr "mode" "TI")])
5556 (define_insn "sse2_cvtsi2sd"
5557 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5560 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5561 (match_operand:V2DF 1 "register_operand" "0,0,v")
5565 cvtsi2sd{l}\t{%2, %0|%0, %2}
5566 cvtsi2sd{l}\t{%2, %0|%0, %2}
5567 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5568 [(set_attr "isa" "noavx,noavx,avx")
5569 (set_attr "type" "sseicvt")
5570 (set_attr "athlon_decode" "double,direct,*")
5571 (set_attr "amdfam10_decode" "vector,double,*")
5572 (set_attr "bdver1_decode" "double,direct,*")
5573 (set_attr "btver2_decode" "double,double,double")
5574 (set_attr "znver1_decode" "double,double,double")
5575 (set_attr "prefix" "orig,orig,maybe_evex")
5576 (set_attr "mode" "DF")])
5578 (define_insn "sse2_cvtsi2sdq<round_name>"
5579 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5582 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5583 (match_operand:V2DF 1 "register_operand" "0,0,v")
5585 "TARGET_SSE2 && TARGET_64BIT"
5587 cvtsi2sd{q}\t{%2, %0|%0, %2}
5588 cvtsi2sd{q}\t{%2, %0|%0, %2}
5589 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5590 [(set_attr "isa" "noavx,noavx,avx")
5591 (set_attr "type" "sseicvt")
5592 (set_attr "athlon_decode" "double,direct,*")
5593 (set_attr "amdfam10_decode" "vector,double,*")
5594 (set_attr "bdver1_decode" "double,direct,*")
5595 (set_attr "length_vex" "*,*,4")
5596 (set_attr "prefix_rex" "1,1,*")
5597 (set_attr "prefix" "orig,orig,maybe_evex")
5598 (set_attr "mode" "DF")])
5600 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5601 [(set (match_operand:SWI48 0 "register_operand" "=r")
5604 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5605 (parallel [(const_int 0)]))]
5606 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5608 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5609 [(set_attr "type" "sseicvt")
5610 (set_attr "prefix" "evex")
5611 (set_attr "mode" "<MODE>")])
5613 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5614 [(set (match_operand:SWI48 0 "register_operand" "=r")
5617 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5618 (parallel [(const_int 0)]))))]
5620 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5621 [(set_attr "type" "sseicvt")
5622 (set_attr "prefix" "evex")
5623 (set_attr "mode" "<MODE>")])
5625 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5626 [(set (match_operand:SWI48 0 "register_operand" "=r")
5629 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5630 (parallel [(const_int 0)]))]
5631 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5633 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5634 [(set_attr "type" "sseicvt")
5635 (set_attr "prefix" "evex")
5636 (set_attr "mode" "<MODE>")])
5638 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5639 [(set (match_operand:SWI48 0 "register_operand" "=r")
5642 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5643 (parallel [(const_int 0)]))))]
5645 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5646 [(set_attr "type" "sseicvt")
5647 (set_attr "prefix" "evex")
5648 (set_attr "mode" "<MODE>")])
5650 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5651 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5654 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5655 (parallel [(const_int 0)]))]
5656 UNSPEC_FIX_NOTRUNC))]
5658 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5659 [(set_attr "type" "sseicvt")
5660 (set_attr "athlon_decode" "double,vector")
5661 (set_attr "bdver1_decode" "double,double")
5662 (set_attr "btver2_decode" "double,double")
5663 (set_attr "prefix_rep" "1")
5664 (set_attr "prefix" "maybe_vex")
5665 (set_attr "mode" "<MODE>")])
5667 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5668 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5669 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5670 UNSPEC_FIX_NOTRUNC))]
5672 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5673 [(set_attr "type" "sseicvt")
5674 (set_attr "athlon_decode" "double,vector")
5675 (set_attr "amdfam10_decode" "double,double")
5676 (set_attr "bdver1_decode" "double,double")
5677 (set_attr "prefix_rep" "1")
5678 (set_attr "prefix" "maybe_vex")
5679 (set_attr "mode" "<MODE>")])
5681 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5682 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5685 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5686 (parallel [(const_int 0)]))))]
5688 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5689 [(set_attr "type" "sseicvt")
5690 (set_attr "athlon_decode" "double,vector")
5691 (set_attr "amdfam10_decode" "double,double")
5692 (set_attr "bdver1_decode" "double,double")
5693 (set_attr "btver2_decode" "double,double")
5694 (set_attr "prefix_rep" "1")
5695 (set_attr "prefix" "maybe_vex")
5696 (set_attr "mode" "<MODE>")])
5698 ;; For float<si2dfmode><mode>2 insn pattern
5699 (define_mode_attr si2dfmode
5700 [(V8DF "V8SI") (V4DF "V4SI")])
5701 (define_mode_attr si2dfmodelower
5702 [(V8DF "v8si") (V4DF "v4si")])
5704 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5705 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5706 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5707 "TARGET_AVX && <mask_mode512bit_condition>"
5708 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5709 [(set_attr "type" "ssecvt")
5710 (set_attr "prefix" "maybe_vex")
5711 (set_attr "mode" "<MODE>")])
5713 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5714 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5715 (any_float:VF2_AVX512VL
5716 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5718 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5719 [(set_attr "type" "ssecvt")
5720 (set_attr "prefix" "evex")
5721 (set_attr "mode" "<MODE>")])
5723 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5724 (define_mode_attr qq2pssuff
5725 [(V8SF "") (V4SF "{y}")])
5727 (define_mode_attr sselongvecmode
5728 [(V8SF "V8DI") (V4SF "V4DI")])
5730 (define_mode_attr sselongvecmodelower
5731 [(V8SF "v8di") (V4SF "v4di")])
5733 (define_mode_attr sseintvecmode3
5734 [(V8SF "XI") (V4SF "OI")
5735 (V8DF "OI") (V4DF "TI")])
5737 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5738 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5739 (any_float:VF1_128_256VL
5740 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5741 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5742 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5743 [(set_attr "type" "ssecvt")
5744 (set_attr "prefix" "evex")
5745 (set_attr "mode" "<MODE>")])
5747 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
5748 [(set (match_operand:V4SF 0 "register_operand" "=v")
5750 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5752 "TARGET_AVX512DQ && TARGET_AVX512VL"
5753 "operands[2] = CONST0_RTX (V2SFmode);")
5755 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
5756 [(set (match_operand:V4SF 0 "register_operand" "=v")
5758 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5759 (match_operand:V2SF 2 "const0_operand" "C")))]
5760 "TARGET_AVX512DQ && TARGET_AVX512VL"
5761 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5762 [(set_attr "type" "ssecvt")
5763 (set_attr "prefix" "evex")
5764 (set_attr "mode" "V4SF")])
5766 (define_expand "float<floatunssuffix>v2div2sf2"
5767 [(set (match_operand:V2SF 0 "register_operand")
5768 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
5769 "TARGET_AVX512DQ && TARGET_AVX512VL"
5771 operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
5772 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
5773 (operands[0], operands[1]));
5777 (define_mode_attr vpckfloat_concat_mode
5778 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5779 (define_mode_attr vpckfloat_temp_mode
5780 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5781 (define_mode_attr vpckfloat_op_mode
5782 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5784 (define_expand "vec_pack<floatprefix>_float_<mode>"
5785 [(match_operand:<ssePSmode> 0 "register_operand")
5786 (any_float:<ssePSmode>
5787 (match_operand:VI8_AVX512VL 1 "register_operand"))
5788 (match_operand:VI8_AVX512VL 2 "register_operand")]
5791 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5792 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5793 rtx (*gen) (rtx, rtx);
5795 if (<MODE>mode == V2DImode)
5796 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
5798 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5799 emit_insn (gen (r1, operands[1]));
5800 emit_insn (gen (r2, operands[2]));
5801 if (<MODE>mode == V2DImode)
5802 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5804 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5809 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5810 [(set (match_operand:V4SF 0 "register_operand" "=v")
5813 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5815 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5816 (parallel [(const_int 0) (const_int 1)]))
5817 (match_operand:QI 3 "register_operand" "Yk"))
5819 "TARGET_AVX512DQ && TARGET_AVX512VL"
5820 "operands[4] = CONST0_RTX (V2SFmode);")
5822 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5823 [(set (match_operand:V4SF 0 "register_operand" "=v")
5826 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5828 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5829 (parallel [(const_int 0) (const_int 1)]))
5830 (match_operand:QI 3 "register_operand" "Yk"))
5831 (match_operand:V2SF 4 "const0_operand" "C")))]
5832 "TARGET_AVX512DQ && TARGET_AVX512VL"
5833 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5834 [(set_attr "type" "ssecvt")
5835 (set_attr "prefix" "evex")
5836 (set_attr "mode" "V4SF")])
5838 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5839 [(set (match_operand:V4SF 0 "register_operand" "=v")
5842 (any_float:V2SF (match_operand:V2DI 1
5843 "nonimmediate_operand" "vm"))
5844 (match_operand:V2SF 3 "const0_operand" "C")
5845 (match_operand:QI 2 "register_operand" "Yk"))
5846 (match_operand:V2SF 4 "const0_operand" "C")))]
5847 "TARGET_AVX512DQ && TARGET_AVX512VL"
5848 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5849 [(set_attr "type" "ssecvt")
5850 (set_attr "prefix" "evex")
5851 (set_attr "mode" "V4SF")])
5853 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5854 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5855 (unsigned_float:VF2_512_256VL
5856 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5858 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5859 [(set_attr "type" "ssecvt")
5860 (set_attr "prefix" "evex")
5861 (set_attr "mode" "<MODE>")])
5863 (define_insn "ufloatv2siv2df2<mask_name>"
5864 [(set (match_operand:V2DF 0 "register_operand" "=v")
5865 (unsigned_float:V2DF
5867 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5868 (parallel [(const_int 0) (const_int 1)]))))]
5870 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5871 [(set_attr "type" "ssecvt")
5872 (set_attr "prefix" "evex")
5873 (set_attr "mode" "V2DF")])
5875 (define_insn "avx512f_cvtdq2pd512_2"
5876 [(set (match_operand:V8DF 0 "register_operand" "=v")
5879 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5880 (parallel [(const_int 0) (const_int 1)
5881 (const_int 2) (const_int 3)
5882 (const_int 4) (const_int 5)
5883 (const_int 6) (const_int 7)]))))]
5885 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5886 [(set_attr "type" "ssecvt")
5887 (set_attr "prefix" "evex")
5888 (set_attr "mode" "V8DF")])
5890 (define_insn "avx_cvtdq2pd256_2"
5891 [(set (match_operand:V4DF 0 "register_operand" "=v")
5894 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5895 (parallel [(const_int 0) (const_int 1)
5896 (const_int 2) (const_int 3)]))))]
5898 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5899 [(set_attr "type" "ssecvt")
5900 (set_attr "prefix" "maybe_evex")
5901 (set_attr "mode" "V4DF")])
5903 (define_insn "sse2_cvtdq2pd<mask_name>"
5904 [(set (match_operand:V2DF 0 "register_operand" "=v")
5907 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5908 (parallel [(const_int 0) (const_int 1)]))))]
5909 "TARGET_SSE2 && <mask_avx512vl_condition>"
5910 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5911 [(set_attr "type" "ssecvt")
5912 (set_attr "prefix" "maybe_vex")
5913 (set_attr "mode" "V2DF")])
5915 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5916 [(set (match_operand:V8SI 0 "register_operand" "=v")
5918 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5919 UNSPEC_FIX_NOTRUNC))]
5921 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5922 [(set_attr "type" "ssecvt")
5923 (set_attr "prefix" "evex")
5924 (set_attr "mode" "OI")])
5926 (define_insn "avx_cvtpd2dq256<mask_name>"
5927 [(set (match_operand:V4SI 0 "register_operand" "=v")
5928 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5929 UNSPEC_FIX_NOTRUNC))]
5930 "TARGET_AVX && <mask_avx512vl_condition>"
5931 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5932 [(set_attr "type" "ssecvt")
5933 (set_attr "prefix" "<mask_prefix>")
5934 (set_attr "mode" "OI")])
5936 (define_expand "avx_cvtpd2dq256_2"
5937 [(set (match_operand:V8SI 0 "register_operand")
5939 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5943 "operands[2] = CONST0_RTX (V4SImode);")
5945 (define_insn "*avx_cvtpd2dq256_2"
5946 [(set (match_operand:V8SI 0 "register_operand" "=v")
5948 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5950 (match_operand:V4SI 2 "const0_operand")))]
5952 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5953 [(set_attr "type" "ssecvt")
5954 (set_attr "prefix" "vex")
5955 (set_attr "btver2_decode" "vector")
5956 (set_attr "mode" "OI")])
5958 (define_insn "sse2_cvtpd2dq"
5959 [(set (match_operand:V4SI 0 "register_operand" "=v")
5961 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5963 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5967 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
5969 return "cvtpd2dq\t{%1, %0|%0, %1}";
5971 [(set_attr "type" "ssecvt")
5972 (set_attr "prefix_rep" "1")
5973 (set_attr "prefix_data16" "0")
5974 (set_attr "prefix" "maybe_vex")
5975 (set_attr "mode" "TI")
5976 (set_attr "amdfam10_decode" "double")
5977 (set_attr "athlon_decode" "vector")
5978 (set_attr "bdver1_decode" "double")])
5980 (define_insn "sse2_cvtpd2dq_mask"
5981 [(set (match_operand:V4SI 0 "register_operand" "=v")
5984 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5987 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
5988 (parallel [(const_int 0) (const_int 1)]))
5989 (match_operand:QI 3 "register_operand" "Yk"))
5990 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5992 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5993 [(set_attr "type" "ssecvt")
5994 (set_attr "prefix" "evex")
5995 (set_attr "mode" "TI")])
5997 (define_insn "*sse2_cvtpd2dq_mask_1"
5998 [(set (match_operand:V4SI 0 "register_operand" "=v")
6001 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6003 (const_vector:V2SI [(const_int 0) (const_int 0)])
6004 (match_operand:QI 2 "register_operand" "Yk"))
6005 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6007 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6008 [(set_attr "type" "ssecvt")
6009 (set_attr "prefix" "evex")
6010 (set_attr "mode" "TI")])
6012 ;; For ufix_notrunc* insn patterns
6013 (define_mode_attr pd2udqsuff
6014 [(V8DF "") (V4DF "{y}")])
6016 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6017 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6019 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6020 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6022 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6023 [(set_attr "type" "ssecvt")
6024 (set_attr "prefix" "evex")
6025 (set_attr "mode" "<sseinsnmode>")])
6027 (define_insn "ufix_notruncv2dfv2si2"
6028 [(set (match_operand:V4SI 0 "register_operand" "=v")
6031 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6032 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6033 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6035 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6036 [(set_attr "type" "ssecvt")
6037 (set_attr "prefix" "evex")
6038 (set_attr "mode" "TI")])
6040 (define_insn "ufix_notruncv2dfv2si2_mask"
6041 [(set (match_operand:V4SI 0 "register_operand" "=v")
6045 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6046 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6048 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6049 (parallel [(const_int 0) (const_int 1)]))
6050 (match_operand:QI 3 "register_operand" "Yk"))
6051 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6053 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6054 [(set_attr "type" "ssecvt")
6055 (set_attr "prefix" "evex")
6056 (set_attr "mode" "TI")])
6058 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6059 [(set (match_operand:V4SI 0 "register_operand" "=v")
6063 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6064 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6065 (const_vector:V2SI [(const_int 0) (const_int 0)])
6066 (match_operand:QI 2 "register_operand" "Yk"))
6067 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6069 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6070 [(set_attr "type" "ssecvt")
6071 (set_attr "prefix" "evex")
6072 (set_attr "mode" "TI")])
6074 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6075 [(set (match_operand:V8SI 0 "register_operand" "=v")
6077 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6079 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6080 [(set_attr "type" "ssecvt")
6081 (set_attr "prefix" "evex")
6082 (set_attr "mode" "OI")])
6084 (define_insn "ufix_truncv2dfv2si2"
6085 [(set (match_operand:V4SI 0 "register_operand" "=v")
6087 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6088 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6090 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6091 [(set_attr "type" "ssecvt")
6092 (set_attr "prefix" "evex")
6093 (set_attr "mode" "TI")])
6095 (define_insn "ufix_truncv2dfv2si2_mask"
6096 [(set (match_operand:V4SI 0 "register_operand" "=v")
6099 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6101 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6102 (parallel [(const_int 0) (const_int 1)]))
6103 (match_operand:QI 3 "register_operand" "Yk"))
6104 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6106 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6107 [(set_attr "type" "ssecvt")
6108 (set_attr "prefix" "evex")
6109 (set_attr "mode" "TI")])
6111 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6112 [(set (match_operand:V4SI 0 "register_operand" "=v")
6115 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6116 (const_vector:V2SI [(const_int 0) (const_int 0)])
6117 (match_operand:QI 2 "register_operand" "Yk"))
6118 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6120 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6121 [(set_attr "type" "ssecvt")
6122 (set_attr "prefix" "evex")
6123 (set_attr "mode" "TI")])
6125 (define_insn "fix_truncv4dfv4si2<mask_name>"
6126 [(set (match_operand:V4SI 0 "register_operand" "=v")
6127 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6128 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6129 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6130 [(set_attr "type" "ssecvt")
6131 (set_attr "prefix" "maybe_evex")
6132 (set_attr "mode" "OI")])
6134 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6135 [(set (match_operand:V4SI 0 "register_operand" "=v")
6136 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6137 "TARGET_AVX512VL && TARGET_AVX512F"
6138 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6139 [(set_attr "type" "ssecvt")
6140 (set_attr "prefix" "maybe_evex")
6141 (set_attr "mode" "OI")])
6143 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6144 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6145 (any_fix:<sseintvecmode>
6146 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6147 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6148 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6149 [(set_attr "type" "ssecvt")
6150 (set_attr "prefix" "evex")
6151 (set_attr "mode" "<sseintvecmode2>")])
6153 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6154 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6155 (unspec:<sseintvecmode>
6156 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6157 UNSPEC_FIX_NOTRUNC))]
6158 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6159 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6160 [(set_attr "type" "ssecvt")
6161 (set_attr "prefix" "evex")
6162 (set_attr "mode" "<sseintvecmode2>")])
6164 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6165 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6166 (unspec:<sseintvecmode>
6167 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6168 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6169 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6170 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6171 [(set_attr "type" "ssecvt")
6172 (set_attr "prefix" "evex")
6173 (set_attr "mode" "<sseintvecmode2>")])
6175 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6176 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6177 (any_fix:<sselongvecmode>
6178 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6179 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6180 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6181 [(set_attr "type" "ssecvt")
6182 (set_attr "prefix" "evex")
6183 (set_attr "mode" "<sseintvecmode3>")])
6185 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6186 [(set (match_operand:V2DI 0 "register_operand" "=v")
6189 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6190 (parallel [(const_int 0) (const_int 1)]))))]
6191 "TARGET_AVX512DQ && TARGET_AVX512VL"
6192 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6193 [(set_attr "type" "ssecvt")
6194 (set_attr "prefix" "evex")
6195 (set_attr "mode" "TI")])
6197 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
6198 [(set (match_operand:V2DI 0 "register_operand")
6200 (match_operand:V2SF 1 "register_operand")))]
6201 "TARGET_AVX512DQ && TARGET_AVX512VL"
6203 operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
6204 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
6205 (operands[0], operands[1]));
6209 (define_mode_attr vunpckfixt_mode
6210 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6211 (define_mode_attr vunpckfixt_model
6212 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6213 (define_mode_attr vunpckfixt_extract_mode
6214 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6216 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6217 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6218 (any_fix:<vunpckfixt_mode>
6219 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6222 rtx tem = operands[1];
6223 rtx (*gen) (rtx, rtx);
6225 if (<MODE>mode != V4SFmode)
6227 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6228 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6230 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6233 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6235 emit_insn (gen (operands[0], tem));
6239 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6240 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6241 (any_fix:<vunpckfixt_mode>
6242 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6246 rtx (*gen) (rtx, rtx);
6248 if (<MODE>mode != V4SFmode)
6250 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6251 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6253 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6257 tem = gen_reg_rtx (V4SFmode);
6258 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6259 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6262 emit_insn (gen (operands[0], tem));
6266 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6267 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6268 (unsigned_fix:<sseintvecmode>
6269 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6271 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6272 [(set_attr "type" "ssecvt")
6273 (set_attr "prefix" "evex")
6274 (set_attr "mode" "<sseintvecmode2>")])
6276 (define_expand "avx_cvttpd2dq256_2"
6277 [(set (match_operand:V8SI 0 "register_operand")
6279 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6282 "operands[2] = CONST0_RTX (V4SImode);")
6284 (define_insn "sse2_cvttpd2dq"
6285 [(set (match_operand:V4SI 0 "register_operand" "=v")
6287 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6288 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6292 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6294 return "cvttpd2dq\t{%1, %0|%0, %1}";
6296 [(set_attr "type" "ssecvt")
6297 (set_attr "amdfam10_decode" "double")
6298 (set_attr "athlon_decode" "vector")
6299 (set_attr "bdver1_decode" "double")
6300 (set_attr "prefix" "maybe_vex")
6301 (set_attr "mode" "TI")])
6303 (define_insn "sse2_cvttpd2dq_mask"
6304 [(set (match_operand:V4SI 0 "register_operand" "=v")
6307 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6309 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6310 (parallel [(const_int 0) (const_int 1)]))
6311 (match_operand:QI 3 "register_operand" "Yk"))
6312 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6314 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6315 [(set_attr "type" "ssecvt")
6316 (set_attr "prefix" "evex")
6317 (set_attr "mode" "TI")])
6319 (define_insn "*sse2_cvttpd2dq_mask_1"
6320 [(set (match_operand:V4SI 0 "register_operand" "=v")
6323 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6324 (const_vector:V2SI [(const_int 0) (const_int 0)])
6325 (match_operand:QI 2 "register_operand" "Yk"))
6326 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6328 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6329 [(set_attr "type" "ssecvt")
6330 (set_attr "prefix" "evex")
6331 (set_attr "mode" "TI")])
6333 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
6334 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6337 (float_truncate:V2SF
6338 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6339 (match_operand:V4SF 1 "register_operand" "0,0,v")
6343 cvtsd2ss\t{%2, %0|%0, %2}
6344 cvtsd2ss\t{%2, %0|%0, %q2}
6345 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
6346 [(set_attr "isa" "noavx,noavx,avx")
6347 (set_attr "type" "ssecvt")
6348 (set_attr "athlon_decode" "vector,double,*")
6349 (set_attr "amdfam10_decode" "vector,double,*")
6350 (set_attr "bdver1_decode" "direct,direct,*")
6351 (set_attr "btver2_decode" "double,double,double")
6352 (set_attr "prefix" "orig,orig,<round_prefix>")
6353 (set_attr "mode" "SF")])
6355 (define_insn "*sse2_vd_cvtsd2ss"
6356 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6359 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6360 (match_operand:V4SF 1 "register_operand" "0,0,v")
6364 cvtsd2ss\t{%2, %0|%0, %2}
6365 cvtsd2ss\t{%2, %0|%0, %2}
6366 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6367 [(set_attr "isa" "noavx,noavx,avx")
6368 (set_attr "type" "ssecvt")
6369 (set_attr "athlon_decode" "vector,double,*")
6370 (set_attr "amdfam10_decode" "vector,double,*")
6371 (set_attr "bdver1_decode" "direct,direct,*")
6372 (set_attr "btver2_decode" "double,double,double")
6373 (set_attr "prefix" "orig,orig,vex")
6374 (set_attr "mode" "SF")])
6376 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
6377 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6381 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6382 (parallel [(const_int 0) (const_int 1)])))
6383 (match_operand:V2DF 1 "register_operand" "0,0,v")
6387 cvtss2sd\t{%2, %0|%0, %2}
6388 cvtss2sd\t{%2, %0|%0, %k2}
6389 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
6390 [(set_attr "isa" "noavx,noavx,avx")
6391 (set_attr "type" "ssecvt")
6392 (set_attr "amdfam10_decode" "vector,double,*")
6393 (set_attr "athlon_decode" "direct,direct,*")
6394 (set_attr "bdver1_decode" "direct,direct,*")
6395 (set_attr "btver2_decode" "double,double,double")
6396 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6397 (set_attr "mode" "DF")])
6399 (define_insn "*sse2_vd_cvtss2sd"
6400 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6403 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6404 (match_operand:V2DF 1 "register_operand" "0,0,v")
6408 cvtss2sd\t{%2, %0|%0, %2}
6409 cvtss2sd\t{%2, %0|%0, %2}
6410 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6411 [(set_attr "isa" "noavx,noavx,avx")
6412 (set_attr "type" "ssecvt")
6413 (set_attr "amdfam10_decode" "vector,double,*")
6414 (set_attr "athlon_decode" "direct,direct,*")
6415 (set_attr "bdver1_decode" "direct,direct,*")
6416 (set_attr "btver2_decode" "double,double,double")
6417 (set_attr "prefix" "orig,orig,vex")
6418 (set_attr "mode" "DF")])
6420 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6421 [(set (match_operand:V8SF 0 "register_operand" "=v")
6422 (float_truncate:V8SF
6423 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6425 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6426 [(set_attr "type" "ssecvt")
6427 (set_attr "prefix" "evex")
6428 (set_attr "mode" "V8SF")])
6430 (define_insn "avx_cvtpd2ps256<mask_name>"
6431 [(set (match_operand:V4SF 0 "register_operand" "=v")
6432 (float_truncate:V4SF
6433 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6434 "TARGET_AVX && <mask_avx512vl_condition>"
6435 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6436 [(set_attr "type" "ssecvt")
6437 (set_attr "prefix" "maybe_evex")
6438 (set_attr "btver2_decode" "vector")
6439 (set_attr "mode" "V4SF")])
6441 (define_expand "sse2_cvtpd2ps"
6442 [(set (match_operand:V4SF 0 "register_operand")
6444 (float_truncate:V2SF
6445 (match_operand:V2DF 1 "vector_operand"))
6448 "operands[2] = CONST0_RTX (V2SFmode);")
6450 (define_expand "sse2_cvtpd2ps_mask"
6451 [(set (match_operand:V4SF 0 "register_operand")
6454 (float_truncate:V2SF
6455 (match_operand:V2DF 1 "vector_operand"))
6457 (match_operand:V4SF 2 "nonimm_or_0_operand")
6458 (parallel [(const_int 0) (const_int 1)]))
6459 (match_operand:QI 3 "register_operand"))
6462 "operands[4] = CONST0_RTX (V2SFmode);")
6464 (define_insn "*sse2_cvtpd2ps"
6465 [(set (match_operand:V4SF 0 "register_operand" "=v")
6467 (float_truncate:V2SF
6468 (match_operand:V2DF 1 "vector_operand" "vBm"))
6469 (match_operand:V2SF 2 "const0_operand" "C")))]
6473 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6475 return "cvtpd2ps\t{%1, %0|%0, %1}";
6477 [(set_attr "type" "ssecvt")
6478 (set_attr "amdfam10_decode" "double")
6479 (set_attr "athlon_decode" "vector")
6480 (set_attr "bdver1_decode" "double")
6481 (set_attr "prefix_data16" "1")
6482 (set_attr "prefix" "maybe_vex")
6483 (set_attr "mode" "V4SF")])
6485 (define_insn "truncv2dfv2sf2"
6486 [(set (match_operand:V2SF 0 "register_operand" "=v")
6487 (float_truncate:V2SF
6488 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6489 "TARGET_MMX_WITH_SSE"
6492 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6494 return "cvtpd2ps\t{%1, %0|%0, %1}";
6496 [(set_attr "type" "ssecvt")
6497 (set_attr "amdfam10_decode" "double")
6498 (set_attr "athlon_decode" "vector")
6499 (set_attr "bdver1_decode" "double")
6500 (set_attr "prefix_data16" "1")
6501 (set_attr "prefix" "maybe_vex")
6502 (set_attr "mode" "V4SF")])
6504 (define_insn "*sse2_cvtpd2ps_mask"
6505 [(set (match_operand:V4SF 0 "register_operand" "=v")
6508 (float_truncate:V2SF
6509 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6511 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6512 (parallel [(const_int 0) (const_int 1)]))
6513 (match_operand:QI 3 "register_operand" "Yk"))
6514 (match_operand:V2SF 4 "const0_operand" "C")))]
6516 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6517 [(set_attr "type" "ssecvt")
6518 (set_attr "prefix" "evex")
6519 (set_attr "mode" "V4SF")])
6521 (define_insn "*sse2_cvtpd2ps_mask_1"
6522 [(set (match_operand:V4SF 0 "register_operand" "=v")
6525 (float_truncate:V2SF
6526 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6527 (match_operand:V2SF 3 "const0_operand" "C")
6528 (match_operand:QI 2 "register_operand" "Yk"))
6529 (match_operand:V2SF 4 "const0_operand" "C")))]
6531 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6532 [(set_attr "type" "ssecvt")
6533 (set_attr "prefix" "evex")
6534 (set_attr "mode" "V4SF")])
6536 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6537 (define_mode_attr sf2dfmode
6538 [(V8DF "V8SF") (V4DF "V4SF")])
6539 (define_mode_attr sf2dfmode_lower
6540 [(V8DF "v8sf") (V4DF "v4sf")])
6542 (define_expand "trunc<mode><sf2dfmode_lower>2"
6543 [(set (match_operand:<sf2dfmode> 0 "register_operand")
6544 (float_truncate:<sf2dfmode>
6545 (match_operand:VF2_512_256 1 "vector_operand")))]
6548 (define_expand "extend<sf2dfmode_lower><mode>2"
6549 [(set (match_operand:VF2_512_256 0 "register_operand")
6550 (float_extend:VF2_512_256
6551 (match_operand:<sf2dfmode> 1 "vector_operand")))]
6554 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6555 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6556 (float_extend:VF2_512_256
6557 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6558 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6559 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6560 [(set_attr "type" "ssecvt")
6561 (set_attr "prefix" "maybe_vex")
6562 (set_attr "mode" "<MODE>")])
6564 (define_insn "*avx_cvtps2pd256_2"
6565 [(set (match_operand:V4DF 0 "register_operand" "=v")
6568 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6569 (parallel [(const_int 0) (const_int 1)
6570 (const_int 2) (const_int 3)]))))]
6572 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6573 [(set_attr "type" "ssecvt")
6574 (set_attr "prefix" "vex")
6575 (set_attr "mode" "V4DF")])
6577 (define_insn "vec_unpacks_lo_v16sf"
6578 [(set (match_operand:V8DF 0 "register_operand" "=v")
6581 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6582 (parallel [(const_int 0) (const_int 1)
6583 (const_int 2) (const_int 3)
6584 (const_int 4) (const_int 5)
6585 (const_int 6) (const_int 7)]))))]
6587 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6588 [(set_attr "type" "ssecvt")
6589 (set_attr "prefix" "evex")
6590 (set_attr "mode" "V8DF")])
6592 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6593 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6594 (unspec:<avx512fmaskmode>
6595 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6596 UNSPEC_CVTINT2MASK))]
6598 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6599 [(set_attr "prefix" "evex")
6600 (set_attr "mode" "<sseinsnmode>")])
6602 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6603 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6604 (unspec:<avx512fmaskmode>
6605 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6606 UNSPEC_CVTINT2MASK))]
6608 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6609 [(set_attr "prefix" "evex")
6610 (set_attr "mode" "<sseinsnmode>")])
6612 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6613 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6614 (vec_merge:VI12_AVX512VL
6617 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6620 operands[2] = CONSTM1_RTX (<MODE>mode);
6621 operands[3] = CONST0_RTX (<MODE>mode);
6624 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6625 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6626 (vec_merge:VI12_AVX512VL
6627 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6628 (match_operand:VI12_AVX512VL 3 "const0_operand")
6629 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6631 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6632 [(set_attr "prefix" "evex")
6633 (set_attr "mode" "<sseinsnmode>")])
6635 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6636 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6637 (vec_merge:VI48_AVX512VL
6640 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6643 operands[2] = CONSTM1_RTX (<MODE>mode);
6644 operands[3] = CONST0_RTX (<MODE>mode);
6647 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6648 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
6649 (vec_merge:VI48_AVX512VL
6650 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6651 (match_operand:VI48_AVX512VL 3 "const0_operand")
6652 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
6655 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
6656 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
6657 [(set_attr "isa" "avx512dq,*")
6658 (set_attr "length_immediate" "0,1")
6659 (set_attr "prefix" "evex")
6660 (set_attr "mode" "<sseinsnmode>")])
6662 (define_insn "sse2_cvtps2pd<mask_name>"
6663 [(set (match_operand:V2DF 0 "register_operand" "=v")
6666 (match_operand:V4SF 1 "vector_operand" "vm")
6667 (parallel [(const_int 0) (const_int 1)]))))]
6668 "TARGET_SSE2 && <mask_avx512vl_condition>"
6669 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6670 [(set_attr "type" "ssecvt")
6671 (set_attr "amdfam10_decode" "direct")
6672 (set_attr "athlon_decode" "double")
6673 (set_attr "bdver1_decode" "double")
6674 (set_attr "prefix_data16" "0")
6675 (set_attr "prefix" "maybe_vex")
6676 (set_attr "mode" "V2DF")])
6678 (define_insn "extendv2sfv2df2"
6679 [(set (match_operand:V2DF 0 "register_operand" "=v")
6681 (match_operand:V2SF 1 "register_operand" "v")))]
6682 "TARGET_MMX_WITH_SSE"
6683 "%vcvtps2pd\t{%1, %0|%0, %1}"
6684 [(set_attr "type" "ssecvt")
6685 (set_attr "amdfam10_decode" "direct")
6686 (set_attr "athlon_decode" "double")
6687 (set_attr "bdver1_decode" "double")
6688 (set_attr "prefix_data16" "0")
6689 (set_attr "prefix" "maybe_vex")
6690 (set_attr "mode" "V2DF")])
6692 (define_expand "vec_unpacks_hi_v4sf"
6697 (match_operand:V4SF 1 "vector_operand"))
6698 (parallel [(const_int 6) (const_int 7)
6699 (const_int 2) (const_int 3)])))
6700 (set (match_operand:V2DF 0 "register_operand")
6704 (parallel [(const_int 0) (const_int 1)]))))]
6706 "operands[2] = gen_reg_rtx (V4SFmode);")
6708 (define_expand "vec_unpacks_hi_v8sf"
6711 (match_operand:V8SF 1 "register_operand")
6712 (parallel [(const_int 4) (const_int 5)
6713 (const_int 6) (const_int 7)])))
6714 (set (match_operand:V4DF 0 "register_operand")
6718 "operands[2] = gen_reg_rtx (V4SFmode);")
6720 (define_expand "vec_unpacks_hi_v16sf"
6723 (match_operand:V16SF 1 "register_operand")
6724 (parallel [(const_int 8) (const_int 9)
6725 (const_int 10) (const_int 11)
6726 (const_int 12) (const_int 13)
6727 (const_int 14) (const_int 15)])))
6728 (set (match_operand:V8DF 0 "register_operand")
6732 "operands[2] = gen_reg_rtx (V8SFmode);")
6734 (define_expand "vec_unpacks_lo_v4sf"
6735 [(set (match_operand:V2DF 0 "register_operand")
6738 (match_operand:V4SF 1 "vector_operand")
6739 (parallel [(const_int 0) (const_int 1)]))))]
6742 (define_expand "vec_unpacks_lo_v8sf"
6743 [(set (match_operand:V4DF 0 "register_operand")
6746 (match_operand:V8SF 1 "nonimmediate_operand")
6747 (parallel [(const_int 0) (const_int 1)
6748 (const_int 2) (const_int 3)]))))]
6751 (define_mode_attr sseunpackfltmode
6752 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6753 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6755 (define_expand "vec_unpacks_float_hi_<mode>"
6756 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6757 (match_operand:VI2_AVX512F 1 "register_operand")]
6760 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6762 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6763 emit_insn (gen_rtx_SET (operands[0],
6764 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6768 (define_expand "vec_unpacks_float_lo_<mode>"
6769 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6770 (match_operand:VI2_AVX512F 1 "register_operand")]
6773 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6775 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6776 emit_insn (gen_rtx_SET (operands[0],
6777 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6781 (define_expand "vec_unpacku_float_hi_<mode>"
6782 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6783 (match_operand:VI2_AVX512F 1 "register_operand")]
6786 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6788 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6789 emit_insn (gen_rtx_SET (operands[0],
6790 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6794 (define_expand "vec_unpacku_float_lo_<mode>"
6795 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6796 (match_operand:VI2_AVX512F 1 "register_operand")]
6799 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6801 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6802 emit_insn (gen_rtx_SET (operands[0],
6803 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6807 (define_expand "vec_unpacks_float_hi_v4si"
6810 (match_operand:V4SI 1 "vector_operand")
6811 (parallel [(const_int 2) (const_int 3)
6812 (const_int 2) (const_int 3)])))
6813 (set (match_operand:V2DF 0 "register_operand")
6817 (parallel [(const_int 0) (const_int 1)]))))]
6819 "operands[2] = gen_reg_rtx (V4SImode);")
6821 (define_expand "vec_unpacks_float_lo_v4si"
6822 [(set (match_operand:V2DF 0 "register_operand")
6825 (match_operand:V4SI 1 "vector_operand")
6826 (parallel [(const_int 0) (const_int 1)]))))]
6829 (define_expand "vec_unpacks_float_hi_v8si"
6832 (match_operand:V8SI 1 "vector_operand")
6833 (parallel [(const_int 4) (const_int 5)
6834 (const_int 6) (const_int 7)])))
6835 (set (match_operand:V4DF 0 "register_operand")
6839 "operands[2] = gen_reg_rtx (V4SImode);")
6841 (define_expand "vec_unpacks_float_lo_v8si"
6842 [(set (match_operand:V4DF 0 "register_operand")
6845 (match_operand:V8SI 1 "nonimmediate_operand")
6846 (parallel [(const_int 0) (const_int 1)
6847 (const_int 2) (const_int 3)]))))]
6850 (define_expand "vec_unpacks_float_hi_v16si"
6853 (match_operand:V16SI 1 "nonimmediate_operand")
6854 (parallel [(const_int 8) (const_int 9)
6855 (const_int 10) (const_int 11)
6856 (const_int 12) (const_int 13)
6857 (const_int 14) (const_int 15)])))
6858 (set (match_operand:V8DF 0 "register_operand")
6862 "operands[2] = gen_reg_rtx (V8SImode);")
6864 (define_expand "vec_unpacks_float_lo_v16si"
6865 [(set (match_operand:V8DF 0 "register_operand")
6868 (match_operand:V16SI 1 "nonimmediate_operand")
6869 (parallel [(const_int 0) (const_int 1)
6870 (const_int 2) (const_int 3)
6871 (const_int 4) (const_int 5)
6872 (const_int 6) (const_int 7)]))))]
6875 (define_expand "vec_unpacku_float_hi_v4si"
6878 (match_operand:V4SI 1 "vector_operand")
6879 (parallel [(const_int 2) (const_int 3)
6880 (const_int 2) (const_int 3)])))
6885 (parallel [(const_int 0) (const_int 1)]))))
6887 (lt:V2DF (match_dup 6) (match_dup 3)))
6889 (and:V2DF (match_dup 7) (match_dup 4)))
6890 (set (match_operand:V2DF 0 "register_operand")
6891 (plus:V2DF (match_dup 6) (match_dup 8)))]
6894 REAL_VALUE_TYPE TWO32r;
6898 real_ldexp (&TWO32r, &dconst1, 32);
6899 x = const_double_from_real_value (TWO32r, DFmode);
6901 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6902 operands[4] = force_reg (V2DFmode,
6903 ix86_build_const_vector (V2DFmode, 1, x));
6905 operands[5] = gen_reg_rtx (V4SImode);
6907 for (i = 6; i < 9; i++)
6908 operands[i] = gen_reg_rtx (V2DFmode);
6911 (define_expand "vec_unpacku_float_lo_v4si"
6915 (match_operand:V4SI 1 "vector_operand")
6916 (parallel [(const_int 0) (const_int 1)]))))
6918 (lt:V2DF (match_dup 5) (match_dup 3)))
6920 (and:V2DF (match_dup 6) (match_dup 4)))
6921 (set (match_operand:V2DF 0 "register_operand")
6922 (plus:V2DF (match_dup 5) (match_dup 7)))]
6925 REAL_VALUE_TYPE TWO32r;
6929 real_ldexp (&TWO32r, &dconst1, 32);
6930 x = const_double_from_real_value (TWO32r, DFmode);
6932 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6933 operands[4] = force_reg (V2DFmode,
6934 ix86_build_const_vector (V2DFmode, 1, x));
6936 for (i = 5; i < 8; i++)
6937 operands[i] = gen_reg_rtx (V2DFmode);
6940 (define_expand "vec_unpacku_float_hi_v8si"
6941 [(match_operand:V4DF 0 "register_operand")
6942 (match_operand:V8SI 1 "register_operand")]
6945 REAL_VALUE_TYPE TWO32r;
6949 real_ldexp (&TWO32r, &dconst1, 32);
6950 x = const_double_from_real_value (TWO32r, DFmode);
6952 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6953 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6954 tmp[5] = gen_reg_rtx (V4SImode);
6956 for (i = 2; i < 5; i++)
6957 tmp[i] = gen_reg_rtx (V4DFmode);
6958 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6959 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6960 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6961 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6962 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6966 (define_expand "vec_unpacku_float_hi_v16si"
6967 [(match_operand:V8DF 0 "register_operand")
6968 (match_operand:V16SI 1 "register_operand")]
6971 REAL_VALUE_TYPE TWO32r;
6974 real_ldexp (&TWO32r, &dconst1, 32);
6975 x = const_double_from_real_value (TWO32r, DFmode);
6977 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6978 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6979 tmp[2] = gen_reg_rtx (V8DFmode);
6980 tmp[3] = gen_reg_rtx (V8SImode);
6981 k = gen_reg_rtx (QImode);
6983 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6984 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6985 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
6986 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6987 emit_move_insn (operands[0], tmp[2]);
6991 (define_expand "vec_unpacku_float_lo_v8si"
6992 [(match_operand:V4DF 0 "register_operand")
6993 (match_operand:V8SI 1 "nonimmediate_operand")]
6996 REAL_VALUE_TYPE TWO32r;
7000 real_ldexp (&TWO32r, &dconst1, 32);
7001 x = const_double_from_real_value (TWO32r, DFmode);
7003 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7004 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7006 for (i = 2; i < 5; i++)
7007 tmp[i] = gen_reg_rtx (V4DFmode);
7008 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
7009 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7010 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7011 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7015 (define_expand "vec_unpacku_float_lo_v16si"
7016 [(match_operand:V8DF 0 "register_operand")
7017 (match_operand:V16SI 1 "nonimmediate_operand")]
7020 REAL_VALUE_TYPE TWO32r;
7023 real_ldexp (&TWO32r, &dconst1, 32);
7024 x = const_double_from_real_value (TWO32r, DFmode);
7026 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7027 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7028 tmp[2] = gen_reg_rtx (V8DFmode);
7029 k = gen_reg_rtx (QImode);
7031 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7032 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7033 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7034 emit_move_insn (operands[0], tmp[2]);
7038 (define_expand "vec_pack_trunc_<mode>"
7040 (float_truncate:<sf2dfmode>
7041 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7043 (float_truncate:<sf2dfmode>
7044 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7045 (set (match_operand:<ssePSmode> 0 "register_operand")
7046 (vec_concat:<ssePSmode>
7051 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7052 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7055 (define_expand "vec_pack_trunc_v2df"
7056 [(match_operand:V4SF 0 "register_operand")
7057 (match_operand:V2DF 1 "vector_operand")
7058 (match_operand:V2DF 2 "vector_operand")]
7063 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7065 tmp0 = gen_reg_rtx (V4DFmode);
7066 tmp1 = force_reg (V2DFmode, operands[1]);
7068 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7069 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7073 tmp0 = gen_reg_rtx (V4SFmode);
7074 tmp1 = gen_reg_rtx (V4SFmode);
7076 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7077 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7078 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7083 (define_expand "vec_pack_sfix_trunc_v8df"
7084 [(match_operand:V16SI 0 "register_operand")
7085 (match_operand:V8DF 1 "nonimmediate_operand")
7086 (match_operand:V8DF 2 "nonimmediate_operand")]
7091 r1 = gen_reg_rtx (V8SImode);
7092 r2 = gen_reg_rtx (V8SImode);
7094 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7095 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7096 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7100 (define_expand "vec_pack_sfix_trunc_v4df"
7101 [(match_operand:V8SI 0 "register_operand")
7102 (match_operand:V4DF 1 "nonimmediate_operand")
7103 (match_operand:V4DF 2 "nonimmediate_operand")]
7108 r1 = gen_reg_rtx (V4SImode);
7109 r2 = gen_reg_rtx (V4SImode);
7111 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7112 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7113 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7117 (define_expand "vec_pack_sfix_trunc_v2df"
7118 [(match_operand:V4SI 0 "register_operand")
7119 (match_operand:V2DF 1 "vector_operand")
7120 (match_operand:V2DF 2 "vector_operand")]
7123 rtx tmp0, tmp1, tmp2;
7125 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7127 tmp0 = gen_reg_rtx (V4DFmode);
7128 tmp1 = force_reg (V2DFmode, operands[1]);
7130 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7131 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7135 tmp0 = gen_reg_rtx (V4SImode);
7136 tmp1 = gen_reg_rtx (V4SImode);
7137 tmp2 = gen_reg_rtx (V2DImode);
7139 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7140 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7141 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7142 gen_lowpart (V2DImode, tmp0),
7143 gen_lowpart (V2DImode, tmp1)));
7144 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7149 (define_mode_attr ssepackfltmode
7150 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7152 (define_expand "vec_pack_ufix_trunc_<mode>"
7153 [(match_operand:<ssepackfltmode> 0 "register_operand")
7154 (match_operand:VF2 1 "register_operand")
7155 (match_operand:VF2 2 "register_operand")]
7158 if (<MODE>mode == V8DFmode)
7162 r1 = gen_reg_rtx (V8SImode);
7163 r2 = gen_reg_rtx (V8SImode);
7165 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7166 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7167 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7172 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7173 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7174 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7175 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7176 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7178 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7179 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7183 tmp[5] = gen_reg_rtx (V8SFmode);
7184 ix86_expand_vec_extract_even_odd (tmp[5],
7185 gen_lowpart (V8SFmode, tmp[2]),
7186 gen_lowpart (V8SFmode, tmp[3]), 0);
7187 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7189 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7190 operands[0], 0, OPTAB_DIRECT);
7191 if (tmp[6] != operands[0])
7192 emit_move_insn (operands[0], tmp[6]);
7198 (define_expand "avx512f_vec_pack_sfix_v8df"
7199 [(match_operand:V16SI 0 "register_operand")
7200 (match_operand:V8DF 1 "nonimmediate_operand")
7201 (match_operand:V8DF 2 "nonimmediate_operand")]
7206 r1 = gen_reg_rtx (V8SImode);
7207 r2 = gen_reg_rtx (V8SImode);
7209 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7210 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7211 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7215 (define_expand "vec_pack_sfix_v4df"
7216 [(match_operand:V8SI 0 "register_operand")
7217 (match_operand:V4DF 1 "nonimmediate_operand")
7218 (match_operand:V4DF 2 "nonimmediate_operand")]
7223 r1 = gen_reg_rtx (V4SImode);
7224 r2 = gen_reg_rtx (V4SImode);
7226 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7227 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7228 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7232 (define_expand "vec_pack_sfix_v2df"
7233 [(match_operand:V4SI 0 "register_operand")
7234 (match_operand:V2DF 1 "vector_operand")
7235 (match_operand:V2DF 2 "vector_operand")]
7238 rtx tmp0, tmp1, tmp2;
7240 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7242 tmp0 = gen_reg_rtx (V4DFmode);
7243 tmp1 = force_reg (V2DFmode, operands[1]);
7245 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7246 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7250 tmp0 = gen_reg_rtx (V4SImode);
7251 tmp1 = gen_reg_rtx (V4SImode);
7252 tmp2 = gen_reg_rtx (V2DImode);
7254 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7255 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7256 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7257 gen_lowpart (V2DImode, tmp0),
7258 gen_lowpart (V2DImode, tmp1)));
7259 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7266 ;; Parallel single-precision floating point element swizzling
7268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7270 (define_expand "sse_movhlps_exp"
7271 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7274 (match_operand:V4SF 1 "nonimmediate_operand")
7275 (match_operand:V4SF 2 "nonimmediate_operand"))
7276 (parallel [(const_int 6)
7282 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7284 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7286 /* Fix up the destination if needed. */
7287 if (dst != operands[0])
7288 emit_move_insn (operands[0], dst);
7293 (define_insn "sse_movhlps"
7294 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7297 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7298 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7299 (parallel [(const_int 6)
7303 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7305 movhlps\t{%2, %0|%0, %2}
7306 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7307 movlps\t{%H2, %0|%0, %H2}
7308 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7309 %vmovhps\t{%2, %0|%q0, %2}"
7310 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7311 (set_attr "type" "ssemov")
7312 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7313 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7315 (define_expand "sse_movlhps_exp"
7316 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7319 (match_operand:V4SF 1 "nonimmediate_operand")
7320 (match_operand:V4SF 2 "nonimmediate_operand"))
7321 (parallel [(const_int 0)
7327 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7329 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7331 /* Fix up the destination if needed. */
7332 if (dst != operands[0])
7333 emit_move_insn (operands[0], dst);
7338 (define_insn "sse_movlhps"
7339 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7342 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7343 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7344 (parallel [(const_int 0)
7348 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7350 movlhps\t{%2, %0|%0, %2}
7351 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7352 movhps\t{%2, %0|%0, %q2}
7353 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7354 %vmovlps\t{%2, %H0|%H0, %2}"
7355 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7356 (set_attr "type" "ssemov")
7357 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7358 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7360 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7361 [(set (match_operand:V16SF 0 "register_operand" "=v")
7364 (match_operand:V16SF 1 "register_operand" "v")
7365 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7366 (parallel [(const_int 2) (const_int 18)
7367 (const_int 3) (const_int 19)
7368 (const_int 6) (const_int 22)
7369 (const_int 7) (const_int 23)
7370 (const_int 10) (const_int 26)
7371 (const_int 11) (const_int 27)
7372 (const_int 14) (const_int 30)
7373 (const_int 15) (const_int 31)])))]
7375 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7376 [(set_attr "type" "sselog")
7377 (set_attr "prefix" "evex")
7378 (set_attr "mode" "V16SF")])
7380 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7381 (define_insn "avx_unpckhps256<mask_name>"
7382 [(set (match_operand:V8SF 0 "register_operand" "=v")
7385 (match_operand:V8SF 1 "register_operand" "v")
7386 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7387 (parallel [(const_int 2) (const_int 10)
7388 (const_int 3) (const_int 11)
7389 (const_int 6) (const_int 14)
7390 (const_int 7) (const_int 15)])))]
7391 "TARGET_AVX && <mask_avx512vl_condition>"
7392 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7393 [(set_attr "type" "sselog")
7394 (set_attr "prefix" "vex")
7395 (set_attr "mode" "V8SF")])
7397 (define_expand "vec_interleave_highv8sf"
7401 (match_operand:V8SF 1 "register_operand")
7402 (match_operand:V8SF 2 "nonimmediate_operand"))
7403 (parallel [(const_int 0) (const_int 8)
7404 (const_int 1) (const_int 9)
7405 (const_int 4) (const_int 12)
7406 (const_int 5) (const_int 13)])))
7412 (parallel [(const_int 2) (const_int 10)
7413 (const_int 3) (const_int 11)
7414 (const_int 6) (const_int 14)
7415 (const_int 7) (const_int 15)])))
7416 (set (match_operand:V8SF 0 "register_operand")
7421 (parallel [(const_int 4) (const_int 5)
7422 (const_int 6) (const_int 7)
7423 (const_int 12) (const_int 13)
7424 (const_int 14) (const_int 15)])))]
7427 operands[3] = gen_reg_rtx (V8SFmode);
7428 operands[4] = gen_reg_rtx (V8SFmode);
7431 (define_insn "vec_interleave_highv4sf<mask_name>"
7432 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7435 (match_operand:V4SF 1 "register_operand" "0,v")
7436 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7437 (parallel [(const_int 2) (const_int 6)
7438 (const_int 3) (const_int 7)])))]
7439 "TARGET_SSE && <mask_avx512vl_condition>"
7441 unpckhps\t{%2, %0|%0, %2}
7442 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7443 [(set_attr "isa" "noavx,avx")
7444 (set_attr "type" "sselog")
7445 (set_attr "prefix" "orig,vex")
7446 (set_attr "mode" "V4SF")])
7448 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7449 [(set (match_operand:V16SF 0 "register_operand" "=v")
7452 (match_operand:V16SF 1 "register_operand" "v")
7453 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7454 (parallel [(const_int 0) (const_int 16)
7455 (const_int 1) (const_int 17)
7456 (const_int 4) (const_int 20)
7457 (const_int 5) (const_int 21)
7458 (const_int 8) (const_int 24)
7459 (const_int 9) (const_int 25)
7460 (const_int 12) (const_int 28)
7461 (const_int 13) (const_int 29)])))]
7463 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7464 [(set_attr "type" "sselog")
7465 (set_attr "prefix" "evex")
7466 (set_attr "mode" "V16SF")])
7468 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7469 (define_insn "avx_unpcklps256<mask_name>"
7470 [(set (match_operand:V8SF 0 "register_operand" "=v")
7473 (match_operand:V8SF 1 "register_operand" "v")
7474 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7475 (parallel [(const_int 0) (const_int 8)
7476 (const_int 1) (const_int 9)
7477 (const_int 4) (const_int 12)
7478 (const_int 5) (const_int 13)])))]
7479 "TARGET_AVX && <mask_avx512vl_condition>"
7480 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7481 [(set_attr "type" "sselog")
7482 (set_attr "prefix" "vex")
7483 (set_attr "mode" "V8SF")])
7485 (define_insn "unpcklps128_mask"
7486 [(set (match_operand:V4SF 0 "register_operand" "=v")
7490 (match_operand:V4SF 1 "register_operand" "v")
7491 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7492 (parallel [(const_int 0) (const_int 4)
7493 (const_int 1) (const_int 5)]))
7494 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7495 (match_operand:QI 4 "register_operand" "Yk")))]
7497 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7498 [(set_attr "type" "sselog")
7499 (set_attr "prefix" "evex")
7500 (set_attr "mode" "V4SF")])
7502 (define_expand "vec_interleave_lowv8sf"
7506 (match_operand:V8SF 1 "register_operand")
7507 (match_operand:V8SF 2 "nonimmediate_operand"))
7508 (parallel [(const_int 0) (const_int 8)
7509 (const_int 1) (const_int 9)
7510 (const_int 4) (const_int 12)
7511 (const_int 5) (const_int 13)])))
7517 (parallel [(const_int 2) (const_int 10)
7518 (const_int 3) (const_int 11)
7519 (const_int 6) (const_int 14)
7520 (const_int 7) (const_int 15)])))
7521 (set (match_operand:V8SF 0 "register_operand")
7526 (parallel [(const_int 0) (const_int 1)
7527 (const_int 2) (const_int 3)
7528 (const_int 8) (const_int 9)
7529 (const_int 10) (const_int 11)])))]
7532 operands[3] = gen_reg_rtx (V8SFmode);
7533 operands[4] = gen_reg_rtx (V8SFmode);
7536 (define_insn "vec_interleave_lowv4sf"
7537 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7540 (match_operand:V4SF 1 "register_operand" "0,v")
7541 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7542 (parallel [(const_int 0) (const_int 4)
7543 (const_int 1) (const_int 5)])))]
7546 unpcklps\t{%2, %0|%0, %2}
7547 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7548 [(set_attr "isa" "noavx,avx")
7549 (set_attr "type" "sselog")
7550 (set_attr "prefix" "orig,maybe_evex")
7551 (set_attr "mode" "V4SF")])
7553 ;; These are modeled with the same vec_concat as the others so that we
7554 ;; capture users of shufps that can use the new instructions
7555 (define_insn "avx_movshdup256<mask_name>"
7556 [(set (match_operand:V8SF 0 "register_operand" "=v")
7559 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7561 (parallel [(const_int 1) (const_int 1)
7562 (const_int 3) (const_int 3)
7563 (const_int 5) (const_int 5)
7564 (const_int 7) (const_int 7)])))]
7565 "TARGET_AVX && <mask_avx512vl_condition>"
7566 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7567 [(set_attr "type" "sse")
7568 (set_attr "prefix" "vex")
7569 (set_attr "mode" "V8SF")])
7571 (define_insn "sse3_movshdup<mask_name>"
7572 [(set (match_operand:V4SF 0 "register_operand" "=v")
7575 (match_operand:V4SF 1 "vector_operand" "vBm")
7577 (parallel [(const_int 1)
7581 "TARGET_SSE3 && <mask_avx512vl_condition>"
7582 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7583 [(set_attr "type" "sse")
7584 (set_attr "prefix_rep" "1")
7585 (set_attr "prefix" "maybe_vex")
7586 (set_attr "mode" "V4SF")])
7588 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7589 [(set (match_operand:V16SF 0 "register_operand" "=v")
7592 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7594 (parallel [(const_int 1) (const_int 1)
7595 (const_int 3) (const_int 3)
7596 (const_int 5) (const_int 5)
7597 (const_int 7) (const_int 7)
7598 (const_int 9) (const_int 9)
7599 (const_int 11) (const_int 11)
7600 (const_int 13) (const_int 13)
7601 (const_int 15) (const_int 15)])))]
7603 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7604 [(set_attr "type" "sse")
7605 (set_attr "prefix" "evex")
7606 (set_attr "mode" "V16SF")])
7608 (define_insn "avx_movsldup256<mask_name>"
7609 [(set (match_operand:V8SF 0 "register_operand" "=v")
7612 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7614 (parallel [(const_int 0) (const_int 0)
7615 (const_int 2) (const_int 2)
7616 (const_int 4) (const_int 4)
7617 (const_int 6) (const_int 6)])))]
7618 "TARGET_AVX && <mask_avx512vl_condition>"
7619 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7620 [(set_attr "type" "sse")
7621 (set_attr "prefix" "vex")
7622 (set_attr "mode" "V8SF")])
7624 (define_insn "sse3_movsldup<mask_name>"
7625 [(set (match_operand:V4SF 0 "register_operand" "=v")
7628 (match_operand:V4SF 1 "vector_operand" "vBm")
7630 (parallel [(const_int 0)
7634 "TARGET_SSE3 && <mask_avx512vl_condition>"
7635 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7636 [(set_attr "type" "sse")
7637 (set_attr "prefix_rep" "1")
7638 (set_attr "prefix" "maybe_vex")
7639 (set_attr "mode" "V4SF")])
7641 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7642 [(set (match_operand:V16SF 0 "register_operand" "=v")
7645 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7647 (parallel [(const_int 0) (const_int 0)
7648 (const_int 2) (const_int 2)
7649 (const_int 4) (const_int 4)
7650 (const_int 6) (const_int 6)
7651 (const_int 8) (const_int 8)
7652 (const_int 10) (const_int 10)
7653 (const_int 12) (const_int 12)
7654 (const_int 14) (const_int 14)])))]
7656 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7657 [(set_attr "type" "sse")
7658 (set_attr "prefix" "evex")
7659 (set_attr "mode" "V16SF")])
7661 (define_expand "avx_shufps256<mask_expand4_name>"
7662 [(match_operand:V8SF 0 "register_operand")
7663 (match_operand:V8SF 1 "register_operand")
7664 (match_operand:V8SF 2 "nonimmediate_operand")
7665 (match_operand:SI 3 "const_int_operand")]
7668 int mask = INTVAL (operands[3]);
7669 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7672 GEN_INT ((mask >> 0) & 3),
7673 GEN_INT ((mask >> 2) & 3),
7674 GEN_INT (((mask >> 4) & 3) + 8),
7675 GEN_INT (((mask >> 6) & 3) + 8),
7676 GEN_INT (((mask >> 0) & 3) + 4),
7677 GEN_INT (((mask >> 2) & 3) + 4),
7678 GEN_INT (((mask >> 4) & 3) + 12),
7679 GEN_INT (((mask >> 6) & 3) + 12)
7680 <mask_expand4_args>));
7684 ;; One bit in mask selects 2 elements.
7685 (define_insn "avx_shufps256_1<mask_name>"
7686 [(set (match_operand:V8SF 0 "register_operand" "=v")
7689 (match_operand:V8SF 1 "register_operand" "v")
7690 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7691 (parallel [(match_operand 3 "const_0_to_3_operand" )
7692 (match_operand 4 "const_0_to_3_operand" )
7693 (match_operand 5 "const_8_to_11_operand" )
7694 (match_operand 6 "const_8_to_11_operand" )
7695 (match_operand 7 "const_4_to_7_operand" )
7696 (match_operand 8 "const_4_to_7_operand" )
7697 (match_operand 9 "const_12_to_15_operand")
7698 (match_operand 10 "const_12_to_15_operand")])))]
7700 && <mask_avx512vl_condition>
7701 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7702 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7703 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7704 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7707 mask = INTVAL (operands[3]);
7708 mask |= INTVAL (operands[4]) << 2;
7709 mask |= (INTVAL (operands[5]) - 8) << 4;
7710 mask |= (INTVAL (operands[6]) - 8) << 6;
7711 operands[3] = GEN_INT (mask);
7713 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7715 [(set_attr "type" "sseshuf")
7716 (set_attr "length_immediate" "1")
7717 (set_attr "prefix" "<mask_prefix>")
7718 (set_attr "mode" "V8SF")])
7720 (define_expand "sse_shufps<mask_expand4_name>"
7721 [(match_operand:V4SF 0 "register_operand")
7722 (match_operand:V4SF 1 "register_operand")
7723 (match_operand:V4SF 2 "vector_operand")
7724 (match_operand:SI 3 "const_int_operand")]
7727 int mask = INTVAL (operands[3]);
7728 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7731 GEN_INT ((mask >> 0) & 3),
7732 GEN_INT ((mask >> 2) & 3),
7733 GEN_INT (((mask >> 4) & 3) + 4),
7734 GEN_INT (((mask >> 6) & 3) + 4)
7735 <mask_expand4_args>));
7739 (define_insn "sse_shufps_v4sf_mask"
7740 [(set (match_operand:V4SF 0 "register_operand" "=v")
7744 (match_operand:V4SF 1 "register_operand" "v")
7745 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7746 (parallel [(match_operand 3 "const_0_to_3_operand")
7747 (match_operand 4 "const_0_to_3_operand")
7748 (match_operand 5 "const_4_to_7_operand")
7749 (match_operand 6 "const_4_to_7_operand")]))
7750 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7751 (match_operand:QI 8 "register_operand" "Yk")))]
7755 mask |= INTVAL (operands[3]) << 0;
7756 mask |= INTVAL (operands[4]) << 2;
7757 mask |= (INTVAL (operands[5]) - 4) << 4;
7758 mask |= (INTVAL (operands[6]) - 4) << 6;
7759 operands[3] = GEN_INT (mask);
7761 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7763 [(set_attr "type" "sseshuf")
7764 (set_attr "length_immediate" "1")
7765 (set_attr "prefix" "evex")
7766 (set_attr "mode" "V4SF")])
7768 (define_insn "sse_shufps_<mode>"
7769 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7770 (vec_select:VI4F_128
7771 (vec_concat:<ssedoublevecmode>
7772 (match_operand:VI4F_128 1 "register_operand" "0,v")
7773 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7774 (parallel [(match_operand 3 "const_0_to_3_operand")
7775 (match_operand 4 "const_0_to_3_operand")
7776 (match_operand 5 "const_4_to_7_operand")
7777 (match_operand 6 "const_4_to_7_operand")])))]
7781 mask |= INTVAL (operands[3]) << 0;
7782 mask |= INTVAL (operands[4]) << 2;
7783 mask |= (INTVAL (operands[5]) - 4) << 4;
7784 mask |= (INTVAL (operands[6]) - 4) << 6;
7785 operands[3] = GEN_INT (mask);
7787 switch (which_alternative)
7790 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7792 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7797 [(set_attr "isa" "noavx,avx")
7798 (set_attr "type" "sseshuf")
7799 (set_attr "length_immediate" "1")
7800 (set_attr "prefix" "orig,maybe_evex")
7801 (set_attr "mode" "V4SF")])
7803 (define_insn "sse_storehps"
7804 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7806 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7807 (parallel [(const_int 2) (const_int 3)])))]
7808 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7810 %vmovhps\t{%1, %0|%q0, %1}
7811 %vmovhlps\t{%1, %d0|%d0, %1}
7812 %vmovlps\t{%H1, %d0|%d0, %H1}"
7813 [(set_attr "type" "ssemov")
7814 (set_attr "prefix" "maybe_vex")
7815 (set_attr "mode" "V2SF,V4SF,V2SF")])
7817 (define_expand "sse_loadhps_exp"
7818 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7821 (match_operand:V4SF 1 "nonimmediate_operand")
7822 (parallel [(const_int 0) (const_int 1)]))
7823 (match_operand:V2SF 2 "nonimmediate_operand")))]
7826 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7828 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7830 /* Fix up the destination if needed. */
7831 if (dst != operands[0])
7832 emit_move_insn (operands[0], dst);
7837 (define_insn "sse_loadhps"
7838 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7841 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7842 (parallel [(const_int 0) (const_int 1)]))
7843 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7846 movhps\t{%2, %0|%0, %q2}
7847 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7848 movlhps\t{%2, %0|%0, %2}
7849 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7850 %vmovlps\t{%2, %H0|%H0, %2}"
7851 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7852 (set_attr "type" "ssemov")
7853 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7854 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7856 (define_insn "sse_storelps"
7857 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7859 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7860 (parallel [(const_int 0) (const_int 1)])))]
7861 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7863 %vmovlps\t{%1, %0|%q0, %1}
7864 %vmovaps\t{%1, %0|%0, %1}
7865 %vmovlps\t{%1, %d0|%d0, %q1}"
7866 [(set_attr "type" "ssemov")
7867 (set_attr "prefix" "maybe_vex")
7868 (set_attr "mode" "V2SF,V4SF,V2SF")])
7870 (define_expand "sse_loadlps_exp"
7871 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7873 (match_operand:V2SF 2 "nonimmediate_operand")
7875 (match_operand:V4SF 1 "nonimmediate_operand")
7876 (parallel [(const_int 2) (const_int 3)]))))]
7879 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7881 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7883 /* Fix up the destination if needed. */
7884 if (dst != operands[0])
7885 emit_move_insn (operands[0], dst);
7890 (define_insn "sse_loadlps"
7891 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7893 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7895 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7896 (parallel [(const_int 2) (const_int 3)]))))]
7899 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7900 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7901 movlps\t{%2, %0|%0, %q2}
7902 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7903 %vmovlps\t{%2, %0|%q0, %2}"
7904 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7905 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7906 (set (attr "length_immediate")
7907 (if_then_else (eq_attr "alternative" "0,1")
7909 (const_string "*")))
7910 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7911 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7913 (define_insn "sse_movss"
7914 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7916 (match_operand:V4SF 2 "register_operand" " x,v")
7917 (match_operand:V4SF 1 "register_operand" " 0,v")
7921 movss\t{%2, %0|%0, %2}
7922 vmovss\t{%2, %1, %0|%0, %1, %2}"
7923 [(set_attr "isa" "noavx,avx")
7924 (set_attr "type" "ssemov")
7925 (set_attr "prefix" "orig,maybe_evex")
7926 (set_attr "mode" "SF")])
7928 (define_insn "avx2_vec_dup<mode>"
7929 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7930 (vec_duplicate:VF1_128_256
7932 (match_operand:V4SF 1 "register_operand" "v")
7933 (parallel [(const_int 0)]))))]
7935 "vbroadcastss\t{%1, %0|%0, %1}"
7936 [(set_attr "type" "sselog1")
7937 (set_attr "prefix" "maybe_evex")
7938 (set_attr "mode" "<MODE>")])
7940 (define_insn "avx2_vec_dupv8sf_1"
7941 [(set (match_operand:V8SF 0 "register_operand" "=v")
7944 (match_operand:V8SF 1 "register_operand" "v")
7945 (parallel [(const_int 0)]))))]
7947 "vbroadcastss\t{%x1, %0|%0, %x1}"
7948 [(set_attr "type" "sselog1")
7949 (set_attr "prefix" "maybe_evex")
7950 (set_attr "mode" "V8SF")])
7952 (define_insn "avx512f_vec_dup<mode>_1"
7953 [(set (match_operand:VF_512 0 "register_operand" "=v")
7954 (vec_duplicate:VF_512
7955 (vec_select:<ssescalarmode>
7956 (match_operand:VF_512 1 "register_operand" "v")
7957 (parallel [(const_int 0)]))))]
7959 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7960 [(set_attr "type" "sselog1")
7961 (set_attr "prefix" "evex")
7962 (set_attr "mode" "<MODE>")])
7964 ;; Although insertps takes register source, we prefer
7965 ;; unpcklps with register source since it is shorter.
7966 (define_insn "*vec_concatv2sf_sse4_1"
7967 [(set (match_operand:V2SF 0 "register_operand"
7968 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7970 (match_operand:SF 1 "nonimmediate_operand"
7971 " 0, 0,Yv, 0,0, v,m, 0 , m")
7972 (match_operand:SF 2 "nonimm_or_0_operand"
7973 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7974 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7976 unpcklps\t{%2, %0|%0, %2}
7977 unpcklps\t{%2, %0|%0, %2}
7978 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7979 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7980 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7981 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7982 %vmovss\t{%1, %0|%0, %1}
7983 punpckldq\t{%2, %0|%0, %2}
7984 movd\t{%1, %0|%0, %1}"
7986 (cond [(eq_attr "alternative" "0,1,3,4")
7987 (const_string "noavx")
7988 (eq_attr "alternative" "2,5")
7989 (const_string "avx")
7991 (const_string "*")))
7993 (cond [(eq_attr "alternative" "6")
7994 (const_string "ssemov")
7995 (eq_attr "alternative" "7")
7996 (const_string "mmxcvt")
7997 (eq_attr "alternative" "8")
7998 (const_string "mmxmov")
8000 (const_string "sselog")))
8001 (set (attr "mmx_isa")
8002 (if_then_else (eq_attr "alternative" "7,8")
8003 (const_string "native")
8004 (const_string "*")))
8005 (set (attr "prefix_data16")
8006 (if_then_else (eq_attr "alternative" "3,4")
8008 (const_string "*")))
8009 (set (attr "prefix_extra")
8010 (if_then_else (eq_attr "alternative" "3,4,5")
8012 (const_string "*")))
8013 (set (attr "length_immediate")
8014 (if_then_else (eq_attr "alternative" "3,4,5")
8016 (const_string "*")))
8017 (set (attr "prefix")
8018 (cond [(eq_attr "alternative" "2,5")
8019 (const_string "maybe_evex")
8020 (eq_attr "alternative" "6")
8021 (const_string "maybe_vex")
8023 (const_string "orig")))
8024 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8026 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8027 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8028 ;; alternatives pretty much forces the MMX alternative to be chosen.
8029 (define_insn "*vec_concatv2sf_sse"
8030 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8032 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8033 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8036 unpcklps\t{%2, %0|%0, %2}
8037 movss\t{%1, %0|%0, %1}
8038 punpckldq\t{%2, %0|%0, %2}
8039 movd\t{%1, %0|%0, %1}"
8040 [(set_attr "mmx_isa" "*,*,native,native")
8041 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8042 (set_attr "mode" "V4SF,SF,DI,DI")])
8044 (define_insn "*vec_concatv4sf"
8045 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8047 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8048 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8051 movlhps\t{%2, %0|%0, %2}
8052 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8053 movhps\t{%2, %0|%0, %q2}
8054 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8055 [(set_attr "isa" "noavx,avx,noavx,avx")
8056 (set_attr "type" "ssemov")
8057 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8058 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8060 (define_insn "*vec_concatv4sf_0"
8061 [(set (match_operand:V4SF 0 "register_operand" "=v")
8063 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
8064 (match_operand:V2SF 2 "const0_operand" " C")))]
8066 "%vmovq\t{%1, %0|%0, %1}"
8067 [(set_attr "type" "ssemov")
8068 (set_attr "prefix" "maybe_vex")
8069 (set_attr "mode" "DF")])
8071 ;; Avoid combining registers from different units in a single alternative,
8072 ;; see comment above inline_secondary_memory_needed function in i386.c
8073 (define_insn "vec_set<mode>_0"
8074 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8075 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8077 (vec_duplicate:VI4F_128
8078 (match_operand:<ssescalarmode> 2 "general_operand"
8079 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8080 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8081 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8085 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8086 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8087 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8088 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8089 %vmovd\t{%2, %0|%0, %2}
8090 movss\t{%2, %0|%0, %2}
8091 movss\t{%2, %0|%0, %2}
8092 vmovss\t{%2, %1, %0|%0, %1, %2}
8093 pinsrd\t{$0, %2, %0|%0, %2, 0}
8094 pinsrd\t{$0, %2, %0|%0, %2, 0}
8095 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8100 (cond [(eq_attr "alternative" "0,1,8,9")
8101 (const_string "sse4_noavx")
8102 (eq_attr "alternative" "2,7,10")
8103 (const_string "avx")
8104 (eq_attr "alternative" "3,4")
8105 (const_string "sse2")
8106 (eq_attr "alternative" "5,6")
8107 (const_string "noavx")
8109 (const_string "*")))
8111 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8112 (const_string "sselog")
8113 (eq_attr "alternative" "12")
8114 (const_string "imov")
8115 (eq_attr "alternative" "13")
8116 (const_string "fmov")
8118 (const_string "ssemov")))
8119 (set (attr "prefix_extra")
8120 (if_then_else (eq_attr "alternative" "8,9,10")
8122 (const_string "*")))
8123 (set (attr "length_immediate")
8124 (if_then_else (eq_attr "alternative" "8,9,10")
8126 (const_string "*")))
8127 (set (attr "prefix")
8128 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8129 (const_string "orig")
8130 (eq_attr "alternative" "2")
8131 (const_string "maybe_evex")
8132 (eq_attr "alternative" "3,4")
8133 (const_string "maybe_vex")
8134 (eq_attr "alternative" "7,10")
8135 (const_string "vex")
8137 (const_string "*")))
8138 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8139 (set (attr "preferred_for_speed")
8140 (cond [(eq_attr "alternative" "4")
8141 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8143 (symbol_ref "true")))])
8145 ;; A subset is vec_setv4sf.
8146 (define_insn "*vec_setv4sf_sse4_1"
8147 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8150 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8151 (match_operand:V4SF 1 "register_operand" "0,0,v")
8152 (match_operand:SI 3 "const_int_operand")))]
8154 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8155 < GET_MODE_NUNITS (V4SFmode))"
8157 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8158 switch (which_alternative)
8162 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8164 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8169 [(set_attr "isa" "noavx,noavx,avx")
8170 (set_attr "type" "sselog")
8171 (set_attr "prefix_data16" "1,1,*")
8172 (set_attr "prefix_extra" "1")
8173 (set_attr "length_immediate" "1")
8174 (set_attr "prefix" "orig,orig,maybe_evex")
8175 (set_attr "mode" "V4SF")])
8177 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8178 (define_insn "vec_set<mode>_0"
8179 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8180 (vec_merge:VI4F_256_512
8181 (vec_duplicate:VI4F_256_512
8182 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8183 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8187 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8188 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8189 vmovd\t{%2, %x0|%x0, %2}"
8191 (if_then_else (eq_attr "alternative" "0")
8192 (const_string "sselog")
8193 (const_string "ssemov")))
8194 (set_attr "prefix" "maybe_evex")
8195 (set_attr "mode" "SF,<ssescalarmode>,SI")
8196 (set (attr "preferred_for_speed")
8197 (cond [(eq_attr "alternative" "2")
8198 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8200 (symbol_ref "true")))])
8202 (define_insn "sse4_1_insertps"
8203 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8204 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8205 (match_operand:V4SF 1 "register_operand" "0,0,v")
8206 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8210 if (MEM_P (operands[2]))
8212 unsigned count_s = INTVAL (operands[3]) >> 6;
8214 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8215 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8217 switch (which_alternative)
8221 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8223 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8228 [(set_attr "isa" "noavx,noavx,avx")
8229 (set_attr "type" "sselog")
8230 (set_attr "prefix_data16" "1,1,*")
8231 (set_attr "prefix_extra" "1")
8232 (set_attr "length_immediate" "1")
8233 (set_attr "prefix" "orig,orig,maybe_evex")
8234 (set_attr "mode" "V4SF")])
8237 [(set (match_operand:VI4F_128 0 "memory_operand")
8239 (vec_duplicate:VI4F_128
8240 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8243 "TARGET_SSE && reload_completed"
8244 [(set (match_dup 0) (match_dup 1))]
8245 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8247 ;; Standard scalar operation patterns which preserve the rest of the
8248 ;; vector for combiner.
8249 (define_insn "vec_setv2df_0"
8250 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8253 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8254 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8258 movsd\t{%2, %0|%0, %2}
8259 vmovsd\t{%2, %1, %0|%0, %1, %2}
8260 movlpd\t{%2, %0|%0, %2}
8261 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8262 [(set_attr "isa" "noavx,avx,noavx,avx")
8263 (set_attr "type" "ssemov")
8264 (set_attr "mode" "DF")])
8266 (define_expand "vec_set<mode>"
8267 [(match_operand:V 0 "register_operand")
8268 (match_operand:<ssescalarmode> 1 "register_operand")
8269 (match_operand 2 "vec_setm_operand")]
8272 if (CONST_INT_P (operands[2]))
8273 ix86_expand_vector_set (false, operands[0], operands[1],
8274 INTVAL (operands[2]));
8276 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8280 (define_insn_and_split "*vec_extractv4sf_0"
8281 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8283 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8284 (parallel [(const_int 0)])))]
8285 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8287 "&& reload_completed"
8288 [(set (match_dup 0) (match_dup 1))]
8289 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8291 (define_insn_and_split "*sse4_1_extractps"
8292 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8294 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8295 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8298 extractps\t{%2, %1, %0|%0, %1, %2}
8299 extractps\t{%2, %1, %0|%0, %1, %2}
8300 vextractps\t{%2, %1, %0|%0, %1, %2}
8303 "&& reload_completed && SSE_REG_P (operands[0])"
8306 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8307 switch (INTVAL (operands[2]))
8311 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8312 operands[2], operands[2],
8313 GEN_INT (INTVAL (operands[2]) + 4),
8314 GEN_INT (INTVAL (operands[2]) + 4)));
8317 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8320 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8325 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8326 (set_attr "type" "sselog,sselog,sselog,*,*")
8327 (set_attr "prefix_data16" "1,1,1,*,*")
8328 (set_attr "prefix_extra" "1,1,1,*,*")
8329 (set_attr "length_immediate" "1,1,1,*,*")
8330 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8331 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8333 (define_insn_and_split "*vec_extractv4sf_mem"
8334 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8336 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8337 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8340 "&& reload_completed"
8341 [(set (match_dup 0) (match_dup 1))]
8343 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8346 (define_mode_attr extract_type
8347 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8349 (define_mode_attr extract_suf
8350 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8352 (define_mode_iterator AVX512_VEC
8353 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8355 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8356 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8357 (match_operand:AVX512_VEC 1 "register_operand")
8358 (match_operand:SI 2 "const_0_to_3_operand")
8359 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8360 (match_operand:QI 4 "register_operand")]
8364 mask = INTVAL (operands[2]);
8365 rtx dest = operands[0];
8367 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8368 dest = gen_reg_rtx (<ssequartermode>mode);
8370 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8371 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8372 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8373 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8376 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8377 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8379 if (dest != operands[0])
8380 emit_move_insn (operands[0], dest);
8384 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8385 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8386 (vec_merge:<ssequartermode>
8387 (vec_select:<ssequartermode>
8388 (match_operand:V8FI 1 "register_operand" "v,v")
8389 (parallel [(match_operand 2 "const_0_to_7_operand")
8390 (match_operand 3 "const_0_to_7_operand")]))
8391 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8392 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8394 && INTVAL (operands[2]) % 2 == 0
8395 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8396 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8398 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8399 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8401 [(set_attr "type" "sselog1")
8402 (set_attr "prefix_extra" "1")
8403 (set_attr "length_immediate" "1")
8404 (set_attr "prefix" "evex")
8405 (set_attr "mode" "<sseinsnmode>")])
8407 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8408 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8409 (vec_select:<ssequartermode>
8410 (match_operand:V8FI 1 "register_operand" "v")
8411 (parallel [(match_operand 2 "const_0_to_7_operand")
8412 (match_operand 3 "const_0_to_7_operand")])))]
8414 && INTVAL (operands[2]) % 2 == 0
8415 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8417 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8418 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8420 [(set_attr "type" "sselog1")
8421 (set_attr "prefix_extra" "1")
8422 (set_attr "length_immediate" "1")
8423 (set_attr "prefix" "evex")
8424 (set_attr "mode" "<sseinsnmode>")])
8427 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8428 (vec_select:<ssequartermode>
8429 (match_operand:V8FI 1 "register_operand")
8430 (parallel [(const_int 0) (const_int 1)])))]
8434 || REG_P (operands[0])
8435 || !EXT_REX_SSE_REG_P (operands[1]))"
8436 [(set (match_dup 0) (match_dup 1))]
8438 if (!TARGET_AVX512VL
8439 && REG_P (operands[0])
8440 && EXT_REX_SSE_REG_P (operands[1]))
8442 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8444 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8447 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8448 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8449 (vec_merge:<ssequartermode>
8450 (vec_select:<ssequartermode>
8451 (match_operand:V16FI 1 "register_operand" "v,v")
8452 (parallel [(match_operand 2 "const_0_to_15_operand")
8453 (match_operand 3 "const_0_to_15_operand")
8454 (match_operand 4 "const_0_to_15_operand")
8455 (match_operand 5 "const_0_to_15_operand")]))
8456 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8457 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8459 && INTVAL (operands[2]) % 4 == 0
8460 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8461 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8462 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8463 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8465 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8466 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8468 [(set_attr "type" "sselog1")
8469 (set_attr "prefix_extra" "1")
8470 (set_attr "length_immediate" "1")
8471 (set_attr "prefix" "evex")
8472 (set_attr "mode" "<sseinsnmode>")])
8474 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8475 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8476 (vec_select:<ssequartermode>
8477 (match_operand:V16FI 1 "register_operand" "v")
8478 (parallel [(match_operand 2 "const_0_to_15_operand")
8479 (match_operand 3 "const_0_to_15_operand")
8480 (match_operand 4 "const_0_to_15_operand")
8481 (match_operand 5 "const_0_to_15_operand")])))]
8483 && INTVAL (operands[2]) % 4 == 0
8484 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8485 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8486 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8488 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8489 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8491 [(set_attr "type" "sselog1")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "length_immediate" "1")
8494 (set_attr "prefix" "evex")
8495 (set_attr "mode" "<sseinsnmode>")])
8498 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8499 (vec_select:<ssequartermode>
8500 (match_operand:V16FI 1 "register_operand")
8501 (parallel [(const_int 0) (const_int 1)
8502 (const_int 2) (const_int 3)])))]
8506 || REG_P (operands[0])
8507 || !EXT_REX_SSE_REG_P (operands[1]))"
8508 [(set (match_dup 0) (match_dup 1))]
8510 if (!TARGET_AVX512VL
8511 && REG_P (operands[0])
8512 && EXT_REX_SSE_REG_P (operands[1]))
8514 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8516 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8519 (define_mode_attr extract_type_2
8520 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8522 (define_mode_attr extract_suf_2
8523 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8525 (define_mode_iterator AVX512_VEC_2
8526 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8528 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8529 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8530 (match_operand:AVX512_VEC_2 1 "register_operand")
8531 (match_operand:SI 2 "const_0_to_1_operand")
8532 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8533 (match_operand:QI 4 "register_operand")]
8536 rtx (*insn)(rtx, rtx, rtx, rtx);
8537 rtx dest = operands[0];
8539 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8540 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8542 switch (INTVAL (operands[2]))
8545 insn = gen_vec_extract_lo_<mode>_mask;
8548 insn = gen_vec_extract_hi_<mode>_mask;
8554 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8555 if (dest != operands[0])
8556 emit_move_insn (operands[0], dest);
8561 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8562 (vec_select:<ssehalfvecmode>
8563 (match_operand:V8FI 1 "nonimmediate_operand")
8564 (parallel [(const_int 0) (const_int 1)
8565 (const_int 2) (const_int 3)])))]
8566 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8569 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8570 [(set (match_dup 0) (match_dup 1))]
8571 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8573 (define_insn "vec_extract_lo_<mode>_mask"
8574 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8575 (vec_merge:<ssehalfvecmode>
8576 (vec_select:<ssehalfvecmode>
8577 (match_operand:V8FI 1 "register_operand" "v,v")
8578 (parallel [(const_int 0) (const_int 1)
8579 (const_int 2) (const_int 3)]))
8580 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8581 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8583 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8584 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8585 [(set_attr "type" "sselog1")
8586 (set_attr "prefix_extra" "1")
8587 (set_attr "length_immediate" "1")
8588 (set_attr "memory" "none,store")
8589 (set_attr "prefix" "evex")
8590 (set_attr "mode" "<sseinsnmode>")])
8592 (define_insn "vec_extract_lo_<mode>"
8593 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
8594 (vec_select:<ssehalfvecmode>
8595 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
8596 (parallel [(const_int 0) (const_int 1)
8597 (const_int 2) (const_int 3)])))]
8598 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8600 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
8601 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8605 [(set_attr "type" "sselog1")
8606 (set_attr "prefix_extra" "1")
8607 (set_attr "length_immediate" "1")
8608 (set_attr "memory" "none,store,load")
8609 (set_attr "prefix" "evex")
8610 (set_attr "mode" "<sseinsnmode>")])
8612 (define_insn "vec_extract_hi_<mode>_mask"
8613 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8614 (vec_merge:<ssehalfvecmode>
8615 (vec_select:<ssehalfvecmode>
8616 (match_operand:V8FI 1 "register_operand" "v,v")
8617 (parallel [(const_int 4) (const_int 5)
8618 (const_int 6) (const_int 7)]))
8619 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8620 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8622 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8623 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8624 [(set_attr "type" "sselog1")
8625 (set_attr "prefix_extra" "1")
8626 (set_attr "length_immediate" "1")
8627 (set_attr "prefix" "evex")
8628 (set_attr "mode" "<sseinsnmode>")])
8630 (define_insn "vec_extract_hi_<mode>"
8631 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8632 (vec_select:<ssehalfvecmode>
8633 (match_operand:V8FI 1 "register_operand" "v")
8634 (parallel [(const_int 4) (const_int 5)
8635 (const_int 6) (const_int 7)])))]
8637 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8638 [(set_attr "type" "sselog1")
8639 (set_attr "prefix_extra" "1")
8640 (set_attr "length_immediate" "1")
8641 (set_attr "prefix" "evex")
8642 (set_attr "mode" "<sseinsnmode>")])
8644 (define_insn "vec_extract_hi_<mode>_mask"
8645 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8646 (vec_merge:<ssehalfvecmode>
8647 (vec_select:<ssehalfvecmode>
8648 (match_operand:V16FI 1 "register_operand" "v,v")
8649 (parallel [(const_int 8) (const_int 9)
8650 (const_int 10) (const_int 11)
8651 (const_int 12) (const_int 13)
8652 (const_int 14) (const_int 15)]))
8653 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8654 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8656 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8657 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8658 [(set_attr "type" "sselog1")
8659 (set_attr "prefix_extra" "1")
8660 (set_attr "length_immediate" "1")
8661 (set_attr "prefix" "evex")
8662 (set_attr "mode" "<sseinsnmode>")])
8664 (define_insn "vec_extract_hi_<mode>"
8665 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
8666 (vec_select:<ssehalfvecmode>
8667 (match_operand:V16FI 1 "register_operand" "v,v")
8668 (parallel [(const_int 8) (const_int 9)
8669 (const_int 10) (const_int 11)
8670 (const_int 12) (const_int 13)
8671 (const_int 14) (const_int 15)])))]
8674 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
8675 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8676 [(set_attr "type" "sselog1")
8677 (set_attr "prefix_extra" "1")
8678 (set_attr "isa" "avx512dq,noavx512dq")
8679 (set_attr "length_immediate" "1")
8680 (set_attr "prefix" "evex")
8681 (set_attr "mode" "<sseinsnmode>")])
8683 (define_mode_iterator VI48F_256_DQ
8684 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
8686 (define_expand "avx512vl_vextractf128<mode>"
8687 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8688 (match_operand:VI48F_256_DQ 1 "register_operand")
8689 (match_operand:SI 2 "const_0_to_1_operand")
8690 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8691 (match_operand:QI 4 "register_operand")]
8694 rtx (*insn)(rtx, rtx, rtx, rtx);
8695 rtx dest = operands[0];
8698 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8699 /* For V8S[IF]mode there are maskm insns with =m and 0
8701 ? !rtx_equal_p (dest, operands[3])
8702 /* For V4D[IF]mode, hi insns don't allow memory, and
8703 lo insns have =m and 0C constraints. */
8704 : (operands[2] != const0_rtx
8705 || (!rtx_equal_p (dest, operands[3])
8706 && GET_CODE (operands[3]) != CONST_VECTOR))))
8707 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8708 switch (INTVAL (operands[2]))
8711 insn = gen_vec_extract_lo_<mode>_mask;
8714 insn = gen_vec_extract_hi_<mode>_mask;
8720 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8721 if (dest != operands[0])
8722 emit_move_insn (operands[0], dest);
8726 (define_expand "avx_vextractf128<mode>"
8727 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8728 (match_operand:V_256 1 "register_operand")
8729 (match_operand:SI 2 "const_0_to_1_operand")]
8732 rtx (*insn)(rtx, rtx);
8734 switch (INTVAL (operands[2]))
8737 insn = gen_vec_extract_lo_<mode>;
8740 insn = gen_vec_extract_hi_<mode>;
8746 emit_insn (insn (operands[0], operands[1]));
8750 (define_insn "vec_extract_lo_<mode>_mask"
8751 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8752 (vec_merge:<ssehalfvecmode>
8753 (vec_select:<ssehalfvecmode>
8754 (match_operand:V16FI 1 "register_operand" "v,v")
8755 (parallel [(const_int 0) (const_int 1)
8756 (const_int 2) (const_int 3)
8757 (const_int 4) (const_int 5)
8758 (const_int 6) (const_int 7)]))
8759 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8760 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8762 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8763 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8764 [(set_attr "type" "sselog1")
8765 (set_attr "prefix_extra" "1")
8766 (set_attr "length_immediate" "1")
8767 (set_attr "memory" "none,store")
8768 (set_attr "prefix" "evex")
8769 (set_attr "mode" "<sseinsnmode>")])
8771 (define_insn "vec_extract_lo_<mode>"
8772 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8773 (vec_select:<ssehalfvecmode>
8774 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
8775 (parallel [(const_int 0) (const_int 1)
8776 (const_int 2) (const_int 3)
8777 (const_int 4) (const_int 5)
8778 (const_int 6) (const_int 7)])))]
8780 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8782 if (!TARGET_AVX512VL
8783 && !REG_P (operands[0])
8784 && EXT_REX_SSE_REG_P (operands[1]))
8786 if (TARGET_AVX512DQ)
8787 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
8789 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8794 [(set_attr "type" "sselog1")
8795 (set_attr "prefix_extra" "1")
8796 (set_attr "length_immediate" "1")
8797 (set_attr "memory" "none,load,store")
8798 (set_attr "prefix" "evex")
8799 (set_attr "mode" "<sseinsnmode>")])
8802 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8803 (vec_select:<ssehalfvecmode>
8804 (match_operand:V16FI 1 "nonimmediate_operand")
8805 (parallel [(const_int 0) (const_int 1)
8806 (const_int 2) (const_int 3)
8807 (const_int 4) (const_int 5)
8808 (const_int 6) (const_int 7)])))]
8809 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8812 || REG_P (operands[0])
8813 || !EXT_REX_SSE_REG_P (operands[1]))"
8814 [(set (match_dup 0) (match_dup 1))]
8816 if (!TARGET_AVX512VL
8817 && REG_P (operands[0])
8818 && EXT_REX_SSE_REG_P (operands[1]))
8820 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8822 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8825 (define_insn "vec_extract_lo_<mode>_mask"
8826 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8827 (vec_merge:<ssehalfvecmode>
8828 (vec_select:<ssehalfvecmode>
8829 (match_operand:VI8F_256 1 "register_operand" "v,v")
8830 (parallel [(const_int 0) (const_int 1)]))
8831 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8832 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8835 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8836 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8837 [(set_attr "type" "sselog1")
8838 (set_attr "prefix_extra" "1")
8839 (set_attr "length_immediate" "1")
8840 (set_attr "memory" "none,store")
8841 (set_attr "prefix" "evex")
8842 (set_attr "mode" "XI")])
8844 (define_insn "vec_extract_lo_<mode>"
8845 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
8846 (vec_select:<ssehalfvecmode>
8847 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
8848 (parallel [(const_int 0) (const_int 1)])))]
8850 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8854 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8855 (vec_select:<ssehalfvecmode>
8856 (match_operand:VI8F_256 1 "nonimmediate_operand")
8857 (parallel [(const_int 0) (const_int 1)])))]
8858 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8859 && reload_completed"
8860 [(set (match_dup 0) (match_dup 1))]
8861 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8863 (define_insn "vec_extract_hi_<mode>_mask"
8864 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8865 (vec_merge:<ssehalfvecmode>
8866 (vec_select:<ssehalfvecmode>
8867 (match_operand:VI8F_256 1 "register_operand" "v,v")
8868 (parallel [(const_int 2) (const_int 3)]))
8869 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8870 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8873 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8874 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8875 [(set_attr "type" "sselog1")
8876 (set_attr "prefix_extra" "1")
8877 (set_attr "length_immediate" "1")
8878 (set_attr "prefix" "vex")
8879 (set_attr "mode" "<sseinsnmode>")])
8881 (define_insn "vec_extract_hi_<mode>"
8882 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8883 (vec_select:<ssehalfvecmode>
8884 (match_operand:VI8F_256 1 "register_operand" "v")
8885 (parallel [(const_int 2) (const_int 3)])))]
8888 if (TARGET_AVX512VL)
8890 if (TARGET_AVX512DQ)
8891 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
8893 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8896 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8898 [(set_attr "type" "sselog1")
8899 (set_attr "prefix_extra" "1")
8900 (set_attr "length_immediate" "1")
8901 (set_attr "prefix" "vex")
8902 (set_attr "mode" "<sseinsnmode>")])
8905 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8906 (vec_select:<ssehalfvecmode>
8907 (match_operand:VI4F_256 1 "nonimmediate_operand")
8908 (parallel [(const_int 0) (const_int 1)
8909 (const_int 2) (const_int 3)])))]
8910 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8911 && reload_completed"
8912 [(set (match_dup 0) (match_dup 1))]
8913 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8915 (define_insn "vec_extract_lo_<mode>_mask"
8916 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8917 (vec_merge:<ssehalfvecmode>
8918 (vec_select:<ssehalfvecmode>
8919 (match_operand:VI4F_256 1 "register_operand" "v,v")
8920 (parallel [(const_int 0) (const_int 1)
8921 (const_int 2) (const_int 3)]))
8922 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8923 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8925 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8926 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8927 [(set_attr "type" "sselog1")
8928 (set_attr "prefix_extra" "1")
8929 (set_attr "length_immediate" "1")
8930 (set_attr "prefix" "evex")
8931 (set_attr "mode" "<sseinsnmode>")])
8933 (define_insn "vec_extract_lo_<mode>"
8934 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
8935 (vec_select:<ssehalfvecmode>
8936 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
8937 (parallel [(const_int 0) (const_int 1)
8938 (const_int 2) (const_int 3)])))]
8940 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8942 [(set_attr "type" "sselog1")
8943 (set_attr "prefix_extra" "1")
8944 (set_attr "length_immediate" "1")
8945 (set_attr "prefix" "evex")
8946 (set_attr "mode" "<sseinsnmode>")])
8948 (define_insn "vec_extract_hi_<mode>_mask"
8949 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
8950 (vec_merge:<ssehalfvecmode>
8951 (vec_select:<ssehalfvecmode>
8952 (match_operand:VI4F_256 1 "register_operand" "v,v")
8953 (parallel [(const_int 4) (const_int 5)
8954 (const_int 6) (const_int 7)]))
8955 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8956 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8958 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8959 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8960 [(set_attr "type" "sselog1")
8961 (set_attr "length_immediate" "1")
8962 (set_attr "prefix" "evex")
8963 (set_attr "mode" "<sseinsnmode>")])
8965 (define_insn "vec_extract_hi_<mode>"
8966 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8967 (vec_select:<ssehalfvecmode>
8968 (match_operand:VI4F_256 1 "register_operand" "x, v")
8969 (parallel [(const_int 4) (const_int 5)
8970 (const_int 6) (const_int 7)])))]
8973 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8974 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8975 [(set_attr "isa" "*, avx512vl")
8976 (set_attr "prefix" "vex, evex")
8977 (set_attr "type" "sselog1")
8978 (set_attr "length_immediate" "1")
8979 (set_attr "mode" "<sseinsnmode>")])
8981 (define_insn_and_split "vec_extract_lo_v32hi"
8982 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8984 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8985 (parallel [(const_int 0) (const_int 1)
8986 (const_int 2) (const_int 3)
8987 (const_int 4) (const_int 5)
8988 (const_int 6) (const_int 7)
8989 (const_int 8) (const_int 9)
8990 (const_int 10) (const_int 11)
8991 (const_int 12) (const_int 13)
8992 (const_int 14) (const_int 15)])))]
8993 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8996 || REG_P (operands[0])
8997 || !EXT_REX_SSE_REG_P (operands[1]))
9000 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9002 "&& reload_completed
9004 || REG_P (operands[0])
9005 || !EXT_REX_SSE_REG_P (operands[1]))"
9006 [(set (match_dup 0) (match_dup 1))]
9008 if (!TARGET_AVX512VL
9009 && REG_P (operands[0])
9010 && EXT_REX_SSE_REG_P (operands[1]))
9011 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
9013 operands[1] = gen_lowpart (V16HImode, operands[1]);
9015 [(set_attr "type" "sselog1")
9016 (set_attr "prefix_extra" "1")
9017 (set_attr "length_immediate" "1")
9018 (set_attr "memory" "none,load,store")
9019 (set_attr "prefix" "evex")
9020 (set_attr "mode" "XI")])
9022 (define_insn "vec_extract_hi_v32hi"
9023 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9025 (match_operand:V32HI 1 "register_operand" "v")
9026 (parallel [(const_int 16) (const_int 17)
9027 (const_int 18) (const_int 19)
9028 (const_int 20) (const_int 21)
9029 (const_int 22) (const_int 23)
9030 (const_int 24) (const_int 25)
9031 (const_int 26) (const_int 27)
9032 (const_int 28) (const_int 29)
9033 (const_int 30) (const_int 31)])))]
9035 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9036 [(set_attr "type" "sselog1")
9037 (set_attr "prefix_extra" "1")
9038 (set_attr "length_immediate" "1")
9039 (set_attr "prefix" "evex")
9040 (set_attr "mode" "XI")])
9042 (define_insn_and_split "vec_extract_lo_v16hi"
9043 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9045 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9046 (parallel [(const_int 0) (const_int 1)
9047 (const_int 2) (const_int 3)
9048 (const_int 4) (const_int 5)
9049 (const_int 6) (const_int 7)])))]
9050 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9052 "&& reload_completed"
9053 [(set (match_dup 0) (match_dup 1))]
9054 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9056 (define_insn "vec_extract_hi_v16hi"
9057 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9059 (match_operand:V16HI 1 "register_operand" "x,v,v")
9060 (parallel [(const_int 8) (const_int 9)
9061 (const_int 10) (const_int 11)
9062 (const_int 12) (const_int 13)
9063 (const_int 14) (const_int 15)])))]
9066 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9067 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9068 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9069 [(set_attr "type" "sselog1")
9070 (set_attr "prefix_extra" "1")
9071 (set_attr "length_immediate" "1")
9072 (set_attr "isa" "*,avx512dq,avx512f")
9073 (set_attr "prefix" "vex,evex,evex")
9074 (set_attr "mode" "OI")])
9076 (define_insn_and_split "vec_extract_lo_v64qi"
9077 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9079 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9080 (parallel [(const_int 0) (const_int 1)
9081 (const_int 2) (const_int 3)
9082 (const_int 4) (const_int 5)
9083 (const_int 6) (const_int 7)
9084 (const_int 8) (const_int 9)
9085 (const_int 10) (const_int 11)
9086 (const_int 12) (const_int 13)
9087 (const_int 14) (const_int 15)
9088 (const_int 16) (const_int 17)
9089 (const_int 18) (const_int 19)
9090 (const_int 20) (const_int 21)
9091 (const_int 22) (const_int 23)
9092 (const_int 24) (const_int 25)
9093 (const_int 26) (const_int 27)
9094 (const_int 28) (const_int 29)
9095 (const_int 30) (const_int 31)])))]
9096 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9099 || REG_P (operands[0])
9100 || !EXT_REX_SSE_REG_P (operands[1]))
9103 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9105 "&& reload_completed
9107 || REG_P (operands[0])
9108 || !EXT_REX_SSE_REG_P (operands[1]))"
9109 [(set (match_dup 0) (match_dup 1))]
9111 if (!TARGET_AVX512VL
9112 && REG_P (operands[0])
9113 && EXT_REX_SSE_REG_P (operands[1]))
9114 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9116 operands[1] = gen_lowpart (V32QImode, operands[1]);
9118 [(set_attr "type" "sselog1")
9119 (set_attr "prefix_extra" "1")
9120 (set_attr "length_immediate" "1")
9121 (set_attr "memory" "none,load,store")
9122 (set_attr "prefix" "evex")
9123 (set_attr "mode" "XI")])
9125 (define_insn "vec_extract_hi_v64qi"
9126 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9128 (match_operand:V64QI 1 "register_operand" "v")
9129 (parallel [(const_int 32) (const_int 33)
9130 (const_int 34) (const_int 35)
9131 (const_int 36) (const_int 37)
9132 (const_int 38) (const_int 39)
9133 (const_int 40) (const_int 41)
9134 (const_int 42) (const_int 43)
9135 (const_int 44) (const_int 45)
9136 (const_int 46) (const_int 47)
9137 (const_int 48) (const_int 49)
9138 (const_int 50) (const_int 51)
9139 (const_int 52) (const_int 53)
9140 (const_int 54) (const_int 55)
9141 (const_int 56) (const_int 57)
9142 (const_int 58) (const_int 59)
9143 (const_int 60) (const_int 61)
9144 (const_int 62) (const_int 63)])))]
9146 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9147 [(set_attr "type" "sselog1")
9148 (set_attr "prefix_extra" "1")
9149 (set_attr "length_immediate" "1")
9150 (set_attr "prefix" "evex")
9151 (set_attr "mode" "XI")])
9153 (define_insn_and_split "vec_extract_lo_v32qi"
9154 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9156 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9157 (parallel [(const_int 0) (const_int 1)
9158 (const_int 2) (const_int 3)
9159 (const_int 4) (const_int 5)
9160 (const_int 6) (const_int 7)
9161 (const_int 8) (const_int 9)
9162 (const_int 10) (const_int 11)
9163 (const_int 12) (const_int 13)
9164 (const_int 14) (const_int 15)])))]
9165 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9167 "&& reload_completed"
9168 [(set (match_dup 0) (match_dup 1))]
9169 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9171 (define_insn "vec_extract_hi_v32qi"
9172 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9174 (match_operand:V32QI 1 "register_operand" "x,v,v")
9175 (parallel [(const_int 16) (const_int 17)
9176 (const_int 18) (const_int 19)
9177 (const_int 20) (const_int 21)
9178 (const_int 22) (const_int 23)
9179 (const_int 24) (const_int 25)
9180 (const_int 26) (const_int 27)
9181 (const_int 28) (const_int 29)
9182 (const_int 30) (const_int 31)])))]
9185 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9186 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9187 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9188 [(set_attr "type" "sselog1")
9189 (set_attr "prefix_extra" "1")
9190 (set_attr "length_immediate" "1")
9191 (set_attr "isa" "*,avx512dq,avx512f")
9192 (set_attr "prefix" "vex,evex,evex")
9193 (set_attr "mode" "OI")])
9195 ;; Modes handled by vec_extract patterns.
9196 (define_mode_iterator VEC_EXTRACT_MODE
9197 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9198 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9199 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9200 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9201 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9202 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9203 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9205 (define_expand "vec_extract<mode><ssescalarmodelower>"
9206 [(match_operand:<ssescalarmode> 0 "register_operand")
9207 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9208 (match_operand 2 "const_int_operand")]
9211 ix86_expand_vector_extract (false, operands[0], operands[1],
9212 INTVAL (operands[2]));
9216 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9217 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9218 (match_operand:V_256_512 1 "register_operand")
9219 (match_operand 2 "const_0_to_1_operand")]
9222 if (INTVAL (operands[2]))
9223 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9225 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9231 ;; Parallel double-precision floating point element swizzling
9233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9235 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9236 [(set (match_operand:V8DF 0 "register_operand" "=v")
9239 (match_operand:V8DF 1 "register_operand" "v")
9240 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9241 (parallel [(const_int 1) (const_int 9)
9242 (const_int 3) (const_int 11)
9243 (const_int 5) (const_int 13)
9244 (const_int 7) (const_int 15)])))]
9246 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9247 [(set_attr "type" "sselog")
9248 (set_attr "prefix" "evex")
9249 (set_attr "mode" "V8DF")])
9251 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9252 (define_insn "avx_unpckhpd256<mask_name>"
9253 [(set (match_operand:V4DF 0 "register_operand" "=v")
9256 (match_operand:V4DF 1 "register_operand" "v")
9257 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9258 (parallel [(const_int 1) (const_int 5)
9259 (const_int 3) (const_int 7)])))]
9260 "TARGET_AVX && <mask_avx512vl_condition>"
9261 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9262 [(set_attr "type" "sselog")
9263 (set_attr "prefix" "vex")
9264 (set_attr "mode" "V4DF")])
9266 (define_expand "vec_interleave_highv4df"
9270 (match_operand:V4DF 1 "register_operand")
9271 (match_operand:V4DF 2 "nonimmediate_operand"))
9272 (parallel [(const_int 0) (const_int 4)
9273 (const_int 2) (const_int 6)])))
9279 (parallel [(const_int 1) (const_int 5)
9280 (const_int 3) (const_int 7)])))
9281 (set (match_operand:V4DF 0 "register_operand")
9286 (parallel [(const_int 2) (const_int 3)
9287 (const_int 6) (const_int 7)])))]
9290 operands[3] = gen_reg_rtx (V4DFmode);
9291 operands[4] = gen_reg_rtx (V4DFmode);
9295 (define_insn "avx512vl_unpckhpd128_mask"
9296 [(set (match_operand:V2DF 0 "register_operand" "=v")
9300 (match_operand:V2DF 1 "register_operand" "v")
9301 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9302 (parallel [(const_int 1) (const_int 3)]))
9303 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9304 (match_operand:QI 4 "register_operand" "Yk")))]
9306 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9307 [(set_attr "type" "sselog")
9308 (set_attr "prefix" "evex")
9309 (set_attr "mode" "V2DF")])
9311 (define_expand "vec_interleave_highv2df"
9312 [(set (match_operand:V2DF 0 "register_operand")
9315 (match_operand:V2DF 1 "nonimmediate_operand")
9316 (match_operand:V2DF 2 "nonimmediate_operand"))
9317 (parallel [(const_int 1)
9321 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9322 operands[2] = force_reg (V2DFmode, operands[2]);
9325 (define_insn "*vec_interleave_highv2df"
9326 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9329 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9330 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9331 (parallel [(const_int 1)
9333 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9335 unpckhpd\t{%2, %0|%0, %2}
9336 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9337 %vmovddup\t{%H1, %0|%0, %H1}
9338 movlpd\t{%H1, %0|%0, %H1}
9339 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9340 %vmovhpd\t{%1, %0|%q0, %1}"
9341 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9342 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9343 (set (attr "prefix_data16")
9344 (if_then_else (eq_attr "alternative" "3,5")
9346 (const_string "*")))
9347 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9348 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9350 (define_expand "avx512f_movddup512<mask_name>"
9351 [(set (match_operand:V8DF 0 "register_operand")
9354 (match_operand:V8DF 1 "nonimmediate_operand")
9356 (parallel [(const_int 0) (const_int 8)
9357 (const_int 2) (const_int 10)
9358 (const_int 4) (const_int 12)
9359 (const_int 6) (const_int 14)])))]
9362 (define_expand "avx512f_unpcklpd512<mask_name>"
9363 [(set (match_operand:V8DF 0 "register_operand")
9366 (match_operand:V8DF 1 "register_operand")
9367 (match_operand:V8DF 2 "nonimmediate_operand"))
9368 (parallel [(const_int 0) (const_int 8)
9369 (const_int 2) (const_int 10)
9370 (const_int 4) (const_int 12)
9371 (const_int 6) (const_int 14)])))]
9374 (define_insn "*avx512f_unpcklpd512<mask_name>"
9375 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9378 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9379 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9380 (parallel [(const_int 0) (const_int 8)
9381 (const_int 2) (const_int 10)
9382 (const_int 4) (const_int 12)
9383 (const_int 6) (const_int 14)])))]
9386 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9387 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9388 [(set_attr "type" "sselog")
9389 (set_attr "prefix" "evex")
9390 (set_attr "mode" "V8DF")])
9392 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9393 (define_expand "avx_movddup256<mask_name>"
9394 [(set (match_operand:V4DF 0 "register_operand")
9397 (match_operand:V4DF 1 "nonimmediate_operand")
9399 (parallel [(const_int 0) (const_int 4)
9400 (const_int 2) (const_int 6)])))]
9401 "TARGET_AVX && <mask_avx512vl_condition>")
9403 (define_expand "avx_unpcklpd256<mask_name>"
9404 [(set (match_operand:V4DF 0 "register_operand")
9407 (match_operand:V4DF 1 "register_operand")
9408 (match_operand:V4DF 2 "nonimmediate_operand"))
9409 (parallel [(const_int 0) (const_int 4)
9410 (const_int 2) (const_int 6)])))]
9411 "TARGET_AVX && <mask_avx512vl_condition>")
9413 (define_insn "*avx_unpcklpd256<mask_name>"
9414 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9417 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9418 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9419 (parallel [(const_int 0) (const_int 4)
9420 (const_int 2) (const_int 6)])))]
9421 "TARGET_AVX && <mask_avx512vl_condition>"
9423 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9424 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9425 [(set_attr "type" "sselog")
9426 (set_attr "prefix" "vex")
9427 (set_attr "mode" "V4DF")])
9429 (define_expand "vec_interleave_lowv4df"
9433 (match_operand:V4DF 1 "register_operand")
9434 (match_operand:V4DF 2 "nonimmediate_operand"))
9435 (parallel [(const_int 0) (const_int 4)
9436 (const_int 2) (const_int 6)])))
9442 (parallel [(const_int 1) (const_int 5)
9443 (const_int 3) (const_int 7)])))
9444 (set (match_operand:V4DF 0 "register_operand")
9449 (parallel [(const_int 0) (const_int 1)
9450 (const_int 4) (const_int 5)])))]
9453 operands[3] = gen_reg_rtx (V4DFmode);
9454 operands[4] = gen_reg_rtx (V4DFmode);
9457 (define_insn "avx512vl_unpcklpd128_mask"
9458 [(set (match_operand:V2DF 0 "register_operand" "=v")
9462 (match_operand:V2DF 1 "register_operand" "v")
9463 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9464 (parallel [(const_int 0) (const_int 2)]))
9465 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9466 (match_operand:QI 4 "register_operand" "Yk")))]
9468 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9469 [(set_attr "type" "sselog")
9470 (set_attr "prefix" "evex")
9471 (set_attr "mode" "V2DF")])
9473 (define_expand "vec_interleave_lowv2df"
9474 [(set (match_operand:V2DF 0 "register_operand")
9477 (match_operand:V2DF 1 "nonimmediate_operand")
9478 (match_operand:V2DF 2 "nonimmediate_operand"))
9479 (parallel [(const_int 0)
9483 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9484 operands[1] = force_reg (V2DFmode, operands[1]);
9487 (define_insn "*vec_interleave_lowv2df"
9488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9491 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9492 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9493 (parallel [(const_int 0)
9495 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9497 unpcklpd\t{%2, %0|%0, %2}
9498 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9499 %vmovddup\t{%1, %0|%0, %q1}
9500 movhpd\t{%2, %0|%0, %q2}
9501 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9502 %vmovlpd\t{%2, %H0|%H0, %2}"
9503 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9504 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9505 (set (attr "prefix_data16")
9506 (if_then_else (eq_attr "alternative" "3,5")
9508 (const_string "*")))
9509 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9510 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9513 [(set (match_operand:V2DF 0 "memory_operand")
9516 (match_operand:V2DF 1 "register_operand")
9518 (parallel [(const_int 0)
9520 "TARGET_SSE3 && reload_completed"
9523 rtx low = gen_lowpart (DFmode, operands[1]);
9525 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9526 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9531 [(set (match_operand:V2DF 0 "register_operand")
9534 (match_operand:V2DF 1 "memory_operand")
9536 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9537 (match_operand:SI 3 "const_int_operand")])))]
9538 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9539 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9541 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9544 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9545 [(set (match_operand:VF_128 0 "register_operand" "=v")
9548 [(match_operand:VF_128 1 "register_operand" "v")
9549 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9554 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9555 [(set_attr "prefix" "evex")
9556 (set_attr "mode" "<ssescalarmode>")])
9558 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9559 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9561 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9562 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9565 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9566 [(set_attr "prefix" "evex")
9567 (set_attr "mode" "<MODE>")])
9569 (define_expand "<avx512>_vternlog<mode>_maskz"
9570 [(match_operand:VI48_AVX512VL 0 "register_operand")
9571 (match_operand:VI48_AVX512VL 1 "register_operand")
9572 (match_operand:VI48_AVX512VL 2 "register_operand")
9573 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9574 (match_operand:SI 4 "const_0_to_255_operand")
9575 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9578 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9579 operands[0], operands[1], operands[2], operands[3],
9580 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9584 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9585 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9586 (unspec:VI48_AVX512VL
9587 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9588 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9589 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9590 (match_operand:SI 4 "const_0_to_255_operand")]
9593 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9594 [(set_attr "type" "sselog")
9595 (set_attr "prefix" "evex")
9596 (set_attr "mode" "<sseinsnmode>")])
9598 (define_insn "<avx512>_vternlog<mode>_mask"
9599 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9600 (vec_merge:VI48_AVX512VL
9601 (unspec:VI48_AVX512VL
9602 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9603 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9604 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9605 (match_operand:SI 4 "const_0_to_255_operand")]
9608 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9610 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9611 [(set_attr "type" "sselog")
9612 (set_attr "prefix" "evex")
9613 (set_attr "mode" "<sseinsnmode>")])
9615 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9616 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9617 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9620 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9621 [(set_attr "prefix" "evex")
9622 (set_attr "mode" "<MODE>")])
9624 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9625 [(set (match_operand:VF_128 0 "register_operand" "=v")
9628 [(match_operand:VF_128 1 "register_operand" "v")
9629 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9634 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9635 [(set_attr "prefix" "evex")
9636 (set_attr "mode" "<ssescalarmode>")])
9638 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9639 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9640 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9641 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9642 (match_operand:SI 3 "const_0_to_255_operand")]
9645 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9646 [(set_attr "prefix" "evex")
9647 (set_attr "mode" "<sseinsnmode>")])
9649 (define_expand "avx512f_shufps512_mask"
9650 [(match_operand:V16SF 0 "register_operand")
9651 (match_operand:V16SF 1 "register_operand")
9652 (match_operand:V16SF 2 "nonimmediate_operand")
9653 (match_operand:SI 3 "const_0_to_255_operand")
9654 (match_operand:V16SF 4 "register_operand")
9655 (match_operand:HI 5 "register_operand")]
9658 int mask = INTVAL (operands[3]);
9659 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9660 GEN_INT ((mask >> 0) & 3),
9661 GEN_INT ((mask >> 2) & 3),
9662 GEN_INT (((mask >> 4) & 3) + 16),
9663 GEN_INT (((mask >> 6) & 3) + 16),
9664 GEN_INT (((mask >> 0) & 3) + 4),
9665 GEN_INT (((mask >> 2) & 3) + 4),
9666 GEN_INT (((mask >> 4) & 3) + 20),
9667 GEN_INT (((mask >> 6) & 3) + 20),
9668 GEN_INT (((mask >> 0) & 3) + 8),
9669 GEN_INT (((mask >> 2) & 3) + 8),
9670 GEN_INT (((mask >> 4) & 3) + 24),
9671 GEN_INT (((mask >> 6) & 3) + 24),
9672 GEN_INT (((mask >> 0) & 3) + 12),
9673 GEN_INT (((mask >> 2) & 3) + 12),
9674 GEN_INT (((mask >> 4) & 3) + 28),
9675 GEN_INT (((mask >> 6) & 3) + 28),
9676 operands[4], operands[5]));
9681 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9682 [(match_operand:VF_AVX512VL 0 "register_operand")
9683 (match_operand:VF_AVX512VL 1 "register_operand")
9684 (match_operand:VF_AVX512VL 2 "register_operand")
9685 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9686 (match_operand:SI 4 "const_0_to_255_operand")
9687 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9690 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9691 operands[0], operands[1], operands[2], operands[3],
9692 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9693 <round_saeonly_expand_operand6>));
9697 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9698 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9700 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9701 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9702 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9703 (match_operand:SI 4 "const_0_to_255_operand")]
9706 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9707 [(set_attr "prefix" "evex")
9708 (set_attr "mode" "<MODE>")])
9710 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9711 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9712 (vec_merge:VF_AVX512VL
9714 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9715 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9716 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9717 (match_operand:SI 4 "const_0_to_255_operand")]
9720 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9722 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9723 [(set_attr "prefix" "evex")
9724 (set_attr "mode" "<MODE>")])
9726 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9727 [(match_operand:VF_128 0 "register_operand")
9728 (match_operand:VF_128 1 "register_operand")
9729 (match_operand:VF_128 2 "register_operand")
9730 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9731 (match_operand:SI 4 "const_0_to_255_operand")
9732 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9735 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9736 operands[0], operands[1], operands[2], operands[3],
9737 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9738 <round_saeonly_expand_operand6>));
9742 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9743 [(set (match_operand:VF_128 0 "register_operand" "=v")
9746 [(match_operand:VF_128 1 "register_operand" "0")
9747 (match_operand:VF_128 2 "register_operand" "v")
9748 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9749 (match_operand:SI 4 "const_0_to_255_operand")]
9754 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9755 [(set_attr "prefix" "evex")
9756 (set_attr "mode" "<ssescalarmode>")])
9758 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9759 [(set (match_operand:VF_128 0 "register_operand" "=v")
9763 [(match_operand:VF_128 1 "register_operand" "0")
9764 (match_operand:VF_128 2 "register_operand" "v")
9765 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9766 (match_operand:SI 4 "const_0_to_255_operand")]
9771 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9773 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9774 [(set_attr "prefix" "evex")
9775 (set_attr "mode" "<ssescalarmode>")])
9777 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9778 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9780 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9781 (match_operand:SI 2 "const_0_to_255_operand")]
9784 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9785 [(set_attr "length_immediate" "1")
9786 (set_attr "prefix" "evex")
9787 (set_attr "mode" "<MODE>")])
9789 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
9790 [(set (match_operand:VF_128 0 "register_operand" "=v")
9793 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
9794 (match_operand:SI 3 "const_0_to_255_operand")]
9796 (match_operand:VF_128 1 "register_operand" "v")
9799 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
9800 [(set_attr "length_immediate" "1")
9801 (set_attr "prefix" "evex")
9802 (set_attr "mode" "<MODE>")])
9804 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
9805 [(set (match_operand:VF_128 0 "register_operand" "=v")
9807 (vec_duplicate:VF_128
9808 (unspec:<ssescalarmode>
9809 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9810 (match_operand:SI 3 "const_0_to_255_operand")]
9812 (match_operand:VF_128 1 "register_operand" "v")
9815 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
9816 [(set_attr "length_immediate" "1")
9817 (set_attr "prefix" "evex")
9818 (set_attr "mode" "<MODE>")])
9820 ;; One bit in mask selects 2 elements.
9821 (define_insn "avx512f_shufps512_1<mask_name>"
9822 [(set (match_operand:V16SF 0 "register_operand" "=v")
9825 (match_operand:V16SF 1 "register_operand" "v")
9826 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9827 (parallel [(match_operand 3 "const_0_to_3_operand")
9828 (match_operand 4 "const_0_to_3_operand")
9829 (match_operand 5 "const_16_to_19_operand")
9830 (match_operand 6 "const_16_to_19_operand")
9831 (match_operand 7 "const_4_to_7_operand")
9832 (match_operand 8 "const_4_to_7_operand")
9833 (match_operand 9 "const_20_to_23_operand")
9834 (match_operand 10 "const_20_to_23_operand")
9835 (match_operand 11 "const_8_to_11_operand")
9836 (match_operand 12 "const_8_to_11_operand")
9837 (match_operand 13 "const_24_to_27_operand")
9838 (match_operand 14 "const_24_to_27_operand")
9839 (match_operand 15 "const_12_to_15_operand")
9840 (match_operand 16 "const_12_to_15_operand")
9841 (match_operand 17 "const_28_to_31_operand")
9842 (match_operand 18 "const_28_to_31_operand")])))]
9844 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9845 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9846 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9847 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9848 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9849 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9850 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9851 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9852 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9853 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9854 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9855 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9858 mask = INTVAL (operands[3]);
9859 mask |= INTVAL (operands[4]) << 2;
9860 mask |= (INTVAL (operands[5]) - 16) << 4;
9861 mask |= (INTVAL (operands[6]) - 16) << 6;
9862 operands[3] = GEN_INT (mask);
9864 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9866 [(set_attr "type" "sselog")
9867 (set_attr "length_immediate" "1")
9868 (set_attr "prefix" "evex")
9869 (set_attr "mode" "V16SF")])
9871 (define_expand "avx512f_shufpd512_mask"
9872 [(match_operand:V8DF 0 "register_operand")
9873 (match_operand:V8DF 1 "register_operand")
9874 (match_operand:V8DF 2 "nonimmediate_operand")
9875 (match_operand:SI 3 "const_0_to_255_operand")
9876 (match_operand:V8DF 4 "register_operand")
9877 (match_operand:QI 5 "register_operand")]
9880 int mask = INTVAL (operands[3]);
9881 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9883 GEN_INT (mask & 2 ? 9 : 8),
9884 GEN_INT (mask & 4 ? 3 : 2),
9885 GEN_INT (mask & 8 ? 11 : 10),
9886 GEN_INT (mask & 16 ? 5 : 4),
9887 GEN_INT (mask & 32 ? 13 : 12),
9888 GEN_INT (mask & 64 ? 7 : 6),
9889 GEN_INT (mask & 128 ? 15 : 14),
9890 operands[4], operands[5]));
9894 (define_insn "avx512f_shufpd512_1<mask_name>"
9895 [(set (match_operand:V8DF 0 "register_operand" "=v")
9898 (match_operand:V8DF 1 "register_operand" "v")
9899 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9900 (parallel [(match_operand 3 "const_0_to_1_operand")
9901 (match_operand 4 "const_8_to_9_operand")
9902 (match_operand 5 "const_2_to_3_operand")
9903 (match_operand 6 "const_10_to_11_operand")
9904 (match_operand 7 "const_4_to_5_operand")
9905 (match_operand 8 "const_12_to_13_operand")
9906 (match_operand 9 "const_6_to_7_operand")
9907 (match_operand 10 "const_14_to_15_operand")])))]
9911 mask = INTVAL (operands[3]);
9912 mask |= (INTVAL (operands[4]) - 8) << 1;
9913 mask |= (INTVAL (operands[5]) - 2) << 2;
9914 mask |= (INTVAL (operands[6]) - 10) << 3;
9915 mask |= (INTVAL (operands[7]) - 4) << 4;
9916 mask |= (INTVAL (operands[8]) - 12) << 5;
9917 mask |= (INTVAL (operands[9]) - 6) << 6;
9918 mask |= (INTVAL (operands[10]) - 14) << 7;
9919 operands[3] = GEN_INT (mask);
9921 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9923 [(set_attr "type" "sselog")
9924 (set_attr "length_immediate" "1")
9925 (set_attr "prefix" "evex")
9926 (set_attr "mode" "V8DF")])
9928 (define_expand "avx_shufpd256<mask_expand4_name>"
9929 [(match_operand:V4DF 0 "register_operand")
9930 (match_operand:V4DF 1 "register_operand")
9931 (match_operand:V4DF 2 "nonimmediate_operand")
9932 (match_operand:SI 3 "const_int_operand")]
9935 int mask = INTVAL (operands[3]);
9936 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9940 GEN_INT (mask & 2 ? 5 : 4),
9941 GEN_INT (mask & 4 ? 3 : 2),
9942 GEN_INT (mask & 8 ? 7 : 6)
9943 <mask_expand4_args>));
9947 (define_insn "avx_shufpd256_1<mask_name>"
9948 [(set (match_operand:V4DF 0 "register_operand" "=v")
9951 (match_operand:V4DF 1 "register_operand" "v")
9952 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9953 (parallel [(match_operand 3 "const_0_to_1_operand")
9954 (match_operand 4 "const_4_to_5_operand")
9955 (match_operand 5 "const_2_to_3_operand")
9956 (match_operand 6 "const_6_to_7_operand")])))]
9957 "TARGET_AVX && <mask_avx512vl_condition>"
9960 mask = INTVAL (operands[3]);
9961 mask |= (INTVAL (operands[4]) - 4) << 1;
9962 mask |= (INTVAL (operands[5]) - 2) << 2;
9963 mask |= (INTVAL (operands[6]) - 6) << 3;
9964 operands[3] = GEN_INT (mask);
9966 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9968 [(set_attr "type" "sseshuf")
9969 (set_attr "length_immediate" "1")
9970 (set_attr "prefix" "vex")
9971 (set_attr "mode" "V4DF")])
9973 (define_expand "sse2_shufpd<mask_expand4_name>"
9974 [(match_operand:V2DF 0 "register_operand")
9975 (match_operand:V2DF 1 "register_operand")
9976 (match_operand:V2DF 2 "vector_operand")
9977 (match_operand:SI 3 "const_int_operand")]
9980 int mask = INTVAL (operands[3]);
9981 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9982 operands[2], GEN_INT (mask & 1),
9983 GEN_INT (mask & 2 ? 3 : 2)
9984 <mask_expand4_args>));
9988 (define_insn "sse2_shufpd_v2df_mask"
9989 [(set (match_operand:V2DF 0 "register_operand" "=v")
9993 (match_operand:V2DF 1 "register_operand" "v")
9994 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9995 (parallel [(match_operand 3 "const_0_to_1_operand")
9996 (match_operand 4 "const_2_to_3_operand")]))
9997 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9998 (match_operand:QI 6 "register_operand" "Yk")))]
10002 mask = INTVAL (operands[3]);
10003 mask |= (INTVAL (operands[4]) - 2) << 1;
10004 operands[3] = GEN_INT (mask);
10006 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
10008 [(set_attr "type" "sseshuf")
10009 (set_attr "length_immediate" "1")
10010 (set_attr "prefix" "evex")
10011 (set_attr "mode" "V2DF")])
10013 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
10014 (define_insn "avx2_interleave_highv4di<mask_name>"
10015 [(set (match_operand:V4DI 0 "register_operand" "=v")
10018 (match_operand:V4DI 1 "register_operand" "v")
10019 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10020 (parallel [(const_int 1)
10024 "TARGET_AVX2 && <mask_avx512vl_condition>"
10025 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10026 [(set_attr "type" "sselog")
10027 (set_attr "prefix" "vex")
10028 (set_attr "mode" "OI")])
10030 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10031 [(set (match_operand:V8DI 0 "register_operand" "=v")
10034 (match_operand:V8DI 1 "register_operand" "v")
10035 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10036 (parallel [(const_int 1) (const_int 9)
10037 (const_int 3) (const_int 11)
10038 (const_int 5) (const_int 13)
10039 (const_int 7) (const_int 15)])))]
10041 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10042 [(set_attr "type" "sselog")
10043 (set_attr "prefix" "evex")
10044 (set_attr "mode" "XI")])
10046 (define_insn "vec_interleave_highv2di<mask_name>"
10047 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10050 (match_operand:V2DI 1 "register_operand" "0,v")
10051 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10052 (parallel [(const_int 1)
10054 "TARGET_SSE2 && <mask_avx512vl_condition>"
10056 punpckhqdq\t{%2, %0|%0, %2}
10057 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10058 [(set_attr "isa" "noavx,avx")
10059 (set_attr "type" "sselog")
10060 (set_attr "prefix_data16" "1,*")
10061 (set_attr "prefix" "orig,<mask_prefix>")
10062 (set_attr "mode" "TI")])
10064 (define_insn "avx2_interleave_lowv4di<mask_name>"
10065 [(set (match_operand:V4DI 0 "register_operand" "=v")
10068 (match_operand:V4DI 1 "register_operand" "v")
10069 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10070 (parallel [(const_int 0)
10074 "TARGET_AVX2 && <mask_avx512vl_condition>"
10075 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10076 [(set_attr "type" "sselog")
10077 (set_attr "prefix" "vex")
10078 (set_attr "mode" "OI")])
10080 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10081 [(set (match_operand:V8DI 0 "register_operand" "=v")
10084 (match_operand:V8DI 1 "register_operand" "v")
10085 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10086 (parallel [(const_int 0) (const_int 8)
10087 (const_int 2) (const_int 10)
10088 (const_int 4) (const_int 12)
10089 (const_int 6) (const_int 14)])))]
10091 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10092 [(set_attr "type" "sselog")
10093 (set_attr "prefix" "evex")
10094 (set_attr "mode" "XI")])
10096 (define_insn "vec_interleave_lowv2di<mask_name>"
10097 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10100 (match_operand:V2DI 1 "register_operand" "0,v")
10101 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10102 (parallel [(const_int 0)
10104 "TARGET_SSE2 && <mask_avx512vl_condition>"
10106 punpcklqdq\t{%2, %0|%0, %2}
10107 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10108 [(set_attr "isa" "noavx,avx")
10109 (set_attr "type" "sselog")
10110 (set_attr "prefix_data16" "1,*")
10111 (set_attr "prefix" "orig,vex")
10112 (set_attr "mode" "TI")])
10114 (define_insn "sse2_shufpd_<mode>"
10115 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10116 (vec_select:VI8F_128
10117 (vec_concat:<ssedoublevecmode>
10118 (match_operand:VI8F_128 1 "register_operand" "0,v")
10119 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10120 (parallel [(match_operand 3 "const_0_to_1_operand")
10121 (match_operand 4 "const_2_to_3_operand")])))]
10125 mask = INTVAL (operands[3]);
10126 mask |= (INTVAL (operands[4]) - 2) << 1;
10127 operands[3] = GEN_INT (mask);
10129 switch (which_alternative)
10132 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10134 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10136 gcc_unreachable ();
10139 [(set_attr "isa" "noavx,avx")
10140 (set_attr "type" "sseshuf")
10141 (set_attr "length_immediate" "1")
10142 (set_attr "prefix" "orig,maybe_evex")
10143 (set_attr "mode" "V2DF")])
10145 ;; Avoid combining registers from different units in a single alternative,
10146 ;; see comment above inline_secondary_memory_needed function in i386.c
10147 (define_insn "sse2_storehpd"
10148 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10150 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10151 (parallel [(const_int 1)])))]
10152 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10154 %vmovhpd\t{%1, %0|%0, %1}
10156 vunpckhpd\t{%d1, %0|%0, %d1}
10160 [(set_attr "isa" "*,noavx,avx,*,*,*")
10161 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10162 (set (attr "prefix_data16")
10164 (and (eq_attr "alternative" "0")
10165 (not (match_test "TARGET_AVX")))
10167 (const_string "*")))
10168 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10169 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10172 [(set (match_operand:DF 0 "register_operand")
10174 (match_operand:V2DF 1 "memory_operand")
10175 (parallel [(const_int 1)])))]
10176 "TARGET_SSE2 && reload_completed"
10177 [(set (match_dup 0) (match_dup 1))]
10178 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10180 (define_insn "*vec_extractv2df_1_sse"
10181 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10183 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10184 (parallel [(const_int 1)])))]
10185 "!TARGET_SSE2 && TARGET_SSE
10186 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10188 movhps\t{%1, %0|%0, %1}
10189 movhlps\t{%1, %0|%0, %1}
10190 movlps\t{%H1, %0|%0, %H1}"
10191 [(set_attr "type" "ssemov")
10192 (set_attr "mode" "V2SF,V4SF,V2SF")])
10194 ;; Avoid combining registers from different units in a single alternative,
10195 ;; see comment above inline_secondary_memory_needed function in i386.c
10196 (define_insn "sse2_storelpd"
10197 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10199 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10200 (parallel [(const_int 0)])))]
10201 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10203 %vmovlpd\t{%1, %0|%0, %1}
10208 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10209 (set (attr "prefix_data16")
10210 (if_then_else (eq_attr "alternative" "0")
10212 (const_string "*")))
10213 (set_attr "prefix" "maybe_vex")
10214 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10217 [(set (match_operand:DF 0 "register_operand")
10219 (match_operand:V2DF 1 "nonimmediate_operand")
10220 (parallel [(const_int 0)])))]
10221 "TARGET_SSE2 && reload_completed"
10222 [(set (match_dup 0) (match_dup 1))]
10223 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10225 (define_insn "*vec_extractv2df_0_sse"
10226 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10228 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10229 (parallel [(const_int 0)])))]
10230 "!TARGET_SSE2 && TARGET_SSE
10231 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10233 movlps\t{%1, %0|%0, %1}
10234 movaps\t{%1, %0|%0, %1}
10235 movlps\t{%1, %0|%0, %q1}"
10236 [(set_attr "type" "ssemov")
10237 (set_attr "mode" "V2SF,V4SF,V2SF")])
10239 (define_expand "sse2_loadhpd_exp"
10240 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10243 (match_operand:V2DF 1 "nonimmediate_operand")
10244 (parallel [(const_int 0)]))
10245 (match_operand:DF 2 "nonimmediate_operand")))]
10248 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10250 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10252 /* Fix up the destination if needed. */
10253 if (dst != operands[0])
10254 emit_move_insn (operands[0], dst);
10259 ;; Avoid combining registers from different units in a single alternative,
10260 ;; see comment above inline_secondary_memory_needed function in i386.c
10261 (define_insn "sse2_loadhpd"
10262 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10263 "=x,v,x,v ,o,o ,o")
10266 (match_operand:V2DF 1 "nonimmediate_operand"
10267 " 0,v,0,v ,0,0 ,0")
10268 (parallel [(const_int 0)]))
10269 (match_operand:DF 2 "nonimmediate_operand"
10270 " m,m,x,Yv,x,*f,r")))]
10271 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10273 movhpd\t{%2, %0|%0, %2}
10274 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10275 unpcklpd\t{%2, %0|%0, %2}
10276 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10280 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10281 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10282 (set (attr "prefix_data16")
10283 (if_then_else (eq_attr "alternative" "0")
10285 (const_string "*")))
10286 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10287 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10290 [(set (match_operand:V2DF 0 "memory_operand")
10292 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10293 (match_operand:DF 1 "register_operand")))]
10294 "TARGET_SSE2 && reload_completed"
10295 [(set (match_dup 0) (match_dup 1))]
10296 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10298 (define_expand "sse2_loadlpd_exp"
10299 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10301 (match_operand:DF 2 "nonimmediate_operand")
10303 (match_operand:V2DF 1 "nonimmediate_operand")
10304 (parallel [(const_int 1)]))))]
10307 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10309 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10311 /* Fix up the destination if needed. */
10312 if (dst != operands[0])
10313 emit_move_insn (operands[0], dst);
10318 ;; Avoid combining registers from different units in a single alternative,
10319 ;; see comment above inline_secondary_memory_needed function in i386.c
10320 (define_insn "sse2_loadlpd"
10321 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10322 "=v,x,v,x,v,x,x,v,m,m ,m")
10324 (match_operand:DF 2 "nonimmediate_operand"
10325 "vm,m,m,x,v,0,0,v,x,*f,r")
10327 (match_operand:V2DF 1 "nonimm_or_0_operand"
10328 " C,0,v,0,v,x,o,o,0,0 ,0")
10329 (parallel [(const_int 1)]))))]
10330 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10332 %vmovq\t{%2, %0|%0, %2}
10333 movlpd\t{%2, %0|%0, %2}
10334 vmovlpd\t{%2, %1, %0|%0, %1, %2}
10335 movsd\t{%2, %0|%0, %2}
10336 vmovsd\t{%2, %1, %0|%0, %1, %2}
10337 shufpd\t{$2, %1, %0|%0, %1, 2}
10338 movhpd\t{%H1, %0|%0, %H1}
10339 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10343 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10345 (cond [(eq_attr "alternative" "5")
10346 (const_string "sselog")
10347 (eq_attr "alternative" "9")
10348 (const_string "fmov")
10349 (eq_attr "alternative" "10")
10350 (const_string "imov")
10352 (const_string "ssemov")))
10353 (set (attr "prefix_data16")
10354 (if_then_else (eq_attr "alternative" "1,6")
10356 (const_string "*")))
10357 (set (attr "length_immediate")
10358 (if_then_else (eq_attr "alternative" "5")
10360 (const_string "*")))
10361 (set (attr "prefix")
10362 (cond [(eq_attr "alternative" "0")
10363 (const_string "maybe_vex")
10364 (eq_attr "alternative" "1,3,5,6")
10365 (const_string "orig")
10366 (eq_attr "alternative" "2,4,7")
10367 (const_string "maybe_evex")
10369 (const_string "*")))
10370 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10373 [(set (match_operand:V2DF 0 "memory_operand")
10375 (match_operand:DF 1 "register_operand")
10376 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10377 "TARGET_SSE2 && reload_completed"
10378 [(set (match_dup 0) (match_dup 1))]
10379 "operands[0] = adjust_address (operands[0], DFmode, 0);")
10381 (define_insn "sse2_movsd"
10382 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
10384 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10385 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10389 movsd\t{%2, %0|%0, %2}
10390 vmovsd\t{%2, %1, %0|%0, %1, %2}
10391 movlpd\t{%2, %0|%0, %q2}
10392 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10393 %vmovlpd\t{%2, %0|%q0, %2}
10394 shufpd\t{$2, %1, %0|%0, %1, 2}
10395 movhps\t{%H1, %0|%0, %H1}
10396 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10397 %vmovhps\t{%1, %H0|%H0, %1}"
10398 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10401 (eq_attr "alternative" "5")
10402 (const_string "sselog")
10403 (const_string "ssemov")))
10404 (set (attr "prefix_data16")
10406 (and (eq_attr "alternative" "2,4")
10407 (not (match_test "TARGET_AVX")))
10409 (const_string "*")))
10410 (set (attr "length_immediate")
10411 (if_then_else (eq_attr "alternative" "5")
10413 (const_string "*")))
10414 (set (attr "prefix")
10415 (cond [(eq_attr "alternative" "1,3,7")
10416 (const_string "maybe_evex")
10417 (eq_attr "alternative" "4,8")
10418 (const_string "maybe_vex")
10420 (const_string "orig")))
10421 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10423 (define_insn "vec_dupv2df<mask_name>"
10424 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10425 (vec_duplicate:V2DF
10426 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10427 "TARGET_SSE2 && <mask_avx512vl_condition>"
10430 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10431 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10432 [(set_attr "isa" "noavx,sse3,avx512vl")
10433 (set_attr "type" "sselog1")
10434 (set_attr "prefix" "orig,maybe_vex,evex")
10435 (set_attr "mode" "V2DF,DF,DF")])
10437 (define_insn "vec_concatv2df"
10438 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10440 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10441 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10443 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10444 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10446 unpcklpd\t{%2, %0|%0, %2}
10447 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10448 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10449 %vmovddup\t{%1, %0|%0, %1}
10450 vmovddup\t{%1, %0|%0, %1}
10451 movhpd\t{%2, %0|%0, %2}
10452 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10453 %vmovq\t{%1, %0|%0, %1}
10454 movlhps\t{%2, %0|%0, %2}
10455 movhps\t{%2, %0|%0, %2}"
10457 (cond [(eq_attr "alternative" "0,5")
10458 (const_string "sse2_noavx")
10459 (eq_attr "alternative" "1,6")
10460 (const_string "avx")
10461 (eq_attr "alternative" "2,4")
10462 (const_string "avx512vl")
10463 (eq_attr "alternative" "3")
10464 (const_string "sse3")
10465 (eq_attr "alternative" "7")
10466 (const_string "sse2")
10468 (const_string "noavx")))
10471 (eq_attr "alternative" "0,1,2,3,4")
10472 (const_string "sselog")
10473 (const_string "ssemov")))
10474 (set (attr "prefix_data16")
10475 (if_then_else (eq_attr "alternative" "5")
10477 (const_string "*")))
10478 (set (attr "prefix")
10479 (cond [(eq_attr "alternative" "1,6")
10480 (const_string "vex")
10481 (eq_attr "alternative" "2,4")
10482 (const_string "evex")
10483 (eq_attr "alternative" "3,7")
10484 (const_string "maybe_vex")
10486 (const_string "orig")))
10487 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10489 ;; vmovq clears also the higher bits.
10490 (define_insn "vec_set<mode>_0"
10491 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10492 (vec_merge:VF2_512_256
10493 (vec_duplicate:VF2_512_256
10494 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
10495 (match_operand:VF2_512_256 1 "const0_operand" "C")
10498 "vmovq\t{%2, %x0|%x0, %2}"
10499 [(set_attr "type" "ssemov")
10500 (set_attr "prefix" "maybe_evex")
10501 (set_attr "mode" "DF")])
10503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10505 ;; Parallel integer down-conversion operations
10507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10509 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10510 (define_mode_attr pmov_src_mode
10511 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10512 (define_mode_attr pmov_src_lower
10513 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10514 (define_mode_attr pmov_suff_1
10515 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10517 (define_expand "trunc<pmov_src_lower><mode>2"
10518 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
10519 (truncate:PMOV_DST_MODE_1
10520 (match_operand:<pmov_src_mode> 1 "register_operand")))]
10523 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10524 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10525 (any_truncate:PMOV_DST_MODE_1
10526 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10528 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10529 [(set_attr "type" "ssemov")
10530 (set_attr "memory" "none,store")
10531 (set_attr "prefix" "evex")
10532 (set_attr "mode" "<sseinsnmode>")])
10534 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10535 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10536 (vec_merge:PMOV_DST_MODE_1
10537 (any_truncate:PMOV_DST_MODE_1
10538 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10539 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10540 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10542 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10543 [(set_attr "type" "ssemov")
10544 (set_attr "memory" "none,store")
10545 (set_attr "prefix" "evex")
10546 (set_attr "mode" "<sseinsnmode>")])
10548 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10549 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10550 (vec_merge:PMOV_DST_MODE_1
10551 (any_truncate:PMOV_DST_MODE_1
10552 (match_operand:<pmov_src_mode> 1 "register_operand"))
10554 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10557 (define_expand "truncv32hiv32qi2"
10558 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10560 (match_operand:V32HI 1 "register_operand")))]
10563 (define_insn "avx512bw_<code>v32hiv32qi2"
10564 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10565 (any_truncate:V32QI
10566 (match_operand:V32HI 1 "register_operand" "v,v")))]
10568 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10569 [(set_attr "type" "ssemov")
10570 (set_attr "memory" "none,store")
10571 (set_attr "prefix" "evex")
10572 (set_attr "mode" "XI")])
10574 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10575 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10577 (any_truncate:V32QI
10578 (match_operand:V32HI 1 "register_operand" "v,v"))
10579 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10580 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10582 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10583 [(set_attr "type" "ssemov")
10584 (set_attr "memory" "none,store")
10585 (set_attr "prefix" "evex")
10586 (set_attr "mode" "XI")])
10588 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10589 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10591 (any_truncate:V32QI
10592 (match_operand:V32HI 1 "register_operand"))
10594 (match_operand:SI 2 "register_operand")))]
10597 (define_mode_iterator PMOV_DST_MODE_2
10598 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10599 (define_mode_attr pmov_suff_2
10600 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10602 (define_expand "trunc<ssedoublemodelower><mode>2"
10603 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10604 (truncate:PMOV_DST_MODE_2
10605 (match_operand:<ssedoublemode> 1 "register_operand")))]
10608 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10609 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10610 (any_truncate:PMOV_DST_MODE_2
10611 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10613 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10614 [(set_attr "type" "ssemov")
10615 (set_attr "memory" "none,store")
10616 (set_attr "prefix" "evex")
10617 (set_attr "mode" "<sseinsnmode>")])
10619 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10620 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10621 (vec_merge:PMOV_DST_MODE_2
10622 (any_truncate:PMOV_DST_MODE_2
10623 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10624 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10625 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10627 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10628 [(set_attr "type" "ssemov")
10629 (set_attr "memory" "none,store")
10630 (set_attr "prefix" "evex")
10631 (set_attr "mode" "<sseinsnmode>")])
10633 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10634 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10635 (vec_merge:PMOV_DST_MODE_2
10636 (any_truncate:PMOV_DST_MODE_2
10637 (match_operand:<ssedoublemode> 1 "register_operand"))
10639 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10642 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10643 (define_mode_attr pmov_dst_3_lower
10644 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
10645 (define_mode_attr pmov_dst_3
10646 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10647 (define_mode_attr pmov_dst_zeroed_3
10648 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10649 (define_mode_attr pmov_suff_3
10650 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10652 (define_expand "trunc<mode><pmov_dst_3_lower>2"
10653 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
10654 (truncate:<pmov_dst_3>
10655 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
10658 operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
10659 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
10661 CONST0_RTX (<pmov_dst_zeroed_3>mode)));
10665 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
10666 [(set (match_operand:V16QI 0 "register_operand" "=v")
10668 (any_truncate:<pmov_dst_3>
10669 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10670 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10672 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10673 [(set_attr "type" "ssemov")
10674 (set_attr "prefix" "evex")
10675 (set_attr "mode" "TI")])
10677 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
10678 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10680 (match_operand:V2DI 1 "register_operand" "v")))]
10682 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10683 [(set_attr "type" "ssemov")
10684 (set_attr "memory" "store")
10685 (set_attr "prefix" "evex")
10686 (set_attr "mode" "TI")])
10688 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
10689 [(set (match_operand:HI 0 "memory_operand")
10692 (match_operand:V2DI 1 "register_operand")) 0))]
10693 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10696 [(set (match_dup 0)
10697 (any_truncate:V2QI (match_dup 1)))]
10698 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10700 (define_insn "avx512vl_<code>v2div2qi2_mask"
10701 [(set (match_operand:V16QI 0 "register_operand" "=v")
10705 (match_operand:V2DI 1 "register_operand" "v"))
10707 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10708 (parallel [(const_int 0) (const_int 1)]))
10709 (match_operand:QI 3 "register_operand" "Yk"))
10710 (const_vector:V14QI [(const_int 0) (const_int 0)
10711 (const_int 0) (const_int 0)
10712 (const_int 0) (const_int 0)
10713 (const_int 0) (const_int 0)
10714 (const_int 0) (const_int 0)
10715 (const_int 0) (const_int 0)
10716 (const_int 0) (const_int 0)])))]
10718 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10719 [(set_attr "type" "ssemov")
10720 (set_attr "prefix" "evex")
10721 (set_attr "mode" "TI")])
10723 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10724 [(set (match_operand:V16QI 0 "register_operand" "=v")
10728 (match_operand:V2DI 1 "register_operand" "v"))
10729 (const_vector:V2QI [(const_int 0) (const_int 0)])
10730 (match_operand:QI 2 "register_operand" "Yk"))
10731 (const_vector:V14QI [(const_int 0) (const_int 0)
10732 (const_int 0) (const_int 0)
10733 (const_int 0) (const_int 0)
10734 (const_int 0) (const_int 0)
10735 (const_int 0) (const_int 0)
10736 (const_int 0) (const_int 0)
10737 (const_int 0) (const_int 0)])))]
10739 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10740 [(set_attr "type" "ssemov")
10741 (set_attr "prefix" "evex")
10742 (set_attr "mode" "TI")])
10744 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
10745 [(set (match_operand:V2QI 0 "memory_operand" "=m")
10748 (match_operand:V2DI 1 "register_operand" "v"))
10750 (match_operand:QI 2 "register_operand" "Yk")))]
10752 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10753 [(set_attr "type" "ssemov")
10754 (set_attr "memory" "store")
10755 (set_attr "prefix" "evex")
10756 (set_attr "mode" "TI")])
10758 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
10759 [(set (match_operand:HI 0 "memory_operand")
10763 (match_operand:V2DI 1 "register_operand"))
10769 (parallel [(const_int 0) (const_int 1)]))
10770 (match_operand:QI 2 "register_operand")) 0))]
10771 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10774 [(set (match_dup 0)
10776 (any_truncate:V2QI (match_dup 1))
10779 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
10781 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
10782 [(set (match_operand:V4QI 0 "memory_operand" "=m")
10784 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
10786 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10787 [(set_attr "type" "ssemov")
10788 (set_attr "memory" "store")
10789 (set_attr "prefix" "evex")
10790 (set_attr "mode" "TI")])
10792 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
10793 [(set (match_operand:SI 0 "memory_operand")
10796 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
10797 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10800 [(set (match_dup 0)
10801 (any_truncate:V4QI (match_dup 1)))]
10802 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
10804 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10805 [(set (match_operand:V16QI 0 "register_operand" "=v")
10809 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10811 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10812 (parallel [(const_int 0) (const_int 1)
10813 (const_int 2) (const_int 3)]))
10814 (match_operand:QI 3 "register_operand" "Yk"))
10815 (const_vector:V12QI [(const_int 0) (const_int 0)
10816 (const_int 0) (const_int 0)
10817 (const_int 0) (const_int 0)
10818 (const_int 0) (const_int 0)
10819 (const_int 0) (const_int 0)
10820 (const_int 0) (const_int 0)])))]
10822 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10823 [(set_attr "type" "ssemov")
10824 (set_attr "prefix" "evex")
10825 (set_attr "mode" "TI")])
10827 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10828 [(set (match_operand:V16QI 0 "register_operand" "=v")
10832 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10833 (const_vector:V4QI [(const_int 0) (const_int 0)
10834 (const_int 0) (const_int 0)])
10835 (match_operand:QI 2 "register_operand" "Yk"))
10836 (const_vector:V12QI [(const_int 0) (const_int 0)
10837 (const_int 0) (const_int 0)
10838 (const_int 0) (const_int 0)
10839 (const_int 0) (const_int 0)
10840 (const_int 0) (const_int 0)
10841 (const_int 0) (const_int 0)])))]
10843 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10844 [(set_attr "type" "ssemov")
10845 (set_attr "prefix" "evex")
10846 (set_attr "mode" "TI")])
10848 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
10849 [(set (match_operand:V4QI 0 "memory_operand" "=m")
10852 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10854 (match_operand:QI 2 "register_operand" "Yk")))]
10856 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10857 [(set_attr "type" "ssemov")
10858 (set_attr "memory" "store")
10859 (set_attr "prefix" "evex")
10860 (set_attr "mode" "TI")])
10862 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
10863 [(set (match_operand:SI 0 "memory_operand")
10867 (match_operand:VI4_128_8_256 1 "register_operand"))
10873 (parallel [(const_int 0) (const_int 1)
10874 (const_int 2) (const_int 3)]))
10875 (match_operand:QI 2 "register_operand")) 0))]
10876 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10879 [(set (match_dup 0)
10881 (any_truncate:V4QI (match_dup 1))
10884 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
10886 (define_mode_iterator VI2_128_BW_4_256
10887 [(V8HI "TARGET_AVX512BW") V8SI])
10889 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
10890 [(set (match_operand:V8QI 0 "memory_operand" "=m")
10892 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
10894 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10895 [(set_attr "type" "ssemov")
10896 (set_attr "memory" "store")
10897 (set_attr "prefix" "evex")
10898 (set_attr "mode" "TI")])
10900 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
10901 [(set (match_operand:DI 0 "memory_operand" "=m")
10904 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
10905 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10908 [(set (match_dup 0)
10909 (any_truncate:V8QI (match_dup 1)))]
10910 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
10912 (define_insn "avx512vl_<code><mode>v8qi2_mask"
10913 [(set (match_operand:V16QI 0 "register_operand" "=v")
10917 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10919 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10920 (parallel [(const_int 0) (const_int 1)
10921 (const_int 2) (const_int 3)
10922 (const_int 4) (const_int 5)
10923 (const_int 6) (const_int 7)]))
10924 (match_operand:QI 3 "register_operand" "Yk"))
10925 (const_vector:V8QI [(const_int 0) (const_int 0)
10926 (const_int 0) (const_int 0)
10927 (const_int 0) (const_int 0)
10928 (const_int 0) (const_int 0)])))]
10930 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10931 [(set_attr "type" "ssemov")
10932 (set_attr "prefix" "evex")
10933 (set_attr "mode" "TI")])
10935 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10936 [(set (match_operand:V16QI 0 "register_operand" "=v")
10940 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10941 (const_vector:V8QI [(const_int 0) (const_int 0)
10942 (const_int 0) (const_int 0)
10943 (const_int 0) (const_int 0)
10944 (const_int 0) (const_int 0)])
10945 (match_operand:QI 2 "register_operand" "Yk"))
10946 (const_vector:V8QI [(const_int 0) (const_int 0)
10947 (const_int 0) (const_int 0)
10948 (const_int 0) (const_int 0)
10949 (const_int 0) (const_int 0)])))]
10951 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10952 [(set_attr "type" "ssemov")
10953 (set_attr "prefix" "evex")
10954 (set_attr "mode" "TI")])
10956 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
10957 [(set (match_operand:V8QI 0 "memory_operand" "=m")
10960 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10962 (match_operand:QI 2 "register_operand" "Yk")))]
10964 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
10965 [(set_attr "type" "ssemov")
10966 (set_attr "memory" "store")
10967 (set_attr "prefix" "evex")
10968 (set_attr "mode" "TI")])
10970 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
10971 [(set (match_operand:DI 0 "memory_operand")
10975 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
10981 (parallel [(const_int 0) (const_int 1)
10982 (const_int 2) (const_int 3)
10983 (const_int 4) (const_int 5)
10984 (const_int 6) (const_int 7)]))
10985 (match_operand:QI 2 "register_operand")) 0))]
10986 "TARGET_AVX512VL && ix86_pre_reload_split ()"
10989 [(set (match_dup 0)
10991 (any_truncate:V8QI (match_dup 1))
10994 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
10996 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10997 (define_mode_attr pmov_dst_4
10998 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10999 (define_mode_attr pmov_dst_zeroed_4
11000 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
11001 (define_mode_attr pmov_suff_4
11002 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
11004 (define_expand "trunc<mode><pmov_dst_4>2"
11005 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
11006 (truncate:<pmov_dst_4>
11007 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
11010 operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
11011 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
11013 CONST0_RTX (<pmov_dst_zeroed_4>mode)));
11018 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
11019 [(set (match_operand:V8HI 0 "register_operand" "=v")
11021 (any_truncate:<pmov_dst_4>
11022 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
11023 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
11025 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11026 [(set_attr "type" "ssemov")
11027 (set_attr "prefix" "evex")
11028 (set_attr "mode" "TI")])
11030 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
11031 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11033 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11035 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11036 [(set_attr "type" "ssemov")
11037 (set_attr "memory" "store")
11038 (set_attr "prefix" "evex")
11039 (set_attr "mode" "TI")])
11041 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
11042 [(set (match_operand:DI 0 "memory_operand")
11045 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11046 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11049 [(set (match_dup 0)
11050 (any_truncate:V4HI (match_dup 1)))]
11051 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11053 (define_insn "avx512vl_<code><mode>v4hi2_mask"
11054 [(set (match_operand:V8HI 0 "register_operand" "=v")
11058 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11060 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11061 (parallel [(const_int 0) (const_int 1)
11062 (const_int 2) (const_int 3)]))
11063 (match_operand:QI 3 "register_operand" "Yk"))
11064 (const_vector:V4HI [(const_int 0) (const_int 0)
11065 (const_int 0) (const_int 0)])))]
11067 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11068 [(set_attr "type" "ssemov")
11069 (set_attr "prefix" "evex")
11070 (set_attr "mode" "TI")])
11072 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
11073 [(set (match_operand:V8HI 0 "register_operand" "=v")
11077 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11078 (const_vector:V4HI [(const_int 0) (const_int 0)
11079 (const_int 0) (const_int 0)])
11080 (match_operand:QI 2 "register_operand" "Yk"))
11081 (const_vector:V4HI [(const_int 0) (const_int 0)
11082 (const_int 0) (const_int 0)])))]
11084 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11085 [(set_attr "type" "ssemov")
11086 (set_attr "prefix" "evex")
11087 (set_attr "mode" "TI")])
11089 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
11090 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11093 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11095 (match_operand:QI 2 "register_operand" "Yk")))]
11098 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
11099 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
11100 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
11102 [(set_attr "type" "ssemov")
11103 (set_attr "memory" "store")
11104 (set_attr "prefix" "evex")
11105 (set_attr "mode" "TI")])
11107 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
11108 [(set (match_operand:DI 0 "memory_operand")
11112 (match_operand:VI4_128_8_256 1 "register_operand"))
11118 (parallel [(const_int 0) (const_int 1)
11119 (const_int 2) (const_int 3)]))
11120 (match_operand:QI 2 "register_operand")) 0))]
11121 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11124 [(set (match_dup 0)
11126 (any_truncate:V4HI (match_dup 1))
11129 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11132 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
11133 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11135 (match_operand:V2DI 1 "register_operand" "v")))]
11137 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11138 [(set_attr "type" "ssemov")
11139 (set_attr "memory" "store")
11140 (set_attr "prefix" "evex")
11141 (set_attr "mode" "TI")])
11143 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
11144 [(set (match_operand:SI 0 "memory_operand")
11147 (match_operand:V2DI 1 "register_operand")) 0))]
11148 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11151 [(set (match_dup 0)
11152 (any_truncate:V2HI (match_dup 1)))]
11153 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11155 (define_insn "avx512vl_<code>v2div2hi2_mask"
11156 [(set (match_operand:V8HI 0 "register_operand" "=v")
11160 (match_operand:V2DI 1 "register_operand" "v"))
11162 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11163 (parallel [(const_int 0) (const_int 1)]))
11164 (match_operand:QI 3 "register_operand" "Yk"))
11165 (const_vector:V6HI [(const_int 0) (const_int 0)
11166 (const_int 0) (const_int 0)
11167 (const_int 0) (const_int 0)])))]
11169 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11170 [(set_attr "type" "ssemov")
11171 (set_attr "prefix" "evex")
11172 (set_attr "mode" "TI")])
11174 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
11175 [(set (match_operand:V8HI 0 "register_operand" "=v")
11179 (match_operand:V2DI 1 "register_operand" "v"))
11180 (const_vector:V2HI [(const_int 0) (const_int 0)])
11181 (match_operand:QI 2 "register_operand" "Yk"))
11182 (const_vector:V6HI [(const_int 0) (const_int 0)
11183 (const_int 0) (const_int 0)
11184 (const_int 0) (const_int 0)])))]
11186 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11187 [(set_attr "type" "ssemov")
11188 (set_attr "prefix" "evex")
11189 (set_attr "mode" "TI")])
11191 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
11192 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11195 (match_operand:V2DI 1 "register_operand" "v"))
11197 (match_operand:QI 2 "register_operand" "Yk")))]
11199 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
11200 [(set_attr "type" "ssemov")
11201 (set_attr "memory" "store")
11202 (set_attr "prefix" "evex")
11203 (set_attr "mode" "TI")])
11205 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
11206 [(set (match_operand:SI 0 "memory_operand")
11210 (match_operand:V2DI 1 "register_operand"))
11216 (parallel [(const_int 0) (const_int 1)]))
11217 (match_operand:QI 2 "register_operand")) 0))]
11218 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11221 [(set (match_dup 0)
11223 (any_truncate:V2HI (match_dup 1))
11226 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11228 (define_expand "truncv2div2si2"
11229 [(set (match_operand:V2SI 0 "register_operand")
11231 (match_operand:V2DI 1 "register_operand")))]
11234 operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
11235 emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
11237 CONST0_RTX (V2SImode)));
11241 (define_insn "avx512vl_<code>v2div2si2"
11242 [(set (match_operand:V4SI 0 "register_operand" "=v")
11245 (match_operand:V2DI 1 "register_operand" "v"))
11246 (match_operand:V2SI 2 "const0_operand")))]
11248 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11249 [(set_attr "type" "ssemov")
11250 (set_attr "prefix" "evex")
11251 (set_attr "mode" "TI")])
11253 (define_insn "*avx512vl_<code>v2div2si2_store_1"
11254 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11256 (match_operand:V2DI 1 "register_operand" "v")))]
11258 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11259 [(set_attr "type" "ssemov")
11260 (set_attr "memory" "store")
11261 (set_attr "prefix" "evex")
11262 (set_attr "mode" "TI")])
11264 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
11265 [(set (match_operand:DI 0 "memory_operand")
11268 (match_operand:V2DI 1 "register_operand")) 0))]
11269 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11272 [(set (match_dup 0)
11273 (any_truncate:V2SI (match_dup 1)))]
11274 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11276 (define_insn "avx512vl_<code>v2div2si2_mask"
11277 [(set (match_operand:V4SI 0 "register_operand" "=v")
11281 (match_operand:V2DI 1 "register_operand" "v"))
11283 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11284 (parallel [(const_int 0) (const_int 1)]))
11285 (match_operand:QI 3 "register_operand" "Yk"))
11286 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11288 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11289 [(set_attr "type" "ssemov")
11290 (set_attr "prefix" "evex")
11291 (set_attr "mode" "TI")])
11293 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
11294 [(set (match_operand:V4SI 0 "register_operand" "=v")
11298 (match_operand:V2DI 1 "register_operand" "v"))
11299 (const_vector:V2SI [(const_int 0) (const_int 0)])
11300 (match_operand:QI 2 "register_operand" "Yk"))
11301 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11303 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11304 [(set_attr "type" "ssemov")
11305 (set_attr "prefix" "evex")
11306 (set_attr "mode" "TI")])
11308 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
11309 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11312 (match_operand:V2DI 1 "register_operand" "v"))
11314 (match_operand:QI 2 "register_operand" "Yk")))]
11316 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11317 [(set_attr "type" "ssemov")
11318 (set_attr "memory" "store")
11319 (set_attr "prefix" "evex")
11320 (set_attr "mode" "TI")])
11322 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
11323 [(set (match_operand:DI 0 "memory_operand")
11327 (match_operand:V2DI 1 "register_operand"))
11333 (parallel [(const_int 0) (const_int 1)]))
11334 (match_operand:QI 2 "register_operand")) 0))]
11335 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11338 [(set (match_dup 0)
11340 (any_truncate:V2SI (match_dup 1))
11343 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11345 (define_expand "truncv8div8qi2"
11346 [(set (match_operand:V8QI 0 "register_operand")
11348 (match_operand:V8DI 1 "register_operand")))]
11351 operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
11352 emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
11356 (define_insn "avx512f_<code>v8div16qi2"
11357 [(set (match_operand:V16QI 0 "register_operand" "=v")
11360 (match_operand:V8DI 1 "register_operand" "v"))
11361 (const_vector:V8QI [(const_int 0) (const_int 0)
11362 (const_int 0) (const_int 0)
11363 (const_int 0) (const_int 0)
11364 (const_int 0) (const_int 0)])))]
11366 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11367 [(set_attr "type" "ssemov")
11368 (set_attr "prefix" "evex")
11369 (set_attr "mode" "TI")])
11371 (define_insn "*avx512f_<code>v8div16qi2_store_1"
11372 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11374 (match_operand:V8DI 1 "register_operand" "v")))]
11376 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11377 [(set_attr "type" "ssemov")
11378 (set_attr "memory" "store")
11379 (set_attr "prefix" "evex")
11380 (set_attr "mode" "TI")])
11382 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
11383 [(set (match_operand:DI 0 "memory_operand")
11386 (match_operand:V8DI 1 "register_operand")) 0))]
11387 "TARGET_AVX512F && ix86_pre_reload_split ()"
11390 [(set (match_dup 0)
11391 (any_truncate:V8QI (match_dup 1)))]
11392 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11394 (define_insn "avx512f_<code>v8div16qi2_mask"
11395 [(set (match_operand:V16QI 0 "register_operand" "=v")
11399 (match_operand:V8DI 1 "register_operand" "v"))
11401 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11402 (parallel [(const_int 0) (const_int 1)
11403 (const_int 2) (const_int 3)
11404 (const_int 4) (const_int 5)
11405 (const_int 6) (const_int 7)]))
11406 (match_operand:QI 3 "register_operand" "Yk"))
11407 (const_vector:V8QI [(const_int 0) (const_int 0)
11408 (const_int 0) (const_int 0)
11409 (const_int 0) (const_int 0)
11410 (const_int 0) (const_int 0)])))]
11412 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11413 [(set_attr "type" "ssemov")
11414 (set_attr "prefix" "evex")
11415 (set_attr "mode" "TI")])
11417 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
11418 [(set (match_operand:V16QI 0 "register_operand" "=v")
11422 (match_operand:V8DI 1 "register_operand" "v"))
11423 (const_vector:V8QI [(const_int 0) (const_int 0)
11424 (const_int 0) (const_int 0)
11425 (const_int 0) (const_int 0)
11426 (const_int 0) (const_int 0)])
11427 (match_operand:QI 2 "register_operand" "Yk"))
11428 (const_vector:V8QI [(const_int 0) (const_int 0)
11429 (const_int 0) (const_int 0)
11430 (const_int 0) (const_int 0)
11431 (const_int 0) (const_int 0)])))]
11433 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11434 [(set_attr "type" "ssemov")
11435 (set_attr "prefix" "evex")
11436 (set_attr "mode" "TI")])
11438 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
11439 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11442 (match_operand:V8DI 1 "register_operand" "v"))
11444 (match_operand:QI 2 "register_operand" "Yk")))]
11446 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11447 [(set_attr "type" "ssemov")
11448 (set_attr "memory" "store")
11449 (set_attr "prefix" "evex")
11450 (set_attr "mode" "TI")])
11452 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
11453 [(set (match_operand:DI 0 "memory_operand")
11457 (match_operand:V8DI 1 "register_operand"))
11463 (parallel [(const_int 0) (const_int 1)
11464 (const_int 2) (const_int 3)
11465 (const_int 4) (const_int 5)
11466 (const_int 6) (const_int 7)]))
11467 (match_operand:QI 2 "register_operand")) 0))]
11468 "TARGET_AVX512F && ix86_pre_reload_split ()"
11471 [(set (match_dup 0)
11473 (any_truncate:V8QI (match_dup 1))
11476 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11480 ;; Parallel integral arithmetic
11482 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11484 (define_expand "neg<mode>2"
11485 [(set (match_operand:VI_AVX2 0 "register_operand")
11488 (match_operand:VI_AVX2 1 "vector_operand")))]
11490 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11492 (define_expand "<insn><mode>3"
11493 [(set (match_operand:VI_AVX2 0 "register_operand")
11495 (match_operand:VI_AVX2 1 "vector_operand")
11496 (match_operand:VI_AVX2 2 "vector_operand")))]
11498 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11500 (define_expand "<insn><mode>3_mask"
11501 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11502 (vec_merge:VI48_AVX512VL
11503 (plusminus:VI48_AVX512VL
11504 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11505 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11506 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11507 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11509 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11511 (define_expand "<insn><mode>3_mask"
11512 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11513 (vec_merge:VI12_AVX512VL
11514 (plusminus:VI12_AVX512VL
11515 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11516 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11517 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11518 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11520 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11522 (define_insn "*<insn><mode>3"
11523 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
11525 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
11526 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
11527 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11529 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11530 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11531 [(set_attr "isa" "noavx,avx")
11532 (set_attr "type" "sseiadd")
11533 (set_attr "prefix_data16" "1,*")
11534 (set_attr "prefix" "orig,maybe_evex")
11535 (set_attr "mode" "<sseinsnmode>")])
11537 (define_insn "*<insn><mode>3_mask"
11538 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11539 (vec_merge:VI48_AVX512VL
11540 (plusminus:VI48_AVX512VL
11541 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11542 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11543 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
11544 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11545 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11546 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11547 [(set_attr "type" "sseiadd")
11548 (set_attr "prefix" "evex")
11549 (set_attr "mode" "<sseinsnmode>")])
11551 (define_insn "*<insn><mode>3_mask"
11552 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11553 (vec_merge:VI12_AVX512VL
11554 (plusminus:VI12_AVX512VL
11555 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11556 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11557 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
11558 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11559 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11560 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11561 [(set_attr "type" "sseiadd")
11562 (set_attr "prefix" "evex")
11563 (set_attr "mode" "<sseinsnmode>")])
11565 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
11566 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
11567 (sat_plusminus:VI12_AVX2_AVX512BW
11568 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
11569 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
11570 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11571 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11573 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
11574 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
11575 (sat_plusminus:VI12_AVX2_AVX512BW
11576 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
11577 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
11578 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
11579 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11581 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11582 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11583 [(set_attr "isa" "noavx,avx")
11584 (set_attr "type" "sseiadd")
11585 (set_attr "prefix_data16" "1,*")
11586 (set_attr "prefix" "orig,maybe_evex")
11587 (set_attr "mode" "TI")])
11589 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
11591 [(set (match_operand:VI12_AVX2 0 "register_operand")
11593 (us_minus:VI12_AVX2
11594 (match_operand:VI12_AVX2 1 "vector_operand")
11595 (match_operand:VI12_AVX2 2 "vector_operand"))
11596 (match_operand:VI12_AVX2 3 "const0_operand")))]
11598 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
11599 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
11600 [(set (match_dup 4)
11601 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
11603 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
11604 "operands[4] = gen_reg_rtx (<MODE>mode);")
11606 (define_expand "mulv8qi3"
11607 [(set (match_operand:V8QI 0 "register_operand")
11608 (mult:V8QI (match_operand:V8QI 1 "register_operand")
11609 (match_operand:V8QI 2 "register_operand")))]
11610 "TARGET_AVX512VL && TARGET_AVX512BW"
11612 gcc_assert (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]));
11616 (define_expand "mul<mode>3"
11617 [(set (match_operand:VI1_AVX512 0 "register_operand")
11618 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
11619 (match_operand:VI1_AVX512 2 "register_operand")))]
11622 if (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]))
11624 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11628 (define_expand "mul<mode>3<mask_name>"
11629 [(set (match_operand:VI2_AVX2 0 "register_operand")
11630 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11631 (match_operand:VI2_AVX2 2 "vector_operand")))]
11632 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11635 (define_insn "*mul<mode>3<mask_name>"
11636 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11637 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11638 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11639 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11640 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11642 pmullw\t{%2, %0|%0, %2}
11643 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11644 [(set_attr "isa" "noavx,avx")
11645 (set_attr "type" "sseimul")
11646 (set_attr "prefix_data16" "1,*")
11647 (set_attr "prefix" "orig,vex")
11648 (set_attr "mode" "<sseinsnmode>")])
11650 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11651 [(set (match_operand:VI2_AVX2 0 "register_operand")
11653 (lshiftrt:<ssedoublemode>
11654 (mult:<ssedoublemode>
11655 (any_extend:<ssedoublemode>
11656 (match_operand:VI2_AVX2 1 "vector_operand"))
11657 (any_extend:<ssedoublemode>
11658 (match_operand:VI2_AVX2 2 "vector_operand")))
11661 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11662 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11664 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11665 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11667 (lshiftrt:<ssedoublemode>
11668 (mult:<ssedoublemode>
11669 (any_extend:<ssedoublemode>
11670 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11671 (any_extend:<ssedoublemode>
11672 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11674 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11675 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11677 pmulh<u>w\t{%2, %0|%0, %2}
11678 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11679 [(set_attr "isa" "noavx,avx")
11680 (set_attr "type" "sseimul")
11681 (set_attr "prefix_data16" "1,*")
11682 (set_attr "prefix" "orig,vex")
11683 (set_attr "mode" "<sseinsnmode>")])
11685 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11686 [(set (match_operand:V8DI 0 "register_operand")
11690 (match_operand:V16SI 1 "nonimmediate_operand")
11691 (parallel [(const_int 0) (const_int 2)
11692 (const_int 4) (const_int 6)
11693 (const_int 8) (const_int 10)
11694 (const_int 12) (const_int 14)])))
11697 (match_operand:V16SI 2 "nonimmediate_operand")
11698 (parallel [(const_int 0) (const_int 2)
11699 (const_int 4) (const_int 6)
11700 (const_int 8) (const_int 10)
11701 (const_int 12) (const_int 14)])))))]
11703 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11705 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11706 [(set (match_operand:V8DI 0 "register_operand" "=v")
11710 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11711 (parallel [(const_int 0) (const_int 2)
11712 (const_int 4) (const_int 6)
11713 (const_int 8) (const_int 10)
11714 (const_int 12) (const_int 14)])))
11717 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11718 (parallel [(const_int 0) (const_int 2)
11719 (const_int 4) (const_int 6)
11720 (const_int 8) (const_int 10)
11721 (const_int 12) (const_int 14)])))))]
11722 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11723 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11724 [(set_attr "type" "sseimul")
11725 (set_attr "prefix_extra" "1")
11726 (set_attr "prefix" "evex")
11727 (set_attr "mode" "XI")])
11729 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11730 [(set (match_operand:V4DI 0 "register_operand")
11734 (match_operand:V8SI 1 "nonimmediate_operand")
11735 (parallel [(const_int 0) (const_int 2)
11736 (const_int 4) (const_int 6)])))
11739 (match_operand:V8SI 2 "nonimmediate_operand")
11740 (parallel [(const_int 0) (const_int 2)
11741 (const_int 4) (const_int 6)])))))]
11742 "TARGET_AVX2 && <mask_avx512vl_condition>"
11743 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11745 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11746 [(set (match_operand:V4DI 0 "register_operand" "=v")
11750 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11751 (parallel [(const_int 0) (const_int 2)
11752 (const_int 4) (const_int 6)])))
11755 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11756 (parallel [(const_int 0) (const_int 2)
11757 (const_int 4) (const_int 6)])))))]
11758 "TARGET_AVX2 && <mask_avx512vl_condition>
11759 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11760 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11761 [(set_attr "type" "sseimul")
11762 (set_attr "prefix" "maybe_evex")
11763 (set_attr "mode" "OI")])
11765 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11766 [(set (match_operand:V2DI 0 "register_operand")
11770 (match_operand:V4SI 1 "vector_operand")
11771 (parallel [(const_int 0) (const_int 2)])))
11774 (match_operand:V4SI 2 "vector_operand")
11775 (parallel [(const_int 0) (const_int 2)])))))]
11776 "TARGET_SSE2 && <mask_avx512vl_condition>"
11777 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11779 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11780 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11784 (match_operand:V4SI 1 "vector_operand" "%0,v")
11785 (parallel [(const_int 0) (const_int 2)])))
11788 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11789 (parallel [(const_int 0) (const_int 2)])))))]
11790 "TARGET_SSE2 && <mask_avx512vl_condition>
11791 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11793 pmuludq\t{%2, %0|%0, %2}
11794 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11795 [(set_attr "isa" "noavx,avx")
11796 (set_attr "type" "sseimul")
11797 (set_attr "prefix_data16" "1,*")
11798 (set_attr "prefix" "orig,maybe_evex")
11799 (set_attr "mode" "TI")])
11801 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11802 [(set (match_operand:V8DI 0 "register_operand")
11806 (match_operand:V16SI 1 "nonimmediate_operand")
11807 (parallel [(const_int 0) (const_int 2)
11808 (const_int 4) (const_int 6)
11809 (const_int 8) (const_int 10)
11810 (const_int 12) (const_int 14)])))
11813 (match_operand:V16SI 2 "nonimmediate_operand")
11814 (parallel [(const_int 0) (const_int 2)
11815 (const_int 4) (const_int 6)
11816 (const_int 8) (const_int 10)
11817 (const_int 12) (const_int 14)])))))]
11819 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11821 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11822 [(set (match_operand:V8DI 0 "register_operand" "=v")
11826 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11827 (parallel [(const_int 0) (const_int 2)
11828 (const_int 4) (const_int 6)
11829 (const_int 8) (const_int 10)
11830 (const_int 12) (const_int 14)])))
11833 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11834 (parallel [(const_int 0) (const_int 2)
11835 (const_int 4) (const_int 6)
11836 (const_int 8) (const_int 10)
11837 (const_int 12) (const_int 14)])))))]
11838 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11839 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11840 [(set_attr "type" "sseimul")
11841 (set_attr "prefix_extra" "1")
11842 (set_attr "prefix" "evex")
11843 (set_attr "mode" "XI")])
11845 (define_expand "vec_widen_smult_even_v8si<mask_name>"
11846 [(set (match_operand:V4DI 0 "register_operand")
11850 (match_operand:V8SI 1 "nonimmediate_operand")
11851 (parallel [(const_int 0) (const_int 2)
11852 (const_int 4) (const_int 6)])))
11855 (match_operand:V8SI 2 "nonimmediate_operand")
11856 (parallel [(const_int 0) (const_int 2)
11857 (const_int 4) (const_int 6)])))))]
11858 "TARGET_AVX2 && <mask_avx512vl_condition>"
11859 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11861 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
11862 [(set (match_operand:V4DI 0 "register_operand" "=v")
11866 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11867 (parallel [(const_int 0) (const_int 2)
11868 (const_int 4) (const_int 6)])))
11871 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11872 (parallel [(const_int 0) (const_int 2)
11873 (const_int 4) (const_int 6)])))))]
11874 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11875 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11876 [(set_attr "type" "sseimul")
11877 (set_attr "prefix_extra" "1")
11878 (set_attr "prefix" "vex")
11879 (set_attr "mode" "OI")])
11881 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
11882 [(set (match_operand:V2DI 0 "register_operand")
11886 (match_operand:V4SI 1 "vector_operand")
11887 (parallel [(const_int 0) (const_int 2)])))
11890 (match_operand:V4SI 2 "vector_operand")
11891 (parallel [(const_int 0) (const_int 2)])))))]
11892 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11893 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11895 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11896 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11900 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11901 (parallel [(const_int 0) (const_int 2)])))
11904 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11905 (parallel [(const_int 0) (const_int 2)])))))]
11906 "TARGET_SSE4_1 && <mask_avx512vl_condition>
11907 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11909 pmuldq\t{%2, %0|%0, %2}
11910 pmuldq\t{%2, %0|%0, %2}
11911 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11912 [(set_attr "isa" "noavx,noavx,avx")
11913 (set_attr "type" "sseimul")
11914 (set_attr "prefix_data16" "1,1,*")
11915 (set_attr "prefix_extra" "1")
11916 (set_attr "prefix" "orig,orig,vex")
11917 (set_attr "mode" "TI")])
11919 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11920 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11921 (unspec:<sseunpackmode>
11922 [(match_operand:VI2_AVX2 1 "register_operand" "v")
11923 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11924 UNSPEC_PMADDWD512))]
11925 "TARGET_AVX512BW && <mask_mode512bit_condition>"
11926 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11927 [(set_attr "type" "sseiadd")
11928 (set_attr "prefix" "evex")
11929 (set_attr "mode" "XI")])
11931 (define_expand "avx2_pmaddwd"
11932 [(set (match_operand:V8SI 0 "register_operand")
11937 (match_operand:V16HI 1 "nonimmediate_operand")
11938 (parallel [(const_int 0) (const_int 2)
11939 (const_int 4) (const_int 6)
11940 (const_int 8) (const_int 10)
11941 (const_int 12) (const_int 14)])))
11944 (match_operand:V16HI 2 "nonimmediate_operand")
11945 (parallel [(const_int 0) (const_int 2)
11946 (const_int 4) (const_int 6)
11947 (const_int 8) (const_int 10)
11948 (const_int 12) (const_int 14)]))))
11951 (vec_select:V8HI (match_dup 1)
11952 (parallel [(const_int 1) (const_int 3)
11953 (const_int 5) (const_int 7)
11954 (const_int 9) (const_int 11)
11955 (const_int 13) (const_int 15)])))
11957 (vec_select:V8HI (match_dup 2)
11958 (parallel [(const_int 1) (const_int 3)
11959 (const_int 5) (const_int 7)
11960 (const_int 9) (const_int 11)
11961 (const_int 13) (const_int 15)]))))))]
11963 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11965 (define_insn "*avx2_pmaddwd"
11966 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11971 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11972 (parallel [(const_int 0) (const_int 2)
11973 (const_int 4) (const_int 6)
11974 (const_int 8) (const_int 10)
11975 (const_int 12) (const_int 14)])))
11978 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11979 (parallel [(const_int 0) (const_int 2)
11980 (const_int 4) (const_int 6)
11981 (const_int 8) (const_int 10)
11982 (const_int 12) (const_int 14)]))))
11985 (vec_select:V8HI (match_dup 1)
11986 (parallel [(const_int 1) (const_int 3)
11987 (const_int 5) (const_int 7)
11988 (const_int 9) (const_int 11)
11989 (const_int 13) (const_int 15)])))
11991 (vec_select:V8HI (match_dup 2)
11992 (parallel [(const_int 1) (const_int 3)
11993 (const_int 5) (const_int 7)
11994 (const_int 9) (const_int 11)
11995 (const_int 13) (const_int 15)]))))))]
11996 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11997 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11998 [(set_attr "type" "sseiadd")
11999 (set_attr "isa" "*,avx512bw")
12000 (set_attr "prefix" "vex,evex")
12001 (set_attr "mode" "OI")])
12003 (define_expand "sse2_pmaddwd"
12004 [(set (match_operand:V4SI 0 "register_operand")
12009 (match_operand:V8HI 1 "vector_operand")
12010 (parallel [(const_int 0) (const_int 2)
12011 (const_int 4) (const_int 6)])))
12014 (match_operand:V8HI 2 "vector_operand")
12015 (parallel [(const_int 0) (const_int 2)
12016 (const_int 4) (const_int 6)]))))
12019 (vec_select:V4HI (match_dup 1)
12020 (parallel [(const_int 1) (const_int 3)
12021 (const_int 5) (const_int 7)])))
12023 (vec_select:V4HI (match_dup 2)
12024 (parallel [(const_int 1) (const_int 3)
12025 (const_int 5) (const_int 7)]))))))]
12027 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
12029 (define_insn "*sse2_pmaddwd"
12030 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
12035 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12036 (parallel [(const_int 0) (const_int 2)
12037 (const_int 4) (const_int 6)])))
12040 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
12041 (parallel [(const_int 0) (const_int 2)
12042 (const_int 4) (const_int 6)]))))
12045 (vec_select:V4HI (match_dup 1)
12046 (parallel [(const_int 1) (const_int 3)
12047 (const_int 5) (const_int 7)])))
12049 (vec_select:V4HI (match_dup 2)
12050 (parallel [(const_int 1) (const_int 3)
12051 (const_int 5) (const_int 7)]))))))]
12052 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12054 pmaddwd\t{%2, %0|%0, %2}
12055 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
12056 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12057 [(set_attr "isa" "noavx,avx,avx512bw")
12058 (set_attr "type" "sseiadd")
12059 (set_attr "atom_unit" "simul")
12060 (set_attr "prefix_data16" "1,*,*")
12061 (set_attr "prefix" "orig,vex,evex")
12062 (set_attr "mode" "TI")])
12064 (define_insn "avx512dq_mul<mode>3<mask_name>"
12065 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
12067 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
12068 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
12069 "TARGET_AVX512DQ && <mask_mode512bit_condition>
12070 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
12071 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12072 [(set_attr "type" "sseimul")
12073 (set_attr "prefix" "evex")
12074 (set_attr "mode" "<sseinsnmode>")])
12076 (define_expand "mul<mode>3<mask_name>"
12077 [(set (match_operand:VI4_AVX512F 0 "register_operand")
12079 (match_operand:VI4_AVX512F 1 "general_vector_operand")
12080 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
12081 "TARGET_SSE2 && <mask_mode512bit_condition>"
12085 if (!vector_operand (operands[1], <MODE>mode))
12086 operands[1] = force_reg (<MODE>mode, operands[1]);
12087 if (!vector_operand (operands[2], <MODE>mode))
12088 operands[2] = force_reg (<MODE>mode, operands[2]);
12089 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
12093 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
12098 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
12099 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
12101 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
12102 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
12103 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
12104 && <mask_mode512bit_condition>"
12106 pmulld\t{%2, %0|%0, %2}
12107 pmulld\t{%2, %0|%0, %2}
12108 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12109 [(set_attr "isa" "noavx,noavx,avx")
12110 (set_attr "type" "sseimul")
12111 (set_attr "prefix_extra" "1")
12112 (set_attr "prefix" "<bcst_mask_prefix4>")
12113 (set_attr "btver2_decode" "vector,vector,vector")
12114 (set_attr "mode" "<sseinsnmode>")])
12116 (define_expand "mul<mode>3"
12117 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12118 (mult:VI8_AVX2_AVX512F
12119 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12120 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12123 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
12127 (define_expand "vec_widen_<s>mult_hi_<mode>"
12128 [(match_operand:<sseunpackmode> 0 "register_operand")
12129 (any_extend:<sseunpackmode>
12130 (match_operand:VI124_AVX2 1 "register_operand"))
12131 (match_operand:VI124_AVX2 2 "register_operand")]
12134 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12139 (define_expand "vec_widen_<s>mult_lo_<mode>"
12140 [(match_operand:<sseunpackmode> 0 "register_operand")
12141 (any_extend:<sseunpackmode>
12142 (match_operand:VI124_AVX2 1 "register_operand"))
12143 (match_operand:VI124_AVX2 2 "register_operand")]
12146 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12151 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
12152 ;; named patterns, but signed V4SI needs special help for plain SSE2.
12153 (define_expand "vec_widen_smult_even_v4si"
12154 [(match_operand:V2DI 0 "register_operand")
12155 (match_operand:V4SI 1 "vector_operand")
12156 (match_operand:V4SI 2 "vector_operand")]
12159 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12164 (define_expand "vec_widen_<s>mult_odd_<mode>"
12165 [(match_operand:<sseunpackmode> 0 "register_operand")
12166 (any_extend:<sseunpackmode>
12167 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
12168 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
12171 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12176 (define_mode_attr SDOT_PMADD_SUF
12177 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
12179 (define_expand "sdot_prod<mode>"
12180 [(match_operand:<sseunpackmode> 0 "register_operand")
12181 (match_operand:VI2_AVX2 1 "register_operand")
12182 (match_operand:VI2_AVX2 2 "register_operand")
12183 (match_operand:<sseunpackmode> 3 "register_operand")]
12186 rtx t = gen_reg_rtx (<sseunpackmode>mode);
12187 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
12188 emit_insn (gen_rtx_SET (operands[0],
12189 gen_rtx_PLUS (<sseunpackmode>mode,
12194 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
12195 ;; back together when madd is available.
12196 (define_expand "sdot_prodv4si"
12197 [(match_operand:V2DI 0 "register_operand")
12198 (match_operand:V4SI 1 "register_operand")
12199 (match_operand:V4SI 2 "register_operand")
12200 (match_operand:V2DI 3 "register_operand")]
12203 rtx t = gen_reg_rtx (V2DImode);
12204 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
12205 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
12209 (define_expand "uavg<mode>3_ceil"
12210 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12211 (truncate:VI12_AVX2_AVX512BW
12212 (lshiftrt:<ssedoublemode>
12213 (plus:<ssedoublemode>
12214 (plus:<ssedoublemode>
12215 (zero_extend:<ssedoublemode>
12216 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
12217 (zero_extend:<ssedoublemode>
12218 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
12223 operands[3] = CONST1_RTX(<ssedoublemode>mode);
12224 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12227 (define_expand "usadv16qi"
12228 [(match_operand:V4SI 0 "register_operand")
12229 (match_operand:V16QI 1 "register_operand")
12230 (match_operand:V16QI 2 "vector_operand")
12231 (match_operand:V4SI 3 "vector_operand")]
12234 rtx t1 = gen_reg_rtx (V2DImode);
12235 rtx t2 = gen_reg_rtx (V4SImode);
12236 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
12237 convert_move (t2, t1, 0);
12238 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
12242 (define_expand "usadv32qi"
12243 [(match_operand:V8SI 0 "register_operand")
12244 (match_operand:V32QI 1 "register_operand")
12245 (match_operand:V32QI 2 "nonimmediate_operand")
12246 (match_operand:V8SI 3 "nonimmediate_operand")]
12249 rtx t1 = gen_reg_rtx (V4DImode);
12250 rtx t2 = gen_reg_rtx (V8SImode);
12251 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
12252 convert_move (t2, t1, 0);
12253 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
12257 (define_expand "usadv64qi"
12258 [(match_operand:V16SI 0 "register_operand")
12259 (match_operand:V64QI 1 "register_operand")
12260 (match_operand:V64QI 2 "nonimmediate_operand")
12261 (match_operand:V16SI 3 "nonimmediate_operand")]
12264 rtx t1 = gen_reg_rtx (V8DImode);
12265 rtx t2 = gen_reg_rtx (V16SImode);
12266 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
12267 convert_move (t2, t1, 0);
12268 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
12272 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12273 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
12274 (ashiftrt:VI248_AVX512BW_1
12275 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
12276 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12278 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12279 [(set_attr "type" "sseishft")
12280 (set (attr "length_immediate")
12281 (if_then_else (match_operand 2 "const_int_operand")
12283 (const_string "0")))
12284 (set_attr "mode" "<sseinsnmode>")])
12286 (define_insn "ashr<mode>3"
12287 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
12288 (ashiftrt:VI24_AVX2
12289 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
12290 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12293 psra<ssemodesuffix>\t{%2, %0|%0, %2}
12294 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12295 [(set_attr "isa" "noavx,avx")
12296 (set_attr "type" "sseishft")
12297 (set (attr "length_immediate")
12298 (if_then_else (match_operand 2 "const_int_operand")
12300 (const_string "0")))
12301 (set_attr "prefix_data16" "1,*")
12302 (set_attr "prefix" "orig,vex")
12303 (set_attr "mode" "<sseinsnmode>")])
12305 (define_insn "ashr<mode>3<mask_name>"
12306 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12307 (ashiftrt:VI248_AVX512BW_AVX512VL
12308 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12309 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12311 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12312 [(set_attr "type" "sseishft")
12313 (set (attr "length_immediate")
12314 (if_then_else (match_operand 2 "const_int_operand")
12316 (const_string "0")))
12317 (set_attr "mode" "<sseinsnmode>")])
12319 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
12320 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12321 (any_lshift:VI248_AVX512BW_2
12322 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12323 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12325 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12326 [(set_attr "type" "sseishft")
12327 (set (attr "length_immediate")
12328 (if_then_else (match_operand 2 "const_int_operand")
12330 (const_string "0")))
12331 (set_attr "mode" "<sseinsnmode>")])
12333 (define_insn "<insn><mode>3"
12334 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
12335 (any_lshift:VI248_AVX2
12336 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
12337 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12340 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12341 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12342 [(set_attr "isa" "noavx,avx")
12343 (set_attr "type" "sseishft")
12344 (set (attr "length_immediate")
12345 (if_then_else (match_operand 2 "const_int_operand")
12347 (const_string "0")))
12348 (set_attr "prefix_data16" "1,*")
12349 (set_attr "prefix" "orig,vex")
12350 (set_attr "mode" "<sseinsnmode>")])
12352 (define_insn "<insn><mode>3<mask_name>"
12353 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12354 (any_lshift:VI248_AVX512BW
12355 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12356 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12358 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12359 [(set_attr "type" "sseishft")
12360 (set (attr "length_immediate")
12361 (if_then_else (match_operand 2 "const_int_operand")
12363 (const_string "0")))
12364 (set_attr "mode" "<sseinsnmode>")])
12367 (define_expand "vec_shl_<mode>"
12368 [(set (match_dup 3)
12370 (match_operand:V_128 1 "register_operand")
12371 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12372 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12375 operands[1] = gen_lowpart (V1TImode, operands[1]);
12376 operands[3] = gen_reg_rtx (V1TImode);
12377 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12380 (define_expand "vec_shr_<mode>"
12381 [(set (match_dup 3)
12383 (match_operand:V_128 1 "register_operand")
12384 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12385 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12388 operands[1] = gen_lowpart (V1TImode, operands[1]);
12389 operands[3] = gen_reg_rtx (V1TImode);
12390 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12393 (define_insn "avx512bw_<insn><mode>3"
12394 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
12395 (any_lshift:VIMAX_AVX512VL
12396 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
12397 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
12400 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12401 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12403 [(set_attr "type" "sseishft")
12404 (set_attr "length_immediate" "1")
12405 (set_attr "prefix" "maybe_evex")
12406 (set_attr "mode" "<sseinsnmode>")])
12408 (define_insn "<sse2_avx2>_<insn><mode>3"
12409 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
12410 (any_lshift:VIMAX_AVX2
12411 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
12412 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
12415 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12417 switch (which_alternative)
12420 return "p<vshift>dq\t{%2, %0|%0, %2}";
12422 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12424 gcc_unreachable ();
12427 [(set_attr "isa" "noavx,avx")
12428 (set_attr "type" "sseishft")
12429 (set_attr "length_immediate" "1")
12430 (set_attr "atom_unit" "sishuf")
12431 (set_attr "prefix_data16" "1,*")
12432 (set_attr "prefix" "orig,vex")
12433 (set_attr "mode" "<sseinsnmode>")])
12435 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
12436 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12437 (any_rotate:VI48_AVX512VL
12438 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
12439 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12441 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12442 [(set_attr "prefix" "evex")
12443 (set_attr "mode" "<sseinsnmode>")])
12445 (define_insn "<avx512>_<rotate><mode><mask_name>"
12446 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12447 (any_rotate:VI48_AVX512VL
12448 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
12449 (match_operand:SI 2 "const_0_to_255_operand")))]
12451 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12452 [(set_attr "prefix" "evex")
12453 (set_attr "mode" "<sseinsnmode>")])
12455 (define_expand "<code><mode>3"
12456 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
12457 (maxmin:VI124_256_AVX512F_AVX512BW
12458 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
12459 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
12461 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12463 (define_insn "*avx2_<code><mode>3"
12464 [(set (match_operand:VI124_256 0 "register_operand" "=v")
12466 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
12467 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
12468 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12469 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12470 [(set_attr "type" "sseiadd")
12471 (set_attr "prefix_extra" "1")
12472 (set_attr "prefix" "vex")
12473 (set_attr "mode" "OI")])
12475 (define_expand "<code><mode>3_mask"
12476 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12477 (vec_merge:VI48_AVX512VL
12478 (maxmin:VI48_AVX512VL
12479 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12480 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12481 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12482 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12484 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12486 (define_insn "*avx512f_<code><mode>3<mask_name>"
12487 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12488 (maxmin:VI48_AVX512VL
12489 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
12490 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12491 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12492 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12493 [(set_attr "type" "sseiadd")
12494 (set_attr "prefix_extra" "1")
12495 (set_attr "prefix" "maybe_evex")
12496 (set_attr "mode" "<sseinsnmode>")])
12498 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12499 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12500 (maxmin:VI12_AVX512VL
12501 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
12502 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
12504 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12505 [(set_attr "type" "sseiadd")
12506 (set_attr "prefix" "evex")
12507 (set_attr "mode" "<sseinsnmode>")])
12509 (define_expand "<code><mode>3"
12510 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12511 (maxmin:VI8_AVX2_AVX512F
12512 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12513 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12517 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
12518 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12521 enum rtx_code code;
12526 xops[0] = operands[0];
12528 if (<CODE> == SMAX || <CODE> == UMAX)
12530 xops[1] = operands[1];
12531 xops[2] = operands[2];
12535 xops[1] = operands[2];
12536 xops[2] = operands[1];
12539 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
12541 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
12542 xops[4] = operands[1];
12543 xops[5] = operands[2];
12545 ok = ix86_expand_int_vcond (xops);
12551 (define_expand "<code><mode>3"
12552 [(set (match_operand:VI124_128 0 "register_operand")
12554 (match_operand:VI124_128 1 "vector_operand")
12555 (match_operand:VI124_128 2 "vector_operand")))]
12558 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
12559 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12565 xops[0] = operands[0];
12566 operands[1] = force_reg (<MODE>mode, operands[1]);
12567 operands[2] = force_reg (<MODE>mode, operands[2]);
12569 if (<CODE> == SMAX)
12571 xops[1] = operands[1];
12572 xops[2] = operands[2];
12576 xops[1] = operands[2];
12577 xops[2] = operands[1];
12580 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
12581 xops[4] = operands[1];
12582 xops[5] = operands[2];
12584 ok = ix86_expand_int_vcond (xops);
12590 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12591 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
12593 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
12594 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12596 && <mask_mode512bit_condition>
12597 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12599 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12600 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12601 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12602 [(set_attr "isa" "noavx,noavx,avx")
12603 (set_attr "type" "sseiadd")
12604 (set_attr "prefix_extra" "1,1,*")
12605 (set_attr "prefix" "orig,orig,vex")
12606 (set_attr "mode" "TI")])
12608 (define_insn "*<code>v8hi3"
12609 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
12611 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12612 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
12613 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12615 p<maxmin_int>w\t{%2, %0|%0, %2}
12616 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
12617 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
12618 [(set_attr "isa" "noavx,avx,avx512bw")
12619 (set_attr "type" "sseiadd")
12620 (set_attr "prefix_data16" "1,*,*")
12621 (set_attr "prefix_extra" "*,1,1")
12622 (set_attr "prefix" "orig,vex,evex")
12623 (set_attr "mode" "TI")])
12625 (define_expand "<code><mode>3"
12626 [(set (match_operand:VI124_128 0 "register_operand")
12628 (match_operand:VI124_128 1 "vector_operand")
12629 (match_operand:VI124_128 2 "vector_operand")))]
12632 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
12633 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12634 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
12636 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
12637 operands[1] = force_reg (<MODE>mode, operands[1]);
12638 if (rtx_equal_p (op3, op2))
12639 op3 = gen_reg_rtx (V8HImode);
12640 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12641 emit_insn (gen_addv8hi3 (op0, op3, op2));
12649 operands[1] = force_reg (<MODE>mode, operands[1]);
12650 operands[2] = force_reg (<MODE>mode, operands[2]);
12652 xops[0] = operands[0];
12654 if (<CODE> == UMAX)
12656 xops[1] = operands[1];
12657 xops[2] = operands[2];
12661 xops[1] = operands[2];
12662 xops[2] = operands[1];
12665 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12666 xops[4] = operands[1];
12667 xops[5] = operands[2];
12669 ok = ix86_expand_int_vcond (xops);
12675 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12676 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12678 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12679 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12681 && <mask_mode512bit_condition>
12682 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12684 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12685 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12686 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12687 [(set_attr "isa" "noavx,noavx,avx")
12688 (set_attr "type" "sseiadd")
12689 (set_attr "prefix_extra" "1,1,*")
12690 (set_attr "prefix" "orig,orig,vex")
12691 (set_attr "mode" "TI")])
12693 (define_insn "*<code>v16qi3"
12694 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12696 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12697 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12698 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12700 p<maxmin_int>b\t{%2, %0|%0, %2}
12701 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12702 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12703 [(set_attr "isa" "noavx,avx,avx512bw")
12704 (set_attr "type" "sseiadd")
12705 (set_attr "prefix_data16" "1,*,*")
12706 (set_attr "prefix_extra" "*,1,1")
12707 (set_attr "prefix" "orig,vex,evex")
12708 (set_attr "mode" "TI")])
12710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12712 ;; Parallel integral comparisons
12714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12716 (define_expand "avx2_eq<mode>3"
12717 [(set (match_operand:VI_256 0 "register_operand")
12719 (match_operand:VI_256 1 "nonimmediate_operand")
12720 (match_operand:VI_256 2 "nonimmediate_operand")))]
12722 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12724 (define_insn "*avx2_eq<mode>3"
12725 [(set (match_operand:VI_256 0 "register_operand" "=x")
12727 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12728 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12729 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12730 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12731 [(set_attr "type" "ssecmp")
12732 (set_attr "prefix_extra" "1")
12733 (set_attr "prefix" "vex")
12734 (set_attr "mode" "OI")])
12736 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12737 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12738 (unspec:<avx512fmaskmode>
12739 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12740 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12741 UNSPEC_MASKED_EQ))]
12743 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12745 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12746 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12747 (unspec:<avx512fmaskmode>
12748 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12749 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12750 UNSPEC_MASKED_EQ))]
12752 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12754 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12755 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12756 (unspec:<avx512fmaskmode>
12757 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12758 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12759 UNSPEC_MASKED_EQ))]
12760 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12762 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12763 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12764 [(set_attr "type" "ssecmp")
12765 (set_attr "prefix_extra" "1")
12766 (set_attr "prefix" "evex")
12767 (set_attr "mode" "<sseinsnmode>")])
12769 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12770 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12771 (unspec:<avx512fmaskmode>
12772 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12773 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12774 UNSPEC_MASKED_EQ))]
12775 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12777 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12778 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12779 [(set_attr "type" "ssecmp")
12780 (set_attr "prefix_extra" "1")
12781 (set_attr "prefix" "evex")
12782 (set_attr "mode" "<sseinsnmode>")])
12784 (define_insn "*sse4_1_eqv2di3"
12785 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12787 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12788 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12789 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12791 pcmpeqq\t{%2, %0|%0, %2}
12792 pcmpeqq\t{%2, %0|%0, %2}
12793 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12794 [(set_attr "isa" "noavx,noavx,avx")
12795 (set_attr "type" "ssecmp")
12796 (set_attr "prefix_extra" "1")
12797 (set_attr "prefix" "orig,orig,vex")
12798 (set_attr "mode" "TI")])
12800 (define_insn "*sse2_eq<mode>3"
12801 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12803 (match_operand:VI124_128 1 "vector_operand" "%0,x")
12804 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12805 "TARGET_SSE2 && !TARGET_XOP
12806 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12808 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12809 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12810 [(set_attr "isa" "noavx,avx")
12811 (set_attr "type" "ssecmp")
12812 (set_attr "prefix_data16" "1,*")
12813 (set_attr "prefix" "orig,vex")
12814 (set_attr "mode" "TI")])
12816 (define_expand "sse2_eq<mode>3"
12817 [(set (match_operand:VI124_128 0 "register_operand")
12819 (match_operand:VI124_128 1 "vector_operand")
12820 (match_operand:VI124_128 2 "vector_operand")))]
12821 "TARGET_SSE2 && !TARGET_XOP "
12822 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12824 (define_expand "sse4_1_eqv2di3"
12825 [(set (match_operand:V2DI 0 "register_operand")
12827 (match_operand:V2DI 1 "vector_operand")
12828 (match_operand:V2DI 2 "vector_operand")))]
12830 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12832 (define_insn "sse4_2_gtv2di3"
12833 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12835 (match_operand:V2DI 1 "register_operand" "0,0,x")
12836 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12839 pcmpgtq\t{%2, %0|%0, %2}
12840 pcmpgtq\t{%2, %0|%0, %2}
12841 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12842 [(set_attr "isa" "noavx,noavx,avx")
12843 (set_attr "type" "ssecmp")
12844 (set_attr "prefix_extra" "1")
12845 (set_attr "prefix" "orig,orig,vex")
12846 (set_attr "mode" "TI")])
12848 (define_insn "avx2_gt<mode>3"
12849 [(set (match_operand:VI_256 0 "register_operand" "=x")
12851 (match_operand:VI_256 1 "register_operand" "x")
12852 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12854 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12855 [(set_attr "type" "ssecmp")
12856 (set_attr "prefix_extra" "1")
12857 (set_attr "prefix" "vex")
12858 (set_attr "mode" "OI")])
12860 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12861 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12862 (unspec:<avx512fmaskmode>
12863 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12864 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12866 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12867 [(set_attr "type" "ssecmp")
12868 (set_attr "prefix_extra" "1")
12869 (set_attr "prefix" "evex")
12870 (set_attr "mode" "<sseinsnmode>")])
12872 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12873 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12874 (unspec:<avx512fmaskmode>
12875 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12876 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12878 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12879 [(set_attr "type" "ssecmp")
12880 (set_attr "prefix_extra" "1")
12881 (set_attr "prefix" "evex")
12882 (set_attr "mode" "<sseinsnmode>")])
12884 (define_insn "sse2_gt<mode>3"
12885 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12887 (match_operand:VI124_128 1 "register_operand" "0,x")
12888 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12889 "TARGET_SSE2 && !TARGET_XOP"
12891 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12892 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12893 [(set_attr "isa" "noavx,avx")
12894 (set_attr "type" "ssecmp")
12895 (set_attr "prefix_data16" "1,*")
12896 (set_attr "prefix" "orig,vex")
12897 (set_attr "mode" "TI")])
12899 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12900 [(set (match_operand:V_512 0 "register_operand")
12901 (if_then_else:V_512
12902 (match_operator 3 ""
12903 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12904 (match_operand:VI_AVX512BW 5 "general_operand")])
12905 (match_operand:V_512 1)
12906 (match_operand:V_512 2)))]
12908 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12909 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12911 bool ok = ix86_expand_int_vcond (operands);
12916 (define_expand "vcond<V_256:mode><VI_256:mode>"
12917 [(set (match_operand:V_256 0 "register_operand")
12918 (if_then_else:V_256
12919 (match_operator 3 ""
12920 [(match_operand:VI_256 4 "nonimmediate_operand")
12921 (match_operand:VI_256 5 "general_operand")])
12922 (match_operand:V_256 1)
12923 (match_operand:V_256 2)))]
12925 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12926 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12928 bool ok = ix86_expand_int_vcond (operands);
12933 (define_expand "vcond<V_128:mode><VI124_128:mode>"
12934 [(set (match_operand:V_128 0 "register_operand")
12935 (if_then_else:V_128
12936 (match_operator 3 ""
12937 [(match_operand:VI124_128 4 "vector_operand")
12938 (match_operand:VI124_128 5 "general_operand")])
12939 (match_operand:V_128 1)
12940 (match_operand:V_128 2)))]
12942 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12943 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12945 bool ok = ix86_expand_int_vcond (operands);
12950 (define_expand "vcond<VI8F_128:mode>v2di"
12951 [(set (match_operand:VI8F_128 0 "register_operand")
12952 (if_then_else:VI8F_128
12953 (match_operator 3 ""
12954 [(match_operand:V2DI 4 "vector_operand")
12955 (match_operand:V2DI 5 "general_operand")])
12956 (match_operand:VI8F_128 1)
12957 (match_operand:VI8F_128 2)))]
12960 bool ok = ix86_expand_int_vcond (operands);
12965 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12966 [(set (match_operand:V_512 0 "register_operand")
12967 (if_then_else:V_512
12968 (match_operator 3 ""
12969 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12970 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12971 (match_operand:V_512 1 "general_operand")
12972 (match_operand:V_512 2 "general_operand")))]
12974 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12975 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12977 bool ok = ix86_expand_int_vcond (operands);
12982 (define_expand "vcondu<V_256:mode><VI_256:mode>"
12983 [(set (match_operand:V_256 0 "register_operand")
12984 (if_then_else:V_256
12985 (match_operator 3 ""
12986 [(match_operand:VI_256 4 "nonimmediate_operand")
12987 (match_operand:VI_256 5 "nonimmediate_operand")])
12988 (match_operand:V_256 1 "general_operand")
12989 (match_operand:V_256 2 "general_operand")))]
12991 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12992 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12994 bool ok = ix86_expand_int_vcond (operands);
12999 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
13000 [(set (match_operand:V_128 0 "register_operand")
13001 (if_then_else:V_128
13002 (match_operator 3 ""
13003 [(match_operand:VI124_128 4 "vector_operand")
13004 (match_operand:VI124_128 5 "vector_operand")])
13005 (match_operand:V_128 1 "general_operand")
13006 (match_operand:V_128 2 "general_operand")))]
13008 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13009 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13011 bool ok = ix86_expand_int_vcond (operands);
13016 (define_expand "vcondu<VI8F_128:mode>v2di"
13017 [(set (match_operand:VI8F_128 0 "register_operand")
13018 (if_then_else:VI8F_128
13019 (match_operator 3 ""
13020 [(match_operand:V2DI 4 "vector_operand")
13021 (match_operand:V2DI 5 "vector_operand")])
13022 (match_operand:VI8F_128 1 "general_operand")
13023 (match_operand:VI8F_128 2 "general_operand")))]
13026 bool ok = ix86_expand_int_vcond (operands);
13031 (define_expand "vcondeq<VI8F_128:mode>v2di"
13032 [(set (match_operand:VI8F_128 0 "register_operand")
13033 (if_then_else:VI8F_128
13034 (match_operator 3 ""
13035 [(match_operand:V2DI 4 "vector_operand")
13036 (match_operand:V2DI 5 "general_operand")])
13037 (match_operand:VI8F_128 1)
13038 (match_operand:VI8F_128 2)))]
13041 bool ok = ix86_expand_int_vcond (operands);
13046 (define_mode_iterator VEC_PERM_AVX2
13047 [V16QI V8HI V4SI V2DI V4SF V2DF
13048 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
13049 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
13050 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
13051 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
13052 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
13053 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
13055 (define_expand "vec_perm<mode>"
13056 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
13057 (match_operand:VEC_PERM_AVX2 1 "register_operand")
13058 (match_operand:VEC_PERM_AVX2 2 "register_operand")
13059 (match_operand:<sseintvecmode> 3 "register_operand")]
13060 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
13062 ix86_expand_vec_perm (operands);
13066 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13068 ;; Parallel bitwise logical operations
13070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13072 (define_expand "one_cmpl<mode>2"
13073 [(set (match_operand:VI 0 "register_operand")
13074 (xor:VI (match_operand:VI 1 "vector_operand")
13078 if (!TARGET_AVX512F)
13079 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
13081 operands[2] = CONSTM1_RTX (<MODE>mode);
13084 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
13085 [(set (match_operand:VI 0 "register_operand" "=v,v")
13086 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
13087 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
13089 && (!<mask_applied>
13090 || <ssescalarmode>mode == SImode
13091 || <ssescalarmode>mode == DImode)"
13093 if (TARGET_AVX512VL)
13094 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
13096 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
13098 [(set_attr "type" "sselog")
13099 (set_attr "prefix" "evex")
13101 (if_then_else (match_test "TARGET_AVX512VL")
13102 (const_string "<sseinsnmode>")
13103 (const_string "XI")))
13104 (set (attr "enabled")
13105 (if_then_else (eq_attr "alternative" "1")
13106 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
13109 (define_expand "<sse2_avx2>_andnot<mode>3"
13110 [(set (match_operand:VI_AVX2 0 "register_operand")
13112 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
13113 (match_operand:VI_AVX2 2 "vector_operand")))]
13116 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13117 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13118 (vec_merge:VI48_AVX512VL
13121 (match_operand:VI48_AVX512VL 1 "register_operand"))
13122 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13123 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13124 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13127 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13128 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
13129 (vec_merge:VI12_AVX512VL
13132 (match_operand:VI12_AVX512VL 1 "register_operand"))
13133 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
13134 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
13135 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13138 (define_insn "*andnot<mode>3"
13139 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
13141 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
13142 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13148 const char *ssesuffix;
13150 switch (get_attr_mode (insn))
13153 gcc_assert (TARGET_AVX512F);
13156 gcc_assert (TARGET_AVX2);
13159 gcc_assert (TARGET_SSE2);
13161 switch (<MODE>mode)
13165 /* There is no vpandnb or vpandnw instruction, nor vpandn for
13166 512-bit vectors. Use vpandnq instead. */
13171 ssesuffix = "<ssemodesuffix>";
13177 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
13178 ? "<ssemodesuffix>" : "");
13181 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13186 gcc_assert (TARGET_AVX512F);
13189 gcc_assert (TARGET_AVX);
13192 gcc_assert (TARGET_SSE);
13198 gcc_unreachable ();
13201 switch (which_alternative)
13204 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13208 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13211 gcc_unreachable ();
13214 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13215 output_asm_insn (buf, operands);
13218 [(set_attr "isa" "noavx,avx,avx")
13219 (set_attr "type" "sselog")
13220 (set (attr "prefix_data16")
13222 (and (eq_attr "alternative" "0")
13223 (eq_attr "mode" "TI"))
13225 (const_string "*")))
13226 (set_attr "prefix" "orig,vex,evex")
13228 (cond [(match_test "TARGET_AVX2")
13229 (const_string "<sseinsnmode>")
13230 (match_test "TARGET_AVX")
13232 (match_test "<MODE_SIZE> > 16")
13233 (const_string "V8SF")
13234 (const_string "<sseinsnmode>"))
13235 (ior (not (match_test "TARGET_SSE2"))
13236 (match_test "optimize_function_for_size_p (cfun)"))
13237 (const_string "V4SF")
13239 (const_string "<sseinsnmode>")))])
13241 (define_insn "*andnot<mode>3_mask"
13242 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13243 (vec_merge:VI48_AVX512VL
13246 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
13247 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
13248 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
13249 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
13251 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
13252 [(set_attr "type" "sselog")
13253 (set_attr "prefix" "evex")
13254 (set_attr "mode" "<sseinsnmode>")])
13256 (define_expand "<code><mode>3"
13257 [(set (match_operand:VI 0 "register_operand")
13259 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
13260 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
13263 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
13267 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13268 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
13269 (any_logic:VI48_AVX_AVX512F
13270 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
13271 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13272 "TARGET_SSE && <mask_mode512bit_condition>
13273 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
13278 const char *ssesuffix;
13280 switch (get_attr_mode (insn))
13283 gcc_assert (TARGET_AVX512F);
13286 gcc_assert (TARGET_AVX2);
13289 gcc_assert (TARGET_SSE2);
13291 switch (<MODE>mode)
13295 ssesuffix = "<ssemodesuffix>";
13301 ssesuffix = (TARGET_AVX512VL
13302 && (<mask_applied> || which_alternative == 2)
13303 ? "<ssemodesuffix>" : "");
13306 gcc_unreachable ();
13311 gcc_assert (TARGET_AVX);
13314 gcc_assert (TARGET_SSE);
13320 gcc_unreachable ();
13323 switch (which_alternative)
13326 if (<mask_applied>)
13327 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
13329 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13333 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
13336 gcc_unreachable ();
13339 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13340 output_asm_insn (buf, operands);
13343 [(set_attr "isa" "noavx,avx,avx")
13344 (set_attr "type" "sselog")
13345 (set (attr "prefix_data16")
13347 (and (eq_attr "alternative" "0")
13348 (eq_attr "mode" "TI"))
13350 (const_string "*")))
13351 (set_attr "prefix" "<mask_prefix3>,evex")
13353 (cond [(match_test "TARGET_AVX2")
13354 (const_string "<sseinsnmode>")
13355 (match_test "TARGET_AVX")
13357 (match_test "<MODE_SIZE> > 16")
13358 (const_string "V8SF")
13359 (const_string "<sseinsnmode>"))
13360 (ior (not (match_test "TARGET_SSE2"))
13361 (match_test "optimize_function_for_size_p (cfun)"))
13362 (const_string "V4SF")
13364 (const_string "<sseinsnmode>")))])
13366 (define_insn "*<code><mode>3"
13367 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
13368 (any_logic:VI12_AVX_AVX512F
13369 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13370 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13371 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13376 const char *ssesuffix;
13378 switch (get_attr_mode (insn))
13381 gcc_assert (TARGET_AVX512F);
13384 gcc_assert (TARGET_AVX2);
13387 gcc_assert (TARGET_SSE2);
13389 switch (<MODE>mode)
13399 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13402 gcc_unreachable ();
13407 gcc_assert (TARGET_AVX);
13410 gcc_assert (TARGET_SSE);
13416 gcc_unreachable ();
13419 switch (which_alternative)
13422 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13426 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13429 gcc_unreachable ();
13432 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13433 output_asm_insn (buf, operands);
13436 [(set_attr "isa" "noavx,avx,avx")
13437 (set_attr "type" "sselog")
13438 (set (attr "prefix_data16")
13440 (and (eq_attr "alternative" "0")
13441 (eq_attr "mode" "TI"))
13443 (const_string "*")))
13444 (set_attr "prefix" "orig,vex,evex")
13446 (cond [(match_test "TARGET_AVX2")
13447 (const_string "<sseinsnmode>")
13448 (match_test "TARGET_AVX")
13450 (match_test "<MODE_SIZE> > 16")
13451 (const_string "V8SF")
13452 (const_string "<sseinsnmode>"))
13453 (ior (not (match_test "TARGET_SSE2"))
13454 (match_test "optimize_function_for_size_p (cfun)"))
13455 (const_string "V4SF")
13457 (const_string "<sseinsnmode>")))])
13459 (define_mode_iterator VI1248_AVX512VLBW
13460 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
13461 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
13462 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
13463 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
13464 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
13465 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13467 (define_mode_iterator AVX512ZEXTMASK
13468 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
13470 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
13471 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13472 (unspec:<avx512fmaskmode>
13473 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13474 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13477 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13478 [(set_attr "prefix" "evex")
13479 (set_attr "mode" "<sseinsnmode>")])
13481 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
13482 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13483 (unspec:<avx512fmaskmode>
13484 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13485 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13488 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13489 [(set_attr "prefix" "evex")
13490 (set_attr "mode" "<sseinsnmode>")])
13492 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
13493 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13494 (zero_extend:AVX512ZEXTMASK
13495 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13496 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13497 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13500 && (<AVX512ZEXTMASK:MODE_SIZE>
13501 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13502 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13503 [(set_attr "prefix" "evex")
13504 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13506 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
13507 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13508 (zero_extend:AVX512ZEXTMASK
13509 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13510 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13511 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13512 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13514 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13516 && (<AVX512ZEXTMASK:MODE_SIZE>
13517 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13518 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13519 [(set_attr "prefix" "evex")
13520 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13522 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
13523 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13524 (zero_extend:AVX512ZEXTMASK
13525 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13526 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13527 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13530 && (<AVX512ZEXTMASK:MODE_SIZE>
13531 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13532 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13533 [(set_attr "prefix" "evex")
13534 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13536 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
13537 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13538 (zero_extend:AVX512ZEXTMASK
13539 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13540 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13541 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13542 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13544 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13546 && (<AVX512ZEXTMASK:MODE_SIZE>
13547 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13548 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13549 [(set_attr "prefix" "evex")
13550 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13554 ;; Parallel integral element swizzling
13556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13558 (define_expand "vec_pack_trunc_<mode>"
13559 [(match_operand:<ssepackmode> 0 "register_operand")
13560 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
13561 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
13564 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
13565 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
13566 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
13570 (define_expand "vec_pack_trunc_qi"
13571 [(set (match_operand:HI 0 "register_operand")
13572 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
13574 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
13577 (define_expand "vec_pack_trunc_<mode>"
13578 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
13579 (ior:<DOUBLEMASKMODE>
13580 (ashift:<DOUBLEMASKMODE>
13581 (zero_extend:<DOUBLEMASKMODE>
13582 (match_operand:SWI24 2 "register_operand"))
13584 (zero_extend:<DOUBLEMASKMODE>
13585 (match_operand:SWI24 1 "register_operand"))))]
13588 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
13591 (define_expand "vec_pack_sbool_trunc_qi"
13592 [(match_operand:QI 0 "register_operand")
13593 (match_operand:QI 1 "register_operand")
13594 (match_operand:QI 2 "register_operand")
13595 (match_operand:QI 3 "const_int_operand")]
13598 HOST_WIDE_INT nunits = INTVAL (operands[3]);
13599 rtx mask, tem1, tem2;
13600 if (nunits != 8 && nunits != 4)
13602 mask = gen_reg_rtx (QImode);
13603 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
13604 tem1 = gen_reg_rtx (QImode);
13605 emit_insn (gen_kandqi (tem1, operands[1], mask));
13606 if (TARGET_AVX512DQ)
13608 tem2 = gen_reg_rtx (QImode);
13609 emit_insn (gen_kashiftqi (tem2, operands[2],
13610 GEN_INT (nunits / 2)));
13614 tem2 = gen_reg_rtx (HImode);
13615 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
13617 GEN_INT (nunits / 2)));
13618 tem2 = lowpart_subreg (QImode, tem2, HImode);
13620 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
13624 (define_insn "<sse2_avx2>_packsswb<mask_name>"
13625 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13626 (vec_concat:VI1_AVX512
13627 (ss_truncate:<ssehalfvecmode>
13628 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13629 (ss_truncate:<ssehalfvecmode>
13630 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13631 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13633 packsswb\t{%2, %0|%0, %2}
13634 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13635 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13636 [(set_attr "isa" "noavx,avx,avx512bw")
13637 (set_attr "type" "sselog")
13638 (set_attr "prefix_data16" "1,*,*")
13639 (set_attr "prefix" "orig,<mask_prefix>,evex")
13640 (set_attr "mode" "<sseinsnmode>")])
13642 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13643 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13644 (vec_concat:VI2_AVX2
13645 (ss_truncate:<ssehalfvecmode>
13646 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13647 (ss_truncate:<ssehalfvecmode>
13648 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13649 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13651 packssdw\t{%2, %0|%0, %2}
13652 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13653 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13654 [(set_attr "isa" "noavx,avx,avx512bw")
13655 (set_attr "type" "sselog")
13656 (set_attr "prefix_data16" "1,*,*")
13657 (set_attr "prefix" "orig,<mask_prefix>,evex")
13658 (set_attr "mode" "<sseinsnmode>")])
13660 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13661 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13662 (vec_concat:VI1_AVX512
13663 (us_truncate:<ssehalfvecmode>
13664 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13665 (us_truncate:<ssehalfvecmode>
13666 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13667 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13669 packuswb\t{%2, %0|%0, %2}
13670 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13671 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13672 [(set_attr "isa" "noavx,avx,avx512bw")
13673 (set_attr "type" "sselog")
13674 (set_attr "prefix_data16" "1,*,*")
13675 (set_attr "prefix" "orig,<mask_prefix>,evex")
13676 (set_attr "mode" "<sseinsnmode>")])
13678 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13679 [(set (match_operand:V64QI 0 "register_operand" "=v")
13682 (match_operand:V64QI 1 "register_operand" "v")
13683 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13684 (parallel [(const_int 8) (const_int 72)
13685 (const_int 9) (const_int 73)
13686 (const_int 10) (const_int 74)
13687 (const_int 11) (const_int 75)
13688 (const_int 12) (const_int 76)
13689 (const_int 13) (const_int 77)
13690 (const_int 14) (const_int 78)
13691 (const_int 15) (const_int 79)
13692 (const_int 24) (const_int 88)
13693 (const_int 25) (const_int 89)
13694 (const_int 26) (const_int 90)
13695 (const_int 27) (const_int 91)
13696 (const_int 28) (const_int 92)
13697 (const_int 29) (const_int 93)
13698 (const_int 30) (const_int 94)
13699 (const_int 31) (const_int 95)
13700 (const_int 40) (const_int 104)
13701 (const_int 41) (const_int 105)
13702 (const_int 42) (const_int 106)
13703 (const_int 43) (const_int 107)
13704 (const_int 44) (const_int 108)
13705 (const_int 45) (const_int 109)
13706 (const_int 46) (const_int 110)
13707 (const_int 47) (const_int 111)
13708 (const_int 56) (const_int 120)
13709 (const_int 57) (const_int 121)
13710 (const_int 58) (const_int 122)
13711 (const_int 59) (const_int 123)
13712 (const_int 60) (const_int 124)
13713 (const_int 61) (const_int 125)
13714 (const_int 62) (const_int 126)
13715 (const_int 63) (const_int 127)])))]
13717 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13718 [(set_attr "type" "sselog")
13719 (set_attr "prefix" "evex")
13720 (set_attr "mode" "XI")])
13722 (define_insn "avx2_interleave_highv32qi<mask_name>"
13723 [(set (match_operand:V32QI 0 "register_operand" "=v")
13726 (match_operand:V32QI 1 "register_operand" "v")
13727 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13728 (parallel [(const_int 8) (const_int 40)
13729 (const_int 9) (const_int 41)
13730 (const_int 10) (const_int 42)
13731 (const_int 11) (const_int 43)
13732 (const_int 12) (const_int 44)
13733 (const_int 13) (const_int 45)
13734 (const_int 14) (const_int 46)
13735 (const_int 15) (const_int 47)
13736 (const_int 24) (const_int 56)
13737 (const_int 25) (const_int 57)
13738 (const_int 26) (const_int 58)
13739 (const_int 27) (const_int 59)
13740 (const_int 28) (const_int 60)
13741 (const_int 29) (const_int 61)
13742 (const_int 30) (const_int 62)
13743 (const_int 31) (const_int 63)])))]
13744 "TARGET_AVX2 && <mask_avx512vl_condition>"
13745 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13746 [(set_attr "type" "sselog")
13747 (set_attr "prefix" "<mask_prefix>")
13748 (set_attr "mode" "OI")])
13750 (define_insn "vec_interleave_highv16qi<mask_name>"
13751 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13754 (match_operand:V16QI 1 "register_operand" "0,v")
13755 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13756 (parallel [(const_int 8) (const_int 24)
13757 (const_int 9) (const_int 25)
13758 (const_int 10) (const_int 26)
13759 (const_int 11) (const_int 27)
13760 (const_int 12) (const_int 28)
13761 (const_int 13) (const_int 29)
13762 (const_int 14) (const_int 30)
13763 (const_int 15) (const_int 31)])))]
13764 "TARGET_SSE2 && <mask_avx512vl_condition>"
13766 punpckhbw\t{%2, %0|%0, %2}
13767 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13768 [(set_attr "isa" "noavx,avx")
13769 (set_attr "type" "sselog")
13770 (set_attr "prefix_data16" "1,*")
13771 (set_attr "prefix" "orig,<mask_prefix>")
13772 (set_attr "mode" "TI")])
13774 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13775 [(set (match_operand:V64QI 0 "register_operand" "=v")
13778 (match_operand:V64QI 1 "register_operand" "v")
13779 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13780 (parallel [(const_int 0) (const_int 64)
13781 (const_int 1) (const_int 65)
13782 (const_int 2) (const_int 66)
13783 (const_int 3) (const_int 67)
13784 (const_int 4) (const_int 68)
13785 (const_int 5) (const_int 69)
13786 (const_int 6) (const_int 70)
13787 (const_int 7) (const_int 71)
13788 (const_int 16) (const_int 80)
13789 (const_int 17) (const_int 81)
13790 (const_int 18) (const_int 82)
13791 (const_int 19) (const_int 83)
13792 (const_int 20) (const_int 84)
13793 (const_int 21) (const_int 85)
13794 (const_int 22) (const_int 86)
13795 (const_int 23) (const_int 87)
13796 (const_int 32) (const_int 96)
13797 (const_int 33) (const_int 97)
13798 (const_int 34) (const_int 98)
13799 (const_int 35) (const_int 99)
13800 (const_int 36) (const_int 100)
13801 (const_int 37) (const_int 101)
13802 (const_int 38) (const_int 102)
13803 (const_int 39) (const_int 103)
13804 (const_int 48) (const_int 112)
13805 (const_int 49) (const_int 113)
13806 (const_int 50) (const_int 114)
13807 (const_int 51) (const_int 115)
13808 (const_int 52) (const_int 116)
13809 (const_int 53) (const_int 117)
13810 (const_int 54) (const_int 118)
13811 (const_int 55) (const_int 119)])))]
13813 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13814 [(set_attr "type" "sselog")
13815 (set_attr "prefix" "evex")
13816 (set_attr "mode" "XI")])
13818 (define_insn "avx2_interleave_lowv32qi<mask_name>"
13819 [(set (match_operand:V32QI 0 "register_operand" "=v")
13822 (match_operand:V32QI 1 "register_operand" "v")
13823 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13824 (parallel [(const_int 0) (const_int 32)
13825 (const_int 1) (const_int 33)
13826 (const_int 2) (const_int 34)
13827 (const_int 3) (const_int 35)
13828 (const_int 4) (const_int 36)
13829 (const_int 5) (const_int 37)
13830 (const_int 6) (const_int 38)
13831 (const_int 7) (const_int 39)
13832 (const_int 16) (const_int 48)
13833 (const_int 17) (const_int 49)
13834 (const_int 18) (const_int 50)
13835 (const_int 19) (const_int 51)
13836 (const_int 20) (const_int 52)
13837 (const_int 21) (const_int 53)
13838 (const_int 22) (const_int 54)
13839 (const_int 23) (const_int 55)])))]
13840 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13841 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13842 [(set_attr "type" "sselog")
13843 (set_attr "prefix" "maybe_vex")
13844 (set_attr "mode" "OI")])
13846 (define_insn "vec_interleave_lowv16qi<mask_name>"
13847 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13850 (match_operand:V16QI 1 "register_operand" "0,v")
13851 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13852 (parallel [(const_int 0) (const_int 16)
13853 (const_int 1) (const_int 17)
13854 (const_int 2) (const_int 18)
13855 (const_int 3) (const_int 19)
13856 (const_int 4) (const_int 20)
13857 (const_int 5) (const_int 21)
13858 (const_int 6) (const_int 22)
13859 (const_int 7) (const_int 23)])))]
13860 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13862 punpcklbw\t{%2, %0|%0, %2}
13863 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13864 [(set_attr "isa" "noavx,avx")
13865 (set_attr "type" "sselog")
13866 (set_attr "prefix_data16" "1,*")
13867 (set_attr "prefix" "orig,vex")
13868 (set_attr "mode" "TI")])
13870 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
13871 [(set (match_operand:V32HI 0 "register_operand" "=v")
13874 (match_operand:V32HI 1 "register_operand" "v")
13875 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13876 (parallel [(const_int 4) (const_int 36)
13877 (const_int 5) (const_int 37)
13878 (const_int 6) (const_int 38)
13879 (const_int 7) (const_int 39)
13880 (const_int 12) (const_int 44)
13881 (const_int 13) (const_int 45)
13882 (const_int 14) (const_int 46)
13883 (const_int 15) (const_int 47)
13884 (const_int 20) (const_int 52)
13885 (const_int 21) (const_int 53)
13886 (const_int 22) (const_int 54)
13887 (const_int 23) (const_int 55)
13888 (const_int 28) (const_int 60)
13889 (const_int 29) (const_int 61)
13890 (const_int 30) (const_int 62)
13891 (const_int 31) (const_int 63)])))]
13893 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13894 [(set_attr "type" "sselog")
13895 (set_attr "prefix" "evex")
13896 (set_attr "mode" "XI")])
13898 (define_insn "avx2_interleave_highv16hi<mask_name>"
13899 [(set (match_operand:V16HI 0 "register_operand" "=v")
13902 (match_operand:V16HI 1 "register_operand" "v")
13903 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13904 (parallel [(const_int 4) (const_int 20)
13905 (const_int 5) (const_int 21)
13906 (const_int 6) (const_int 22)
13907 (const_int 7) (const_int 23)
13908 (const_int 12) (const_int 28)
13909 (const_int 13) (const_int 29)
13910 (const_int 14) (const_int 30)
13911 (const_int 15) (const_int 31)])))]
13912 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13913 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13914 [(set_attr "type" "sselog")
13915 (set_attr "prefix" "maybe_evex")
13916 (set_attr "mode" "OI")])
13918 (define_insn "vec_interleave_highv8hi<mask_name>"
13919 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13922 (match_operand:V8HI 1 "register_operand" "0,v")
13923 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13924 (parallel [(const_int 4) (const_int 12)
13925 (const_int 5) (const_int 13)
13926 (const_int 6) (const_int 14)
13927 (const_int 7) (const_int 15)])))]
13928 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13930 punpckhwd\t{%2, %0|%0, %2}
13931 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13932 [(set_attr "isa" "noavx,avx")
13933 (set_attr "type" "sselog")
13934 (set_attr "prefix_data16" "1,*")
13935 (set_attr "prefix" "orig,maybe_vex")
13936 (set_attr "mode" "TI")])
13938 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13939 [(set (match_operand:V32HI 0 "register_operand" "=v")
13942 (match_operand:V32HI 1 "register_operand" "v")
13943 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13944 (parallel [(const_int 0) (const_int 32)
13945 (const_int 1) (const_int 33)
13946 (const_int 2) (const_int 34)
13947 (const_int 3) (const_int 35)
13948 (const_int 8) (const_int 40)
13949 (const_int 9) (const_int 41)
13950 (const_int 10) (const_int 42)
13951 (const_int 11) (const_int 43)
13952 (const_int 16) (const_int 48)
13953 (const_int 17) (const_int 49)
13954 (const_int 18) (const_int 50)
13955 (const_int 19) (const_int 51)
13956 (const_int 24) (const_int 56)
13957 (const_int 25) (const_int 57)
13958 (const_int 26) (const_int 58)
13959 (const_int 27) (const_int 59)])))]
13961 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13962 [(set_attr "type" "sselog")
13963 (set_attr "prefix" "evex")
13964 (set_attr "mode" "XI")])
13966 (define_insn "avx2_interleave_lowv16hi<mask_name>"
13967 [(set (match_operand:V16HI 0 "register_operand" "=v")
13970 (match_operand:V16HI 1 "register_operand" "v")
13971 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13972 (parallel [(const_int 0) (const_int 16)
13973 (const_int 1) (const_int 17)
13974 (const_int 2) (const_int 18)
13975 (const_int 3) (const_int 19)
13976 (const_int 8) (const_int 24)
13977 (const_int 9) (const_int 25)
13978 (const_int 10) (const_int 26)
13979 (const_int 11) (const_int 27)])))]
13980 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13981 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13982 [(set_attr "type" "sselog")
13983 (set_attr "prefix" "maybe_evex")
13984 (set_attr "mode" "OI")])
13986 (define_insn "vec_interleave_lowv8hi<mask_name>"
13987 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13990 (match_operand:V8HI 1 "register_operand" "0,v")
13991 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13992 (parallel [(const_int 0) (const_int 8)
13993 (const_int 1) (const_int 9)
13994 (const_int 2) (const_int 10)
13995 (const_int 3) (const_int 11)])))]
13996 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13998 punpcklwd\t{%2, %0|%0, %2}
13999 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14000 [(set_attr "isa" "noavx,avx")
14001 (set_attr "type" "sselog")
14002 (set_attr "prefix_data16" "1,*")
14003 (set_attr "prefix" "orig,maybe_evex")
14004 (set_attr "mode" "TI")])
14006 (define_insn "avx2_interleave_highv8si<mask_name>"
14007 [(set (match_operand:V8SI 0 "register_operand" "=v")
14010 (match_operand:V8SI 1 "register_operand" "v")
14011 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14012 (parallel [(const_int 2) (const_int 10)
14013 (const_int 3) (const_int 11)
14014 (const_int 6) (const_int 14)
14015 (const_int 7) (const_int 15)])))]
14016 "TARGET_AVX2 && <mask_avx512vl_condition>"
14017 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14018 [(set_attr "type" "sselog")
14019 (set_attr "prefix" "maybe_evex")
14020 (set_attr "mode" "OI")])
14022 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
14023 [(set (match_operand:V16SI 0 "register_operand" "=v")
14026 (match_operand:V16SI 1 "register_operand" "v")
14027 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14028 (parallel [(const_int 2) (const_int 18)
14029 (const_int 3) (const_int 19)
14030 (const_int 6) (const_int 22)
14031 (const_int 7) (const_int 23)
14032 (const_int 10) (const_int 26)
14033 (const_int 11) (const_int 27)
14034 (const_int 14) (const_int 30)
14035 (const_int 15) (const_int 31)])))]
14037 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14038 [(set_attr "type" "sselog")
14039 (set_attr "prefix" "evex")
14040 (set_attr "mode" "XI")])
14043 (define_insn "vec_interleave_highv4si<mask_name>"
14044 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14047 (match_operand:V4SI 1 "register_operand" "0,v")
14048 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14049 (parallel [(const_int 2) (const_int 6)
14050 (const_int 3) (const_int 7)])))]
14051 "TARGET_SSE2 && <mask_avx512vl_condition>"
14053 punpckhdq\t{%2, %0|%0, %2}
14054 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14055 [(set_attr "isa" "noavx,avx")
14056 (set_attr "type" "sselog")
14057 (set_attr "prefix_data16" "1,*")
14058 (set_attr "prefix" "orig,maybe_vex")
14059 (set_attr "mode" "TI")])
14061 (define_insn "avx2_interleave_lowv8si<mask_name>"
14062 [(set (match_operand:V8SI 0 "register_operand" "=v")
14065 (match_operand:V8SI 1 "register_operand" "v")
14066 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14067 (parallel [(const_int 0) (const_int 8)
14068 (const_int 1) (const_int 9)
14069 (const_int 4) (const_int 12)
14070 (const_int 5) (const_int 13)])))]
14071 "TARGET_AVX2 && <mask_avx512vl_condition>"
14072 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14073 [(set_attr "type" "sselog")
14074 (set_attr "prefix" "maybe_evex")
14075 (set_attr "mode" "OI")])
14077 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
14078 [(set (match_operand:V16SI 0 "register_operand" "=v")
14081 (match_operand:V16SI 1 "register_operand" "v")
14082 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14083 (parallel [(const_int 0) (const_int 16)
14084 (const_int 1) (const_int 17)
14085 (const_int 4) (const_int 20)
14086 (const_int 5) (const_int 21)
14087 (const_int 8) (const_int 24)
14088 (const_int 9) (const_int 25)
14089 (const_int 12) (const_int 28)
14090 (const_int 13) (const_int 29)])))]
14092 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14093 [(set_attr "type" "sselog")
14094 (set_attr "prefix" "evex")
14095 (set_attr "mode" "XI")])
14097 (define_insn "vec_interleave_lowv4si<mask_name>"
14098 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14101 (match_operand:V4SI 1 "register_operand" "0,v")
14102 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14103 (parallel [(const_int 0) (const_int 4)
14104 (const_int 1) (const_int 5)])))]
14105 "TARGET_SSE2 && <mask_avx512vl_condition>"
14107 punpckldq\t{%2, %0|%0, %2}
14108 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14109 [(set_attr "isa" "noavx,avx")
14110 (set_attr "type" "sselog")
14111 (set_attr "prefix_data16" "1,*")
14112 (set_attr "prefix" "orig,vex")
14113 (set_attr "mode" "TI")])
14115 (define_expand "vec_interleave_high<mode>"
14116 [(match_operand:VI_256 0 "register_operand")
14117 (match_operand:VI_256 1 "register_operand")
14118 (match_operand:VI_256 2 "nonimmediate_operand")]
14121 rtx t1 = gen_reg_rtx (<MODE>mode);
14122 rtx t2 = gen_reg_rtx (<MODE>mode);
14123 rtx t3 = gen_reg_rtx (V4DImode);
14124 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14125 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14126 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14127 gen_lowpart (V4DImode, t2),
14128 GEN_INT (1 + (3 << 4))));
14129 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14133 (define_expand "vec_interleave_low<mode>"
14134 [(match_operand:VI_256 0 "register_operand")
14135 (match_operand:VI_256 1 "register_operand")
14136 (match_operand:VI_256 2 "nonimmediate_operand")]
14139 rtx t1 = gen_reg_rtx (<MODE>mode);
14140 rtx t2 = gen_reg_rtx (<MODE>mode);
14141 rtx t3 = gen_reg_rtx (V4DImode);
14142 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14143 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14144 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14145 gen_lowpart (V4DImode, t2),
14146 GEN_INT (0 + (2 << 4))));
14147 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14151 ;; Modes handled by pinsr patterns.
14152 (define_mode_iterator PINSR_MODE
14153 [(V16QI "TARGET_SSE4_1") V8HI
14154 (V4SI "TARGET_SSE4_1")
14155 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
14157 (define_mode_attr sse2p4_1
14158 [(V16QI "sse4_1") (V8HI "sse2")
14159 (V4SI "sse4_1") (V2DI "sse4_1")])
14161 (define_mode_attr pinsr_evex_isa
14162 [(V16QI "avx512bw") (V8HI "avx512bw")
14163 (V4SI "avx512dq") (V2DI "avx512dq")])
14165 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
14166 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
14167 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
14168 (vec_merge:PINSR_MODE
14169 (vec_duplicate:PINSR_MODE
14170 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
14171 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
14172 (match_operand:SI 3 "const_int_operand")))]
14174 && ((unsigned) exact_log2 (INTVAL (operands[3]))
14175 < GET_MODE_NUNITS (<MODE>mode))"
14177 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
14179 switch (which_alternative)
14182 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14183 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
14186 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
14189 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
14190 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
14194 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14196 gcc_unreachable ();
14199 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
14200 (set_attr "type" "sselog")
14201 (set (attr "prefix_rex")
14203 (and (not (match_test "TARGET_AVX"))
14204 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
14206 (const_string "*")))
14207 (set (attr "prefix_data16")
14209 (and (not (match_test "TARGET_AVX"))
14210 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14212 (const_string "*")))
14213 (set (attr "prefix_extra")
14215 (and (not (match_test "TARGET_AVX"))
14216 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14218 (const_string "1")))
14219 (set_attr "length_immediate" "1")
14220 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
14221 (set_attr "mode" "TI")])
14223 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
14224 [(match_operand:AVX512_VEC 0 "register_operand")
14225 (match_operand:AVX512_VEC 1 "register_operand")
14226 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
14227 (match_operand:SI 3 "const_0_to_3_operand")
14228 (match_operand:AVX512_VEC 4 "register_operand")
14229 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14232 int mask, selector;
14233 mask = INTVAL (operands[3]);
14234 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
14235 ? 0xFFFF ^ (0x000F << mask * 4)
14236 : 0xFF ^ (0x03 << mask * 2));
14237 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
14238 (operands[0], operands[1], operands[2], GEN_INT (selector),
14239 operands[4], operands[5]));
14243 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
14244 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
14245 (vec_merge:AVX512_VEC
14246 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
14247 (vec_duplicate:AVX512_VEC
14248 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
14249 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
14251 && (INTVAL (operands[3])
14252 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
14254 if (which_alternative == 0)
14255 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
14256 switch (<MODE>mode)
14259 if (misaligned_operand (operands[2], <ssequartermode>mode))
14260 return "vmovupd\t{%2, %x0|%x0, %2}";
14262 return "vmovapd\t{%2, %x0|%x0, %2}";
14264 if (misaligned_operand (operands[2], <ssequartermode>mode))
14265 return "vmovups\t{%2, %x0|%x0, %2}";
14267 return "vmovaps\t{%2, %x0|%x0, %2}";
14269 if (misaligned_operand (operands[2], <ssequartermode>mode))
14270 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
14271 : "vmovdqu\t{%2, %x0|%x0, %2}";
14273 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
14274 : "vmovdqa\t{%2, %x0|%x0, %2}";
14276 if (misaligned_operand (operands[2], <ssequartermode>mode))
14277 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
14278 : "vmovdqu\t{%2, %x0|%x0, %2}";
14280 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
14281 : "vmovdqa\t{%2, %x0|%x0, %2}";
14283 gcc_unreachable ();
14286 [(set_attr "type" "sselog,ssemov,ssemov")
14287 (set_attr "length_immediate" "1,0,0")
14288 (set_attr "prefix" "evex,vex,evex")
14289 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
14291 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
14292 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
14293 (vec_merge:AVX512_VEC
14294 (match_operand:AVX512_VEC 1 "register_operand" "v")
14295 (vec_duplicate:AVX512_VEC
14296 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
14297 (match_operand:SI 3 "const_int_operand" "n")))]
14301 int selector = INTVAL (operands[3]);
14303 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
14305 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
14307 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
14309 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
14312 gcc_unreachable ();
14314 operands[3] = GEN_INT (mask);
14316 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
14318 [(set_attr "type" "sselog")
14319 (set_attr "length_immediate" "1")
14320 (set_attr "prefix" "evex")
14321 (set_attr "mode" "<sseinsnmode>")])
14323 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
14324 [(match_operand:AVX512_VEC_2 0 "register_operand")
14325 (match_operand:AVX512_VEC_2 1 "register_operand")
14326 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14327 (match_operand:SI 3 "const_0_to_1_operand")
14328 (match_operand:AVX512_VEC_2 4 "register_operand")
14329 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14332 int mask = INTVAL (operands[3]);
14334 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
14335 operands[2], operands[4],
14338 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
14339 operands[2], operands[4],
14344 (define_insn "vec_set_lo_<mode><mask_name>"
14345 [(set (match_operand:V16FI 0 "register_operand" "=v")
14347 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14348 (vec_select:<ssehalfvecmode>
14349 (match_operand:V16FI 1 "register_operand" "v")
14350 (parallel [(const_int 8) (const_int 9)
14351 (const_int 10) (const_int 11)
14352 (const_int 12) (const_int 13)
14353 (const_int 14) (const_int 15)]))))]
14355 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14356 [(set_attr "type" "sselog")
14357 (set_attr "length_immediate" "1")
14358 (set_attr "prefix" "evex")
14359 (set_attr "mode" "<sseinsnmode>")])
14361 (define_insn "vec_set_hi_<mode><mask_name>"
14362 [(set (match_operand:V16FI 0 "register_operand" "=v")
14364 (vec_select:<ssehalfvecmode>
14365 (match_operand:V16FI 1 "register_operand" "v")
14366 (parallel [(const_int 0) (const_int 1)
14367 (const_int 2) (const_int 3)
14368 (const_int 4) (const_int 5)
14369 (const_int 6) (const_int 7)]))
14370 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14372 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14373 [(set_attr "type" "sselog")
14374 (set_attr "length_immediate" "1")
14375 (set_attr "prefix" "evex")
14376 (set_attr "mode" "<sseinsnmode>")])
14378 (define_insn "vec_set_lo_<mode><mask_name>"
14379 [(set (match_operand:V8FI 0 "register_operand" "=v")
14381 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14382 (vec_select:<ssehalfvecmode>
14383 (match_operand:V8FI 1 "register_operand" "v")
14384 (parallel [(const_int 4) (const_int 5)
14385 (const_int 6) (const_int 7)]))))]
14387 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14388 [(set_attr "type" "sselog")
14389 (set_attr "length_immediate" "1")
14390 (set_attr "prefix" "evex")
14391 (set_attr "mode" "XI")])
14393 (define_insn "vec_set_hi_<mode><mask_name>"
14394 [(set (match_operand:V8FI 0 "register_operand" "=v")
14396 (vec_select:<ssehalfvecmode>
14397 (match_operand:V8FI 1 "register_operand" "v")
14398 (parallel [(const_int 0) (const_int 1)
14399 (const_int 2) (const_int 3)]))
14400 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14402 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14403 [(set_attr "type" "sselog")
14404 (set_attr "length_immediate" "1")
14405 (set_attr "prefix" "evex")
14406 (set_attr "mode" "XI")])
14408 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
14409 [(match_operand:VI8F_256 0 "register_operand")
14410 (match_operand:VI8F_256 1 "register_operand")
14411 (match_operand:VI8F_256 2 "nonimmediate_operand")
14412 (match_operand:SI 3 "const_0_to_3_operand")
14413 (match_operand:VI8F_256 4 "register_operand")
14414 (match_operand:QI 5 "register_operand")]
14417 int mask = INTVAL (operands[3]);
14418 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
14419 (operands[0], operands[1], operands[2],
14420 GEN_INT (((mask >> 0) & 1) * 2 + 0),
14421 GEN_INT (((mask >> 0) & 1) * 2 + 1),
14422 GEN_INT (((mask >> 1) & 1) * 2 + 4),
14423 GEN_INT (((mask >> 1) & 1) * 2 + 5),
14424 operands[4], operands[5]));
14428 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
14429 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
14430 (vec_select:VI8F_256
14431 (vec_concat:<ssedoublemode>
14432 (match_operand:VI8F_256 1 "register_operand" "v")
14433 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
14434 (parallel [(match_operand 3 "const_0_to_3_operand")
14435 (match_operand 4 "const_0_to_3_operand")
14436 (match_operand 5 "const_4_to_7_operand")
14437 (match_operand 6 "const_4_to_7_operand")])))]
14439 && (INTVAL (operands[3]) & 1) == 0
14440 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14441 && (INTVAL (operands[5]) & 1) == 0
14442 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
14445 mask = INTVAL (operands[3]) / 2;
14446 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
14447 operands[3] = GEN_INT (mask);
14448 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
14450 [(set_attr "type" "sselog")
14451 (set_attr "length_immediate" "1")
14452 (set_attr "prefix" "evex")
14453 (set_attr "mode" "XI")])
14455 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
14456 [(match_operand:V8FI 0 "register_operand")
14457 (match_operand:V8FI 1 "register_operand")
14458 (match_operand:V8FI 2 "nonimmediate_operand")
14459 (match_operand:SI 3 "const_0_to_255_operand")
14460 (match_operand:V8FI 4 "register_operand")
14461 (match_operand:QI 5 "register_operand")]
14464 int mask = INTVAL (operands[3]);
14465 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
14466 (operands[0], operands[1], operands[2],
14467 GEN_INT (((mask >> 0) & 3) * 2),
14468 GEN_INT (((mask >> 0) & 3) * 2 + 1),
14469 GEN_INT (((mask >> 2) & 3) * 2),
14470 GEN_INT (((mask >> 2) & 3) * 2 + 1),
14471 GEN_INT (((mask >> 4) & 3) * 2 + 8),
14472 GEN_INT (((mask >> 4) & 3) * 2 + 9),
14473 GEN_INT (((mask >> 6) & 3) * 2 + 8),
14474 GEN_INT (((mask >> 6) & 3) * 2 + 9),
14475 operands[4], operands[5]));
14479 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
14480 [(set (match_operand:V8FI 0 "register_operand" "=v")
14482 (vec_concat:<ssedoublemode>
14483 (match_operand:V8FI 1 "register_operand" "v")
14484 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
14485 (parallel [(match_operand 3 "const_0_to_7_operand")
14486 (match_operand 4 "const_0_to_7_operand")
14487 (match_operand 5 "const_0_to_7_operand")
14488 (match_operand 6 "const_0_to_7_operand")
14489 (match_operand 7 "const_8_to_15_operand")
14490 (match_operand 8 "const_8_to_15_operand")
14491 (match_operand 9 "const_8_to_15_operand")
14492 (match_operand 10 "const_8_to_15_operand")])))]
14494 && (INTVAL (operands[3]) & 1) == 0
14495 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14496 && (INTVAL (operands[5]) & 1) == 0
14497 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
14498 && (INTVAL (operands[7]) & 1) == 0
14499 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14500 && (INTVAL (operands[9]) & 1) == 0
14501 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
14504 mask = INTVAL (operands[3]) / 2;
14505 mask |= INTVAL (operands[5]) / 2 << 2;
14506 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
14507 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
14508 operands[3] = GEN_INT (mask);
14510 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14512 [(set_attr "type" "sselog")
14513 (set_attr "length_immediate" "1")
14514 (set_attr "prefix" "evex")
14515 (set_attr "mode" "<sseinsnmode>")])
14517 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
14518 [(match_operand:VI4F_256 0 "register_operand")
14519 (match_operand:VI4F_256 1 "register_operand")
14520 (match_operand:VI4F_256 2 "nonimmediate_operand")
14521 (match_operand:SI 3 "const_0_to_3_operand")
14522 (match_operand:VI4F_256 4 "register_operand")
14523 (match_operand:QI 5 "register_operand")]
14526 int mask = INTVAL (operands[3]);
14527 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
14528 (operands[0], operands[1], operands[2],
14529 GEN_INT (((mask >> 0) & 1) * 4 + 0),
14530 GEN_INT (((mask >> 0) & 1) * 4 + 1),
14531 GEN_INT (((mask >> 0) & 1) * 4 + 2),
14532 GEN_INT (((mask >> 0) & 1) * 4 + 3),
14533 GEN_INT (((mask >> 1) & 1) * 4 + 8),
14534 GEN_INT (((mask >> 1) & 1) * 4 + 9),
14535 GEN_INT (((mask >> 1) & 1) * 4 + 10),
14536 GEN_INT (((mask >> 1) & 1) * 4 + 11),
14537 operands[4], operands[5]));
14541 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
14542 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
14543 (vec_select:VI4F_256
14544 (vec_concat:<ssedoublemode>
14545 (match_operand:VI4F_256 1 "register_operand" "v")
14546 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
14547 (parallel [(match_operand 3 "const_0_to_7_operand")
14548 (match_operand 4 "const_0_to_7_operand")
14549 (match_operand 5 "const_0_to_7_operand")
14550 (match_operand 6 "const_0_to_7_operand")
14551 (match_operand 7 "const_8_to_15_operand")
14552 (match_operand 8 "const_8_to_15_operand")
14553 (match_operand 9 "const_8_to_15_operand")
14554 (match_operand 10 "const_8_to_15_operand")])))]
14556 && (INTVAL (operands[3]) & 3) == 0
14557 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14558 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14559 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14560 && (INTVAL (operands[7]) & 3) == 0
14561 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14562 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14563 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
14566 mask = INTVAL (operands[3]) / 4;
14567 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
14568 operands[3] = GEN_INT (mask);
14570 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14572 [(set_attr "type" "sselog")
14573 (set_attr "length_immediate" "1")
14574 (set_attr "prefix" "evex")
14575 (set_attr "mode" "<sseinsnmode>")])
14577 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
14578 [(match_operand:V16FI 0 "register_operand")
14579 (match_operand:V16FI 1 "register_operand")
14580 (match_operand:V16FI 2 "nonimmediate_operand")
14581 (match_operand:SI 3 "const_0_to_255_operand")
14582 (match_operand:V16FI 4 "register_operand")
14583 (match_operand:HI 5 "register_operand")]
14586 int mask = INTVAL (operands[3]);
14587 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
14588 (operands[0], operands[1], operands[2],
14589 GEN_INT (((mask >> 0) & 3) * 4),
14590 GEN_INT (((mask >> 0) & 3) * 4 + 1),
14591 GEN_INT (((mask >> 0) & 3) * 4 + 2),
14592 GEN_INT (((mask >> 0) & 3) * 4 + 3),
14593 GEN_INT (((mask >> 2) & 3) * 4),
14594 GEN_INT (((mask >> 2) & 3) * 4 + 1),
14595 GEN_INT (((mask >> 2) & 3) * 4 + 2),
14596 GEN_INT (((mask >> 2) & 3) * 4 + 3),
14597 GEN_INT (((mask >> 4) & 3) * 4 + 16),
14598 GEN_INT (((mask >> 4) & 3) * 4 + 17),
14599 GEN_INT (((mask >> 4) & 3) * 4 + 18),
14600 GEN_INT (((mask >> 4) & 3) * 4 + 19),
14601 GEN_INT (((mask >> 6) & 3) * 4 + 16),
14602 GEN_INT (((mask >> 6) & 3) * 4 + 17),
14603 GEN_INT (((mask >> 6) & 3) * 4 + 18),
14604 GEN_INT (((mask >> 6) & 3) * 4 + 19),
14605 operands[4], operands[5]));
14609 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
14610 [(set (match_operand:V16FI 0 "register_operand" "=v")
14612 (vec_concat:<ssedoublemode>
14613 (match_operand:V16FI 1 "register_operand" "v")
14614 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
14615 (parallel [(match_operand 3 "const_0_to_15_operand")
14616 (match_operand 4 "const_0_to_15_operand")
14617 (match_operand 5 "const_0_to_15_operand")
14618 (match_operand 6 "const_0_to_15_operand")
14619 (match_operand 7 "const_0_to_15_operand")
14620 (match_operand 8 "const_0_to_15_operand")
14621 (match_operand 9 "const_0_to_15_operand")
14622 (match_operand 10 "const_0_to_15_operand")
14623 (match_operand 11 "const_16_to_31_operand")
14624 (match_operand 12 "const_16_to_31_operand")
14625 (match_operand 13 "const_16_to_31_operand")
14626 (match_operand 14 "const_16_to_31_operand")
14627 (match_operand 15 "const_16_to_31_operand")
14628 (match_operand 16 "const_16_to_31_operand")
14629 (match_operand 17 "const_16_to_31_operand")
14630 (match_operand 18 "const_16_to_31_operand")])))]
14632 && (INTVAL (operands[3]) & 3) == 0
14633 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14634 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14635 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14636 && (INTVAL (operands[7]) & 3) == 0
14637 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14638 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14639 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
14640 && (INTVAL (operands[11]) & 3) == 0
14641 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
14642 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
14643 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
14644 && (INTVAL (operands[15]) & 3) == 0
14645 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
14646 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
14647 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14650 mask = INTVAL (operands[3]) / 4;
14651 mask |= INTVAL (operands[7]) / 4 << 2;
14652 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14653 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14654 operands[3] = GEN_INT (mask);
14656 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14658 [(set_attr "type" "sselog")
14659 (set_attr "length_immediate" "1")
14660 (set_attr "prefix" "evex")
14661 (set_attr "mode" "<sseinsnmode>")])
14663 (define_expand "avx512f_pshufdv3_mask"
14664 [(match_operand:V16SI 0 "register_operand")
14665 (match_operand:V16SI 1 "nonimmediate_operand")
14666 (match_operand:SI 2 "const_0_to_255_operand")
14667 (match_operand:V16SI 3 "register_operand")
14668 (match_operand:HI 4 "register_operand")]
14671 int mask = INTVAL (operands[2]);
14672 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14673 GEN_INT ((mask >> 0) & 3),
14674 GEN_INT ((mask >> 2) & 3),
14675 GEN_INT ((mask >> 4) & 3),
14676 GEN_INT ((mask >> 6) & 3),
14677 GEN_INT (((mask >> 0) & 3) + 4),
14678 GEN_INT (((mask >> 2) & 3) + 4),
14679 GEN_INT (((mask >> 4) & 3) + 4),
14680 GEN_INT (((mask >> 6) & 3) + 4),
14681 GEN_INT (((mask >> 0) & 3) + 8),
14682 GEN_INT (((mask >> 2) & 3) + 8),
14683 GEN_INT (((mask >> 4) & 3) + 8),
14684 GEN_INT (((mask >> 6) & 3) + 8),
14685 GEN_INT (((mask >> 0) & 3) + 12),
14686 GEN_INT (((mask >> 2) & 3) + 12),
14687 GEN_INT (((mask >> 4) & 3) + 12),
14688 GEN_INT (((mask >> 6) & 3) + 12),
14689 operands[3], operands[4]));
14693 (define_insn "avx512f_pshufd_1<mask_name>"
14694 [(set (match_operand:V16SI 0 "register_operand" "=v")
14696 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14697 (parallel [(match_operand 2 "const_0_to_3_operand")
14698 (match_operand 3 "const_0_to_3_operand")
14699 (match_operand 4 "const_0_to_3_operand")
14700 (match_operand 5 "const_0_to_3_operand")
14701 (match_operand 6 "const_4_to_7_operand")
14702 (match_operand 7 "const_4_to_7_operand")
14703 (match_operand 8 "const_4_to_7_operand")
14704 (match_operand 9 "const_4_to_7_operand")
14705 (match_operand 10 "const_8_to_11_operand")
14706 (match_operand 11 "const_8_to_11_operand")
14707 (match_operand 12 "const_8_to_11_operand")
14708 (match_operand 13 "const_8_to_11_operand")
14709 (match_operand 14 "const_12_to_15_operand")
14710 (match_operand 15 "const_12_to_15_operand")
14711 (match_operand 16 "const_12_to_15_operand")
14712 (match_operand 17 "const_12_to_15_operand")])))]
14714 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14715 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14716 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14717 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14718 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14719 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14720 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14721 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14722 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14723 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14724 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14725 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14728 mask |= INTVAL (operands[2]) << 0;
14729 mask |= INTVAL (operands[3]) << 2;
14730 mask |= INTVAL (operands[4]) << 4;
14731 mask |= INTVAL (operands[5]) << 6;
14732 operands[2] = GEN_INT (mask);
14734 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14736 [(set_attr "type" "sselog1")
14737 (set_attr "prefix" "evex")
14738 (set_attr "length_immediate" "1")
14739 (set_attr "mode" "XI")])
14741 (define_expand "avx512vl_pshufdv3_mask"
14742 [(match_operand:V8SI 0 "register_operand")
14743 (match_operand:V8SI 1 "nonimmediate_operand")
14744 (match_operand:SI 2 "const_0_to_255_operand")
14745 (match_operand:V8SI 3 "register_operand")
14746 (match_operand:QI 4 "register_operand")]
14749 int mask = INTVAL (operands[2]);
14750 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14751 GEN_INT ((mask >> 0) & 3),
14752 GEN_INT ((mask >> 2) & 3),
14753 GEN_INT ((mask >> 4) & 3),
14754 GEN_INT ((mask >> 6) & 3),
14755 GEN_INT (((mask >> 0) & 3) + 4),
14756 GEN_INT (((mask >> 2) & 3) + 4),
14757 GEN_INT (((mask >> 4) & 3) + 4),
14758 GEN_INT (((mask >> 6) & 3) + 4),
14759 operands[3], operands[4]));
14763 (define_expand "avx2_pshufdv3"
14764 [(match_operand:V8SI 0 "register_operand")
14765 (match_operand:V8SI 1 "nonimmediate_operand")
14766 (match_operand:SI 2 "const_0_to_255_operand")]
14769 int mask = INTVAL (operands[2]);
14770 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14771 GEN_INT ((mask >> 0) & 3),
14772 GEN_INT ((mask >> 2) & 3),
14773 GEN_INT ((mask >> 4) & 3),
14774 GEN_INT ((mask >> 6) & 3),
14775 GEN_INT (((mask >> 0) & 3) + 4),
14776 GEN_INT (((mask >> 2) & 3) + 4),
14777 GEN_INT (((mask >> 4) & 3) + 4),
14778 GEN_INT (((mask >> 6) & 3) + 4)));
14782 (define_insn "avx2_pshufd_1<mask_name>"
14783 [(set (match_operand:V8SI 0 "register_operand" "=v")
14785 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14786 (parallel [(match_operand 2 "const_0_to_3_operand")
14787 (match_operand 3 "const_0_to_3_operand")
14788 (match_operand 4 "const_0_to_3_operand")
14789 (match_operand 5 "const_0_to_3_operand")
14790 (match_operand 6 "const_4_to_7_operand")
14791 (match_operand 7 "const_4_to_7_operand")
14792 (match_operand 8 "const_4_to_7_operand")
14793 (match_operand 9 "const_4_to_7_operand")])))]
14795 && <mask_avx512vl_condition>
14796 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14797 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14798 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14799 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14802 mask |= INTVAL (operands[2]) << 0;
14803 mask |= INTVAL (operands[3]) << 2;
14804 mask |= INTVAL (operands[4]) << 4;
14805 mask |= INTVAL (operands[5]) << 6;
14806 operands[2] = GEN_INT (mask);
14808 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14810 [(set_attr "type" "sselog1")
14811 (set_attr "prefix" "maybe_evex")
14812 (set_attr "length_immediate" "1")
14813 (set_attr "mode" "OI")])
14815 (define_expand "avx512vl_pshufd_mask"
14816 [(match_operand:V4SI 0 "register_operand")
14817 (match_operand:V4SI 1 "nonimmediate_operand")
14818 (match_operand:SI 2 "const_0_to_255_operand")
14819 (match_operand:V4SI 3 "register_operand")
14820 (match_operand:QI 4 "register_operand")]
14823 int mask = INTVAL (operands[2]);
14824 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14825 GEN_INT ((mask >> 0) & 3),
14826 GEN_INT ((mask >> 2) & 3),
14827 GEN_INT ((mask >> 4) & 3),
14828 GEN_INT ((mask >> 6) & 3),
14829 operands[3], operands[4]));
14833 (define_expand "sse2_pshufd"
14834 [(match_operand:V4SI 0 "register_operand")
14835 (match_operand:V4SI 1 "vector_operand")
14836 (match_operand:SI 2 "const_int_operand")]
14839 int mask = INTVAL (operands[2]);
14840 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14841 GEN_INT ((mask >> 0) & 3),
14842 GEN_INT ((mask >> 2) & 3),
14843 GEN_INT ((mask >> 4) & 3),
14844 GEN_INT ((mask >> 6) & 3)));
14848 (define_insn "sse2_pshufd_1<mask_name>"
14849 [(set (match_operand:V4SI 0 "register_operand" "=v")
14851 (match_operand:V4SI 1 "vector_operand" "vBm")
14852 (parallel [(match_operand 2 "const_0_to_3_operand")
14853 (match_operand 3 "const_0_to_3_operand")
14854 (match_operand 4 "const_0_to_3_operand")
14855 (match_operand 5 "const_0_to_3_operand")])))]
14856 "TARGET_SSE2 && <mask_avx512vl_condition>"
14859 mask |= INTVAL (operands[2]) << 0;
14860 mask |= INTVAL (operands[3]) << 2;
14861 mask |= INTVAL (operands[4]) << 4;
14862 mask |= INTVAL (operands[5]) << 6;
14863 operands[2] = GEN_INT (mask);
14865 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14867 [(set_attr "type" "sselog1")
14868 (set_attr "prefix_data16" "1")
14869 (set_attr "prefix" "<mask_prefix2>")
14870 (set_attr "length_immediate" "1")
14871 (set_attr "mode" "TI")])
14873 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14874 [(set (match_operand:V32HI 0 "register_operand" "=v")
14876 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14877 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14880 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14881 [(set_attr "type" "sselog")
14882 (set_attr "prefix" "evex")
14883 (set_attr "mode" "XI")])
14885 (define_expand "avx512vl_pshuflwv3_mask"
14886 [(match_operand:V16HI 0 "register_operand")
14887 (match_operand:V16HI 1 "nonimmediate_operand")
14888 (match_operand:SI 2 "const_0_to_255_operand")
14889 (match_operand:V16HI 3 "register_operand")
14890 (match_operand:HI 4 "register_operand")]
14891 "TARGET_AVX512VL && TARGET_AVX512BW"
14893 int mask = INTVAL (operands[2]);
14894 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14895 GEN_INT ((mask >> 0) & 3),
14896 GEN_INT ((mask >> 2) & 3),
14897 GEN_INT ((mask >> 4) & 3),
14898 GEN_INT ((mask >> 6) & 3),
14899 GEN_INT (((mask >> 0) & 3) + 8),
14900 GEN_INT (((mask >> 2) & 3) + 8),
14901 GEN_INT (((mask >> 4) & 3) + 8),
14902 GEN_INT (((mask >> 6) & 3) + 8),
14903 operands[3], operands[4]));
14907 (define_expand "avx2_pshuflwv3"
14908 [(match_operand:V16HI 0 "register_operand")
14909 (match_operand:V16HI 1 "nonimmediate_operand")
14910 (match_operand:SI 2 "const_0_to_255_operand")]
14913 int mask = INTVAL (operands[2]);
14914 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14915 GEN_INT ((mask >> 0) & 3),
14916 GEN_INT ((mask >> 2) & 3),
14917 GEN_INT ((mask >> 4) & 3),
14918 GEN_INT ((mask >> 6) & 3),
14919 GEN_INT (((mask >> 0) & 3) + 8),
14920 GEN_INT (((mask >> 2) & 3) + 8),
14921 GEN_INT (((mask >> 4) & 3) + 8),
14922 GEN_INT (((mask >> 6) & 3) + 8)));
14926 (define_insn "avx2_pshuflw_1<mask_name>"
14927 [(set (match_operand:V16HI 0 "register_operand" "=v")
14929 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14930 (parallel [(match_operand 2 "const_0_to_3_operand")
14931 (match_operand 3 "const_0_to_3_operand")
14932 (match_operand 4 "const_0_to_3_operand")
14933 (match_operand 5 "const_0_to_3_operand")
14938 (match_operand 6 "const_8_to_11_operand")
14939 (match_operand 7 "const_8_to_11_operand")
14940 (match_operand 8 "const_8_to_11_operand")
14941 (match_operand 9 "const_8_to_11_operand")
14945 (const_int 15)])))]
14947 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14948 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14949 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14950 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14951 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14954 mask |= INTVAL (operands[2]) << 0;
14955 mask |= INTVAL (operands[3]) << 2;
14956 mask |= INTVAL (operands[4]) << 4;
14957 mask |= INTVAL (operands[5]) << 6;
14958 operands[2] = GEN_INT (mask);
14960 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14962 [(set_attr "type" "sselog")
14963 (set_attr "prefix" "maybe_evex")
14964 (set_attr "length_immediate" "1")
14965 (set_attr "mode" "OI")])
14967 (define_expand "avx512vl_pshuflw_mask"
14968 [(match_operand:V8HI 0 "register_operand")
14969 (match_operand:V8HI 1 "nonimmediate_operand")
14970 (match_operand:SI 2 "const_0_to_255_operand")
14971 (match_operand:V8HI 3 "register_operand")
14972 (match_operand:QI 4 "register_operand")]
14973 "TARGET_AVX512VL && TARGET_AVX512BW"
14975 int mask = INTVAL (operands[2]);
14976 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14977 GEN_INT ((mask >> 0) & 3),
14978 GEN_INT ((mask >> 2) & 3),
14979 GEN_INT ((mask >> 4) & 3),
14980 GEN_INT ((mask >> 6) & 3),
14981 operands[3], operands[4]));
14985 (define_expand "sse2_pshuflw"
14986 [(match_operand:V8HI 0 "register_operand")
14987 (match_operand:V8HI 1 "vector_operand")
14988 (match_operand:SI 2 "const_int_operand")]
14991 int mask = INTVAL (operands[2]);
14992 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14993 GEN_INT ((mask >> 0) & 3),
14994 GEN_INT ((mask >> 2) & 3),
14995 GEN_INT ((mask >> 4) & 3),
14996 GEN_INT ((mask >> 6) & 3)));
15000 (define_insn "sse2_pshuflw_1<mask_name>"
15001 [(set (match_operand:V8HI 0 "register_operand" "=v")
15003 (match_operand:V8HI 1 "vector_operand" "vBm")
15004 (parallel [(match_operand 2 "const_0_to_3_operand")
15005 (match_operand 3 "const_0_to_3_operand")
15006 (match_operand 4 "const_0_to_3_operand")
15007 (match_operand 5 "const_0_to_3_operand")
15012 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15015 mask |= INTVAL (operands[2]) << 0;
15016 mask |= INTVAL (operands[3]) << 2;
15017 mask |= INTVAL (operands[4]) << 4;
15018 mask |= INTVAL (operands[5]) << 6;
15019 operands[2] = GEN_INT (mask);
15021 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15023 [(set_attr "type" "sselog")
15024 (set_attr "prefix_data16" "0")
15025 (set_attr "prefix_rep" "1")
15026 (set_attr "prefix" "maybe_vex")
15027 (set_attr "length_immediate" "1")
15028 (set_attr "mode" "TI")])
15030 (define_expand "avx2_pshufhwv3"
15031 [(match_operand:V16HI 0 "register_operand")
15032 (match_operand:V16HI 1 "nonimmediate_operand")
15033 (match_operand:SI 2 "const_0_to_255_operand")]
15036 int mask = INTVAL (operands[2]);
15037 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
15038 GEN_INT (((mask >> 0) & 3) + 4),
15039 GEN_INT (((mask >> 2) & 3) + 4),
15040 GEN_INT (((mask >> 4) & 3) + 4),
15041 GEN_INT (((mask >> 6) & 3) + 4),
15042 GEN_INT (((mask >> 0) & 3) + 12),
15043 GEN_INT (((mask >> 2) & 3) + 12),
15044 GEN_INT (((mask >> 4) & 3) + 12),
15045 GEN_INT (((mask >> 6) & 3) + 12)));
15049 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
15050 [(set (match_operand:V32HI 0 "register_operand" "=v")
15052 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15053 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15056 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15057 [(set_attr "type" "sselog")
15058 (set_attr "prefix" "evex")
15059 (set_attr "mode" "XI")])
15061 (define_expand "avx512vl_pshufhwv3_mask"
15062 [(match_operand:V16HI 0 "register_operand")
15063 (match_operand:V16HI 1 "nonimmediate_operand")
15064 (match_operand:SI 2 "const_0_to_255_operand")
15065 (match_operand:V16HI 3 "register_operand")
15066 (match_operand:HI 4 "register_operand")]
15067 "TARGET_AVX512VL && TARGET_AVX512BW"
15069 int mask = INTVAL (operands[2]);
15070 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
15071 GEN_INT (((mask >> 0) & 3) + 4),
15072 GEN_INT (((mask >> 2) & 3) + 4),
15073 GEN_INT (((mask >> 4) & 3) + 4),
15074 GEN_INT (((mask >> 6) & 3) + 4),
15075 GEN_INT (((mask >> 0) & 3) + 12),
15076 GEN_INT (((mask >> 2) & 3) + 12),
15077 GEN_INT (((mask >> 4) & 3) + 12),
15078 GEN_INT (((mask >> 6) & 3) + 12),
15079 operands[3], operands[4]));
15083 (define_insn "avx2_pshufhw_1<mask_name>"
15084 [(set (match_operand:V16HI 0 "register_operand" "=v")
15086 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
15087 (parallel [(const_int 0)
15091 (match_operand 2 "const_4_to_7_operand")
15092 (match_operand 3 "const_4_to_7_operand")
15093 (match_operand 4 "const_4_to_7_operand")
15094 (match_operand 5 "const_4_to_7_operand")
15099 (match_operand 6 "const_12_to_15_operand")
15100 (match_operand 7 "const_12_to_15_operand")
15101 (match_operand 8 "const_12_to_15_operand")
15102 (match_operand 9 "const_12_to_15_operand")])))]
15104 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15105 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15106 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15107 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15108 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15111 mask |= (INTVAL (operands[2]) - 4) << 0;
15112 mask |= (INTVAL (operands[3]) - 4) << 2;
15113 mask |= (INTVAL (operands[4]) - 4) << 4;
15114 mask |= (INTVAL (operands[5]) - 4) << 6;
15115 operands[2] = GEN_INT (mask);
15117 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15119 [(set_attr "type" "sselog")
15120 (set_attr "prefix" "maybe_evex")
15121 (set_attr "length_immediate" "1")
15122 (set_attr "mode" "OI")])
15124 (define_expand "avx512vl_pshufhw_mask"
15125 [(match_operand:V8HI 0 "register_operand")
15126 (match_operand:V8HI 1 "nonimmediate_operand")
15127 (match_operand:SI 2 "const_0_to_255_operand")
15128 (match_operand:V8HI 3 "register_operand")
15129 (match_operand:QI 4 "register_operand")]
15130 "TARGET_AVX512VL && TARGET_AVX512BW"
15132 int mask = INTVAL (operands[2]);
15133 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
15134 GEN_INT (((mask >> 0) & 3) + 4),
15135 GEN_INT (((mask >> 2) & 3) + 4),
15136 GEN_INT (((mask >> 4) & 3) + 4),
15137 GEN_INT (((mask >> 6) & 3) + 4),
15138 operands[3], operands[4]));
15142 (define_expand "sse2_pshufhw"
15143 [(match_operand:V8HI 0 "register_operand")
15144 (match_operand:V8HI 1 "vector_operand")
15145 (match_operand:SI 2 "const_int_operand")]
15148 int mask = INTVAL (operands[2]);
15149 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
15150 GEN_INT (((mask >> 0) & 3) + 4),
15151 GEN_INT (((mask >> 2) & 3) + 4),
15152 GEN_INT (((mask >> 4) & 3) + 4),
15153 GEN_INT (((mask >> 6) & 3) + 4)));
15157 (define_insn "sse2_pshufhw_1<mask_name>"
15158 [(set (match_operand:V8HI 0 "register_operand" "=v")
15160 (match_operand:V8HI 1 "vector_operand" "vBm")
15161 (parallel [(const_int 0)
15165 (match_operand 2 "const_4_to_7_operand")
15166 (match_operand 3 "const_4_to_7_operand")
15167 (match_operand 4 "const_4_to_7_operand")
15168 (match_operand 5 "const_4_to_7_operand")])))]
15169 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15172 mask |= (INTVAL (operands[2]) - 4) << 0;
15173 mask |= (INTVAL (operands[3]) - 4) << 2;
15174 mask |= (INTVAL (operands[4]) - 4) << 4;
15175 mask |= (INTVAL (operands[5]) - 4) << 6;
15176 operands[2] = GEN_INT (mask);
15178 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15180 [(set_attr "type" "sselog")
15181 (set_attr "prefix_rep" "1")
15182 (set_attr "prefix_data16" "0")
15183 (set_attr "prefix" "maybe_vex")
15184 (set_attr "length_immediate" "1")
15185 (set_attr "mode" "TI")])
15187 (define_expand "sse2_loadd"
15188 [(set (match_operand:V4SI 0 "register_operand")
15190 (vec_duplicate:V4SI
15191 (match_operand:SI 1 "nonimmediate_operand"))
15195 "operands[2] = CONST0_RTX (V4SImode);")
15197 (define_insn "sse2_loadld"
15198 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
15200 (vec_duplicate:V4SI
15201 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
15202 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
15206 %vmovd\t{%2, %0|%0, %2}
15207 %vmovd\t{%2, %0|%0, %2}
15208 movss\t{%2, %0|%0, %2}
15209 movss\t{%2, %0|%0, %2}
15210 vmovss\t{%2, %1, %0|%0, %1, %2}"
15211 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
15212 (set_attr "type" "ssemov")
15213 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
15214 (set_attr "mode" "TI,TI,V4SF,SF,SF")
15215 (set (attr "preferred_for_speed")
15216 (cond [(eq_attr "alternative" "1")
15217 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15219 (symbol_ref "true")))])
15221 ;; QI and HI modes handled by pextr patterns.
15222 (define_mode_iterator PEXTR_MODE12
15223 [(V16QI "TARGET_SSE4_1") V8HI])
15225 (define_insn "*vec_extract<mode>"
15226 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
15227 (vec_select:<ssescalarmode>
15228 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
15230 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
15233 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15234 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
15235 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15236 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15237 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
15238 (set_attr "type" "sselog1")
15239 (set_attr "prefix_data16" "1")
15240 (set (attr "prefix_extra")
15242 (and (eq_attr "alternative" "0,2")
15243 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15245 (const_string "1")))
15246 (set_attr "length_immediate" "1")
15247 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
15248 (set_attr "mode" "TI")])
15250 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
15251 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
15253 (vec_select:<PEXTR_MODE12:ssescalarmode>
15254 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
15256 [(match_operand:SI 2
15257 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
15260 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15261 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
15262 [(set_attr "isa" "*,avx512bw")
15263 (set_attr "type" "sselog1")
15264 (set_attr "prefix_data16" "1")
15265 (set (attr "prefix_extra")
15267 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
15269 (const_string "1")))
15270 (set_attr "length_immediate" "1")
15271 (set_attr "prefix" "maybe_vex")
15272 (set_attr "mode" "TI")])
15274 (define_insn "*vec_extractv16qi_zext"
15275 [(set (match_operand:HI 0 "register_operand" "=r,r")
15278 (match_operand:V16QI 1 "register_operand" "x,v")
15280 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
15283 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
15284 vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
15285 [(set_attr "isa" "*,avx512bw")
15286 (set_attr "type" "sselog1")
15287 (set_attr "prefix_data16" "1")
15288 (set_attr "prefix_extra" "1")
15289 (set_attr "length_immediate" "1")
15290 (set_attr "prefix" "maybe_vex")
15291 (set_attr "mode" "TI")])
15293 (define_insn "*vec_extract<mode>_mem"
15294 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
15295 (vec_select:<ssescalarmode>
15296 (match_operand:VI12_128 1 "memory_operand" "o")
15298 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15302 (define_insn "*vec_extract<ssevecmodelower>_0"
15303 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
15305 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
15306 (parallel [(const_int 0)])))]
15307 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15309 [(set_attr "isa" "*,sse2,*,*")
15310 (set (attr "preferred_for_speed")
15311 (cond [(eq_attr "alternative" "1")
15312 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15314 (symbol_ref "true")))])
15316 (define_insn "*vec_extractv2di_0_sse"
15317 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
15319 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
15320 (parallel [(const_int 0)])))]
15321 "TARGET_SSE && !TARGET_64BIT
15322 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15324 [(set_attr "isa" "sse4,*,*")
15325 (set (attr "preferred_for_speed")
15326 (cond [(eq_attr "alternative" "0")
15327 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15329 (symbol_ref "true")))])
15332 [(set (match_operand:DI 0 "general_reg_operand")
15334 (match_operand:V2DI 1 "register_operand")
15335 (parallel [(const_int 0)])))]
15336 "TARGET_SSE4_1 && !TARGET_64BIT
15337 && reload_completed"
15338 [(set (match_dup 2) (match_dup 4))
15342 (parallel [(const_int 1)])))]
15344 operands[4] = gen_lowpart (SImode, operands[1]);
15345 operands[5] = gen_lowpart (V4SImode, operands[1]);
15346 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
15350 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15352 (match_operand:<ssevecmode> 1 "register_operand")
15353 (parallel [(const_int 0)])))]
15354 "TARGET_SSE && reload_completed"
15355 [(set (match_dup 0) (match_dup 1))]
15356 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
15358 (define_insn "*vec_extractv4si_0_zext_sse4"
15359 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
15362 (match_operand:V4SI 1 "register_operand" "v,x,v")
15363 (parallel [(const_int 0)]))))]
15366 [(set_attr "isa" "x64,*,avx512f")
15367 (set (attr "preferred_for_speed")
15368 (cond [(eq_attr "alternative" "0")
15369 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15371 (symbol_ref "true")))])
15373 (define_insn "*vec_extractv4si_0_zext"
15374 [(set (match_operand:DI 0 "register_operand" "=r")
15377 (match_operand:V4SI 1 "register_operand" "x")
15378 (parallel [(const_int 0)]))))]
15379 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
15383 [(set (match_operand:DI 0 "register_operand")
15386 (match_operand:V4SI 1 "register_operand")
15387 (parallel [(const_int 0)]))))]
15388 "TARGET_SSE2 && reload_completed"
15389 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15390 "operands[1] = gen_lowpart (SImode, operands[1]);")
15392 (define_insn "*vec_extractv4si"
15393 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
15395 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
15396 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
15399 switch (which_alternative)
15403 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
15407 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15408 return "psrldq\t{%2, %0|%0, %2}";
15412 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15413 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
15416 gcc_unreachable ();
15419 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
15420 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
15421 (set (attr "prefix_extra")
15422 (if_then_else (eq_attr "alternative" "0,1")
15424 (const_string "*")))
15425 (set_attr "length_immediate" "1")
15426 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
15427 (set_attr "mode" "TI")])
15429 (define_insn "*vec_extractv4si_zext"
15430 [(set (match_operand:DI 0 "register_operand" "=r,r")
15433 (match_operand:V4SI 1 "register_operand" "x,v")
15434 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15435 "TARGET_64BIT && TARGET_SSE4_1"
15436 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
15437 [(set_attr "isa" "*,avx512dq")
15438 (set_attr "type" "sselog1")
15439 (set_attr "prefix_extra" "1")
15440 (set_attr "length_immediate" "1")
15441 (set_attr "prefix" "maybe_vex")
15442 (set_attr "mode" "TI")])
15444 (define_insn "*vec_extractv4si_mem"
15445 [(set (match_operand:SI 0 "register_operand" "=x,r")
15447 (match_operand:V4SI 1 "memory_operand" "o,o")
15448 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
15452 (define_insn_and_split "*vec_extractv4si_zext_mem"
15453 [(set (match_operand:DI 0 "register_operand" "=x,r")
15456 (match_operand:V4SI 1 "memory_operand" "o,o")
15457 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15458 "TARGET_64BIT && TARGET_SSE"
15460 "&& reload_completed"
15461 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15463 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
15466 (define_insn "*vec_extractv2di_1"
15467 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
15469 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
15470 (parallel [(const_int 1)])))]
15471 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15473 %vpextrq\t{$1, %1, %0|%0, %1, 1}
15474 vpextrq\t{$1, %1, %0|%0, %1, 1}
15475 %vmovhps\t{%1, %0|%0, %1}
15476 psrldq\t{$8, %0|%0, 8}
15477 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15478 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15479 movhlps\t{%1, %0|%0, %1}
15483 (cond [(eq_attr "alternative" "0")
15484 (const_string "x64_sse4")
15485 (eq_attr "alternative" "1")
15486 (const_string "x64_avx512dq")
15487 (eq_attr "alternative" "3")
15488 (const_string "sse2_noavx")
15489 (eq_attr "alternative" "4")
15490 (const_string "avx")
15491 (eq_attr "alternative" "5")
15492 (const_string "avx512bw")
15493 (eq_attr "alternative" "6")
15494 (const_string "noavx")
15495 (eq_attr "alternative" "8")
15496 (const_string "x64")
15498 (const_string "*")))
15500 (cond [(eq_attr "alternative" "2,6,7")
15501 (const_string "ssemov")
15502 (eq_attr "alternative" "3,4,5")
15503 (const_string "sseishft1")
15504 (eq_attr "alternative" "8")
15505 (const_string "imov")
15507 (const_string "sselog1")))
15508 (set (attr "length_immediate")
15509 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
15511 (const_string "*")))
15512 (set (attr "prefix_rex")
15513 (if_then_else (eq_attr "alternative" "0,1")
15515 (const_string "*")))
15516 (set (attr "prefix_extra")
15517 (if_then_else (eq_attr "alternative" "0,1")
15519 (const_string "*")))
15520 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
15521 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
15524 [(set (match_operand:<ssescalarmode> 0 "register_operand")
15525 (vec_select:<ssescalarmode>
15526 (match_operand:VI_128 1 "memory_operand")
15528 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15529 "TARGET_SSE && reload_completed"
15530 [(set (match_dup 0) (match_dup 1))]
15532 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
15534 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
15537 (define_insn "*vec_extractv2ti"
15538 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
15540 (match_operand:V2TI 1 "register_operand" "x,v")
15542 [(match_operand:SI 2 "const_0_to_1_operand")])))]
15545 vextract%~128\t{%2, %1, %0|%0, %1, %2}
15546 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
15547 [(set_attr "type" "sselog")
15548 (set_attr "prefix_extra" "1")
15549 (set_attr "length_immediate" "1")
15550 (set_attr "prefix" "vex,evex")
15551 (set_attr "mode" "OI")])
15553 (define_insn "*vec_extractv4ti"
15554 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
15556 (match_operand:V4TI 1 "register_operand" "v")
15558 [(match_operand:SI 2 "const_0_to_3_operand")])))]
15560 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
15561 [(set_attr "type" "sselog")
15562 (set_attr "prefix_extra" "1")
15563 (set_attr "length_immediate" "1")
15564 (set_attr "prefix" "evex")
15565 (set_attr "mode" "XI")])
15567 (define_mode_iterator VEXTRACTI128_MODE
15568 [(V4TI "TARGET_AVX512F") V2TI])
15571 [(set (match_operand:TI 0 "nonimmediate_operand")
15573 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
15574 (parallel [(const_int 0)])))]
15576 && reload_completed
15577 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
15578 [(set (match_dup 0) (match_dup 1))]
15579 "operands[1] = gen_lowpart (TImode, operands[1]);")
15581 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
15582 ;; vector modes into vec_extract*.
15584 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15585 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
15586 "can_create_pseudo_p ()
15587 && REG_P (operands[1])
15588 && VECTOR_MODE_P (GET_MODE (operands[1]))
15589 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
15590 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
15591 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
15592 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
15593 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
15594 (parallel [(const_int 0)])))]
15598 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
15601 if (<MODE>mode == SImode)
15603 tmp = gen_reg_rtx (V8SImode);
15604 emit_insn (gen_vec_extract_lo_v16si (tmp,
15605 gen_lowpart (V16SImode,
15610 tmp = gen_reg_rtx (V4DImode);
15611 emit_insn (gen_vec_extract_lo_v8di (tmp,
15612 gen_lowpart (V8DImode,
15618 tmp = gen_reg_rtx (<ssevecmode>mode);
15619 if (<MODE>mode == SImode)
15620 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
15623 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
15628 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
15633 (define_insn "*vec_concatv2si_sse4_1"
15634 [(set (match_operand:V2SI 0 "register_operand"
15635 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
15637 (match_operand:SI 1 "nonimmediate_operand"
15638 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
15639 (match_operand:SI 2 "nonimm_or_0_operand"
15640 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
15641 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15643 pinsrd\t{$1, %2, %0|%0, %2, 1}
15644 pinsrd\t{$1, %2, %0|%0, %2, 1}
15645 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15646 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15647 punpckldq\t{%2, %0|%0, %2}
15648 punpckldq\t{%2, %0|%0, %2}
15649 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
15650 %vmovd\t{%1, %0|%0, %1}
15651 punpckldq\t{%2, %0|%0, %2}
15652 movd\t{%1, %0|%0, %1}"
15653 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
15654 (set (attr "mmx_isa")
15655 (if_then_else (eq_attr "alternative" "8,9")
15656 (const_string "native")
15657 (const_string "*")))
15659 (cond [(eq_attr "alternative" "7")
15660 (const_string "ssemov")
15661 (eq_attr "alternative" "8")
15662 (const_string "mmxcvt")
15663 (eq_attr "alternative" "9")
15664 (const_string "mmxmov")
15666 (const_string "sselog")))
15667 (set (attr "prefix_extra")
15668 (if_then_else (eq_attr "alternative" "0,1,2,3")
15670 (const_string "*")))
15671 (set (attr "length_immediate")
15672 (if_then_else (eq_attr "alternative" "0,1,2,3")
15674 (const_string "*")))
15675 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15676 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15678 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15679 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15680 ;; alternatives pretty much forces the MMX alternative to be chosen.
15681 (define_insn "*vec_concatv2si"
15682 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15684 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15685 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15686 "TARGET_SSE && !TARGET_SSE4_1"
15688 punpckldq\t{%2, %0|%0, %2}
15689 movd\t{%1, %0|%0, %1}
15690 unpcklps\t{%2, %0|%0, %2}
15691 movss\t{%1, %0|%0, %1}
15692 punpckldq\t{%2, %0|%0, %2}
15693 movd\t{%1, %0|%0, %1}"
15694 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15695 (set_attr "mmx_isa" "*,*,*,*,native,native")
15696 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15697 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15699 (define_insn "*vec_concatv4si"
15700 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15702 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15703 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15706 punpcklqdq\t{%2, %0|%0, %2}
15707 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15708 movlhps\t{%2, %0|%0, %2}
15709 movhps\t{%2, %0|%0, %q2}
15710 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15711 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15712 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15713 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15714 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15716 (define_insn "*vec_concatv4si_0"
15717 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15719 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15720 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15723 %vmovq\t{%1, %0|%0, %1}
15724 movq2dq\t{%1, %0|%0, %1}"
15725 [(set_attr "mmx_isa" "*,native")
15726 (set_attr "type" "ssemov")
15727 (set_attr "prefix" "maybe_vex,orig")
15728 (set_attr "mode" "TI")])
15730 (define_insn "vec_concatv2di"
15731 [(set (match_operand:V2DI 0 "register_operand"
15732 "=Yr,*x,x ,v ,x,v ,x,x,v")
15734 (match_operand:DI 1 "register_operand"
15735 " 0, 0,x ,Yv,0,Yv,0,0,v")
15736 (match_operand:DI 2 "nonimmediate_operand"
15737 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15740 pinsrq\t{$1, %2, %0|%0, %2, 1}
15741 pinsrq\t{$1, %2, %0|%0, %2, 1}
15742 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15743 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15744 punpcklqdq\t{%2, %0|%0, %2}
15745 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15746 movlhps\t{%2, %0|%0, %2}
15747 movhps\t{%2, %0|%0, %2}
15748 vmovhps\t{%2, %1, %0|%0, %1, %2}"
15750 (cond [(eq_attr "alternative" "0,1")
15751 (const_string "x64_sse4_noavx")
15752 (eq_attr "alternative" "2")
15753 (const_string "x64_avx")
15754 (eq_attr "alternative" "3")
15755 (const_string "x64_avx512dq")
15756 (eq_attr "alternative" "4")
15757 (const_string "sse2_noavx")
15758 (eq_attr "alternative" "5,8")
15759 (const_string "avx")
15761 (const_string "noavx")))
15764 (eq_attr "alternative" "0,1,2,3,4,5")
15765 (const_string "sselog")
15766 (const_string "ssemov")))
15767 (set (attr "prefix_rex")
15768 (if_then_else (eq_attr "alternative" "0,1,2,3")
15770 (const_string "*")))
15771 (set (attr "prefix_extra")
15772 (if_then_else (eq_attr "alternative" "0,1,2,3")
15774 (const_string "*")))
15775 (set (attr "length_immediate")
15776 (if_then_else (eq_attr "alternative" "0,1,2,3")
15778 (const_string "*")))
15779 (set (attr "prefix")
15780 (cond [(eq_attr "alternative" "2")
15781 (const_string "vex")
15782 (eq_attr "alternative" "3")
15783 (const_string "evex")
15784 (eq_attr "alternative" "5,8")
15785 (const_string "maybe_evex")
15787 (const_string "orig")))
15788 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15790 (define_insn "*vec_concatv2di_0"
15791 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
15793 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15794 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
15797 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15798 %vmovq\t{%1, %0|%0, %1}
15799 movq2dq\t{%1, %0|%0, %1}"
15800 [(set_attr "isa" "x64,*,*")
15801 (set_attr "mmx_isa" "*,*,native")
15802 (set_attr "type" "ssemov")
15803 (set_attr "prefix_rex" "1,*,*")
15804 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15805 (set_attr "mode" "TI")
15806 (set (attr "preferred_for_speed")
15807 (cond [(eq_attr "alternative" "0")
15808 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15810 (symbol_ref "true")))])
15812 ;; vmovq clears also the higher bits.
15813 (define_insn "vec_set<mode>_0"
15814 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15815 (vec_merge:VI8_AVX_AVX512F
15816 (vec_duplicate:VI8_AVX_AVX512F
15817 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15818 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15821 "vmovq\t{%2, %x0|%x0, %2}"
15822 [(set_attr "isa" "x64,*")
15823 (set_attr "type" "ssemov")
15824 (set_attr "prefix_rex" "1,*")
15825 (set_attr "prefix" "maybe_evex")
15826 (set_attr "mode" "TI")
15827 (set (attr "preferred_for_speed")
15828 (cond [(eq_attr "alternative" "0")
15829 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15831 (symbol_ref "true")))])
15833 (define_expand "vec_unpacks_lo_<mode>"
15834 [(match_operand:<sseunpackmode> 0 "register_operand")
15835 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15837 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15839 (define_expand "vec_unpacks_hi_<mode>"
15840 [(match_operand:<sseunpackmode> 0 "register_operand")
15841 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15843 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15845 (define_expand "vec_unpacku_lo_<mode>"
15846 [(match_operand:<sseunpackmode> 0 "register_operand")
15847 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15849 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15851 (define_expand "vec_unpacks_sbool_lo_qi"
15852 [(match_operand:QI 0 "register_operand")
15853 (match_operand:QI 1 "register_operand")
15854 (match_operand:QI 2 "const_int_operand")]
15857 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15859 emit_move_insn (operands[0], operands[1]);
15863 (define_expand "vec_unpacks_lo_hi"
15864 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15865 (match_operand:HI 1 "register_operand"))]
15868 (define_expand "vec_unpacks_lo_si"
15869 [(set (match_operand:HI 0 "register_operand")
15870 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15873 (define_expand "vec_unpacks_lo_di"
15874 [(set (match_operand:SI 0 "register_operand")
15875 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15878 (define_expand "vec_unpacku_hi_<mode>"
15879 [(match_operand:<sseunpackmode> 0 "register_operand")
15880 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15882 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15884 (define_expand "vec_unpacks_sbool_hi_qi"
15885 [(match_operand:QI 0 "register_operand")
15886 (match_operand:QI 1 "register_operand")
15887 (match_operand:QI 2 "const_int_operand")]
15890 HOST_WIDE_INT nunits = INTVAL (operands[2]);
15891 if (nunits != 8 && nunits != 4)
15893 if (TARGET_AVX512DQ)
15894 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15895 GEN_INT (nunits / 2)));
15898 rtx tem = gen_reg_rtx (HImode);
15899 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15901 GEN_INT (nunits / 2)));
15902 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15907 (define_expand "vec_unpacks_hi_hi"
15909 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15910 (lshiftrt:HI (match_operand:HI 1 "register_operand")
15912 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15915 (define_expand "vec_unpacks_hi_<mode>"
15917 [(set (subreg:SWI48x
15918 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15919 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15921 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15923 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15925 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15929 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15931 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15932 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
15933 (truncate:VI12_AVX2_AVX512BW
15934 (lshiftrt:<ssedoublemode>
15935 (plus:<ssedoublemode>
15936 (plus:<ssedoublemode>
15937 (zero_extend:<ssedoublemode>
15938 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
15939 (zero_extend:<ssedoublemode>
15940 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
15941 (match_dup <mask_expand_op3>))
15943 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15945 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
15946 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15949 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15950 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
15951 (truncate:VI12_AVX2_AVX512BW
15952 (lshiftrt:<ssedoublemode>
15953 (plus:<ssedoublemode>
15954 (plus:<ssedoublemode>
15955 (zero_extend:<ssedoublemode>
15956 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
15957 (zero_extend:<ssedoublemode>
15958 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
15959 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
15961 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15962 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15964 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15965 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15966 [(set_attr "isa" "noavx,avx")
15967 (set_attr "type" "sseiadd")
15968 (set_attr "prefix_data16" "1,*")
15969 (set_attr "prefix" "orig,<mask_prefix>")
15970 (set_attr "mode" "<sseinsnmode>")])
15972 ;; The correct representation for this is absolutely enormous, and
15973 ;; surely not generally useful.
15974 (define_insn "<sse2_avx2>_psadbw"
15975 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15976 (unspec:VI8_AVX2_AVX512BW
15977 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15978 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15982 psadbw\t{%2, %0|%0, %2}
15983 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15984 [(set_attr "isa" "noavx,avx")
15985 (set_attr "type" "sseiadd")
15986 (set_attr "atom_unit" "simul")
15987 (set_attr "prefix_data16" "1,*")
15988 (set_attr "prefix" "orig,maybe_evex")
15989 (set_attr "mode" "<sseinsnmode>")])
15991 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15992 [(set (match_operand:SI 0 "register_operand" "=r")
15994 [(match_operand:VF_128_256 1 "register_operand" "x")]
15997 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15998 [(set_attr "type" "ssemov")
15999 (set_attr "prefix" "maybe_vex")
16000 (set_attr "mode" "<MODE>")])
16002 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
16003 [(set (match_operand:DI 0 "register_operand" "=r")
16006 [(match_operand:VF_128_256 1 "register_operand" "x")]
16008 "TARGET_64BIT && TARGET_SSE"
16009 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
16010 [(set_attr "type" "ssemov")
16011 (set_attr "prefix" "maybe_vex")
16012 (set_attr "mode" "<MODE>")])
16014 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
16015 [(set (match_operand:SI 0 "register_operand" "=r")
16018 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16019 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16023 "&& reload_completed"
16024 [(set (match_dup 0)
16025 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16026 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16027 [(set_attr "type" "ssemov")
16028 (set_attr "prefix" "maybe_vex")
16029 (set_attr "mode" "<MODE>")])
16031 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
16032 [(set (match_operand:DI 0 "register_operand" "=r")
16036 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16037 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16039 "TARGET_64BIT && TARGET_SSE"
16041 "&& reload_completed"
16042 [(set (match_dup 0)
16043 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16044 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16045 [(set_attr "type" "ssemov")
16046 (set_attr "prefix" "maybe_vex")
16047 (set_attr "mode" "<MODE>")])
16049 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
16050 [(set (match_operand:SI 0 "register_operand" "=r")
16052 [(subreg:VF_128_256
16053 (ashiftrt:<sseintvecmode>
16054 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16055 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16059 "&& reload_completed"
16060 [(set (match_dup 0)
16061 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16062 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16063 [(set_attr "type" "ssemov")
16064 (set_attr "prefix" "maybe_vex")
16065 (set_attr "mode" "<MODE>")])
16067 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
16068 [(set (match_operand:DI 0 "register_operand" "=r")
16071 [(subreg:VF_128_256
16072 (ashiftrt:<sseintvecmode>
16073 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16074 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16076 "TARGET_64BIT && TARGET_SSE"
16078 "&& reload_completed"
16079 [(set (match_dup 0)
16080 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16081 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16082 [(set_attr "type" "ssemov")
16083 (set_attr "prefix" "maybe_vex")
16084 (set_attr "mode" "<MODE>")])
16086 (define_insn "<sse2_avx2>_pmovmskb"
16087 [(set (match_operand:SI 0 "register_operand" "=r")
16089 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16092 "%vpmovmskb\t{%1, %0|%0, %1}"
16093 [(set_attr "type" "ssemov")
16094 (set (attr "prefix_data16")
16096 (match_test "TARGET_AVX")
16098 (const_string "1")))
16099 (set_attr "prefix" "maybe_vex")
16100 (set_attr "mode" "SI")])
16102 (define_insn "*<sse2_avx2>_pmovmskb_zext"
16103 [(set (match_operand:DI 0 "register_operand" "=r")
16106 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16108 "TARGET_64BIT && TARGET_SSE2"
16109 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16110 [(set_attr "type" "ssemov")
16111 (set (attr "prefix_data16")
16113 (match_test "TARGET_AVX")
16115 (const_string "1")))
16116 (set_attr "prefix" "maybe_vex")
16117 (set_attr "mode" "SI")])
16119 (define_insn "*sse2_pmovmskb_ext"
16120 [(set (match_operand:DI 0 "register_operand" "=r")
16123 [(match_operand:V16QI 1 "register_operand" "x")]
16125 "TARGET_64BIT && TARGET_SSE2"
16126 "%vpmovmskb\t{%1, %k0|%k0, %1}"
16127 [(set_attr "type" "ssemov")
16128 (set (attr "prefix_data16")
16130 (match_test "TARGET_AVX")
16132 (const_string "1")))
16133 (set_attr "prefix" "maybe_vex")
16134 (set_attr "mode" "SI")])
16136 (define_insn_and_split "*sse2_pmovskb_zexthisi"
16137 [(set (match_operand:SI 0 "register_operand")
16141 [(match_operand:V16QI 1 "register_operand")]
16142 UNSPEC_MOVMSK) 0)))]
16143 "TARGET_SSE2 && ix86_pre_reload_split ()"
16146 [(set (match_dup 0)
16147 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
16150 [(set (match_operand:SI 0 "register_operand")
16155 [(match_operand:V16QI 1 "register_operand")]
16156 UNSPEC_MOVMSK) 0))))]
16158 [(set (match_dup 2)
16159 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16161 (xor:SI (match_dup 2) (const_int 65535)))]
16162 "operands[2] = gen_reg_rtx (SImode);")
16165 [(set (match_operand:SI 0 "register_operand")
16167 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
16170 [(set (match_dup 2)
16171 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16172 (set (match_dup 0) (match_dup 3))]
16174 operands[2] = gen_reg_rtx (SImode);
16175 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16176 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16180 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16182 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16187 [(set (match_operand:SI 0 "register_operand")
16189 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
16192 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
16193 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
16194 [(set (match_dup 2)
16195 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
16196 (set (match_dup 0) (match_dup 3))]
16198 operands[2] = gen_reg_rtx (SImode);
16199 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
16200 if (GET_MODE_NUNITS (<MODE>mode) == 32)
16201 operands[3] = gen_rtx_NOT (SImode, operands[2]);
16205 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
16207 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
16211 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
16212 [(set (match_operand:SI 0 "register_operand" "=r")
16214 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16215 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16220 [(set (match_dup 0)
16221 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16223 [(set_attr "type" "ssemov")
16224 (set (attr "prefix_data16")
16226 (match_test "TARGET_AVX")
16228 (const_string "1")))
16229 (set_attr "prefix" "maybe_vex")
16230 (set_attr "mode" "SI")])
16232 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
16233 [(set (match_operand:DI 0 "register_operand" "=r")
16236 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
16237 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
16239 "TARGET_64BIT && TARGET_SSE2"
16242 [(set (match_dup 0)
16243 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16245 [(set_attr "type" "ssemov")
16246 (set (attr "prefix_data16")
16248 (match_test "TARGET_AVX")
16250 (const_string "1")))
16251 (set_attr "prefix" "maybe_vex")
16252 (set_attr "mode" "SI")])
16254 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
16255 [(set (match_operand:DI 0 "register_operand" "=r")
16258 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
16259 (match_operand:V16QI 2 "const0_operand" "C"))]
16261 "TARGET_64BIT && TARGET_SSE2"
16264 [(set (match_dup 0)
16265 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16267 [(set_attr "type" "ssemov")
16268 (set (attr "prefix_data16")
16270 (match_test "TARGET_AVX")
16272 (const_string "1")))
16273 (set_attr "prefix" "maybe_vex")
16274 (set_attr "mode" "SI")])
16276 (define_expand "sse2_maskmovdqu"
16277 [(set (match_operand:V16QI 0 "memory_operand")
16278 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
16279 (match_operand:V16QI 2 "register_operand")
16284 (define_insn "*sse2_maskmovdqu"
16285 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
16286 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
16287 (match_operand:V16QI 2 "register_operand" "x")
16288 (mem:V16QI (match_dup 0))]
16292 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
16293 that requires %v to be at the beginning of the opcode name. */
16294 if (Pmode != word_mode)
16295 fputs ("\taddr32", asm_out_file);
16296 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
16298 [(set_attr "type" "ssemov")
16299 (set_attr "prefix_data16" "1")
16300 (set (attr "length_address")
16301 (symbol_ref ("Pmode != word_mode")))
16302 ;; The implicit %rdi operand confuses default length_vex computation.
16303 (set (attr "length_vex")
16304 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
16305 (set_attr "prefix" "maybe_vex")
16306 (set_attr "znver1_decode" "vector")
16307 (set_attr "mode" "TI")])
16309 (define_insn "sse_ldmxcsr"
16310 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
16314 [(set_attr "type" "sse")
16315 (set_attr "atom_sse_attr" "mxcsr")
16316 (set_attr "prefix" "maybe_vex")
16317 (set_attr "memory" "load")])
16319 (define_insn "sse_stmxcsr"
16320 [(set (match_operand:SI 0 "memory_operand" "=m")
16321 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
16324 [(set_attr "type" "sse")
16325 (set_attr "atom_sse_attr" "mxcsr")
16326 (set_attr "prefix" "maybe_vex")
16327 (set_attr "memory" "store")])
16329 (define_insn "sse2_clflush"
16330 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
16334 [(set_attr "type" "sse")
16335 (set_attr "atom_sse_attr" "fence")
16336 (set_attr "memory" "unknown")])
16338 ;; As per AMD and Intel ISA manuals, the first operand is extensions
16339 ;; and it goes to %ecx. The second operand received is hints and it goes
16341 (define_insn "sse3_mwait"
16342 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
16343 (match_operand:SI 1 "register_operand" "a")]
16346 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
16347 ;; Since 32bit register operands are implicitly zero extended to 64bit,
16348 ;; we only need to set up 32bit registers.
16350 [(set_attr "length" "3")])
16352 (define_insn "@sse3_monitor_<mode>"
16353 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
16354 (match_operand:SI 1 "register_operand" "c")
16355 (match_operand:SI 2 "register_operand" "d")]
16358 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
16359 ;; RCX and RDX are used. Since 32bit register operands are implicitly
16360 ;; zero extended to 64bit, we only need to set up 32bit registers.
16362 [(set (attr "length")
16363 (symbol_ref ("(Pmode != word_mode) + 3")))])
16365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16367 ;; SSSE3 instructions
16369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16371 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
16373 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
16374 [(set (match_operand:V16HI 0 "register_operand" "=x")
16375 (ssse3_plusminus:V16HI
16378 (match_operand:V16HI 1 "register_operand" "x")
16379 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
16381 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16382 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
16383 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
16384 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
16386 (vec_concat:V32HI (match_dup 1) (match_dup 2))
16388 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16389 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
16390 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
16391 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
16393 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16394 [(set_attr "type" "sseiadd")
16395 (set_attr "prefix_extra" "1")
16396 (set_attr "prefix" "vex")
16397 (set_attr "mode" "OI")])
16399 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
16400 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16401 (ssse3_plusminus:V8HI
16404 (match_operand:V8HI 1 "register_operand" "0,x")
16405 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
16407 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16408 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
16410 (vec_concat:V16HI (match_dup 1) (match_dup 2))
16412 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16413 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
16416 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16417 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16418 [(set_attr "isa" "noavx,avx")
16419 (set_attr "type" "sseiadd")
16420 (set_attr "atom_unit" "complex")
16421 (set_attr "prefix_data16" "1,*")
16422 (set_attr "prefix_extra" "1")
16423 (set_attr "prefix" "orig,vex")
16424 (set_attr "mode" "TI")])
16426 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
16427 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16428 (ssse3_plusminus:V4HI
16431 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
16432 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
16434 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16436 (vec_concat:V8HI (match_dup 1) (match_dup 2))
16438 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16439 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16441 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16444 "TARGET_SSSE3 && reload_completed
16445 && SSE_REGNO_P (REGNO (operands[0]))"
16448 /* Generate SSE version of the operation. */
16449 rtx op0 = lowpart_subreg (V8HImode, operands[0],
16450 GET_MODE (operands[0]));
16451 rtx op1 = lowpart_subreg (V8HImode, operands[1],
16452 GET_MODE (operands[1]));
16453 rtx op2 = lowpart_subreg (V8HImode, operands[2],
16454 GET_MODE (operands[2]));
16455 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
16456 ix86_move_vector_high_sse_to_mmx (op0);
16459 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16460 (set_attr "type" "sseiadd")
16461 (set_attr "atom_unit" "complex")
16462 (set_attr "prefix_extra" "1")
16463 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16464 (set_attr "mode" "DI,TI,TI")])
16466 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
16467 [(set (match_operand:V8SI 0 "register_operand" "=x")
16471 (match_operand:V8SI 1 "register_operand" "x")
16472 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
16474 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
16475 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
16477 (vec_concat:V16SI (match_dup 1) (match_dup 2))
16479 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
16480 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
16482 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16483 [(set_attr "type" "sseiadd")
16484 (set_attr "prefix_extra" "1")
16485 (set_attr "prefix" "vex")
16486 (set_attr "mode" "OI")])
16488 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
16489 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16493 (match_operand:V4SI 1 "register_operand" "0,x")
16494 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
16496 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16498 (vec_concat:V8SI (match_dup 1) (match_dup 2))
16500 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16503 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16504 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16505 [(set_attr "isa" "noavx,avx")
16506 (set_attr "type" "sseiadd")
16507 (set_attr "atom_unit" "complex")
16508 (set_attr "prefix_data16" "1,*")
16509 (set_attr "prefix_extra" "1")
16510 (set_attr "prefix" "orig,vex")
16511 (set_attr "mode" "TI")])
16513 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
16514 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
16518 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
16519 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
16520 (parallel [(const_int 0) (const_int 2)]))
16522 (vec_concat:V4SI (match_dup 1) (match_dup 2))
16523 (parallel [(const_int 1) (const_int 3)]))))]
16524 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16526 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16529 "TARGET_SSSE3 && reload_completed
16530 && SSE_REGNO_P (REGNO (operands[0]))"
16533 /* Generate SSE version of the operation. */
16534 rtx op0 = lowpart_subreg (V4SImode, operands[0],
16535 GET_MODE (operands[0]));
16536 rtx op1 = lowpart_subreg (V4SImode, operands[1],
16537 GET_MODE (operands[1]));
16538 rtx op2 = lowpart_subreg (V4SImode, operands[2],
16539 GET_MODE (operands[2]));
16540 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
16541 ix86_move_vector_high_sse_to_mmx (op0);
16544 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16545 (set_attr "type" "sseiadd")
16546 (set_attr "atom_unit" "complex")
16547 (set_attr "prefix_extra" "1")
16548 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16549 (set_attr "mode" "DI,TI,TI")])
16551 (define_insn "avx2_pmaddubsw256"
16552 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
16557 (match_operand:V32QI 1 "register_operand" "x,v")
16558 (parallel [(const_int 0) (const_int 2)
16559 (const_int 4) (const_int 6)
16560 (const_int 8) (const_int 10)
16561 (const_int 12) (const_int 14)
16562 (const_int 16) (const_int 18)
16563 (const_int 20) (const_int 22)
16564 (const_int 24) (const_int 26)
16565 (const_int 28) (const_int 30)])))
16568 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
16569 (parallel [(const_int 0) (const_int 2)
16570 (const_int 4) (const_int 6)
16571 (const_int 8) (const_int 10)
16572 (const_int 12) (const_int 14)
16573 (const_int 16) (const_int 18)
16574 (const_int 20) (const_int 22)
16575 (const_int 24) (const_int 26)
16576 (const_int 28) (const_int 30)]))))
16579 (vec_select:V16QI (match_dup 1)
16580 (parallel [(const_int 1) (const_int 3)
16581 (const_int 5) (const_int 7)
16582 (const_int 9) (const_int 11)
16583 (const_int 13) (const_int 15)
16584 (const_int 17) (const_int 19)
16585 (const_int 21) (const_int 23)
16586 (const_int 25) (const_int 27)
16587 (const_int 29) (const_int 31)])))
16589 (vec_select:V16QI (match_dup 2)
16590 (parallel [(const_int 1) (const_int 3)
16591 (const_int 5) (const_int 7)
16592 (const_int 9) (const_int 11)
16593 (const_int 13) (const_int 15)
16594 (const_int 17) (const_int 19)
16595 (const_int 21) (const_int 23)
16596 (const_int 25) (const_int 27)
16597 (const_int 29) (const_int 31)]))))))]
16599 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16600 [(set_attr "isa" "*,avx512bw")
16601 (set_attr "type" "sseiadd")
16602 (set_attr "prefix_extra" "1")
16603 (set_attr "prefix" "vex,evex")
16604 (set_attr "mode" "OI")])
16606 ;; The correct representation for this is absolutely enormous, and
16607 ;; surely not generally useful.
16608 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
16609 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16610 (unspec:VI2_AVX512VL
16611 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
16612 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
16613 UNSPEC_PMADDUBSW512))]
16615 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
16616 [(set_attr "type" "sseiadd")
16617 (set_attr "prefix" "evex")
16618 (set_attr "mode" "XI")])
16620 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
16621 [(set (match_operand:V32HI 0 "register_operand" "=v")
16628 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
16630 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
16632 (const_vector:V32HI [(const_int 1) (const_int 1)
16633 (const_int 1) (const_int 1)
16634 (const_int 1) (const_int 1)
16635 (const_int 1) (const_int 1)
16636 (const_int 1) (const_int 1)
16637 (const_int 1) (const_int 1)
16638 (const_int 1) (const_int 1)
16639 (const_int 1) (const_int 1)
16640 (const_int 1) (const_int 1)
16641 (const_int 1) (const_int 1)
16642 (const_int 1) (const_int 1)
16643 (const_int 1) (const_int 1)
16644 (const_int 1) (const_int 1)
16645 (const_int 1) (const_int 1)
16646 (const_int 1) (const_int 1)
16647 (const_int 1) (const_int 1)]))
16650 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16651 [(set_attr "type" "sseimul")
16652 (set_attr "prefix" "evex")
16653 (set_attr "mode" "XI")])
16655 (define_insn "ssse3_pmaddubsw128"
16656 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
16661 (match_operand:V16QI 1 "register_operand" "0,x,v")
16662 (parallel [(const_int 0) (const_int 2)
16663 (const_int 4) (const_int 6)
16664 (const_int 8) (const_int 10)
16665 (const_int 12) (const_int 14)])))
16668 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
16669 (parallel [(const_int 0) (const_int 2)
16670 (const_int 4) (const_int 6)
16671 (const_int 8) (const_int 10)
16672 (const_int 12) (const_int 14)]))))
16675 (vec_select:V8QI (match_dup 1)
16676 (parallel [(const_int 1) (const_int 3)
16677 (const_int 5) (const_int 7)
16678 (const_int 9) (const_int 11)
16679 (const_int 13) (const_int 15)])))
16681 (vec_select:V8QI (match_dup 2)
16682 (parallel [(const_int 1) (const_int 3)
16683 (const_int 5) (const_int 7)
16684 (const_int 9) (const_int 11)
16685 (const_int 13) (const_int 15)]))))))]
16688 pmaddubsw\t{%2, %0|%0, %2}
16689 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16690 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16691 [(set_attr "isa" "noavx,avx,avx512bw")
16692 (set_attr "type" "sseiadd")
16693 (set_attr "atom_unit" "simul")
16694 (set_attr "prefix_data16" "1,*,*")
16695 (set_attr "prefix_extra" "1")
16696 (set_attr "prefix" "orig,vex,evex")
16697 (set_attr "mode" "TI")])
16699 (define_insn "ssse3_pmaddubsw"
16700 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16705 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16706 (parallel [(const_int 0) (const_int 2)
16707 (const_int 4) (const_int 6)])))
16710 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16711 (parallel [(const_int 0) (const_int 2)
16712 (const_int 4) (const_int 6)]))))
16715 (vec_select:V4QI (match_dup 1)
16716 (parallel [(const_int 1) (const_int 3)
16717 (const_int 5) (const_int 7)])))
16719 (vec_select:V4QI (match_dup 2)
16720 (parallel [(const_int 1) (const_int 3)
16721 (const_int 5) (const_int 7)]))))))]
16722 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16724 pmaddubsw\t{%2, %0|%0, %2}
16725 pmaddubsw\t{%2, %0|%0, %2}
16726 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16727 [(set_attr "isa" "*,noavx,avx")
16728 (set_attr "mmx_isa" "native,*,*")
16729 (set_attr "type" "sseiadd")
16730 (set_attr "atom_unit" "simul")
16731 (set_attr "prefix_extra" "1")
16732 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16733 (set_attr "mode" "DI,TI,TI")])
16735 (define_mode_iterator PMULHRSW
16736 [V8HI (V16HI "TARGET_AVX2")])
16738 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16739 [(set (match_operand:PMULHRSW 0 "register_operand")
16740 (vec_merge:PMULHRSW
16742 (lshiftrt:<ssedoublemode>
16743 (plus:<ssedoublemode>
16744 (lshiftrt:<ssedoublemode>
16745 (mult:<ssedoublemode>
16746 (sign_extend:<ssedoublemode>
16747 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16748 (sign_extend:<ssedoublemode>
16749 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16753 (match_operand:PMULHRSW 3 "register_operand")
16754 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16755 "TARGET_AVX512BW && TARGET_AVX512VL"
16757 operands[5] = CONST1_RTX(<MODE>mode);
16758 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16761 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16762 [(set (match_operand:PMULHRSW 0 "register_operand")
16764 (lshiftrt:<ssedoublemode>
16765 (plus:<ssedoublemode>
16766 (lshiftrt:<ssedoublemode>
16767 (mult:<ssedoublemode>
16768 (sign_extend:<ssedoublemode>
16769 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16770 (sign_extend:<ssedoublemode>
16771 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16777 operands[3] = CONST1_RTX(<MODE>mode);
16778 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16781 (define_expand "smulhrs<mode>3"
16782 [(set (match_operand:VI2_AVX2 0 "register_operand")
16784 (lshiftrt:<ssedoublemode>
16785 (plus:<ssedoublemode>
16786 (lshiftrt:<ssedoublemode>
16787 (mult:<ssedoublemode>
16788 (sign_extend:<ssedoublemode>
16789 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
16790 (sign_extend:<ssedoublemode>
16791 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
16797 operands[3] = CONST1_RTX(<MODE>mode);
16798 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16801 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16802 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16804 (lshiftrt:<ssedoublemode>
16805 (plus:<ssedoublemode>
16806 (lshiftrt:<ssedoublemode>
16807 (mult:<ssedoublemode>
16808 (sign_extend:<ssedoublemode>
16809 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16810 (sign_extend:<ssedoublemode>
16811 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16813 (match_operand:VI2_AVX2 3 "const1_operand"))
16815 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16816 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16818 pmulhrsw\t{%2, %0|%0, %2}
16819 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16820 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16821 [(set_attr "isa" "noavx,avx,avx512bw")
16822 (set_attr "type" "sseimul")
16823 (set_attr "prefix_data16" "1,*,*")
16824 (set_attr "prefix_extra" "1")
16825 (set_attr "prefix" "orig,maybe_evex,evex")
16826 (set_attr "mode" "<sseinsnmode>")])
16828 (define_expand "smulhrsv4hi3"
16829 [(set (match_operand:V4HI 0 "register_operand")
16836 (match_operand:V4HI 1 "register_operand"))
16838 (match_operand:V4HI 2 "register_operand")))
16842 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
16844 operands[3] = CONST1_RTX(V4HImode);
16845 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16848 (define_expand "ssse3_pmulhrswv4hi3"
16849 [(set (match_operand:V4HI 0 "register_operand")
16856 (match_operand:V4HI 1 "register_mmxmem_operand"))
16858 (match_operand:V4HI 2 "register_mmxmem_operand")))
16862 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16864 operands[3] = CONST1_RTX(V4HImode);
16865 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16868 (define_insn "*ssse3_pmulhrswv4hi3"
16869 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16876 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16878 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16880 (match_operand:V4HI 3 "const1_operand"))
16882 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16884 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16886 pmulhrsw\t{%2, %0|%0, %2}
16887 pmulhrsw\t{%2, %0|%0, %2}
16888 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16889 [(set_attr "isa" "*,noavx,avx")
16890 (set_attr "mmx_isa" "native,*,*")
16891 (set_attr "type" "sseimul")
16892 (set_attr "prefix_extra" "1")
16893 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16894 (set_attr "mode" "DI,TI,TI")])
16896 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16897 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16899 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16900 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16902 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16904 pshufb\t{%2, %0|%0, %2}
16905 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16906 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16907 [(set_attr "isa" "noavx,avx,avx512bw")
16908 (set_attr "type" "sselog1")
16909 (set_attr "prefix_data16" "1,*,*")
16910 (set_attr "prefix_extra" "1")
16911 (set_attr "prefix" "orig,maybe_evex,evex")
16912 (set_attr "btver2_decode" "vector")
16913 (set_attr "mode" "<sseinsnmode>")])
16915 (define_insn_and_split "ssse3_pshufbv8qi3"
16916 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16917 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16918 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16920 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
16921 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16923 pshufb\t{%2, %0|%0, %2}
16926 "TARGET_SSSE3 && reload_completed
16927 && SSE_REGNO_P (REGNO (operands[0]))"
16928 [(set (match_dup 3) (match_dup 5))
16930 (and:V4SI (match_dup 3) (match_dup 2)))
16932 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16934 /* Emulate MMX version of pshufb with SSE version by masking out the
16935 bit 3 of the shuffle control byte. */
16936 operands[0] = lowpart_subreg (V16QImode, operands[0],
16937 GET_MODE (operands[0]));
16938 operands[1] = lowpart_subreg (V16QImode, operands[1],
16939 GET_MODE (operands[1]));
16940 operands[2] = lowpart_subreg (V4SImode, operands[2],
16941 GET_MODE (operands[2]));
16942 operands[4] = lowpart_subreg (V16QImode, operands[3],
16943 GET_MODE (operands[3]));
16944 rtx vec_const = ix86_build_const_vector (V4SImode, true,
16945 gen_int_mode (0xf7f7f7f7, SImode));
16946 operands[5] = force_const_mem (V4SImode, vec_const);
16948 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16949 (set_attr "prefix_extra" "1")
16950 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16951 (set_attr "mode" "DI,TI,TI")])
16953 (define_insn "<ssse3_avx2>_psign<mode>3"
16954 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16956 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16957 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16961 psign<ssemodesuffix>\t{%2, %0|%0, %2}
16962 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16963 [(set_attr "isa" "noavx,avx")
16964 (set_attr "type" "sselog1")
16965 (set_attr "prefix_data16" "1,*")
16966 (set_attr "prefix_extra" "1")
16967 (set_attr "prefix" "orig,vex")
16968 (set_attr "mode" "<sseinsnmode>")])
16970 (define_insn "ssse3_psign<mode>3"
16971 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
16973 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
16974 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
16976 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16978 psign<mmxvecsize>\t{%2, %0|%0, %2}
16979 psign<mmxvecsize>\t{%2, %0|%0, %2}
16980 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16981 [(set_attr "isa" "*,noavx,avx")
16982 (set_attr "mmx_isa" "native,*,*")
16983 (set_attr "type" "sselog1")
16984 (set_attr "prefix_extra" "1")
16985 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16986 (set_attr "mode" "DI,TI,TI")])
16988 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
16989 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16990 (vec_merge:VI1_AVX512
16992 [(match_operand:VI1_AVX512 1 "register_operand" "v")
16993 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16994 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16996 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16997 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16998 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
17000 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17001 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
17003 [(set_attr "type" "sseishft")
17004 (set_attr "atom_unit" "sishuf")
17005 (set_attr "prefix_extra" "1")
17006 (set_attr "length_immediate" "1")
17007 (set_attr "prefix" "evex")
17008 (set_attr "mode" "<sseinsnmode>")])
17010 (define_insn "<ssse3_avx2>_palignr<mode>"
17011 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
17012 (unspec:SSESCALARMODE
17013 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
17014 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
17015 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17019 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17021 switch (which_alternative)
17024 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17027 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17029 gcc_unreachable ();
17032 [(set_attr "isa" "noavx,avx,avx512bw")
17033 (set_attr "type" "sseishft")
17034 (set_attr "atom_unit" "sishuf")
17035 (set_attr "prefix_data16" "1,*,*")
17036 (set_attr "prefix_extra" "1")
17037 (set_attr "length_immediate" "1")
17038 (set_attr "prefix" "orig,vex,evex")
17039 (set_attr "mode" "<sseinsnmode>")])
17041 (define_insn_and_split "ssse3_palignrdi"
17042 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
17043 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
17044 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
17045 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17047 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17049 switch (which_alternative)
17052 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17053 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17058 gcc_unreachable ();
17061 "TARGET_SSSE3 && reload_completed
17062 && SSE_REGNO_P (REGNO (operands[0]))"
17063 [(set (match_dup 0)
17064 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
17066 /* Emulate MMX palignrdi with SSE psrldq. */
17067 rtx op0 = lowpart_subreg (V2DImode, operands[0],
17068 GET_MODE (operands[0]));
17070 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
17073 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
17074 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
17075 /* Swap bits 0:63 with bits 64:127. */
17076 rtx mask = gen_rtx_PARALLEL (VOIDmode,
17077 gen_rtvec (4, GEN_INT (2),
17081 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
17082 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
17083 emit_insn (gen_rtx_SET (op1, op2));
17085 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
17087 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17088 (set_attr "type" "sseishft")
17089 (set_attr "atom_unit" "sishuf")
17090 (set_attr "prefix_extra" "1")
17091 (set_attr "length_immediate" "1")
17092 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17093 (set_attr "mode" "DI,TI,TI")])
17095 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
17096 ;; modes for abs instruction on pre AVX-512 targets.
17097 (define_mode_iterator VI1248_AVX512VL_AVX512BW
17098 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
17099 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
17100 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
17101 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
17103 (define_insn "*abs<mode>2"
17104 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
17105 (abs:VI1248_AVX512VL_AVX512BW
17106 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
17108 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
17109 [(set_attr "type" "sselog1")
17110 (set_attr "prefix_data16" "1")
17111 (set_attr "prefix_extra" "1")
17112 (set_attr "prefix" "maybe_vex")
17113 (set_attr "mode" "<sseinsnmode>")])
17115 (define_insn "abs<mode>2_mask"
17116 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17117 (vec_merge:VI48_AVX512VL
17119 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
17120 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
17121 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17123 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17124 [(set_attr "type" "sselog1")
17125 (set_attr "prefix" "evex")
17126 (set_attr "mode" "<sseinsnmode>")])
17128 (define_insn "abs<mode>2_mask"
17129 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17130 (vec_merge:VI12_AVX512VL
17132 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
17133 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
17134 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
17136 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17137 [(set_attr "type" "sselog1")
17138 (set_attr "prefix" "evex")
17139 (set_attr "mode" "<sseinsnmode>")])
17141 (define_expand "abs<mode>2"
17142 [(set (match_operand:VI_AVX2 0 "register_operand")
17144 (match_operand:VI_AVX2 1 "vector_operand")))]
17148 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
17149 && !TARGET_AVX512VL))
17151 ix86_expand_sse2_abs (operands[0], operands[1]);
17156 (define_insn "ssse3_abs<mode>2"
17157 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
17159 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
17160 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17162 pabs<mmxvecsize>\t{%1, %0|%0, %1}
17163 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
17164 [(set_attr "mmx_isa" "native,*")
17165 (set_attr "type" "sselog1")
17166 (set_attr "prefix_rep" "0")
17167 (set_attr "prefix_extra" "1")
17168 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17169 (set_attr "mode" "DI,TI")])
17171 (define_insn "abs<mode>2"
17172 [(set (match_operand:MMXMODEI 0 "register_operand")
17174 (match_operand:MMXMODEI 1 "register_operand")))]
17175 "TARGET_MMX_WITH_SSE && TARGET_SSSE3")
17177 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17179 ;; AMD SSE4A instructions
17181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17183 (define_insn "sse4a_movnt<mode>"
17184 [(set (match_operand:MODEF 0 "memory_operand" "=m")
17186 [(match_operand:MODEF 1 "register_operand" "x")]
17189 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
17190 [(set_attr "type" "ssemov")
17191 (set_attr "mode" "<MODE>")])
17193 (define_insn "sse4a_vmmovnt<mode>"
17194 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
17195 (unspec:<ssescalarmode>
17196 [(vec_select:<ssescalarmode>
17197 (match_operand:VF_128 1 "register_operand" "x")
17198 (parallel [(const_int 0)]))]
17201 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
17202 [(set_attr "type" "ssemov")
17203 (set_attr "mode" "<ssescalarmode>")])
17205 (define_insn "sse4a_extrqi"
17206 [(set (match_operand:V2DI 0 "register_operand" "=x")
17207 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17208 (match_operand 2 "const_0_to_255_operand")
17209 (match_operand 3 "const_0_to_255_operand")]
17212 "extrq\t{%3, %2, %0|%0, %2, %3}"
17213 [(set_attr "type" "sse")
17214 (set_attr "prefix_data16" "1")
17215 (set_attr "length_immediate" "2")
17216 (set_attr "mode" "TI")])
17218 (define_insn "sse4a_extrq"
17219 [(set (match_operand:V2DI 0 "register_operand" "=x")
17220 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17221 (match_operand:V16QI 2 "register_operand" "x")]
17224 "extrq\t{%2, %0|%0, %2}"
17225 [(set_attr "type" "sse")
17226 (set_attr "prefix_data16" "1")
17227 (set_attr "mode" "TI")])
17229 (define_insn "sse4a_insertqi"
17230 [(set (match_operand:V2DI 0 "register_operand" "=x")
17231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17232 (match_operand:V2DI 2 "register_operand" "x")
17233 (match_operand 3 "const_0_to_255_operand")
17234 (match_operand 4 "const_0_to_255_operand")]
17237 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
17238 [(set_attr "type" "sseins")
17239 (set_attr "prefix_data16" "0")
17240 (set_attr "prefix_rep" "1")
17241 (set_attr "length_immediate" "2")
17242 (set_attr "mode" "TI")])
17244 (define_insn "sse4a_insertq"
17245 [(set (match_operand:V2DI 0 "register_operand" "=x")
17246 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17247 (match_operand:V2DI 2 "register_operand" "x")]
17250 "insertq\t{%2, %0|%0, %2}"
17251 [(set_attr "type" "sseins")
17252 (set_attr "prefix_data16" "0")
17253 (set_attr "prefix_rep" "1")
17254 (set_attr "mode" "TI")])
17256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17258 ;; Intel SSE4.1 instructions
17260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17262 ;; Mapping of immediate bits for blend instructions
17263 (define_mode_attr blendbits
17264 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
17266 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
17267 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17268 (vec_merge:VF_128_256
17269 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17270 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
17271 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
17274 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17275 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17276 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17277 [(set_attr "isa" "noavx,noavx,avx")
17278 (set_attr "type" "ssemov")
17279 (set_attr "length_immediate" "1")
17280 (set_attr "prefix_data16" "1,1,*")
17281 (set_attr "prefix_extra" "1")
17282 (set_attr "prefix" "orig,orig,vex")
17283 (set_attr "mode" "<MODE>")])
17285 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
17286 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17288 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17289 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17290 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
17294 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17295 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17296 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17297 [(set_attr "isa" "noavx,noavx,avx")
17298 (set_attr "type" "ssemov")
17299 (set_attr "length_immediate" "1")
17300 (set_attr "prefix_data16" "1,1,*")
17301 (set_attr "prefix_extra" "1")
17302 (set_attr "prefix" "orig,orig,vex")
17303 (set_attr "btver2_decode" "vector,vector,vector")
17304 (set_attr "mode" "<MODE>")])
17306 ;; Also define scalar versions. These are used for conditional move.
17307 ;; Using subregs into vector modes causes register allocation lossage.
17308 ;; These patterns do not allow memory operands because the native
17309 ;; instructions read the full 128-bits.
17311 (define_insn "sse4_1_blendv<ssemodesuffix>"
17312 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
17314 [(match_operand:MODEF 1 "register_operand" "0,0,x")
17315 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
17316 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
17320 if (get_attr_mode (insn) == MODE_V4SF)
17321 return (which_alternative == 2
17322 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17323 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
17325 return (which_alternative == 2
17326 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17327 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
17329 [(set_attr "isa" "noavx,noavx,avx")
17330 (set_attr "type" "ssemov")
17331 (set_attr "length_immediate" "1")
17332 (set_attr "prefix_data16" "1,1,*")
17333 (set_attr "prefix_extra" "1")
17334 (set_attr "prefix" "orig,orig,vex")
17335 (set_attr "btver2_decode" "vector,vector,vector")
17337 (cond [(match_test "TARGET_AVX")
17338 (const_string "<ssevecmode>")
17339 (match_test "optimize_function_for_size_p (cfun)")
17340 (const_string "V4SF")
17341 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
17342 (const_string "V4SF")
17344 (const_string "<ssevecmode>")))])
17346 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
17347 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17349 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17350 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17352 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
17353 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
17357 "&& reload_completed"
17358 [(set (match_dup 0)
17360 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17361 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
17362 [(set_attr "isa" "noavx,noavx,avx")
17363 (set_attr "type" "ssemov")
17364 (set_attr "length_immediate" "1")
17365 (set_attr "prefix_data16" "1,1,*")
17366 (set_attr "prefix_extra" "1")
17367 (set_attr "prefix" "orig,orig,vex")
17368 (set_attr "btver2_decode" "vector,vector,vector")
17369 (set_attr "mode" "<MODE>")])
17371 (define_mode_attr ssefltmodesuffix
17372 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
17374 (define_mode_attr ssefltvecmode
17375 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
17377 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
17378 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
17379 (unspec:<ssebytemode>
17380 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
17381 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
17382 (subreg:<ssebytemode>
17384 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
17385 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
17389 "&& reload_completed"
17390 [(set (match_dup 0)
17391 (unspec:<ssefltvecmode>
17392 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17394 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
17395 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
17396 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
17397 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
17399 [(set_attr "isa" "noavx,noavx,avx")
17400 (set_attr "type" "ssemov")
17401 (set_attr "length_immediate" "1")
17402 (set_attr "prefix_data16" "1,1,*")
17403 (set_attr "prefix_extra" "1")
17404 (set_attr "prefix" "orig,orig,vex")
17405 (set_attr "btver2_decode" "vector,vector,vector")
17406 (set_attr "mode" "<ssefltvecmode>")])
17408 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
17409 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17411 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
17412 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17413 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17417 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17418 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17419 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17420 [(set_attr "isa" "noavx,noavx,avx")
17421 (set_attr "type" "ssemul")
17422 (set_attr "length_immediate" "1")
17423 (set_attr "prefix_data16" "1,1,*")
17424 (set_attr "prefix_extra" "1")
17425 (set_attr "prefix" "orig,orig,vex")
17426 (set_attr "btver2_decode" "vector,vector,vector")
17427 (set_attr "znver1_decode" "vector,vector,vector")
17428 (set_attr "mode" "<MODE>")])
17430 ;; Mode attribute used by `vmovntdqa' pattern
17431 (define_mode_attr vi8_sse4_1_avx2_avx512
17432 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
17434 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
17435 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
17436 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
17439 "%vmovntdqa\t{%1, %0|%0, %1}"
17440 [(set_attr "isa" "noavx,noavx,avx")
17441 (set_attr "type" "ssemov")
17442 (set_attr "prefix_extra" "1,1,*")
17443 (set_attr "prefix" "orig,orig,maybe_evex")
17444 (set_attr "mode" "<sseinsnmode>")])
17446 (define_insn "<sse4_1_avx2>_mpsadbw"
17447 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17449 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17450 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17451 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17455 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17456 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17457 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17458 [(set_attr "isa" "noavx,noavx,avx")
17459 (set_attr "type" "sselog1")
17460 (set_attr "length_immediate" "1")
17461 (set_attr "prefix_extra" "1")
17462 (set_attr "prefix" "orig,orig,vex")
17463 (set_attr "btver2_decode" "vector,vector,vector")
17464 (set_attr "znver1_decode" "vector,vector,vector")
17465 (set_attr "mode" "<sseinsnmode>")])
17467 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
17468 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
17469 (vec_concat:VI2_AVX2
17470 (us_truncate:<ssehalfvecmode>
17471 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
17472 (us_truncate:<ssehalfvecmode>
17473 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
17474 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17476 packusdw\t{%2, %0|%0, %2}
17477 packusdw\t{%2, %0|%0, %2}
17478 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17479 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17480 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
17481 (set_attr "type" "sselog")
17482 (set_attr "prefix_extra" "1")
17483 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
17484 (set_attr "mode" "<sseinsnmode>")])
17486 (define_insn "<sse4_1_avx2>_pblendvb"
17487 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17489 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17490 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17491 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
17495 pblendvb\t{%3, %2, %0|%0, %2, %3}
17496 pblendvb\t{%3, %2, %0|%0, %2, %3}
17497 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17498 [(set_attr "isa" "noavx,noavx,avx")
17499 (set_attr "type" "ssemov")
17500 (set_attr "prefix_extra" "1")
17501 (set_attr "length_immediate" "*,*,1")
17502 (set_attr "prefix" "orig,orig,vex")
17503 (set_attr "btver2_decode" "vector,vector,vector")
17504 (set_attr "mode" "<sseinsnmode>")])
17506 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
17507 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17509 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17510 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17511 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
17512 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
17517 [(set (match_dup 0)
17519 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17521 [(set_attr "isa" "noavx,noavx,avx")
17522 (set_attr "type" "ssemov")
17523 (set_attr "prefix_extra" "1")
17524 (set_attr "length_immediate" "*,*,1")
17525 (set_attr "prefix" "orig,orig,vex")
17526 (set_attr "btver2_decode" "vector,vector,vector")
17527 (set_attr "mode" "<sseinsnmode>")])
17529 (define_insn "sse4_1_pblendw"
17530 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17532 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
17533 (match_operand:V8HI 1 "register_operand" "0,0,x")
17534 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
17537 pblendw\t{%3, %2, %0|%0, %2, %3}
17538 pblendw\t{%3, %2, %0|%0, %2, %3}
17539 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17540 [(set_attr "isa" "noavx,noavx,avx")
17541 (set_attr "type" "ssemov")
17542 (set_attr "prefix_extra" "1")
17543 (set_attr "length_immediate" "1")
17544 (set_attr "prefix" "orig,orig,vex")
17545 (set_attr "mode" "TI")])
17547 ;; The builtin uses an 8-bit immediate. Expand that.
17548 (define_expand "avx2_pblendw"
17549 [(set (match_operand:V16HI 0 "register_operand")
17551 (match_operand:V16HI 2 "nonimmediate_operand")
17552 (match_operand:V16HI 1 "register_operand")
17553 (match_operand:SI 3 "const_0_to_255_operand")))]
17556 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
17557 operands[3] = GEN_INT (val << 8 | val);
17560 (define_insn "*avx2_pblendw"
17561 [(set (match_operand:V16HI 0 "register_operand" "=x")
17563 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
17564 (match_operand:V16HI 1 "register_operand" "x")
17565 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
17568 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
17569 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17571 [(set_attr "type" "ssemov")
17572 (set_attr "prefix_extra" "1")
17573 (set_attr "length_immediate" "1")
17574 (set_attr "prefix" "vex")
17575 (set_attr "mode" "OI")])
17577 (define_insn "avx2_pblendd<mode>"
17578 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
17579 (vec_merge:VI4_AVX2
17580 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
17581 (match_operand:VI4_AVX2 1 "register_operand" "x")
17582 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
17584 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17585 [(set_attr "type" "ssemov")
17586 (set_attr "prefix_extra" "1")
17587 (set_attr "length_immediate" "1")
17588 (set_attr "prefix" "vex")
17589 (set_attr "mode" "<sseinsnmode>")])
17591 (define_insn "sse4_1_phminposuw"
17592 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17593 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
17594 UNSPEC_PHMINPOSUW))]
17596 "%vphminposuw\t{%1, %0|%0, %1}"
17597 [(set_attr "isa" "noavx,noavx,avx")
17598 (set_attr "type" "sselog1")
17599 (set_attr "prefix_extra" "1")
17600 (set_attr "prefix" "orig,orig,vex")
17601 (set_attr "mode" "TI")])
17603 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
17604 [(set (match_operand:V16HI 0 "register_operand" "=v")
17606 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17607 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17608 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17609 [(set_attr "type" "ssemov")
17610 (set_attr "prefix_extra" "1")
17611 (set_attr "prefix" "maybe_evex")
17612 (set_attr "mode" "OI")])
17614 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
17615 [(set (match_operand:V32QI 0 "register_operand" "=v")
17618 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
17619 (match_operand:V32QI 2 "const0_operand" "C"))
17620 (match_parallel 3 "pmovzx_parallel"
17621 [(match_operand 4 "const_int_operand" "n")])))]
17624 "&& reload_completed"
17625 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
17627 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
17628 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
17631 (define_expand "<insn>v16qiv16hi2"
17632 [(set (match_operand:V16HI 0 "register_operand")
17634 (match_operand:V16QI 1 "nonimmediate_operand")))]
17637 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
17638 [(set (match_operand:V32HI 0 "register_operand" "=v")
17640 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
17642 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17643 [(set_attr "type" "ssemov")
17644 (set_attr "prefix_extra" "1")
17645 (set_attr "prefix" "evex")
17646 (set_attr "mode" "XI")])
17648 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
17649 [(set (match_operand:V64QI 0 "register_operand" "=v")
17652 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
17653 (match_operand:V64QI 2 "const0_operand" "C"))
17654 (match_parallel 3 "pmovzx_parallel"
17655 [(match_operand 4 "const_int_operand" "n")])))]
17658 "&& reload_completed"
17659 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
17661 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
17662 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
17665 (define_expand "<insn>v32qiv32hi2"
17666 [(set (match_operand:V32HI 0 "register_operand")
17668 (match_operand:V32QI 1 "nonimmediate_operand")))]
17671 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
17672 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17675 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17676 (parallel [(const_int 0) (const_int 1)
17677 (const_int 2) (const_int 3)
17678 (const_int 4) (const_int 5)
17679 (const_int 6) (const_int 7)]))))]
17680 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17681 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17682 [(set_attr "isa" "noavx,noavx,avx")
17683 (set_attr "type" "ssemov")
17684 (set_attr "prefix_extra" "1")
17685 (set_attr "prefix" "orig,orig,maybe_evex")
17686 (set_attr "mode" "TI")])
17688 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
17689 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17691 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
17692 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17693 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17694 [(set_attr "isa" "noavx,noavx,avx")
17695 (set_attr "type" "ssemov")
17696 (set_attr "prefix_extra" "1")
17697 (set_attr "prefix" "orig,orig,maybe_evex")
17698 (set_attr "mode" "TI")])
17700 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
17701 [(set (match_operand:V8HI 0 "register_operand")
17706 (match_operand:DI 1 "memory_operand")
17708 (parallel [(const_int 0) (const_int 1)
17709 (const_int 2) (const_int 3)
17710 (const_int 4) (const_int 5)
17711 (const_int 6) (const_int 7)]))))]
17712 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
17713 && ix86_pre_reload_split ()"
17716 [(set (match_dup 0)
17717 (any_extend:V8HI (match_dup 1)))]
17718 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17720 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
17721 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,v")
17724 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,vm")
17725 (match_operand:V16QI 2 "const0_operand" "C,C,C"))
17726 (match_parallel 3 "pmovzx_parallel"
17727 [(match_operand 4 "const_int_operand" "n,n,n")])))]
17730 "&& reload_completed"
17731 [(set (match_dup 0)
17735 (parallel [(const_int 0) (const_int 1)
17736 (const_int 2) (const_int 3)
17737 (const_int 4) (const_int 5)
17738 (const_int 6) (const_int 7)]))))]
17740 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
17741 if (MEM_P (operands[1]))
17743 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
17744 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
17745 emit_insn (gen_rtx_SET (operands[0], operands[1]));
17749 [(set_attr "isa" "noavx,noavx,avx")])
17751 (define_expand "<insn>v8qiv8hi2"
17752 [(set (match_operand:V8HI 0 "register_operand")
17754 (match_operand:V8QI 1 "nonimmediate_operand")))]
17757 if (!MEM_P (operands[1]))
17759 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17760 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
17765 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
17766 [(set (match_operand:V16SI 0 "register_operand" "=v")
17768 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17770 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17771 [(set_attr "type" "ssemov")
17772 (set_attr "prefix" "evex")
17773 (set_attr "mode" "XI")])
17775 (define_expand "<insn>v16qiv16si2"
17776 [(set (match_operand:V16SI 0 "register_operand")
17778 (match_operand:V16QI 1 "nonimmediate_operand")))]
17781 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
17782 [(set (match_operand:V8SI 0 "register_operand" "=v")
17785 (match_operand:V16QI 1 "register_operand" "v")
17786 (parallel [(const_int 0) (const_int 1)
17787 (const_int 2) (const_int 3)
17788 (const_int 4) (const_int 5)
17789 (const_int 6) (const_int 7)]))))]
17790 "TARGET_AVX2 && <mask_avx512vl_condition>"
17791 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17792 [(set_attr "type" "ssemov")
17793 (set_attr "prefix_extra" "1")
17794 (set_attr "prefix" "maybe_evex")
17795 (set_attr "mode" "OI")])
17797 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
17798 [(set (match_operand:V8SI 0 "register_operand" "=v")
17800 (match_operand:V8QI 1 "memory_operand" "m")))]
17801 "TARGET_AVX2 && <mask_avx512vl_condition>"
17802 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17803 [(set_attr "type" "ssemov")
17804 (set_attr "prefix_extra" "1")
17805 (set_attr "prefix" "maybe_evex")
17806 (set_attr "mode" "OI")])
17808 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
17809 [(set (match_operand:V8SI 0 "register_operand")
17814 (match_operand:DI 1 "memory_operand")
17816 (parallel [(const_int 0) (const_int 1)
17817 (const_int 2) (const_int 3)
17818 (const_int 4) (const_int 5)
17819 (const_int 6) (const_int 7)]))))]
17820 "TARGET_AVX2 && <mask_avx512vl_condition>
17821 && ix86_pre_reload_split ()"
17824 [(set (match_dup 0)
17825 (any_extend:V8SI (match_dup 1)))]
17826 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17828 (define_expand "<insn>v8qiv8si2"
17829 [(set (match_operand:V8SI 0 "register_operand")
17831 (match_operand:V8QI 1 "nonimmediate_operand")))]
17834 if (!MEM_P (operands[1]))
17836 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17837 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
17842 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17843 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17846 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17847 (parallel [(const_int 0) (const_int 1)
17848 (const_int 2) (const_int 3)]))))]
17849 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17850 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17851 [(set_attr "isa" "noavx,noavx,avx")
17852 (set_attr "type" "ssemov")
17853 (set_attr "prefix_extra" "1")
17854 (set_attr "prefix" "orig,orig,maybe_evex")
17855 (set_attr "mode" "TI")])
17857 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17858 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17860 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17861 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17862 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17863 [(set_attr "isa" "noavx,noavx,avx")
17864 (set_attr "type" "ssemov")
17865 (set_attr "prefix_extra" "1")
17866 (set_attr "prefix" "orig,orig,maybe_evex")
17867 (set_attr "mode" "TI")])
17869 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17870 [(set (match_operand:V4SI 0 "register_operand")
17875 (vec_duplicate:V4SI
17876 (match_operand:SI 1 "memory_operand"))
17878 [(const_int 0) (const_int 0)
17879 (const_int 0) (const_int 0)])
17881 (parallel [(const_int 0) (const_int 1)
17882 (const_int 2) (const_int 3)]))))]
17883 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17884 && ix86_pre_reload_split ()"
17887 [(set (match_dup 0)
17888 (any_extend:V4SI (match_dup 1)))]
17889 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17891 (define_expand "<insn>v4qiv4si2"
17892 [(set (match_operand:V4SI 0 "register_operand")
17894 (match_operand:V4QI 1 "nonimmediate_operand")))]
17897 if (!MEM_P (operands[1]))
17899 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
17900 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
17905 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17906 [(set (match_operand:V16SI 0 "register_operand" "=v")
17908 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17910 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17911 [(set_attr "type" "ssemov")
17912 (set_attr "prefix" "evex")
17913 (set_attr "mode" "XI")])
17915 (define_expand "<insn>v16hiv16si2"
17916 [(set (match_operand:V16SI 0 "register_operand")
17918 (match_operand:V16HI 1 "nonimmediate_operand")))]
17921 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
17922 [(set (match_operand:V32HI 0 "register_operand" "=v")
17925 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
17926 (match_operand:V32HI 2 "const0_operand" "C"))
17927 (match_parallel 3 "pmovzx_parallel"
17928 [(match_operand 4 "const_int_operand" "n")])))]
17931 "&& reload_completed"
17932 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
17934 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
17935 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
17938 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
17939 [(set (match_operand:V8SI 0 "register_operand" "=v")
17941 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17942 "TARGET_AVX2 && <mask_avx512vl_condition>"
17943 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17944 [(set_attr "type" "ssemov")
17945 (set_attr "prefix_extra" "1")
17946 (set_attr "prefix" "maybe_evex")
17947 (set_attr "mode" "OI")])
17949 (define_expand "<insn>v8hiv8si2"
17950 [(set (match_operand:V8SI 0 "register_operand")
17952 (match_operand:V8HI 1 "nonimmediate_operand")))]
17955 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
17956 [(set (match_operand:V16HI 0 "register_operand" "=v")
17959 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
17960 (match_operand:V16HI 2 "const0_operand" "C"))
17961 (match_parallel 3 "pmovzx_parallel"
17962 [(match_operand 4 "const_int_operand" "n")])))]
17965 "&& reload_completed"
17966 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
17968 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
17969 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
17972 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17973 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17976 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17977 (parallel [(const_int 0) (const_int 1)
17978 (const_int 2) (const_int 3)]))))]
17979 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17980 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17981 [(set_attr "isa" "noavx,noavx,avx")
17982 (set_attr "type" "ssemov")
17983 (set_attr "prefix_extra" "1")
17984 (set_attr "prefix" "orig,orig,maybe_evex")
17985 (set_attr "mode" "TI")])
17987 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17988 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17990 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17991 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17992 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17993 [(set_attr "isa" "noavx,noavx,avx")
17994 (set_attr "type" "ssemov")
17995 (set_attr "prefix_extra" "1")
17996 (set_attr "prefix" "orig,orig,maybe_evex")
17997 (set_attr "mode" "TI")])
17999 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
18000 [(set (match_operand:V4SI 0 "register_operand")
18005 (match_operand:DI 1 "memory_operand")
18007 (parallel [(const_int 0) (const_int 1)
18008 (const_int 2) (const_int 3)]))))]
18009 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18010 && ix86_pre_reload_split ()"
18013 [(set (match_dup 0)
18014 (any_extend:V4SI (match_dup 1)))]
18015 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18017 (define_expand "<insn>v4hiv4si2"
18018 [(set (match_operand:V4SI 0 "register_operand")
18020 (match_operand:V4HI 1 "nonimmediate_operand")))]
18023 if (!MEM_P (operands[1]))
18025 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18026 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
18031 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
18032 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
18035 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
18036 (match_operand:V8HI 2 "const0_operand" "C,C,C"))
18037 (match_parallel 3 "pmovzx_parallel"
18038 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18041 "&& reload_completed"
18042 [(set (match_dup 0)
18046 (parallel [(const_int 0) (const_int 1)
18047 (const_int 2) (const_int 3)]))))]
18049 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
18050 if (MEM_P (operands[1]))
18052 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
18053 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
18054 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18058 [(set_attr "isa" "noavx,noavx,avx")])
18060 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
18061 [(set (match_operand:V8DI 0 "register_operand" "=v")
18064 (match_operand:V16QI 1 "register_operand" "v")
18065 (parallel [(const_int 0) (const_int 1)
18066 (const_int 2) (const_int 3)
18067 (const_int 4) (const_int 5)
18068 (const_int 6) (const_int 7)]))))]
18070 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18071 [(set_attr "type" "ssemov")
18072 (set_attr "prefix" "evex")
18073 (set_attr "mode" "XI")])
18075 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
18076 [(set (match_operand:V8DI 0 "register_operand" "=v")
18078 (match_operand:V8QI 1 "memory_operand" "m")))]
18080 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18081 [(set_attr "type" "ssemov")
18082 (set_attr "prefix" "evex")
18083 (set_attr "mode" "XI")])
18085 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
18086 [(set (match_operand:V8DI 0 "register_operand")
18091 (match_operand:DI 1 "memory_operand")
18093 (parallel [(const_int 0) (const_int 1)
18094 (const_int 2) (const_int 3)
18095 (const_int 4) (const_int 5)
18096 (const_int 6) (const_int 7)]))))]
18097 "TARGET_AVX512F && ix86_pre_reload_split ()"
18100 [(set (match_dup 0)
18101 (any_extend:V8DI (match_dup 1)))]
18102 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18104 (define_expand "<insn>v8qiv8di2"
18105 [(set (match_operand:V8DI 0 "register_operand")
18107 (match_operand:V8QI 1 "nonimmediate_operand")))]
18110 if (!MEM_P (operands[1]))
18112 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18113 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
18118 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
18119 [(set (match_operand:V4DI 0 "register_operand" "=v")
18122 (match_operand:V16QI 1 "register_operand" "v")
18123 (parallel [(const_int 0) (const_int 1)
18124 (const_int 2) (const_int 3)]))))]
18125 "TARGET_AVX2 && <mask_avx512vl_condition>"
18126 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18127 [(set_attr "type" "ssemov")
18128 (set_attr "prefix_extra" "1")
18129 (set_attr "prefix" "maybe_evex")
18130 (set_attr "mode" "OI")])
18132 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
18133 [(set (match_operand:V4DI 0 "register_operand" "=v")
18135 (match_operand:V4QI 1 "memory_operand" "m")))]
18136 "TARGET_AVX2 && <mask_avx512vl_condition>"
18137 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18138 [(set_attr "type" "ssemov")
18139 (set_attr "prefix_extra" "1")
18140 (set_attr "prefix" "maybe_evex")
18141 (set_attr "mode" "OI")])
18143 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
18144 [(set (match_operand:V4DI 0 "register_operand")
18149 (vec_duplicate:V4SI
18150 (match_operand:SI 1 "memory_operand"))
18152 [(const_int 0) (const_int 0)
18153 (const_int 0) (const_int 0)])
18155 (parallel [(const_int 0) (const_int 1)
18156 (const_int 2) (const_int 3)]))))]
18157 "TARGET_AVX2 && <mask_avx512vl_condition>
18158 && ix86_pre_reload_split ()"
18161 [(set (match_dup 0)
18162 (any_extend:V4DI (match_dup 1)))]
18163 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18165 (define_expand "<insn>v4qiv4di2"
18166 [(set (match_operand:V4DI 0 "register_operand")
18168 (match_operand:V4QI 1 "nonimmediate_operand")))]
18171 if (!MEM_P (operands[1]))
18173 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18174 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
18179 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
18180 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18183 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18184 (parallel [(const_int 0) (const_int 1)]))))]
18185 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18186 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18187 [(set_attr "isa" "noavx,noavx,avx")
18188 (set_attr "type" "ssemov")
18189 (set_attr "prefix_extra" "1")
18190 (set_attr "prefix" "orig,orig,maybe_evex")
18191 (set_attr "mode" "TI")])
18193 (define_expand "<insn>v2qiv2di2"
18194 [(set (match_operand:V2DI 0 "register_operand")
18196 (match_operand:V2QI 1 "register_operand")))]
18199 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
18200 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
18204 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
18205 [(set (match_operand:V8DI 0 "register_operand" "=v")
18207 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
18209 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18210 [(set_attr "type" "ssemov")
18211 (set_attr "prefix" "evex")
18212 (set_attr "mode" "XI")])
18214 (define_expand "<insn>v8hiv8di2"
18215 [(set (match_operand:V8DI 0 "register_operand")
18217 (match_operand:V8HI 1 "nonimmediate_operand")))]
18220 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
18221 [(set (match_operand:V4DI 0 "register_operand" "=v")
18224 (match_operand:V8HI 1 "register_operand" "v")
18225 (parallel [(const_int 0) (const_int 1)
18226 (const_int 2) (const_int 3)]))))]
18227 "TARGET_AVX2 && <mask_avx512vl_condition>"
18228 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18229 [(set_attr "type" "ssemov")
18230 (set_attr "prefix_extra" "1")
18231 (set_attr "prefix" "maybe_evex")
18232 (set_attr "mode" "OI")])
18234 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
18235 [(set (match_operand:V4DI 0 "register_operand" "=v")
18237 (match_operand:V4HI 1 "memory_operand" "m")))]
18238 "TARGET_AVX2 && <mask_avx512vl_condition>"
18239 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18240 [(set_attr "type" "ssemov")
18241 (set_attr "prefix_extra" "1")
18242 (set_attr "prefix" "maybe_evex")
18243 (set_attr "mode" "OI")])
18245 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
18246 [(set (match_operand:V4DI 0 "register_operand")
18251 (match_operand:DI 1 "memory_operand")
18253 (parallel [(const_int 0) (const_int 1)
18254 (const_int 2) (const_int 3)]))))]
18255 "TARGET_AVX2 && <mask_avx512vl_condition>
18256 && ix86_pre_reload_split ()"
18259 [(set (match_dup 0)
18260 (any_extend:V4DI (match_dup 1)))]
18261 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
18263 (define_expand "<insn>v4hiv4di2"
18264 [(set (match_operand:V4DI 0 "register_operand")
18266 (match_operand:V4HI 1 "nonimmediate_operand")))]
18269 if (!MEM_P (operands[1]))
18271 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
18272 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
18277 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
18278 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18281 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
18282 (parallel [(const_int 0) (const_int 1)]))))]
18283 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18284 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18285 [(set_attr "isa" "noavx,noavx,avx")
18286 (set_attr "type" "ssemov")
18287 (set_attr "prefix_extra" "1")
18288 (set_attr "prefix" "orig,orig,maybe_evex")
18289 (set_attr "mode" "TI")])
18291 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
18292 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18294 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
18295 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18296 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18297 [(set_attr "isa" "noavx,noavx,avx")
18298 (set_attr "type" "ssemov")
18299 (set_attr "prefix_extra" "1")
18300 (set_attr "prefix" "orig,orig,maybe_evex")
18301 (set_attr "mode" "TI")])
18303 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
18304 [(set (match_operand:V2DI 0 "register_operand")
18309 (vec_duplicate:V4SI
18310 (match_operand:SI 1 "memory_operand"))
18312 [(const_int 0) (const_int 0)
18313 (const_int 0) (const_int 0)])
18315 (parallel [(const_int 0) (const_int 1)]))))]
18316 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18317 && ix86_pre_reload_split ()"
18320 [(set (match_dup 0)
18321 (any_extend:V2DI (match_dup 1)))]
18322 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
18324 (define_expand "<insn>v2hiv2di2"
18325 [(set (match_operand:V2DI 0 "register_operand")
18327 (match_operand:V2HI 1 "nonimmediate_operand")))]
18330 if (!MEM_P (operands[1]))
18332 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
18333 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
18338 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
18339 [(set (match_operand:V8DI 0 "register_operand" "=v")
18341 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18343 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18344 [(set_attr "type" "ssemov")
18345 (set_attr "prefix" "evex")
18346 (set_attr "mode" "XI")])
18348 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
18349 [(set (match_operand:V16SI 0 "register_operand" "=v")
18352 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
18353 (match_operand:V16SI 2 "const0_operand" "C"))
18354 (match_parallel 3 "pmovzx_parallel"
18355 [(match_operand 4 "const_int_operand" "n")])))]
18358 "&& reload_completed"
18359 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
18361 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
18362 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
18365 (define_expand "<insn>v8siv8di2"
18366 [(set (match_operand:V8DI 0 "register_operand" "=v")
18368 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
18371 (define_insn "avx2_<code>v4siv4di2<mask_name>"
18372 [(set (match_operand:V4DI 0 "register_operand" "=v")
18374 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18375 "TARGET_AVX2 && <mask_avx512vl_condition>"
18376 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18377 [(set_attr "type" "ssemov")
18378 (set_attr "prefix" "maybe_evex")
18379 (set_attr "prefix_extra" "1")
18380 (set_attr "mode" "OI")])
18382 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
18383 [(set (match_operand:V8SI 0 "register_operand" "=v")
18386 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
18387 (match_operand:V8SI 2 "const0_operand" "C"))
18388 (match_parallel 3 "pmovzx_parallel"
18389 [(match_operand 4 "const_int_operand" "n")])))]
18392 "&& reload_completed"
18393 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
18395 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
18396 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
18399 (define_expand "<insn>v4siv4di2"
18400 [(set (match_operand:V4DI 0 "register_operand" "=v")
18402 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
18405 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
18406 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18409 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
18410 (parallel [(const_int 0) (const_int 1)]))))]
18411 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18412 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18413 [(set_attr "isa" "noavx,noavx,avx")
18414 (set_attr "type" "ssemov")
18415 (set_attr "prefix_extra" "1")
18416 (set_attr "prefix" "orig,orig,maybe_evex")
18417 (set_attr "mode" "TI")])
18419 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
18420 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
18422 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
18423 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18424 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18425 [(set_attr "isa" "noavx,noavx,avx")
18426 (set_attr "type" "ssemov")
18427 (set_attr "prefix_extra" "1")
18428 (set_attr "prefix" "orig,orig,maybe_evex")
18429 (set_attr "mode" "TI")])
18431 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
18432 [(set (match_operand:V2DI 0 "register_operand")
18437 (match_operand:DI 1 "memory_operand")
18439 (parallel [(const_int 0) (const_int 1)]))))]
18440 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18441 && ix86_pre_reload_split ()"
18444 [(set (match_dup 0)
18445 (any_extend:V2DI (match_dup 1)))]
18446 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
18448 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
18449 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18452 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
18453 (match_operand:V4SI 2 "const0_operand" "C,C,C"))
18454 (match_parallel 3 "pmovzx_parallel"
18455 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18458 "&& reload_completed"
18459 [(set (match_dup 0)
18461 (vec_select:V2SI (match_dup 1)
18462 (parallel [(const_int 0) (const_int 1)]))))]
18464 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
18465 if (MEM_P (operands[1]))
18467 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
18468 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
18469 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18473 [(set_attr "isa" "noavx,noavx,avx")])
18475 (define_expand "<insn>v2siv2di2"
18476 [(set (match_operand:V2DI 0 "register_operand")
18478 (match_operand:V2SI 1 "nonimmediate_operand")))]
18481 if (!MEM_P (operands[1]))
18483 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
18484 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
18489 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
18490 ;; setting FLAGS_REG. But it is not a really compare instruction.
18491 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
18492 [(set (reg:CC FLAGS_REG)
18493 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
18494 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
18497 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
18498 [(set_attr "type" "ssecomi")
18499 (set_attr "prefix_extra" "1")
18500 (set_attr "prefix" "vex")
18501 (set_attr "mode" "<MODE>")])
18503 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
18504 ;; But it is not a really compare instruction.
18505 (define_insn "<sse4_1>_ptest<mode>"
18506 [(set (reg:CC FLAGS_REG)
18507 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
18508 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
18511 "%vptest\t{%1, %0|%0, %1}"
18512 [(set_attr "isa" "noavx,noavx,avx")
18513 (set_attr "type" "ssecomi")
18514 (set_attr "prefix_extra" "1")
18515 (set_attr "prefix" "orig,orig,vex")
18516 (set (attr "btver2_decode")
18518 (match_test "<sseinsnmode>mode==OImode")
18519 (const_string "vector")
18520 (const_string "*")))
18521 (set_attr "mode" "<sseinsnmode>")])
18523 (define_insn "ptesttf2"
18524 [(set (reg:CC FLAGS_REG)
18525 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
18526 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
18529 "%vptest\t{%1, %0|%0, %1}"
18530 [(set_attr "isa" "noavx,noavx,avx")
18531 (set_attr "type" "ssecomi")
18532 (set_attr "prefix_extra" "1")
18533 (set_attr "prefix" "orig,orig,vex")
18534 (set_attr "mode" "TI")])
18536 (define_expand "nearbyint<mode>2"
18537 [(set (match_operand:VF 0 "register_operand")
18539 [(match_operand:VF 1 "vector_operand")
18543 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
18545 (define_expand "rint<mode>2"
18546 [(set (match_operand:VF 0 "register_operand")
18548 [(match_operand:VF 1 "vector_operand")
18552 "operands[2] = GEN_INT (ROUND_MXCSR);")
18554 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
18555 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18557 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
18558 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
18561 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18562 [(set_attr "isa" "noavx,noavx,avx")
18563 (set_attr "type" "ssecvt")
18564 (set_attr "prefix_data16" "1,1,*")
18565 (set_attr "prefix_extra" "1")
18566 (set_attr "length_immediate" "1")
18567 (set_attr "prefix" "orig,orig,vex")
18568 (set_attr "mode" "<MODE>")])
18570 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
18571 [(match_operand:<sseintvecmode> 0 "register_operand")
18572 (match_operand:VF1_128_256 1 "vector_operand")
18573 (match_operand:SI 2 "const_0_to_15_operand")]
18576 rtx tmp = gen_reg_rtx (<MODE>mode);
18579 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
18582 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18586 (define_expand "avx512f_round<castmode>512"
18587 [(match_operand:VF_512 0 "register_operand")
18588 (match_operand:VF_512 1 "nonimmediate_operand")
18589 (match_operand:SI 2 "const_0_to_15_operand")]
18592 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
18596 (define_expand "avx512f_roundps512_sfix"
18597 [(match_operand:V16SI 0 "register_operand")
18598 (match_operand:V16SF 1 "nonimmediate_operand")
18599 (match_operand:SI 2 "const_0_to_15_operand")]
18602 rtx tmp = gen_reg_rtx (V16SFmode);
18603 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
18604 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
18608 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
18609 [(match_operand:<ssepackfltmode> 0 "register_operand")
18610 (match_operand:VF2 1 "vector_operand")
18611 (match_operand:VF2 2 "vector_operand")
18612 (match_operand:SI 3 "const_0_to_15_operand")]
18617 if (<MODE>mode == V2DFmode
18618 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18620 rtx tmp2 = gen_reg_rtx (V4DFmode);
18622 tmp0 = gen_reg_rtx (V4DFmode);
18623 tmp1 = force_reg (V2DFmode, operands[1]);
18625 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18626 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
18627 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18631 tmp0 = gen_reg_rtx (<MODE>mode);
18632 tmp1 = gen_reg_rtx (<MODE>mode);
18635 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
18638 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
18641 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18646 (define_insn "sse4_1_round<ssescalarmodesuffix>"
18647 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18650 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18651 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18653 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18657 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18658 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18659 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
18660 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
18661 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18662 (set_attr "type" "ssecvt")
18663 (set_attr "length_immediate" "1")
18664 (set_attr "prefix_data16" "1,1,*,*")
18665 (set_attr "prefix_extra" "1")
18666 (set_attr "prefix" "orig,orig,vex,evex")
18667 (set_attr "mode" "<MODE>")])
18669 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
18670 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18672 (vec_duplicate:VF_128
18673 (unspec:<ssescalarmode>
18674 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18675 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18677 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18681 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18682 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18683 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
18684 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18685 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18686 (set_attr "type" "ssecvt")
18687 (set_attr "length_immediate" "1")
18688 (set_attr "prefix_data16" "1,1,*,*")
18689 (set_attr "prefix_extra" "1")
18690 (set_attr "prefix" "orig,orig,vex,evex")
18691 (set_attr "mode" "<MODE>")])
18693 (define_expand "round<mode>2"
18694 [(set (match_dup 3)
18696 (match_operand:VF 1 "register_operand")
18698 (set (match_operand:VF 0 "register_operand")
18700 [(match_dup 3) (match_dup 4)]
18702 "TARGET_SSE4_1 && !flag_trapping_math"
18704 machine_mode scalar_mode;
18705 const struct real_format *fmt;
18706 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
18707 rtx half, vec_half;
18709 scalar_mode = GET_MODE_INNER (<MODE>mode);
18711 /* load nextafter (0.5, 0.0) */
18712 fmt = REAL_MODE_FORMAT (scalar_mode);
18713 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
18714 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
18715 half = const_double_from_real_value (pred_half, scalar_mode);
18717 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
18718 vec_half = force_reg (<MODE>mode, vec_half);
18720 operands[2] = gen_reg_rtx (<MODE>mode);
18721 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
18723 operands[3] = gen_reg_rtx (<MODE>mode);
18724 operands[4] = GEN_INT (ROUND_TRUNC);
18727 (define_expand "round<mode>2_sfix"
18728 [(match_operand:<sseintvecmode> 0 "register_operand")
18729 (match_operand:VF1 1 "register_operand")]
18730 "TARGET_SSE4_1 && !flag_trapping_math"
18732 rtx tmp = gen_reg_rtx (<MODE>mode);
18734 emit_insn (gen_round<mode>2 (tmp, operands[1]));
18737 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18741 (define_expand "round<mode>2_vec_pack_sfix"
18742 [(match_operand:<ssepackfltmode> 0 "register_operand")
18743 (match_operand:VF2 1 "register_operand")
18744 (match_operand:VF2 2 "register_operand")]
18745 "TARGET_SSE4_1 && !flag_trapping_math"
18749 if (<MODE>mode == V2DFmode
18750 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18752 rtx tmp2 = gen_reg_rtx (V4DFmode);
18754 tmp0 = gen_reg_rtx (V4DFmode);
18755 tmp1 = force_reg (V2DFmode, operands[1]);
18757 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18758 emit_insn (gen_roundv4df2 (tmp2, tmp0));
18759 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18763 tmp0 = gen_reg_rtx (<MODE>mode);
18764 tmp1 = gen_reg_rtx (<MODE>mode);
18766 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
18767 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
18770 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18777 ;; Intel SSE4.2 string/text processing instructions
18779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18781 (define_insn_and_split "sse4_2_pcmpestr"
18782 [(set (match_operand:SI 0 "register_operand" "=c,c")
18784 [(match_operand:V16QI 2 "register_operand" "x,x")
18785 (match_operand:SI 3 "register_operand" "a,a")
18786 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
18787 (match_operand:SI 5 "register_operand" "d,d")
18788 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
18790 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18798 (set (reg:CC FLAGS_REG)
18807 && ix86_pre_reload_split ()"
18812 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18813 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18814 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18817 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
18818 operands[3], operands[4],
18819 operands[5], operands[6]));
18821 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
18822 operands[3], operands[4],
18823 operands[5], operands[6]));
18824 if (flags && !(ecx || xmm0))
18825 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
18826 operands[2], operands[3],
18827 operands[4], operands[5],
18829 if (!(flags || ecx || xmm0))
18830 emit_note (NOTE_INSN_DELETED);
18834 [(set_attr "type" "sselog")
18835 (set_attr "prefix_data16" "1")
18836 (set_attr "prefix_extra" "1")
18837 (set_attr "length_immediate" "1")
18838 (set_attr "memory" "none,load")
18839 (set_attr "mode" "TI")])
18841 (define_insn "sse4_2_pcmpestri"
18842 [(set (match_operand:SI 0 "register_operand" "=c,c")
18844 [(match_operand:V16QI 1 "register_operand" "x,x")
18845 (match_operand:SI 2 "register_operand" "a,a")
18846 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18847 (match_operand:SI 4 "register_operand" "d,d")
18848 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18850 (set (reg:CC FLAGS_REG)
18859 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
18860 [(set_attr "type" "sselog")
18861 (set_attr "prefix_data16" "1")
18862 (set_attr "prefix_extra" "1")
18863 (set_attr "prefix" "maybe_vex")
18864 (set_attr "length_immediate" "1")
18865 (set_attr "btver2_decode" "vector")
18866 (set_attr "memory" "none,load")
18867 (set_attr "mode" "TI")])
18869 (define_insn "sse4_2_pcmpestrm"
18870 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
18872 [(match_operand:V16QI 1 "register_operand" "x,x")
18873 (match_operand:SI 2 "register_operand" "a,a")
18874 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18875 (match_operand:SI 4 "register_operand" "d,d")
18876 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18878 (set (reg:CC FLAGS_REG)
18887 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
18888 [(set_attr "type" "sselog")
18889 (set_attr "prefix_data16" "1")
18890 (set_attr "prefix_extra" "1")
18891 (set_attr "length_immediate" "1")
18892 (set_attr "prefix" "maybe_vex")
18893 (set_attr "btver2_decode" "vector")
18894 (set_attr "memory" "none,load")
18895 (set_attr "mode" "TI")])
18897 (define_insn "sse4_2_pcmpestr_cconly"
18898 [(set (reg:CC FLAGS_REG)
18900 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
18901 (match_operand:SI 3 "register_operand" "a,a,a,a")
18902 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
18903 (match_operand:SI 5 "register_operand" "d,d,d,d")
18904 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
18906 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
18907 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
18910 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18911 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18912 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
18913 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
18914 [(set_attr "type" "sselog")
18915 (set_attr "prefix_data16" "1")
18916 (set_attr "prefix_extra" "1")
18917 (set_attr "length_immediate" "1")
18918 (set_attr "memory" "none,load,none,load")
18919 (set_attr "btver2_decode" "vector,vector,vector,vector")
18920 (set_attr "prefix" "maybe_vex")
18921 (set_attr "mode" "TI")])
18923 (define_insn_and_split "sse4_2_pcmpistr"
18924 [(set (match_operand:SI 0 "register_operand" "=c,c")
18926 [(match_operand:V16QI 2 "register_operand" "x,x")
18927 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18928 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
18930 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18936 (set (reg:CC FLAGS_REG)
18943 && ix86_pre_reload_split ()"
18948 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18949 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18950 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18953 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
18954 operands[3], operands[4]));
18956 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
18957 operands[3], operands[4]));
18958 if (flags && !(ecx || xmm0))
18959 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
18960 operands[2], operands[3],
18962 if (!(flags || ecx || xmm0))
18963 emit_note (NOTE_INSN_DELETED);
18967 [(set_attr "type" "sselog")
18968 (set_attr "prefix_data16" "1")
18969 (set_attr "prefix_extra" "1")
18970 (set_attr "length_immediate" "1")
18971 (set_attr "memory" "none,load")
18972 (set_attr "mode" "TI")])
18974 (define_insn "sse4_2_pcmpistri"
18975 [(set (match_operand:SI 0 "register_operand" "=c,c")
18977 [(match_operand:V16QI 1 "register_operand" "x,x")
18978 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18979 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18981 (set (reg:CC FLAGS_REG)
18988 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
18989 [(set_attr "type" "sselog")
18990 (set_attr "prefix_data16" "1")
18991 (set_attr "prefix_extra" "1")
18992 (set_attr "length_immediate" "1")
18993 (set_attr "prefix" "maybe_vex")
18994 (set_attr "memory" "none,load")
18995 (set_attr "btver2_decode" "vector")
18996 (set_attr "mode" "TI")])
18998 (define_insn "sse4_2_pcmpistrm"
18999 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
19001 [(match_operand:V16QI 1 "register_operand" "x,x")
19002 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19003 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19005 (set (reg:CC FLAGS_REG)
19012 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
19013 [(set_attr "type" "sselog")
19014 (set_attr "prefix_data16" "1")
19015 (set_attr "prefix_extra" "1")
19016 (set_attr "length_immediate" "1")
19017 (set_attr "prefix" "maybe_vex")
19018 (set_attr "memory" "none,load")
19019 (set_attr "btver2_decode" "vector")
19020 (set_attr "mode" "TI")])
19022 (define_insn "sse4_2_pcmpistr_cconly"
19023 [(set (reg:CC FLAGS_REG)
19025 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
19026 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
19027 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
19029 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
19030 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
19033 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19034 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
19035 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
19036 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
19037 [(set_attr "type" "sselog")
19038 (set_attr "prefix_data16" "1")
19039 (set_attr "prefix_extra" "1")
19040 (set_attr "length_immediate" "1")
19041 (set_attr "memory" "none,load,none,load")
19042 (set_attr "prefix" "maybe_vex")
19043 (set_attr "btver2_decode" "vector,vector,vector,vector")
19044 (set_attr "mode" "TI")])
19046 ;; Packed float variants
19047 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
19048 [(V8DI "V8SF") (V16SI "V16SF")])
19050 (define_expand "avx512pf_gatherpf<mode>sf"
19052 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19053 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19055 [(match_operand 2 "vsib_address_operand")
19056 (match_operand:VI48_512 1 "register_operand")
19057 (match_operand:SI 3 "const1248_operand")]))
19058 (match_operand:SI 4 "const_2_to_3_operand")]
19059 UNSPEC_GATHER_PREFETCH)]
19063 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19064 operands[3]), UNSPEC_VSIBADDR);
19067 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
19069 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19070 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19072 [(match_operand:P 2 "vsib_address_operand" "Tv")
19073 (match_operand:VI48_512 1 "register_operand" "v")
19074 (match_operand:SI 3 "const1248_operand" "n")]
19076 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19077 UNSPEC_GATHER_PREFETCH)]
19080 switch (INTVAL (operands[4]))
19083 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19084 gas changed what it requires incompatibly. */
19085 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19087 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19089 gcc_unreachable ();
19092 [(set_attr "type" "sse")
19093 (set_attr "prefix" "evex")
19094 (set_attr "mode" "XI")])
19096 ;; Packed double variants
19097 (define_expand "avx512pf_gatherpf<mode>df"
19099 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19102 [(match_operand 2 "vsib_address_operand")
19103 (match_operand:VI4_256_8_512 1 "register_operand")
19104 (match_operand:SI 3 "const1248_operand")]))
19105 (match_operand:SI 4 "const_2_to_3_operand")]
19106 UNSPEC_GATHER_PREFETCH)]
19110 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19111 operands[3]), UNSPEC_VSIBADDR);
19114 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
19116 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19117 (match_operator:V8DF 5 "vsib_mem_operator"
19119 [(match_operand:P 2 "vsib_address_operand" "Tv")
19120 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19121 (match_operand:SI 3 "const1248_operand" "n")]
19123 (match_operand:SI 4 "const_2_to_3_operand" "n")]
19124 UNSPEC_GATHER_PREFETCH)]
19127 switch (INTVAL (operands[4]))
19130 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19131 gas changed what it requires incompatibly. */
19132 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19134 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19136 gcc_unreachable ();
19139 [(set_attr "type" "sse")
19140 (set_attr "prefix" "evex")
19141 (set_attr "mode" "XI")])
19143 ;; Packed float variants
19144 (define_expand "avx512pf_scatterpf<mode>sf"
19146 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19147 (mem:<GATHER_SCATTER_SF_MEM_MODE>
19149 [(match_operand 2 "vsib_address_operand")
19150 (match_operand:VI48_512 1 "register_operand")
19151 (match_operand:SI 3 "const1248_operand")]))
19152 (match_operand:SI 4 "const2367_operand")]
19153 UNSPEC_SCATTER_PREFETCH)]
19157 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19158 operands[3]), UNSPEC_VSIBADDR);
19161 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
19163 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19164 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
19166 [(match_operand:P 2 "vsib_address_operand" "Tv")
19167 (match_operand:VI48_512 1 "register_operand" "v")
19168 (match_operand:SI 3 "const1248_operand" "n")]
19170 (match_operand:SI 4 "const2367_operand" "n")]
19171 UNSPEC_SCATTER_PREFETCH)]
19174 switch (INTVAL (operands[4]))
19178 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19179 gas changed what it requires incompatibly. */
19180 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19183 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
19185 gcc_unreachable ();
19188 [(set_attr "type" "sse")
19189 (set_attr "prefix" "evex")
19190 (set_attr "mode" "XI")])
19192 ;; Packed double variants
19193 (define_expand "avx512pf_scatterpf<mode>df"
19195 [(match_operand:<avx512fmaskmode> 0 "register_operand")
19198 [(match_operand 2 "vsib_address_operand")
19199 (match_operand:VI4_256_8_512 1 "register_operand")
19200 (match_operand:SI 3 "const1248_operand")]))
19201 (match_operand:SI 4 "const2367_operand")]
19202 UNSPEC_SCATTER_PREFETCH)]
19206 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
19207 operands[3]), UNSPEC_VSIBADDR);
19210 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
19212 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
19213 (match_operator:V8DF 5 "vsib_mem_operator"
19215 [(match_operand:P 2 "vsib_address_operand" "Tv")
19216 (match_operand:VI4_256_8_512 1 "register_operand" "v")
19217 (match_operand:SI 3 "const1248_operand" "n")]
19219 (match_operand:SI 4 "const2367_operand" "n")]
19220 UNSPEC_SCATTER_PREFETCH)]
19223 switch (INTVAL (operands[4]))
19227 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19228 gas changed what it requires incompatibly. */
19229 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19232 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
19234 gcc_unreachable ();
19237 [(set_attr "type" "sse")
19238 (set_attr "prefix" "evex")
19239 (set_attr "mode" "XI")])
19241 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
19242 [(set (match_operand:VF_512 0 "register_operand" "=v")
19244 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19247 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19248 [(set_attr "prefix" "evex")
19249 (set_attr "type" "sse")
19250 (set_attr "mode" "<MODE>")])
19252 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
19253 [(set (match_operand:VF_512 0 "register_operand" "=v")
19255 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19258 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19259 [(set_attr "prefix" "evex")
19260 (set_attr "type" "sse")
19261 (set_attr "mode" "<MODE>")])
19263 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
19264 [(set (match_operand:VF_128 0 "register_operand" "=v")
19267 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19269 (match_operand:VF_128 2 "register_operand" "v")
19272 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19273 [(set_attr "length_immediate" "1")
19274 (set_attr "prefix" "evex")
19275 (set_attr "type" "sse")
19276 (set_attr "mode" "<MODE>")])
19278 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
19279 [(set (match_operand:VF_512 0 "register_operand" "=v")
19281 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19284 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19285 [(set_attr "prefix" "evex")
19286 (set_attr "type" "sse")
19287 (set_attr "mode" "<MODE>")])
19289 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
19290 [(set (match_operand:VF_128 0 "register_operand" "=v")
19293 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
19295 (match_operand:VF_128 2 "register_operand" "v")
19298 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
19299 [(set_attr "length_immediate" "1")
19300 (set_attr "type" "sse")
19301 (set_attr "prefix" "evex")
19302 (set_attr "mode" "<MODE>")])
19304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19306 ;; XOP instructions
19308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19310 (define_code_iterator xop_plus [plus ss_plus])
19312 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
19313 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
19315 ;; XOP parallel integer multiply/add instructions.
19317 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
19318 [(set (match_operand:VI24_128 0 "register_operand" "=x")
19321 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
19322 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
19323 (match_operand:VI24_128 3 "register_operand" "x")))]
19325 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19326 [(set_attr "type" "ssemuladd")
19327 (set_attr "mode" "TI")])
19329 (define_insn "xop_p<macs>dql"
19330 [(set (match_operand:V2DI 0 "register_operand" "=x")
19335 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19336 (parallel [(const_int 0) (const_int 2)])))
19339 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19340 (parallel [(const_int 0) (const_int 2)]))))
19341 (match_operand:V2DI 3 "register_operand" "x")))]
19343 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19344 [(set_attr "type" "ssemuladd")
19345 (set_attr "mode" "TI")])
19347 (define_insn "xop_p<macs>dqh"
19348 [(set (match_operand:V2DI 0 "register_operand" "=x")
19353 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
19354 (parallel [(const_int 1) (const_int 3)])))
19357 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19358 (parallel [(const_int 1) (const_int 3)]))))
19359 (match_operand:V2DI 3 "register_operand" "x")))]
19361 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19362 [(set_attr "type" "ssemuladd")
19363 (set_attr "mode" "TI")])
19365 ;; XOP parallel integer multiply/add instructions for the intrinisics
19366 (define_insn "xop_p<macs>wd"
19367 [(set (match_operand:V4SI 0 "register_operand" "=x")
19372 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19373 (parallel [(const_int 1) (const_int 3)
19374 (const_int 5) (const_int 7)])))
19377 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19378 (parallel [(const_int 1) (const_int 3)
19379 (const_int 5) (const_int 7)]))))
19380 (match_operand:V4SI 3 "register_operand" "x")))]
19382 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19383 [(set_attr "type" "ssemuladd")
19384 (set_attr "mode" "TI")])
19386 (define_insn "xop_p<madcs>wd"
19387 [(set (match_operand:V4SI 0 "register_operand" "=x")
19393 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
19394 (parallel [(const_int 0) (const_int 2)
19395 (const_int 4) (const_int 6)])))
19398 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
19399 (parallel [(const_int 0) (const_int 2)
19400 (const_int 4) (const_int 6)]))))
19405 (parallel [(const_int 1) (const_int 3)
19406 (const_int 5) (const_int 7)])))
19410 (parallel [(const_int 1) (const_int 3)
19411 (const_int 5) (const_int 7)])))))
19412 (match_operand:V4SI 3 "register_operand" "x")))]
19414 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19415 [(set_attr "type" "ssemuladd")
19416 (set_attr "mode" "TI")])
19418 ;; XOP parallel XMM conditional moves
19419 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
19420 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
19421 (if_then_else:V_128_256
19422 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
19423 (match_operand:V_128_256 1 "register_operand" "x,x")
19424 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
19426 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19427 [(set_attr "type" "sse4arg")])
19429 ;; XOP horizontal add/subtract instructions
19430 (define_insn "xop_phadd<u>bw"
19431 [(set (match_operand:V8HI 0 "register_operand" "=x")
19435 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19436 (parallel [(const_int 0) (const_int 2)
19437 (const_int 4) (const_int 6)
19438 (const_int 8) (const_int 10)
19439 (const_int 12) (const_int 14)])))
19443 (parallel [(const_int 1) (const_int 3)
19444 (const_int 5) (const_int 7)
19445 (const_int 9) (const_int 11)
19446 (const_int 13) (const_int 15)])))))]
19448 "vphadd<u>bw\t{%1, %0|%0, %1}"
19449 [(set_attr "type" "sseiadd1")])
19451 (define_insn "xop_phadd<u>bd"
19452 [(set (match_operand:V4SI 0 "register_operand" "=x")
19457 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19458 (parallel [(const_int 0) (const_int 4)
19459 (const_int 8) (const_int 12)])))
19463 (parallel [(const_int 1) (const_int 5)
19464 (const_int 9) (const_int 13)]))))
19469 (parallel [(const_int 2) (const_int 6)
19470 (const_int 10) (const_int 14)])))
19474 (parallel [(const_int 3) (const_int 7)
19475 (const_int 11) (const_int 15)]))))))]
19477 "vphadd<u>bd\t{%1, %0|%0, %1}"
19478 [(set_attr "type" "sseiadd1")])
19480 (define_insn "xop_phadd<u>bq"
19481 [(set (match_operand:V2DI 0 "register_operand" "=x")
19487 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19488 (parallel [(const_int 0) (const_int 8)])))
19492 (parallel [(const_int 1) (const_int 9)]))))
19497 (parallel [(const_int 2) (const_int 10)])))
19501 (parallel [(const_int 3) (const_int 11)])))))
19507 (parallel [(const_int 4) (const_int 12)])))
19511 (parallel [(const_int 5) (const_int 13)]))))
19516 (parallel [(const_int 6) (const_int 14)])))
19520 (parallel [(const_int 7) (const_int 15)])))))))]
19522 "vphadd<u>bq\t{%1, %0|%0, %1}"
19523 [(set_attr "type" "sseiadd1")])
19525 (define_insn "xop_phadd<u>wd"
19526 [(set (match_operand:V4SI 0 "register_operand" "=x")
19530 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19531 (parallel [(const_int 0) (const_int 2)
19532 (const_int 4) (const_int 6)])))
19536 (parallel [(const_int 1) (const_int 3)
19537 (const_int 5) (const_int 7)])))))]
19539 "vphadd<u>wd\t{%1, %0|%0, %1}"
19540 [(set_attr "type" "sseiadd1")])
19542 (define_insn "xop_phadd<u>wq"
19543 [(set (match_operand:V2DI 0 "register_operand" "=x")
19548 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19549 (parallel [(const_int 0) (const_int 4)])))
19553 (parallel [(const_int 1) (const_int 5)]))))
19558 (parallel [(const_int 2) (const_int 6)])))
19562 (parallel [(const_int 3) (const_int 7)]))))))]
19564 "vphadd<u>wq\t{%1, %0|%0, %1}"
19565 [(set_attr "type" "sseiadd1")])
19567 (define_insn "xop_phadd<u>dq"
19568 [(set (match_operand:V2DI 0 "register_operand" "=x")
19572 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19573 (parallel [(const_int 0) (const_int 2)])))
19577 (parallel [(const_int 1) (const_int 3)])))))]
19579 "vphadd<u>dq\t{%1, %0|%0, %1}"
19580 [(set_attr "type" "sseiadd1")])
19582 (define_insn "xop_phsubbw"
19583 [(set (match_operand:V8HI 0 "register_operand" "=x")
19587 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19588 (parallel [(const_int 0) (const_int 2)
19589 (const_int 4) (const_int 6)
19590 (const_int 8) (const_int 10)
19591 (const_int 12) (const_int 14)])))
19595 (parallel [(const_int 1) (const_int 3)
19596 (const_int 5) (const_int 7)
19597 (const_int 9) (const_int 11)
19598 (const_int 13) (const_int 15)])))))]
19600 "vphsubbw\t{%1, %0|%0, %1}"
19601 [(set_attr "type" "sseiadd1")])
19603 (define_insn "xop_phsubwd"
19604 [(set (match_operand:V4SI 0 "register_operand" "=x")
19608 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19609 (parallel [(const_int 0) (const_int 2)
19610 (const_int 4) (const_int 6)])))
19614 (parallel [(const_int 1) (const_int 3)
19615 (const_int 5) (const_int 7)])))))]
19617 "vphsubwd\t{%1, %0|%0, %1}"
19618 [(set_attr "type" "sseiadd1")])
19620 (define_insn "xop_phsubdq"
19621 [(set (match_operand:V2DI 0 "register_operand" "=x")
19625 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19626 (parallel [(const_int 0) (const_int 2)])))
19630 (parallel [(const_int 1) (const_int 3)])))))]
19632 "vphsubdq\t{%1, %0|%0, %1}"
19633 [(set_attr "type" "sseiadd1")])
19635 ;; XOP permute instructions
19636 (define_insn "xop_pperm"
19637 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19639 [(match_operand:V16QI 1 "register_operand" "x,x")
19640 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19641 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
19642 UNSPEC_XOP_PERMUTE))]
19643 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19644 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19645 [(set_attr "type" "sse4arg")
19646 (set_attr "mode" "TI")])
19648 ;; XOP pack instructions that combine two vectors into a smaller vector
19649 (define_insn "xop_pperm_pack_v2di_v4si"
19650 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
19653 (match_operand:V2DI 1 "register_operand" "x,x"))
19655 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
19656 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19657 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19658 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19659 [(set_attr "type" "sse4arg")
19660 (set_attr "mode" "TI")])
19662 (define_insn "xop_pperm_pack_v4si_v8hi"
19663 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
19666 (match_operand:V4SI 1 "register_operand" "x,x"))
19668 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
19669 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19670 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19671 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19672 [(set_attr "type" "sse4arg")
19673 (set_attr "mode" "TI")])
19675 (define_insn "xop_pperm_pack_v8hi_v16qi"
19676 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19679 (match_operand:V8HI 1 "register_operand" "x,x"))
19681 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
19682 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19683 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19684 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19685 [(set_attr "type" "sse4arg")
19686 (set_attr "mode" "TI")])
19688 ;; XOP packed rotate instructions
19689 (define_expand "rotl<mode>3"
19690 [(set (match_operand:VI_128 0 "register_operand")
19692 (match_operand:VI_128 1 "nonimmediate_operand")
19693 (match_operand:SI 2 "general_operand")))]
19696 /* If we were given a scalar, convert it to parallel */
19697 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19699 rtvec vs = rtvec_alloc (<ssescalarnum>);
19700 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19701 rtx reg = gen_reg_rtx (<MODE>mode);
19702 rtx op2 = operands[2];
19705 if (GET_MODE (op2) != <ssescalarmode>mode)
19707 op2 = gen_reg_rtx (<ssescalarmode>mode);
19708 convert_move (op2, operands[2], false);
19711 for (i = 0; i < <ssescalarnum>; i++)
19712 RTVEC_ELT (vs, i) = op2;
19714 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19715 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19720 (define_expand "rotr<mode>3"
19721 [(set (match_operand:VI_128 0 "register_operand")
19723 (match_operand:VI_128 1 "nonimmediate_operand")
19724 (match_operand:SI 2 "general_operand")))]
19727 /* If we were given a scalar, convert it to parallel */
19728 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19730 rtvec vs = rtvec_alloc (<ssescalarnum>);
19731 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19732 rtx neg = gen_reg_rtx (<MODE>mode);
19733 rtx reg = gen_reg_rtx (<MODE>mode);
19734 rtx op2 = operands[2];
19737 if (GET_MODE (op2) != <ssescalarmode>mode)
19739 op2 = gen_reg_rtx (<ssescalarmode>mode);
19740 convert_move (op2, operands[2], false);
19743 for (i = 0; i < <ssescalarnum>; i++)
19744 RTVEC_ELT (vs, i) = op2;
19746 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19747 emit_insn (gen_neg<mode>2 (neg, reg));
19748 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
19753 (define_insn "xop_rotl<mode>3"
19754 [(set (match_operand:VI_128 0 "register_operand" "=x")
19756 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19757 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19759 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19760 [(set_attr "type" "sseishft")
19761 (set_attr "length_immediate" "1")
19762 (set_attr "mode" "TI")])
19764 (define_insn "xop_rotr<mode>3"
19765 [(set (match_operand:VI_128 0 "register_operand" "=x")
19767 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19768 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19772 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
19773 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
19775 [(set_attr "type" "sseishft")
19776 (set_attr "length_immediate" "1")
19777 (set_attr "mode" "TI")])
19779 (define_expand "vrotr<mode>3"
19780 [(match_operand:VI_128 0 "register_operand")
19781 (match_operand:VI_128 1 "register_operand")
19782 (match_operand:VI_128 2 "register_operand")]
19785 rtx reg = gen_reg_rtx (<MODE>mode);
19786 emit_insn (gen_neg<mode>2 (reg, operands[2]));
19787 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19791 (define_expand "vrotl<mode>3"
19792 [(match_operand:VI_128 0 "register_operand")
19793 (match_operand:VI_128 1 "register_operand")
19794 (match_operand:VI_128 2 "register_operand")]
19797 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
19801 (define_insn "xop_vrotl<mode>3"
19802 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19803 (if_then_else:VI_128
19805 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19808 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19812 (neg:VI_128 (match_dup 2)))))]
19813 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19814 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19815 [(set_attr "type" "sseishft")
19816 (set_attr "prefix_data16" "0")
19817 (set_attr "prefix_extra" "2")
19818 (set_attr "mode" "TI")])
19820 ;; XOP packed shift instructions.
19821 (define_expand "vlshr<mode>3"
19822 [(set (match_operand:VI12_128 0 "register_operand")
19824 (match_operand:VI12_128 1 "register_operand")
19825 (match_operand:VI12_128 2 "nonimmediate_operand")))]
19828 rtx neg = gen_reg_rtx (<MODE>mode);
19829 emit_insn (gen_neg<mode>2 (neg, operands[2]));
19830 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19834 (define_expand "vlshr<mode>3"
19835 [(set (match_operand:VI48_128 0 "register_operand")
19837 (match_operand:VI48_128 1 "register_operand")
19838 (match_operand:VI48_128 2 "nonimmediate_operand")))]
19839 "TARGET_AVX2 || TARGET_XOP"
19843 rtx neg = gen_reg_rtx (<MODE>mode);
19844 emit_insn (gen_neg<mode>2 (neg, operands[2]));
19845 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19850 (define_expand "vlshr<mode>3"
19851 [(set (match_operand:VI48_512 0 "register_operand")
19853 (match_operand:VI48_512 1 "register_operand")
19854 (match_operand:VI48_512 2 "nonimmediate_operand")))]
19857 (define_expand "vlshr<mode>3"
19858 [(set (match_operand:VI48_256 0 "register_operand")
19860 (match_operand:VI48_256 1 "register_operand")
19861 (match_operand:VI48_256 2 "nonimmediate_operand")))]
19864 (define_expand "vashrv8hi3<mask_name>"
19865 [(set (match_operand:V8HI 0 "register_operand")
19867 (match_operand:V8HI 1 "register_operand")
19868 (match_operand:V8HI 2 "nonimmediate_operand")))]
19869 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
19873 rtx neg = gen_reg_rtx (V8HImode);
19874 emit_insn (gen_negv8hi2 (neg, operands[2]));
19875 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
19880 (define_expand "vashrv16qi3"
19881 [(set (match_operand:V16QI 0 "register_operand")
19883 (match_operand:V16QI 1 "register_operand")
19884 (match_operand:V16QI 2 "nonimmediate_operand")))]
19887 rtx neg = gen_reg_rtx (V16QImode);
19888 emit_insn (gen_negv16qi2 (neg, operands[2]));
19889 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
19893 (define_expand "vashrv2di3<mask_name>"
19894 [(set (match_operand:V2DI 0 "register_operand")
19896 (match_operand:V2DI 1 "register_operand")
19897 (match_operand:V2DI 2 "nonimmediate_operand")))]
19898 "TARGET_XOP || TARGET_AVX512VL"
19902 rtx neg = gen_reg_rtx (V2DImode);
19903 emit_insn (gen_negv2di2 (neg, operands[2]));
19904 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
19909 (define_expand "vashrv4si3"
19910 [(set (match_operand:V4SI 0 "register_operand")
19911 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
19912 (match_operand:V4SI 2 "nonimmediate_operand")))]
19913 "TARGET_AVX2 || TARGET_XOP"
19917 rtx neg = gen_reg_rtx (V4SImode);
19918 emit_insn (gen_negv4si2 (neg, operands[2]));
19919 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
19924 (define_expand "vashrv16si3"
19925 [(set (match_operand:V16SI 0 "register_operand")
19926 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
19927 (match_operand:V16SI 2 "nonimmediate_operand")))]
19930 (define_expand "vashrv8si3"
19931 [(set (match_operand:V8SI 0 "register_operand")
19932 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
19933 (match_operand:V8SI 2 "nonimmediate_operand")))]
19936 (define_expand "vashl<mode>3"
19937 [(set (match_operand:VI12_128 0 "register_operand")
19939 (match_operand:VI12_128 1 "register_operand")
19940 (match_operand:VI12_128 2 "nonimmediate_operand")))]
19943 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19947 (define_expand "vashl<mode>3"
19948 [(set (match_operand:VI48_128 0 "register_operand")
19950 (match_operand:VI48_128 1 "register_operand")
19951 (match_operand:VI48_128 2 "nonimmediate_operand")))]
19952 "TARGET_AVX2 || TARGET_XOP"
19956 operands[2] = force_reg (<MODE>mode, operands[2]);
19957 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19962 (define_expand "vashl<mode>3"
19963 [(set (match_operand:VI48_512 0 "register_operand")
19965 (match_operand:VI48_512 1 "register_operand")
19966 (match_operand:VI48_512 2 "nonimmediate_operand")))]
19969 (define_expand "vashl<mode>3"
19970 [(set (match_operand:VI48_256 0 "register_operand")
19972 (match_operand:VI48_256 1 "register_operand")
19973 (match_operand:VI48_256 2 "nonimmediate_operand")))]
19976 (define_insn "xop_sha<mode>3"
19977 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19978 (if_then_else:VI_128
19980 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19983 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19987 (neg:VI_128 (match_dup 2)))))]
19988 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19989 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19990 [(set_attr "type" "sseishft")
19991 (set_attr "prefix_data16" "0")
19992 (set_attr "prefix_extra" "2")
19993 (set_attr "mode" "TI")])
19995 (define_insn "xop_shl<mode>3"
19996 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19997 (if_then_else:VI_128
19999 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
20002 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
20006 (neg:VI_128 (match_dup 2)))))]
20007 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20008 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20009 [(set_attr "type" "sseishft")
20010 (set_attr "prefix_data16" "0")
20011 (set_attr "prefix_extra" "2")
20012 (set_attr "mode" "TI")])
20014 (define_expand "<insn><mode>3"
20015 [(set (match_operand:VI1_AVX512 0 "register_operand")
20016 (any_shift:VI1_AVX512
20017 (match_operand:VI1_AVX512 1 "register_operand")
20018 (match_operand:SI 2 "nonmemory_operand")))]
20021 if (TARGET_XOP && <MODE>mode == V16QImode)
20023 bool negate = false;
20024 rtx (*gen) (rtx, rtx, rtx);
20028 if (<CODE> != ASHIFT)
20030 if (CONST_INT_P (operands[2]))
20031 operands[2] = GEN_INT (-INTVAL (operands[2]));
20035 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
20036 for (i = 0; i < 16; i++)
20037 XVECEXP (par, 0, i) = operands[2];
20039 tmp = gen_reg_rtx (V16QImode);
20040 emit_insn (gen_vec_initv16qiqi (tmp, par));
20043 emit_insn (gen_negv16qi2 (tmp, tmp));
20045 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
20046 emit_insn (gen (operands[0], operands[1], tmp));
20048 else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
20049 operands[1], operands[2]))
20050 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
20054 (define_expand "ashrv2di3"
20055 [(set (match_operand:V2DI 0 "register_operand")
20057 (match_operand:V2DI 1 "register_operand")
20058 (match_operand:DI 2 "nonmemory_operand")))]
20059 "TARGET_XOP || TARGET_AVX512VL"
20061 if (!TARGET_AVX512VL)
20063 rtx reg = gen_reg_rtx (V2DImode);
20065 bool negate = false;
20068 if (CONST_INT_P (operands[2]))
20069 operands[2] = GEN_INT (-INTVAL (operands[2]));
20073 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
20074 for (i = 0; i < 2; i++)
20075 XVECEXP (par, 0, i) = operands[2];
20077 emit_insn (gen_vec_initv2didi (reg, par));
20080 emit_insn (gen_negv2di2 (reg, reg));
20082 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
20087 ;; XOP FRCZ support
20088 (define_insn "xop_frcz<mode>2"
20089 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
20091 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
20094 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
20095 [(set_attr "type" "ssecvt1")
20096 (set_attr "mode" "<MODE>")])
20098 (define_expand "xop_vmfrcz<mode>2"
20099 [(set (match_operand:VF_128 0 "register_operand")
20102 [(match_operand:VF_128 1 "nonimmediate_operand")]
20107 "operands[2] = CONST0_RTX (<MODE>mode);")
20109 (define_insn "*xop_vmfrcz<mode>2"
20110 [(set (match_operand:VF_128 0 "register_operand" "=x")
20113 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
20115 (match_operand:VF_128 2 "const0_operand")
20118 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
20119 [(set_attr "type" "ssecvt1")
20120 (set_attr "mode" "<MODE>")])
20122 (define_insn "xop_maskcmp<mode>3"
20123 [(set (match_operand:VI_128 0 "register_operand" "=x")
20124 (match_operator:VI_128 1 "ix86_comparison_int_operator"
20125 [(match_operand:VI_128 2 "register_operand" "x")
20126 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20128 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20129 [(set_attr "type" "sse4arg")
20130 (set_attr "prefix_data16" "0")
20131 (set_attr "prefix_rep" "0")
20132 (set_attr "prefix_extra" "2")
20133 (set_attr "length_immediate" "1")
20134 (set_attr "mode" "TI")])
20136 (define_insn "xop_maskcmp_uns<mode>3"
20137 [(set (match_operand:VI_128 0 "register_operand" "=x")
20138 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
20139 [(match_operand:VI_128 2 "register_operand" "x")
20140 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
20142 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20143 [(set_attr "type" "ssecmp")
20144 (set_attr "prefix_data16" "0")
20145 (set_attr "prefix_rep" "0")
20146 (set_attr "prefix_extra" "2")
20147 (set_attr "length_immediate" "1")
20148 (set_attr "mode" "TI")])
20150 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
20151 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
20152 ;; the exact instruction generated for the intrinsic.
20153 (define_insn "xop_maskcmp_uns2<mode>3"
20154 [(set (match_operand:VI_128 0 "register_operand" "=x")
20156 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
20157 [(match_operand:VI_128 2 "register_operand" "x")
20158 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
20159 UNSPEC_XOP_UNSIGNED_CMP))]
20161 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
20162 [(set_attr "type" "ssecmp")
20163 (set_attr "prefix_data16" "0")
20164 (set_attr "prefix_extra" "2")
20165 (set_attr "length_immediate" "1")
20166 (set_attr "mode" "TI")])
20168 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
20169 ;; being added here to be complete.
20170 (define_insn "xop_pcom_tf<mode>3"
20171 [(set (match_operand:VI_128 0 "register_operand" "=x")
20173 [(match_operand:VI_128 1 "register_operand" "x")
20174 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
20175 (match_operand:SI 3 "const_int_operand" "n")]
20176 UNSPEC_XOP_TRUEFALSE))]
20179 return ((INTVAL (operands[3]) != 0)
20180 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20181 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
20183 [(set_attr "type" "ssecmp")
20184 (set_attr "prefix_data16" "0")
20185 (set_attr "prefix_extra" "2")
20186 (set_attr "length_immediate" "1")
20187 (set_attr "mode" "TI")])
20189 (define_insn "xop_vpermil2<mode>3"
20190 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
20192 [(match_operand:VF_128_256 1 "register_operand" "x,x")
20193 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
20194 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
20195 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
20198 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
20199 [(set_attr "type" "sse4arg")
20200 (set_attr "length_immediate" "1")
20201 (set_attr "mode" "<MODE>")])
20203 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20205 (define_insn "aesenc"
20206 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20207 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20208 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20212 aesenc\t{%2, %0|%0, %2}
20213 vaesenc\t{%2, %1, %0|%0, %1, %2}"
20214 [(set_attr "isa" "noavx,avx")
20215 (set_attr "type" "sselog1")
20216 (set_attr "prefix_extra" "1")
20217 (set_attr "prefix" "orig,vex")
20218 (set_attr "btver2_decode" "double,double")
20219 (set_attr "mode" "TI")])
20221 (define_insn "aesenclast"
20222 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20223 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20224 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20225 UNSPEC_AESENCLAST))]
20228 aesenclast\t{%2, %0|%0, %2}
20229 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
20230 [(set_attr "isa" "noavx,avx")
20231 (set_attr "type" "sselog1")
20232 (set_attr "prefix_extra" "1")
20233 (set_attr "prefix" "orig,vex")
20234 (set_attr "btver2_decode" "double,double")
20235 (set_attr "mode" "TI")])
20237 (define_insn "aesdec"
20238 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20239 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20240 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20244 aesdec\t{%2, %0|%0, %2}
20245 vaesdec\t{%2, %1, %0|%0, %1, %2}"
20246 [(set_attr "isa" "noavx,avx")
20247 (set_attr "type" "sselog1")
20248 (set_attr "prefix_extra" "1")
20249 (set_attr "prefix" "orig,vex")
20250 (set_attr "btver2_decode" "double,double")
20251 (set_attr "mode" "TI")])
20253 (define_insn "aesdeclast"
20254 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20255 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20256 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
20257 UNSPEC_AESDECLAST))]
20260 aesdeclast\t{%2, %0|%0, %2}
20261 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
20262 [(set_attr "isa" "noavx,avx")
20263 (set_attr "type" "sselog1")
20264 (set_attr "prefix_extra" "1")
20265 (set_attr "prefix" "orig,vex")
20266 (set_attr "btver2_decode" "double,double")
20267 (set_attr "mode" "TI")])
20269 (define_insn "aesimc"
20270 [(set (match_operand:V2DI 0 "register_operand" "=x")
20271 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
20274 "%vaesimc\t{%1, %0|%0, %1}"
20275 [(set_attr "type" "sselog1")
20276 (set_attr "prefix_extra" "1")
20277 (set_attr "prefix" "maybe_vex")
20278 (set_attr "mode" "TI")])
20280 (define_insn "aeskeygenassist"
20281 [(set (match_operand:V2DI 0 "register_operand" "=x")
20282 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
20283 (match_operand:SI 2 "const_0_to_255_operand" "n")]
20284 UNSPEC_AESKEYGENASSIST))]
20286 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
20287 [(set_attr "type" "sselog1")
20288 (set_attr "prefix_extra" "1")
20289 (set_attr "length_immediate" "1")
20290 (set_attr "prefix" "maybe_vex")
20291 (set_attr "mode" "TI")])
20293 (define_insn "pclmulqdq"
20294 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
20295 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
20296 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
20297 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20301 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
20302 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20303 [(set_attr "isa" "noavx,avx")
20304 (set_attr "type" "sselog1")
20305 (set_attr "prefix_extra" "1")
20306 (set_attr "length_immediate" "1")
20307 (set_attr "prefix" "orig,vex")
20308 (set_attr "mode" "TI")])
20310 (define_expand "avx_vzeroall"
20311 [(match_par_dup 0 [(const_int 0)])]
20314 int nregs = TARGET_64BIT ? 16 : 8;
20317 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
20319 XVECEXP (operands[0], 0, 0)
20320 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
20323 for (regno = 0; regno < nregs; regno++)
20324 XVECEXP (operands[0], 0, regno + 1)
20325 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
20326 CONST0_RTX (V8SImode));
20329 (define_insn "*avx_vzeroall"
20330 [(match_parallel 0 "vzeroall_operation"
20331 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
20334 [(set_attr "type" "sse")
20335 (set_attr "modrm" "0")
20336 (set_attr "memory" "none")
20337 (set_attr "prefix" "vex")
20338 (set_attr "btver2_decode" "vector")
20339 (set_attr "mode" "OI")])
20341 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
20342 ;; if the upper 128bits are unused. Initially we expand the instructions
20343 ;; as though they had no effect on the SSE registers, but later add SETs and
20344 ;; CLOBBERs to the PARALLEL to model the real effect.
20345 (define_expand "avx_vzeroupper"
20346 [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20349 (define_insn "*avx_vzeroupper"
20350 [(match_parallel 0 "vzeroupper_pattern"
20351 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20352 "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
20354 [(set_attr "type" "sse")
20355 (set_attr "modrm" "0")
20356 (set_attr "memory" "none")
20357 (set_attr "prefix" "vex")
20358 (set_attr "btver2_decode" "vector")
20359 (set_attr "mode" "OI")])
20361 (define_insn_and_split "*avx_vzeroupper_1"
20362 [(match_parallel 0 "vzeroupper_pattern"
20363 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
20364 "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
20366 "&& epilogue_completed"
20369 /* For IPA-RA purposes, make it clear the instruction clobbers
20370 even XMM registers not mentioned explicitly in the pattern. */
20371 unsigned int nregs = TARGET_64BIT ? 16 : 8;
20372 unsigned int npats = XVECLEN (operands[0], 0);
20373 rtvec vec = rtvec_alloc (nregs + 1);
20374 RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
20375 for (unsigned int i = 0, j = 1; i < nregs; ++i)
20377 unsigned int regno = GET_SSE_REGNO (i);
20379 && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
20381 RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
20386 rtx reg = gen_rtx_REG (V2DImode, regno);
20387 RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
20390 operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
20392 [(set_attr "type" "sse")
20393 (set_attr "modrm" "0")
20394 (set_attr "memory" "none")
20395 (set_attr "prefix" "vex")
20396 (set_attr "btver2_decode" "vector")
20397 (set_attr "mode" "OI")])
20399 (define_mode_attr pbroadcast_evex_isa
20400 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
20401 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
20402 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
20403 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
20405 (define_insn "avx2_pbroadcast<mode>"
20406 [(set (match_operand:VI 0 "register_operand" "=x,v")
20408 (vec_select:<ssescalarmode>
20409 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
20410 (parallel [(const_int 0)]))))]
20412 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
20413 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
20414 (set_attr "type" "ssemov")
20415 (set_attr "prefix_extra" "1")
20416 (set_attr "prefix" "vex,evex")
20417 (set_attr "mode" "<sseinsnmode>")])
20419 (define_insn "avx2_pbroadcast<mode>_1"
20420 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
20421 (vec_duplicate:VI_256
20422 (vec_select:<ssescalarmode>
20423 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
20424 (parallel [(const_int 0)]))))]
20427 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20428 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20429 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
20430 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
20431 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
20432 (set_attr "type" "ssemov")
20433 (set_attr "prefix_extra" "1")
20434 (set_attr "prefix" "vex")
20435 (set_attr "mode" "<sseinsnmode>")])
20437 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
20438 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
20439 (unspec:VI48F_256_512
20440 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
20441 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20443 "TARGET_AVX2 && <mask_mode512bit_condition>"
20444 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20445 [(set_attr "type" "sselog")
20446 (set_attr "prefix" "<mask_prefix2>")
20447 (set_attr "mode" "<sseinsnmode>")])
20449 (define_insn "<avx512>_permvar<mode><mask_name>"
20450 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
20451 (unspec:VI1_AVX512VL
20452 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
20453 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20455 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
20456 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20457 [(set_attr "type" "sselog")
20458 (set_attr "prefix" "<mask_prefix2>")
20459 (set_attr "mode" "<sseinsnmode>")])
20461 (define_insn "<avx512>_permvar<mode><mask_name>"
20462 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20463 (unspec:VI2_AVX512VL
20464 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
20465 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
20467 "TARGET_AVX512BW && <mask_mode512bit_condition>"
20468 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
20469 [(set_attr "type" "sselog")
20470 (set_attr "prefix" "<mask_prefix2>")
20471 (set_attr "mode" "<sseinsnmode>")])
20473 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
20474 ;; If it so happens that the input is in memory, use vbroadcast.
20475 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
20476 (define_insn "*avx_vperm_broadcast_v4sf"
20477 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
20479 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
20480 (match_parallel 2 "avx_vbroadcast_operand"
20481 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20484 int elt = INTVAL (operands[3]);
20485 switch (which_alternative)
20489 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
20490 return "vbroadcastss\t{%1, %0|%0, %k1}";
20492 operands[2] = GEN_INT (elt * 0x55);
20493 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
20495 gcc_unreachable ();
20498 [(set_attr "type" "ssemov,ssemov,sselog1")
20499 (set_attr "prefix_extra" "1")
20500 (set_attr "length_immediate" "0,0,1")
20501 (set_attr "prefix" "maybe_evex")
20502 (set_attr "mode" "SF,SF,V4SF")])
20504 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
20505 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
20507 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
20508 (match_parallel 2 "avx_vbroadcast_operand"
20509 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20511 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
20513 "&& reload_completed"
20514 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
20516 rtx op0 = operands[0], op1 = operands[1];
20517 int elt = INTVAL (operands[3]);
20523 if (TARGET_AVX2 && elt == 0)
20525 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
20530 /* Shuffle element we care about into all elements of the 128-bit lane.
20531 The other lane gets shuffled too, but we don't care. */
20532 if (<MODE>mode == V4DFmode)
20533 mask = (elt & 1 ? 15 : 0);
20535 mask = (elt & 3) * 0x55;
20536 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
20538 /* Shuffle the lane we care about into both lanes of the dest. */
20539 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
20540 if (EXT_REX_SSE_REG_P (op0))
20542 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
20544 gcc_assert (<MODE>mode == V8SFmode);
20545 if ((mask & 1) == 0)
20546 emit_insn (gen_avx2_vec_dupv8sf (op0,
20547 gen_lowpart (V4SFmode, op0)));
20549 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
20550 GEN_INT (4), GEN_INT (5),
20551 GEN_INT (6), GEN_INT (7),
20552 GEN_INT (12), GEN_INT (13),
20553 GEN_INT (14), GEN_INT (15)));
20557 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
20561 operands[1] = adjust_address (op1, <ssescalarmode>mode,
20562 elt * GET_MODE_SIZE (<ssescalarmode>mode));
20565 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20566 [(set (match_operand:VF2 0 "register_operand")
20568 (match_operand:VF2 1 "nonimmediate_operand")
20569 (match_operand:SI 2 "const_0_to_255_operand")))]
20570 "TARGET_AVX && <mask_mode512bit_condition>"
20572 int mask = INTVAL (operands[2]);
20573 rtx perm[<ssescalarnum>];
20576 for (i = 0; i < <ssescalarnum>; i = i + 2)
20578 perm[i] = GEN_INT (((mask >> i) & 1) + i);
20579 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
20583 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20586 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20587 [(set (match_operand:VF1 0 "register_operand")
20589 (match_operand:VF1 1 "nonimmediate_operand")
20590 (match_operand:SI 2 "const_0_to_255_operand")))]
20591 "TARGET_AVX && <mask_mode512bit_condition>"
20593 int mask = INTVAL (operands[2]);
20594 rtx perm[<ssescalarnum>];
20597 for (i = 0; i < <ssescalarnum>; i = i + 4)
20599 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
20600 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
20601 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
20602 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
20606 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20609 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
20610 ;; patterns, as they have the same RTL representation (vpermilp*
20611 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
20612 ;; latency as it never crosses lanes.
20613 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
20614 [(set (match_operand:VF 0 "register_operand" "=v")
20616 (match_operand:VF 1 "nonimmediate_operand" "vm")
20617 (match_parallel 2 ""
20618 [(match_operand 3 "const_int_operand")])))]
20619 "TARGET_AVX && <mask_mode512bit_condition>
20620 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
20622 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
20623 operands[2] = GEN_INT (mask);
20624 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
20626 [(set_attr "type" "sselog")
20627 (set_attr "prefix_extra" "1")
20628 (set_attr "length_immediate" "1")
20629 (set_attr "prefix" "<mask_prefix>")
20630 (set_attr "mode" "<sseinsnmode>")])
20632 (define_expand "avx2_perm<mode>"
20633 [(match_operand:VI8F_256 0 "register_operand")
20634 (match_operand:VI8F_256 1 "nonimmediate_operand")
20635 (match_operand:SI 2 "const_0_to_255_operand")]
20638 int mask = INTVAL (operands[2]);
20639 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
20640 GEN_INT ((mask >> 0) & 3),
20641 GEN_INT ((mask >> 2) & 3),
20642 GEN_INT ((mask >> 4) & 3),
20643 GEN_INT ((mask >> 6) & 3)));
20647 (define_expand "avx512vl_perm<mode>_mask"
20648 [(match_operand:VI8F_256 0 "register_operand")
20649 (match_operand:VI8F_256 1 "nonimmediate_operand")
20650 (match_operand:SI 2 "const_0_to_255_operand")
20651 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
20652 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20655 int mask = INTVAL (operands[2]);
20656 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
20657 GEN_INT ((mask >> 0) & 3),
20658 GEN_INT ((mask >> 2) & 3),
20659 GEN_INT ((mask >> 4) & 3),
20660 GEN_INT ((mask >> 6) & 3),
20661 operands[3], operands[4]));
20665 (define_insn "avx2_perm<mode>_1<mask_name>"
20666 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20667 (vec_select:VI8F_256
20668 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
20669 (parallel [(match_operand 2 "const_0_to_3_operand")
20670 (match_operand 3 "const_0_to_3_operand")
20671 (match_operand 4 "const_0_to_3_operand")
20672 (match_operand 5 "const_0_to_3_operand")])))]
20673 "TARGET_AVX2 && <mask_mode512bit_condition>"
20676 mask |= INTVAL (operands[2]) << 0;
20677 mask |= INTVAL (operands[3]) << 2;
20678 mask |= INTVAL (operands[4]) << 4;
20679 mask |= INTVAL (operands[5]) << 6;
20680 operands[2] = GEN_INT (mask);
20681 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
20683 [(set_attr "type" "sselog")
20684 (set_attr "prefix" "<mask_prefix2>")
20685 (set_attr "mode" "<sseinsnmode>")])
20687 (define_expand "avx512f_perm<mode>"
20688 [(match_operand:V8FI 0 "register_operand")
20689 (match_operand:V8FI 1 "nonimmediate_operand")
20690 (match_operand:SI 2 "const_0_to_255_operand")]
20693 int mask = INTVAL (operands[2]);
20694 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
20695 GEN_INT ((mask >> 0) & 3),
20696 GEN_INT ((mask >> 2) & 3),
20697 GEN_INT ((mask >> 4) & 3),
20698 GEN_INT ((mask >> 6) & 3),
20699 GEN_INT (((mask >> 0) & 3) + 4),
20700 GEN_INT (((mask >> 2) & 3) + 4),
20701 GEN_INT (((mask >> 4) & 3) + 4),
20702 GEN_INT (((mask >> 6) & 3) + 4)));
20706 (define_expand "avx512f_perm<mode>_mask"
20707 [(match_operand:V8FI 0 "register_operand")
20708 (match_operand:V8FI 1 "nonimmediate_operand")
20709 (match_operand:SI 2 "const_0_to_255_operand")
20710 (match_operand:V8FI 3 "nonimm_or_0_operand")
20711 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20714 int mask = INTVAL (operands[2]);
20715 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
20716 GEN_INT ((mask >> 0) & 3),
20717 GEN_INT ((mask >> 2) & 3),
20718 GEN_INT ((mask >> 4) & 3),
20719 GEN_INT ((mask >> 6) & 3),
20720 GEN_INT (((mask >> 0) & 3) + 4),
20721 GEN_INT (((mask >> 2) & 3) + 4),
20722 GEN_INT (((mask >> 4) & 3) + 4),
20723 GEN_INT (((mask >> 6) & 3) + 4),
20724 operands[3], operands[4]));
20728 (define_insn "avx512f_perm<mode>_1<mask_name>"
20729 [(set (match_operand:V8FI 0 "register_operand" "=v")
20731 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
20732 (parallel [(match_operand 2 "const_0_to_3_operand")
20733 (match_operand 3 "const_0_to_3_operand")
20734 (match_operand 4 "const_0_to_3_operand")
20735 (match_operand 5 "const_0_to_3_operand")
20736 (match_operand 6 "const_4_to_7_operand")
20737 (match_operand 7 "const_4_to_7_operand")
20738 (match_operand 8 "const_4_to_7_operand")
20739 (match_operand 9 "const_4_to_7_operand")])))]
20740 "TARGET_AVX512F && <mask_mode512bit_condition>
20741 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
20742 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
20743 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
20744 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
20747 mask |= INTVAL (operands[2]) << 0;
20748 mask |= INTVAL (operands[3]) << 2;
20749 mask |= INTVAL (operands[4]) << 4;
20750 mask |= INTVAL (operands[5]) << 6;
20751 operands[2] = GEN_INT (mask);
20752 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
20754 [(set_attr "type" "sselog")
20755 (set_attr "prefix" "<mask_prefix2>")
20756 (set_attr "mode" "<sseinsnmode>")])
20758 (define_insn "avx2_permv2ti"
20759 [(set (match_operand:V4DI 0 "register_operand" "=x")
20761 [(match_operand:V4DI 1 "register_operand" "x")
20762 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
20763 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20766 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20767 [(set_attr "type" "sselog")
20768 (set_attr "prefix" "vex")
20769 (set_attr "mode" "OI")])
20771 (define_insn "avx2_vec_dupv4df"
20772 [(set (match_operand:V4DF 0 "register_operand" "=v")
20773 (vec_duplicate:V4DF
20775 (match_operand:V2DF 1 "register_operand" "v")
20776 (parallel [(const_int 0)]))))]
20778 "vbroadcastsd\t{%1, %0|%0, %1}"
20779 [(set_attr "type" "sselog1")
20780 (set_attr "prefix" "maybe_evex")
20781 (set_attr "mode" "V4DF")])
20783 (define_insn "<avx512>_vec_dup<mode>_1"
20784 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
20785 (vec_duplicate:VI_AVX512BW
20786 (vec_select:<ssescalarmode>
20787 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
20788 (parallel [(const_int 0)]))))]
20791 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20792 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
20793 [(set_attr "type" "ssemov")
20794 (set_attr "prefix" "evex")
20795 (set_attr "mode" "<sseinsnmode>")])
20797 (define_insn "<avx512>_vec_dup<mode><mask_name>"
20798 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
20799 (vec_duplicate:V48_AVX512VL
20800 (vec_select:<ssescalarmode>
20801 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20802 (parallel [(const_int 0)]))))]
20805 /* There is no DF broadcast (in AVX-512*) to 128b register.
20806 Mimic it with integer variant. */
20807 if (<MODE>mode == V2DFmode)
20808 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
20810 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
20812 [(set_attr "type" "ssemov")
20813 (set_attr "prefix" "evex")
20814 (set_attr "mode" "<sseinsnmode>")])
20816 (define_insn "<avx512>_vec_dup<mode><mask_name>"
20817 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
20818 (vec_duplicate:VI12_AVX512VL
20819 (vec_select:<ssescalarmode>
20820 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20821 (parallel [(const_int 0)]))))]
20823 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
20824 [(set_attr "type" "ssemov")
20825 (set_attr "prefix" "evex")
20826 (set_attr "mode" "<sseinsnmode>")])
20828 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20829 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
20830 (vec_duplicate:V16FI
20831 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
20834 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
20835 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20836 [(set_attr "type" "ssemov")
20837 (set_attr "prefix" "evex")
20838 (set_attr "mode" "<sseinsnmode>")])
20840 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20841 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
20842 (vec_duplicate:V8FI
20843 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
20846 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
20847 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20848 [(set_attr "type" "ssemov")
20849 (set_attr "prefix" "evex")
20850 (set_attr "mode" "<sseinsnmode>")])
20852 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20853 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
20854 (vec_duplicate:VI12_AVX512VL
20855 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20858 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
20859 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
20860 [(set_attr "type" "ssemov")
20861 (set_attr "prefix" "evex")
20862 (set_attr "mode" "<sseinsnmode>")])
20864 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20865 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
20866 (vec_duplicate:V48_AVX512VL
20867 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20869 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20870 [(set_attr "type" "ssemov")
20871 (set_attr "prefix" "evex")
20872 (set_attr "mode" "<sseinsnmode>")
20873 (set (attr "enabled")
20874 (if_then_else (eq_attr "alternative" "1")
20875 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
20876 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
20879 (define_insn "vec_dupv4sf"
20880 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
20881 (vec_duplicate:V4SF
20882 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
20885 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
20886 vbroadcastss\t{%1, %0|%0, %1}
20887 shufps\t{$0, %0, %0|%0, %0, 0}"
20888 [(set_attr "isa" "avx,avx,noavx")
20889 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
20890 (set_attr "length_immediate" "1,0,1")
20891 (set_attr "prefix_extra" "0,1,*")
20892 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
20893 (set_attr "mode" "V4SF")])
20895 (define_insn "*vec_dupv4si"
20896 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
20897 (vec_duplicate:V4SI
20898 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
20901 %vpshufd\t{$0, %1, %0|%0, %1, 0}
20902 vbroadcastss\t{%1, %0|%0, %1}
20903 shufps\t{$0, %0, %0|%0, %0, 0}"
20904 [(set_attr "isa" "sse2,avx,noavx")
20905 (set_attr "type" "sselog1,ssemov,sselog1")
20906 (set_attr "length_immediate" "1,0,1")
20907 (set_attr "prefix_extra" "0,1,*")
20908 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
20909 (set_attr "mode" "TI,V4SF,V4SF")])
20911 (define_insn "*vec_dupv2di"
20912 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
20913 (vec_duplicate:V2DI
20914 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
20918 vpunpcklqdq\t{%d1, %0|%0, %d1}
20919 %vmovddup\t{%1, %0|%0, %1}
20921 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
20922 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
20923 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
20924 (set_attr "mode" "TI,TI,DF,V4SF")])
20926 (define_insn "avx2_vbroadcasti128_<mode>"
20927 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
20929 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
20933 vbroadcasti128\t{%1, %0|%0, %1}
20934 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
20935 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
20936 [(set_attr "isa" "*,avx512dq,avx512vl")
20937 (set_attr "type" "ssemov")
20938 (set_attr "prefix_extra" "1")
20939 (set_attr "prefix" "vex,evex,evex")
20940 (set_attr "mode" "OI")])
20942 ;; Modes handled by AVX vec_dup patterns.
20943 (define_mode_iterator AVX_VEC_DUP_MODE
20944 [V8SI V8SF V4DI V4DF])
20945 (define_mode_attr vecdupssescalarmodesuffix
20946 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
20947 ;; Modes handled by AVX2 vec_dup patterns.
20948 (define_mode_iterator AVX2_VEC_DUP_MODE
20949 [V32QI V16QI V16HI V8HI V8SI V4SI])
20951 (define_insn "*vec_dup<mode>"
20952 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
20953 (vec_duplicate:AVX2_VEC_DUP_MODE
20954 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
20957 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20958 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20960 [(set_attr "isa" "*,*,noavx512vl")
20961 (set_attr "type" "ssemov")
20962 (set_attr "prefix_extra" "1")
20963 (set_attr "prefix" "maybe_evex")
20964 (set_attr "mode" "<sseinsnmode>")
20965 (set (attr "preferred_for_speed")
20966 (cond [(eq_attr "alternative" "2")
20967 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
20969 (symbol_ref "true")))])
20971 (define_insn "vec_dup<mode>"
20972 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
20973 (vec_duplicate:AVX_VEC_DUP_MODE
20974 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
20977 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20978 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
20979 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20980 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
20982 [(set_attr "type" "ssemov")
20983 (set_attr "prefix_extra" "1")
20984 (set_attr "prefix" "maybe_evex")
20985 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
20986 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
20989 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
20990 (vec_duplicate:AVX2_VEC_DUP_MODE
20991 (match_operand:<ssescalarmode> 1 "register_operand")))]
20993 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
20994 available, because then we can broadcast from GPRs directly.
20995 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
20996 for V*SI mode it requires just -mavx512vl. */
20997 && !(TARGET_AVX512VL
20998 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
20999 && reload_completed && GENERAL_REG_P (operands[1])"
21002 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
21003 CONST0_RTX (V4SImode),
21004 gen_lowpart (SImode, operands[1])));
21005 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
21006 gen_lowpart (<ssexmmmode>mode,
21012 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
21013 (vec_duplicate:AVX_VEC_DUP_MODE
21014 (match_operand:<ssescalarmode> 1 "register_operand")))]
21015 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
21016 [(set (match_dup 2)
21017 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
21019 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
21020 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
21022 (define_insn "avx_vbroadcastf128_<mode>"
21023 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
21025 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
21029 vbroadcast<i128>\t{%1, %0|%0, %1}
21030 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21031 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
21032 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
21033 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
21034 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
21035 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
21036 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
21037 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
21038 (set_attr "prefix_extra" "1")
21039 (set_attr "length_immediate" "0,1,1,0,1,0,1")
21040 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
21041 (set_attr "mode" "<sseinsnmode>")])
21043 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
21044 (define_mode_iterator VI4F_BRCST32x2
21045 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21046 V16SF (V8SF "TARGET_AVX512VL")])
21048 (define_mode_attr 64x2mode
21049 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
21051 (define_mode_attr 32x2mode
21052 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
21053 (V8SF "V2SF") (V4SI "V2SI")])
21055 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
21056 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
21057 (vec_duplicate:VI4F_BRCST32x2
21058 (vec_select:<32x2mode>
21059 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
21060 (parallel [(const_int 0) (const_int 1)]))))]
21062 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
21063 [(set_attr "type" "ssemov")
21064 (set_attr "prefix_extra" "1")
21065 (set_attr "prefix" "evex")
21066 (set_attr "mode" "<sseinsnmode>")])
21068 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
21069 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
21070 (vec_duplicate:VI4F_256
21071 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
21074 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
21075 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21076 [(set_attr "type" "ssemov")
21077 (set_attr "prefix_extra" "1")
21078 (set_attr "prefix" "evex")
21079 (set_attr "mode" "<sseinsnmode>")])
21081 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21082 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
21083 (vec_duplicate:V16FI
21084 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
21087 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
21088 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21089 [(set_attr "type" "ssemov")
21090 (set_attr "prefix_extra" "1")
21091 (set_attr "prefix" "evex")
21092 (set_attr "mode" "<sseinsnmode>")])
21094 ;; For broadcast[i|f]64x2
21095 (define_mode_iterator VI8F_BRCST64x2
21096 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
21098 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
21099 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
21100 (vec_duplicate:VI8F_BRCST64x2
21101 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
21104 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
21105 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21106 [(set_attr "type" "ssemov")
21107 (set_attr "prefix_extra" "1")
21108 (set_attr "prefix" "evex")
21109 (set_attr "mode" "<sseinsnmode>")])
21111 (define_insn "avx512cd_maskb_vec_dup<mode>"
21112 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21113 (vec_duplicate:VI8_AVX512VL
21115 (match_operand:QI 1 "register_operand" "k"))))]
21117 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
21118 [(set_attr "type" "mskmov")
21119 (set_attr "prefix" "evex")
21120 (set_attr "mode" "XI")])
21122 (define_insn "avx512cd_maskw_vec_dup<mode>"
21123 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21124 (vec_duplicate:VI4_AVX512VL
21126 (match_operand:HI 1 "register_operand" "k"))))]
21128 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
21129 [(set_attr "type" "mskmov")
21130 (set_attr "prefix" "evex")
21131 (set_attr "mode" "XI")])
21133 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
21134 [(set (match_operand:VF 0 "register_operand" "=v")
21136 [(match_operand:VF 1 "register_operand" "v")
21137 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
21139 "TARGET_AVX && <mask_mode512bit_condition>"
21140 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21141 [(set_attr "type" "sselog")
21142 (set_attr "prefix_extra" "1")
21143 (set_attr "btver2_decode" "vector")
21144 (set_attr "prefix" "<mask_prefix>")
21145 (set_attr "mode" "<sseinsnmode>")])
21147 (define_mode_iterator VPERMI2
21148 [V16SI V16SF V8DI V8DF
21149 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
21150 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
21151 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
21152 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
21153 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21154 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21155 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21156 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21158 (define_mode_iterator VPERMI2I
21160 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
21161 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
21162 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
21163 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
21164 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
21165 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
21167 (define_expand "<avx512>_vpermi2var<mode>3_mask"
21168 [(set (match_operand:VPERMI2 0 "register_operand")
21171 [(match_operand:<sseintvecmode> 2 "register_operand")
21172 (match_operand:VPERMI2 1 "register_operand")
21173 (match_operand:VPERMI2 3 "nonimmediate_operand")]
21176 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
21179 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
21180 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
21183 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21184 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
21185 (vec_merge:VPERMI2I
21187 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21188 (match_operand:VPERMI2I 1 "register_operand" "v")
21189 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
21192 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21194 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21195 [(set_attr "type" "sselog")
21196 (set_attr "prefix" "evex")
21197 (set_attr "mode" "<sseinsnmode>")])
21199 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
21200 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21201 (vec_merge:VF_AVX512VL
21202 (unspec:VF_AVX512VL
21203 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
21204 (match_operand:VF_AVX512VL 1 "register_operand" "v")
21205 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
21207 (subreg:VF_AVX512VL (match_dup 2) 0)
21208 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21210 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21211 [(set_attr "type" "sselog")
21212 (set_attr "prefix" "evex")
21213 (set_attr "mode" "<sseinsnmode>")])
21215 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
21216 [(match_operand:VPERMI2 0 "register_operand")
21217 (match_operand:<sseintvecmode> 1 "register_operand")
21218 (match_operand:VPERMI2 2 "register_operand")
21219 (match_operand:VPERMI2 3 "nonimmediate_operand")
21220 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21223 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
21224 operands[0], operands[1], operands[2], operands[3],
21225 CONST0_RTX (<MODE>mode), operands[4]));
21229 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
21230 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
21232 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
21233 (match_operand:VPERMI2 2 "register_operand" "0,v")
21234 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
21238 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
21239 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21240 [(set_attr "type" "sselog")
21241 (set_attr "prefix" "evex")
21242 (set_attr "mode" "<sseinsnmode>")])
21244 (define_insn "<avx512>_vpermt2var<mode>3_mask"
21245 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
21248 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
21249 (match_operand:VPERMI2 2 "register_operand" "0")
21250 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
21253 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21255 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
21256 [(set_attr "type" "sselog")
21257 (set_attr "prefix" "evex")
21258 (set_attr "mode" "<sseinsnmode>")])
21260 (define_expand "avx_vperm2f128<mode>3"
21261 [(set (match_operand:AVX256MODE2P 0 "register_operand")
21262 (unspec:AVX256MODE2P
21263 [(match_operand:AVX256MODE2P 1 "register_operand")
21264 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
21265 (match_operand:SI 3 "const_0_to_255_operand")]
21266 UNSPEC_VPERMIL2F128))]
21269 int mask = INTVAL (operands[3]);
21270 if ((mask & 0x88) == 0)
21272 rtx perm[<ssescalarnum>], t1, t2;
21273 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
21275 base = (mask & 3) * nelt2;
21276 for (i = 0; i < nelt2; ++i)
21277 perm[i] = GEN_INT (base + i);
21279 base = ((mask >> 4) & 3) * nelt2;
21280 for (i = 0; i < nelt2; ++i)
21281 perm[i + nelt2] = GEN_INT (base + i);
21283 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
21284 operands[1], operands[2]);
21285 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
21286 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
21287 t2 = gen_rtx_SET (operands[0], t2);
21293 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
21294 ;; means that in order to represent this properly in rtl we'd have to
21295 ;; nest *another* vec_concat with a zero operand and do the select from
21296 ;; a 4x wide vector. That doesn't seem very nice.
21297 (define_insn "*avx_vperm2f128<mode>_full"
21298 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21299 (unspec:AVX256MODE2P
21300 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
21301 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
21302 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21303 UNSPEC_VPERMIL2F128))]
21305 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21306 [(set_attr "type" "sselog")
21307 (set_attr "prefix_extra" "1")
21308 (set_attr "length_immediate" "1")
21309 (set_attr "prefix" "vex")
21310 (set_attr "mode" "<sseinsnmode>")])
21312 (define_insn "*avx_vperm2f128<mode>_nozero"
21313 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
21314 (vec_select:AVX256MODE2P
21315 (vec_concat:<ssedoublevecmode>
21316 (match_operand:AVX256MODE2P 1 "register_operand" "x")
21317 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
21318 (match_parallel 3 ""
21319 [(match_operand 4 "const_int_operand")])))]
21321 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
21323 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
21325 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
21327 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
21328 operands[3] = GEN_INT (mask);
21329 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
21331 [(set_attr "type" "sselog")
21332 (set_attr "prefix_extra" "1")
21333 (set_attr "length_immediate" "1")
21334 (set_attr "prefix" "vex")
21335 (set_attr "mode" "<sseinsnmode>")])
21337 (define_insn "*ssse3_palignr<mode>_perm"
21338 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
21340 (match_operand:V_128 1 "register_operand" "0,x,v")
21341 (match_parallel 2 "palignr_operand"
21342 [(match_operand 3 "const_int_operand" "n,n,n")])))]
21345 operands[2] = (GEN_INT (INTVAL (operands[3])
21346 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
21348 switch (which_alternative)
21351 return "palignr\t{%2, %1, %0|%0, %1, %2}";
21354 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
21356 gcc_unreachable ();
21359 [(set_attr "isa" "noavx,avx,avx512bw")
21360 (set_attr "type" "sseishft")
21361 (set_attr "atom_unit" "sishuf")
21362 (set_attr "prefix_data16" "1,*,*")
21363 (set_attr "prefix_extra" "1")
21364 (set_attr "length_immediate" "1")
21365 (set_attr "prefix" "orig,vex,evex")])
21367 (define_expand "avx512vl_vinsert<mode>"
21368 [(match_operand:VI48F_256 0 "register_operand")
21369 (match_operand:VI48F_256 1 "register_operand")
21370 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21371 (match_operand:SI 3 "const_0_to_1_operand")
21372 (match_operand:VI48F_256 4 "register_operand")
21373 (match_operand:<avx512fmaskmode> 5 "register_operand")]
21376 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21378 switch (INTVAL (operands[3]))
21381 insn = gen_vec_set_lo_<mode>_mask;
21384 insn = gen_vec_set_hi_<mode>_mask;
21387 gcc_unreachable ();
21390 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
21395 (define_expand "avx_vinsertf128<mode>"
21396 [(match_operand:V_256 0 "register_operand")
21397 (match_operand:V_256 1 "register_operand")
21398 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
21399 (match_operand:SI 3 "const_0_to_1_operand")]
21402 rtx (*insn)(rtx, rtx, rtx);
21404 switch (INTVAL (operands[3]))
21407 insn = gen_vec_set_lo_<mode>;
21410 insn = gen_vec_set_hi_<mode>;
21413 gcc_unreachable ();
21416 emit_insn (insn (operands[0], operands[1], operands[2]));
21420 (define_insn "vec_set_lo_<mode><mask_name>"
21421 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21422 (vec_concat:VI8F_256
21423 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21424 (vec_select:<ssehalfvecmode>
21425 (match_operand:VI8F_256 1 "register_operand" "v")
21426 (parallel [(const_int 2) (const_int 3)]))))]
21427 "TARGET_AVX && <mask_avx512dq_condition>"
21429 if (TARGET_AVX512DQ)
21430 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21431 else if (TARGET_AVX512VL)
21432 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21434 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21436 [(set_attr "type" "sselog")
21437 (set_attr "prefix_extra" "1")
21438 (set_attr "length_immediate" "1")
21439 (set_attr "prefix" "vex")
21440 (set_attr "mode" "<sseinsnmode>")])
21442 (define_insn "vec_set_hi_<mode><mask_name>"
21443 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
21444 (vec_concat:VI8F_256
21445 (vec_select:<ssehalfvecmode>
21446 (match_operand:VI8F_256 1 "register_operand" "v")
21447 (parallel [(const_int 0) (const_int 1)]))
21448 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21449 "TARGET_AVX && <mask_avx512dq_condition>"
21451 if (TARGET_AVX512DQ)
21452 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21453 else if (TARGET_AVX512VL)
21454 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21456 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21458 [(set_attr "type" "sselog")
21459 (set_attr "prefix_extra" "1")
21460 (set_attr "length_immediate" "1")
21461 (set_attr "prefix" "vex")
21462 (set_attr "mode" "<sseinsnmode>")])
21464 (define_insn "vec_set_lo_<mode><mask_name>"
21465 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21466 (vec_concat:VI4F_256
21467 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
21468 (vec_select:<ssehalfvecmode>
21469 (match_operand:VI4F_256 1 "register_operand" "v")
21470 (parallel [(const_int 4) (const_int 5)
21471 (const_int 6) (const_int 7)]))))]
21474 if (TARGET_AVX512VL)
21475 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21477 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21479 [(set_attr "type" "sselog")
21480 (set_attr "prefix_extra" "1")
21481 (set_attr "length_immediate" "1")
21482 (set_attr "prefix" "vex")
21483 (set_attr "mode" "<sseinsnmode>")])
21485 (define_insn "vec_set_hi_<mode><mask_name>"
21486 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21487 (vec_concat:VI4F_256
21488 (vec_select:<ssehalfvecmode>
21489 (match_operand:VI4F_256 1 "register_operand" "v")
21490 (parallel [(const_int 0) (const_int 1)
21491 (const_int 2) (const_int 3)]))
21492 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21495 if (TARGET_AVX512VL)
21496 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21498 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21500 [(set_attr "type" "sselog")
21501 (set_attr "prefix_extra" "1")
21502 (set_attr "length_immediate" "1")
21503 (set_attr "prefix" "vex")
21504 (set_attr "mode" "<sseinsnmode>")])
21506 (define_insn "vec_set_lo_v16hi"
21507 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21509 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
21511 (match_operand:V16HI 1 "register_operand" "x,v")
21512 (parallel [(const_int 8) (const_int 9)
21513 (const_int 10) (const_int 11)
21514 (const_int 12) (const_int 13)
21515 (const_int 14) (const_int 15)]))))]
21518 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21519 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21520 [(set_attr "type" "sselog")
21521 (set_attr "prefix_extra" "1")
21522 (set_attr "length_immediate" "1")
21523 (set_attr "prefix" "vex,evex")
21524 (set_attr "mode" "OI")])
21526 (define_insn "vec_set_hi_v16hi"
21527 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21530 (match_operand:V16HI 1 "register_operand" "x,v")
21531 (parallel [(const_int 0) (const_int 1)
21532 (const_int 2) (const_int 3)
21533 (const_int 4) (const_int 5)
21534 (const_int 6) (const_int 7)]))
21535 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
21538 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21539 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21540 [(set_attr "type" "sselog")
21541 (set_attr "prefix_extra" "1")
21542 (set_attr "length_immediate" "1")
21543 (set_attr "prefix" "vex,evex")
21544 (set_attr "mode" "OI")])
21546 (define_insn "vec_set_lo_v32qi"
21547 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21549 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
21551 (match_operand:V32QI 1 "register_operand" "x,v")
21552 (parallel [(const_int 16) (const_int 17)
21553 (const_int 18) (const_int 19)
21554 (const_int 20) (const_int 21)
21555 (const_int 22) (const_int 23)
21556 (const_int 24) (const_int 25)
21557 (const_int 26) (const_int 27)
21558 (const_int 28) (const_int 29)
21559 (const_int 30) (const_int 31)]))))]
21562 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21563 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21564 [(set_attr "type" "sselog")
21565 (set_attr "prefix_extra" "1")
21566 (set_attr "length_immediate" "1")
21567 (set_attr "prefix" "vex,evex")
21568 (set_attr "mode" "OI")])
21570 (define_insn "vec_set_hi_v32qi"
21571 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21574 (match_operand:V32QI 1 "register_operand" "x,v")
21575 (parallel [(const_int 0) (const_int 1)
21576 (const_int 2) (const_int 3)
21577 (const_int 4) (const_int 5)
21578 (const_int 6) (const_int 7)
21579 (const_int 8) (const_int 9)
21580 (const_int 10) (const_int 11)
21581 (const_int 12) (const_int 13)
21582 (const_int 14) (const_int 15)]))
21583 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
21586 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21587 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21588 [(set_attr "type" "sselog")
21589 (set_attr "prefix_extra" "1")
21590 (set_attr "length_immediate" "1")
21591 (set_attr "prefix" "vex,evex")
21592 (set_attr "mode" "OI")])
21594 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
21595 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
21597 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
21598 (match_operand:V48_AVX2 1 "memory_operand" "m")]
21601 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
21602 [(set_attr "type" "sselog1")
21603 (set_attr "prefix_extra" "1")
21604 (set_attr "prefix" "vex")
21605 (set_attr "btver2_decode" "vector")
21606 (set_attr "mode" "<sseinsnmode>")])
21608 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
21609 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
21611 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
21612 (match_operand:V48_AVX2 2 "register_operand" "x")
21616 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21617 [(set_attr "type" "sselog1")
21618 (set_attr "prefix_extra" "1")
21619 (set_attr "prefix" "vex")
21620 (set_attr "btver2_decode" "vector")
21621 (set_attr "mode" "<sseinsnmode>")])
21623 (define_expand "maskload<mode><sseintvecmodelower>"
21624 [(set (match_operand:V48_AVX2 0 "register_operand")
21626 [(match_operand:<sseintvecmode> 2 "register_operand")
21627 (match_operand:V48_AVX2 1 "memory_operand")]
21631 (define_expand "maskload<mode><avx512fmaskmodelower>"
21632 [(set (match_operand:V48_AVX512VL 0 "register_operand")
21633 (vec_merge:V48_AVX512VL
21634 (match_operand:V48_AVX512VL 1 "memory_operand")
21636 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21639 (define_expand "maskload<mode><avx512fmaskmodelower>"
21640 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
21641 (vec_merge:VI12_AVX512VL
21642 (match_operand:VI12_AVX512VL 1 "memory_operand")
21644 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21647 (define_expand "maskstore<mode><sseintvecmodelower>"
21648 [(set (match_operand:V48_AVX2 0 "memory_operand")
21650 [(match_operand:<sseintvecmode> 2 "register_operand")
21651 (match_operand:V48_AVX2 1 "register_operand")
21656 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21657 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
21658 (vec_merge:V48_AVX512VL
21659 (match_operand:V48_AVX512VL 1 "register_operand")
21661 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21664 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21665 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
21666 (vec_merge:VI12_AVX512VL
21667 (match_operand:VI12_AVX512VL 1 "register_operand")
21669 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21672 (define_expand "cbranch<mode>4"
21673 [(set (reg:CC FLAGS_REG)
21674 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
21675 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
21676 (set (pc) (if_then_else
21677 (match_operator 0 "bt_comparison_operator"
21678 [(reg:CC FLAGS_REG) (const_int 0)])
21679 (label_ref (match_operand 3))
21683 ix86_expand_branch (GET_CODE (operands[0]),
21684 operands[1], operands[2], operands[3]);
21689 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
21690 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
21691 (vec_concat:AVX256MODE2P
21692 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
21693 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
21694 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21696 "&& reload_completed"
21697 [(set (match_dup 0) (match_dup 1))]
21699 if (REG_P (operands[0]))
21700 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21702 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21703 <ssehalfvecmode>mode);
21706 ;; Modes handled by vec_init expanders.
21707 (define_mode_iterator VEC_INIT_MODE
21708 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21709 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21710 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21711 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
21712 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21713 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
21714 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
21716 ;; Likewise, but for initialization from half sized vectors.
21717 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
21718 (define_mode_iterator VEC_INIT_HALF_MODE
21719 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21720 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21721 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21722 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
21723 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21724 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
21725 (V4TI "TARGET_AVX512F")])
21727 (define_expand "vec_init<mode><ssescalarmodelower>"
21728 [(match_operand:VEC_INIT_MODE 0 "register_operand")
21732 ix86_expand_vector_init (false, operands[0], operands[1]);
21736 (define_expand "vec_init<mode><ssehalfvecmodelower>"
21737 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
21741 ix86_expand_vector_init (false, operands[0], operands[1]);
21745 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21746 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
21747 (ashiftrt:VI48_AVX512F_AVX512VL
21748 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
21749 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
21750 "TARGET_AVX2 && <mask_mode512bit_condition>"
21751 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21752 [(set_attr "type" "sseishft")
21753 (set_attr "prefix" "maybe_evex")
21754 (set_attr "mode" "<sseinsnmode>")])
21756 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21757 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21758 (ashiftrt:VI2_AVX512VL
21759 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21760 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21762 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21763 [(set_attr "type" "sseishft")
21764 (set_attr "prefix" "maybe_evex")
21765 (set_attr "mode" "<sseinsnmode>")])
21767 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
21768 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
21769 (any_lshift:VI48_AVX512F
21770 (match_operand:VI48_AVX512F 1 "register_operand" "v")
21771 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
21772 "TARGET_AVX2 && <mask_mode512bit_condition>"
21773 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21774 [(set_attr "type" "sseishft")
21775 (set_attr "prefix" "maybe_evex")
21776 (set_attr "mode" "<sseinsnmode>")])
21778 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
21779 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21780 (any_lshift:VI2_AVX512VL
21781 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21782 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21784 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21785 [(set_attr "type" "sseishft")
21786 (set_attr "prefix" "maybe_evex")
21787 (set_attr "mode" "<sseinsnmode>")])
21789 (define_insn "avx_vec_concat<mode>"
21790 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
21791 (vec_concat:V_256_512
21792 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
21793 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
21795 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
21796 || !MEM_P (operands[1]))"
21798 switch (which_alternative)
21801 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21803 if (<MODE_SIZE> == 64)
21805 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
21806 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21808 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21812 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21813 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21815 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21819 switch (get_attr_mode (insn))
21822 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21823 return "vmovups\t{%1, %t0|%t0, %1}";
21825 return "vmovaps\t{%1, %t0|%t0, %1}";
21827 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21828 return "vmovupd\t{%1, %t0|%t0, %1}";
21830 return "vmovapd\t{%1, %t0|%t0, %1}";
21832 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21833 return "vmovups\t{%1, %x0|%x0, %1}";
21835 return "vmovaps\t{%1, %x0|%x0, %1}";
21837 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21838 return "vmovupd\t{%1, %x0|%x0, %1}";
21840 return "vmovapd\t{%1, %x0|%x0, %1}";
21842 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21844 if (which_alternative == 2)
21845 return "vmovdqu\t{%1, %t0|%t0, %1}";
21846 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21847 return "vmovdqu64\t{%1, %t0|%t0, %1}";
21849 return "vmovdqu32\t{%1, %t0|%t0, %1}";
21853 if (which_alternative == 2)
21854 return "vmovdqa\t{%1, %t0|%t0, %1}";
21855 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21856 return "vmovdqa64\t{%1, %t0|%t0, %1}";
21858 return "vmovdqa32\t{%1, %t0|%t0, %1}";
21861 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21863 if (which_alternative == 2)
21864 return "vmovdqu\t{%1, %x0|%x0, %1}";
21865 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21866 return "vmovdqu64\t{%1, %x0|%x0, %1}";
21868 return "vmovdqu32\t{%1, %x0|%x0, %1}";
21872 if (which_alternative == 2)
21873 return "vmovdqa\t{%1, %x0|%x0, %1}";
21874 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21875 return "vmovdqa64\t{%1, %x0|%x0, %1}";
21877 return "vmovdqa32\t{%1, %x0|%x0, %1}";
21880 gcc_unreachable ();
21883 gcc_unreachable ();
21886 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
21887 (set_attr "prefix_extra" "1,1,*,*")
21888 (set_attr "length_immediate" "1,1,*,*")
21889 (set_attr "prefix" "maybe_evex")
21890 (set_attr "mode" "<sseinsnmode>")])
21892 (define_insn "vcvtph2ps<mask_name>"
21893 [(set (match_operand:V4SF 0 "register_operand" "=v")
21895 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
21897 (parallel [(const_int 0) (const_int 1)
21898 (const_int 2) (const_int 3)])))]
21899 "TARGET_F16C || TARGET_AVX512VL"
21900 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21901 [(set_attr "type" "ssecvt")
21902 (set_attr "prefix" "maybe_evex")
21903 (set_attr "mode" "V4SF")])
21905 (define_insn "*vcvtph2ps_load<mask_name>"
21906 [(set (match_operand:V4SF 0 "register_operand" "=v")
21907 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
21908 UNSPEC_VCVTPH2PS))]
21909 "TARGET_F16C || TARGET_AVX512VL"
21910 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21911 [(set_attr "type" "ssecvt")
21912 (set_attr "prefix" "vex")
21913 (set_attr "mode" "V8SF")])
21915 (define_insn "vcvtph2ps256<mask_name>"
21916 [(set (match_operand:V8SF 0 "register_operand" "=v")
21917 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
21918 UNSPEC_VCVTPH2PS))]
21919 "TARGET_F16C || TARGET_AVX512VL"
21920 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21921 [(set_attr "type" "ssecvt")
21922 (set_attr "prefix" "vex")
21923 (set_attr "btver2_decode" "double")
21924 (set_attr "mode" "V8SF")])
21926 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
21927 [(set (match_operand:V16SF 0 "register_operand" "=v")
21929 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
21930 UNSPEC_VCVTPH2PS))]
21932 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
21933 [(set_attr "type" "ssecvt")
21934 (set_attr "prefix" "evex")
21935 (set_attr "mode" "V16SF")])
21937 (define_expand "vcvtps2ph_mask"
21938 [(set (match_operand:V8HI 0 "register_operand")
21941 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21942 (match_operand:SI 2 "const_0_to_255_operand")]
21945 (match_operand:V8HI 3 "nonimm_or_0_operand")
21946 (match_operand:QI 4 "register_operand")))]
21948 "operands[5] = CONST0_RTX (V4HImode);")
21950 (define_expand "vcvtps2ph"
21951 [(set (match_operand:V8HI 0 "register_operand")
21953 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21954 (match_operand:SI 2 "const_0_to_255_operand")]
21958 "operands[3] = CONST0_RTX (V4HImode);")
21960 (define_insn "*vcvtps2ph<mask_name>"
21961 [(set (match_operand:V8HI 0 "register_operand" "=v")
21963 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21964 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21966 (match_operand:V4HI 3 "const0_operand")))]
21967 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
21968 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
21969 [(set_attr "type" "ssecvt")
21970 (set_attr "prefix" "maybe_evex")
21971 (set_attr "mode" "V4SF")])
21973 (define_insn "*vcvtps2ph_store<merge_mask_name>"
21974 [(set (match_operand:V4HI 0 "memory_operand" "=m")
21975 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21976 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21977 UNSPEC_VCVTPS2PH))]
21978 "TARGET_F16C || TARGET_AVX512VL"
21979 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
21980 [(set_attr "type" "ssecvt")
21981 (set_attr "prefix" "maybe_evex")
21982 (set_attr "mode" "V4SF")])
21984 (define_insn "vcvtps2ph256<mask_name>"
21985 [(set (match_operand:V8HI 0 "register_operand" "=v")
21986 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
21987 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21988 UNSPEC_VCVTPS2PH))]
21989 "TARGET_F16C || TARGET_AVX512VL"
21990 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21991 [(set_attr "type" "ssecvt")
21992 (set_attr "prefix" "maybe_evex")
21993 (set_attr "btver2_decode" "vector")
21994 (set_attr "mode" "V8SF")])
21996 (define_insn "*vcvtps2ph256<merge_mask_name>"
21997 [(set (match_operand:V8HI 0 "memory_operand" "=m")
21998 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
21999 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22000 UNSPEC_VCVTPS2PH))]
22001 "TARGET_F16C || TARGET_AVX512VL"
22002 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22003 [(set_attr "type" "ssecvt")
22004 (set_attr "prefix" "maybe_evex")
22005 (set_attr "btver2_decode" "vector")
22006 (set_attr "mode" "V8SF")])
22008 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
22009 [(set (match_operand:V16HI 0 "register_operand" "=v")
22011 [(match_operand:V16SF 1 "register_operand" "v")
22012 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22013 UNSPEC_VCVTPS2PH))]
22015 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22016 [(set_attr "type" "ssecvt")
22017 (set_attr "prefix" "evex")
22018 (set_attr "mode" "V16SF")])
22020 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
22021 [(set (match_operand:V16HI 0 "memory_operand" "=m")
22023 [(match_operand:V16SF 1 "register_operand" "v")
22024 (match_operand:SI 2 "const_0_to_255_operand" "N")]
22025 UNSPEC_VCVTPS2PH))]
22027 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
22028 [(set_attr "type" "ssecvt")
22029 (set_attr "prefix" "evex")
22030 (set_attr "mode" "V16SF")])
22032 ;; For gather* insn patterns
22033 (define_mode_iterator VEC_GATHER_MODE
22034 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
22035 (define_mode_attr VEC_GATHER_IDXSI
22036 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
22037 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
22038 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
22039 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
22041 (define_mode_attr VEC_GATHER_IDXDI
22042 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22043 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
22044 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
22045 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
22047 (define_mode_attr VEC_GATHER_SRCDI
22048 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
22049 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
22050 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
22051 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
22053 (define_expand "avx2_gathersi<mode>"
22054 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22055 (unspec:VEC_GATHER_MODE
22056 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
22057 (mem:<ssescalarmode>
22059 [(match_operand 2 "vsib_address_operand")
22060 (match_operand:<VEC_GATHER_IDXSI>
22061 3 "register_operand")
22062 (match_operand:SI 5 "const1248_operand ")]))
22063 (mem:BLK (scratch))
22064 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
22066 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22070 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22071 operands[5]), UNSPEC_VSIBADDR);
22074 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
22075 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22076 (unspec:VEC_GATHER_MODE
22077 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
22078 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22080 [(match_operand:P 3 "vsib_address_operand" "Tv")
22081 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
22082 (match_operand:SI 6 "const1248_operand" "n")]
22084 (mem:BLK (scratch))
22085 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
22087 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22089 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
22090 [(set_attr "type" "ssemov")
22091 (set_attr "prefix" "vex")
22092 (set_attr "mode" "<sseinsnmode>")])
22094 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
22095 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22096 (unspec:VEC_GATHER_MODE
22098 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22100 [(match_operand:P 2 "vsib_address_operand" "Tv")
22101 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
22102 (match_operand:SI 5 "const1248_operand" "n")]
22104 (mem:BLK (scratch))
22105 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
22107 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22109 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
22110 [(set_attr "type" "ssemov")
22111 (set_attr "prefix" "vex")
22112 (set_attr "mode" "<sseinsnmode>")])
22114 (define_expand "avx2_gatherdi<mode>"
22115 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
22116 (unspec:VEC_GATHER_MODE
22117 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22118 (mem:<ssescalarmode>
22120 [(match_operand 2 "vsib_address_operand")
22121 (match_operand:<VEC_GATHER_IDXDI>
22122 3 "register_operand")
22123 (match_operand:SI 5 "const1248_operand ")]))
22124 (mem:BLK (scratch))
22125 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
22127 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
22131 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22132 operands[5]), UNSPEC_VSIBADDR);
22135 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
22136 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22137 (unspec:VEC_GATHER_MODE
22138 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22139 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22141 [(match_operand:P 3 "vsib_address_operand" "Tv")
22142 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22143 (match_operand:SI 6 "const1248_operand" "n")]
22145 (mem:BLK (scratch))
22146 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22148 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22150 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
22151 [(set_attr "type" "ssemov")
22152 (set_attr "prefix" "vex")
22153 (set_attr "mode" "<sseinsnmode>")])
22155 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
22156 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
22157 (unspec:VEC_GATHER_MODE
22159 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22161 [(match_operand:P 2 "vsib_address_operand" "Tv")
22162 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22163 (match_operand:SI 5 "const1248_operand" "n")]
22165 (mem:BLK (scratch))
22166 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22168 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
22171 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
22172 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
22173 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
22175 [(set_attr "type" "ssemov")
22176 (set_attr "prefix" "vex")
22177 (set_attr "mode" "<sseinsnmode>")])
22179 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
22180 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22181 (vec_select:<VEC_GATHER_SRCDI>
22183 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
22184 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
22186 [(match_operand:P 3 "vsib_address_operand" "Tv")
22187 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
22188 (match_operand:SI 6 "const1248_operand" "n")]
22190 (mem:BLK (scratch))
22191 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
22193 (parallel [(const_int 0) (const_int 1)
22194 (const_int 2) (const_int 3)])))
22195 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22197 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
22198 [(set_attr "type" "ssemov")
22199 (set_attr "prefix" "vex")
22200 (set_attr "mode" "<sseinsnmode>")])
22202 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
22203 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
22204 (vec_select:<VEC_GATHER_SRCDI>
22207 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22209 [(match_operand:P 2 "vsib_address_operand" "Tv")
22210 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
22211 (match_operand:SI 5 "const1248_operand" "n")]
22213 (mem:BLK (scratch))
22214 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
22216 (parallel [(const_int 0) (const_int 1)
22217 (const_int 2) (const_int 3)])))
22218 (clobber (match_scratch:VI4F_256 1 "=&x"))]
22220 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
22221 [(set_attr "type" "ssemov")
22222 (set_attr "prefix" "vex")
22223 (set_attr "mode" "<sseinsnmode>")])
22225 (define_expand "<avx512>_gathersi<mode>"
22226 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22228 [(match_operand:VI48F 1 "register_operand")
22229 (match_operand:<avx512fmaskmode> 4 "register_operand")
22230 (mem:<ssescalarmode>
22232 [(match_operand 2 "vsib_address_operand")
22233 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
22234 (match_operand:SI 5 "const1248_operand")]))]
22236 (clobber (match_scratch:<avx512fmaskmode> 7))])]
22240 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22241 operands[5]), UNSPEC_VSIBADDR);
22244 (define_insn "*avx512f_gathersi<VI48F:mode>"
22245 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22247 [(match_operand:VI48F 1 "register_operand" "0")
22248 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
22249 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22251 [(match_operand:P 4 "vsib_address_operand" "Tv")
22252 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
22253 (match_operand:SI 5 "const1248_operand" "n")]
22254 UNSPEC_VSIBADDR)])]
22256 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
22258 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22259 ;; gas changed what it requires incompatibly.
22260 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
22261 [(set_attr "type" "ssemov")
22262 (set_attr "prefix" "evex")
22263 (set_attr "mode" "<sseinsnmode>")])
22265 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
22266 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22269 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22270 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22272 [(match_operand:P 3 "vsib_address_operand" "Tv")
22273 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22274 (match_operand:SI 4 "const1248_operand" "n")]
22275 UNSPEC_VSIBADDR)])]
22277 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22279 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22280 ;; gas changed what it requires incompatibly.
22281 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
22282 [(set_attr "type" "ssemov")
22283 (set_attr "prefix" "evex")
22284 (set_attr "mode" "<sseinsnmode>")])
22287 (define_expand "<avx512>_gatherdi<mode>"
22288 [(parallel [(set (match_operand:VI48F 0 "register_operand")
22290 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
22291 (match_operand:QI 4 "register_operand")
22292 (mem:<ssescalarmode>
22294 [(match_operand 2 "vsib_address_operand")
22295 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
22296 (match_operand:SI 5 "const1248_operand")]))]
22298 (clobber (match_scratch:QI 7))])]
22302 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
22303 operands[5]), UNSPEC_VSIBADDR);
22306 (define_insn "*avx512f_gatherdi<VI48F:mode>"
22307 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22309 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
22310 (match_operand:QI 7 "register_operand" "2")
22311 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
22313 [(match_operand:P 4 "vsib_address_operand" "Tv")
22314 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
22315 (match_operand:SI 5 "const1248_operand" "n")]
22316 UNSPEC_VSIBADDR)])]
22318 (clobber (match_scratch:QI 2 "=&Yk"))]
22320 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
22321 ;; gas changed what it requires incompatibly.
22322 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
22323 [(set_attr "type" "ssemov")
22324 (set_attr "prefix" "evex")
22325 (set_attr "mode" "<sseinsnmode>")])
22327 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
22328 [(set (match_operand:VI48F 0 "register_operand" "=&v")
22331 (match_operand:QI 6 "register_operand" "1")
22332 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
22334 [(match_operand:P 3 "vsib_address_operand" "Tv")
22335 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22336 (match_operand:SI 4 "const1248_operand" "n")]
22337 UNSPEC_VSIBADDR)])]
22339 (clobber (match_scratch:QI 1 "=&Yk"))]
22342 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22343 gas changed what it requires incompatibly. */
22344 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
22346 if (<VI48F:MODE_SIZE> != 64)
22347 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
22349 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
22351 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
22353 [(set_attr "type" "ssemov")
22354 (set_attr "prefix" "evex")
22355 (set_attr "mode" "<sseinsnmode>")])
22357 (define_expand "<avx512>_scattersi<mode>"
22358 [(parallel [(set (mem:VI48F
22360 [(match_operand 0 "vsib_address_operand")
22361 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
22362 (match_operand:SI 4 "const1248_operand")]))
22364 [(match_operand:<avx512fmaskmode> 1 "register_operand")
22365 (match_operand:VI48F 3 "register_operand")]
22367 (clobber (match_scratch:<avx512fmaskmode> 6))])]
22371 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22372 operands[4]), UNSPEC_VSIBADDR);
22375 (define_insn "*avx512f_scattersi<VI48F:mode>"
22376 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22378 [(match_operand:P 0 "vsib_address_operand" "Tv")
22379 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
22380 (match_operand:SI 4 "const1248_operand" "n")]
22383 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
22384 (match_operand:VI48F 3 "register_operand" "v")]
22386 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
22388 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22389 ;; gas changed what it requires incompatibly.
22390 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22391 [(set_attr "type" "ssemov")
22392 (set_attr "prefix" "evex")
22393 (set_attr "mode" "<sseinsnmode>")])
22395 (define_expand "<avx512>_scatterdi<mode>"
22396 [(parallel [(set (mem:VI48F
22398 [(match_operand 0 "vsib_address_operand")
22399 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
22400 (match_operand:SI 4 "const1248_operand")]))
22402 [(match_operand:QI 1 "register_operand")
22403 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
22405 (clobber (match_scratch:QI 6))])]
22409 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
22410 operands[4]), UNSPEC_VSIBADDR);
22413 (define_insn "*avx512f_scatterdi<VI48F:mode>"
22414 [(set (match_operator:VI48F 5 "vsib_mem_operator"
22416 [(match_operand:P 0 "vsib_address_operand" "Tv")
22417 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
22418 (match_operand:SI 4 "const1248_operand" "n")]
22421 [(match_operand:QI 6 "register_operand" "1")
22422 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
22424 (clobber (match_scratch:QI 1 "=&Yk"))]
22426 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
22427 ;; gas changed what it requires incompatibly.
22428 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
22429 [(set_attr "type" "ssemov")
22430 (set_attr "prefix" "evex")
22431 (set_attr "mode" "<sseinsnmode>")])
22433 (define_insn "<avx512>_compress<mode>_mask"
22434 [(set (match_operand:VI48F 0 "register_operand" "=v")
22436 [(match_operand:VI48F 1 "register_operand" "v")
22437 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
22438 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22441 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22442 [(set_attr "type" "ssemov")
22443 (set_attr "prefix" "evex")
22444 (set_attr "mode" "<sseinsnmode>")])
22446 (define_insn "compress<mode>_mask"
22447 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
22448 (unspec:VI12_AVX512VLBW
22449 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
22450 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
22451 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
22453 "TARGET_AVX512VBMI2"
22454 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22455 [(set_attr "type" "ssemov")
22456 (set_attr "prefix" "evex")
22457 (set_attr "mode" "<sseinsnmode>")])
22459 (define_insn "<avx512>_compressstore<mode>_mask"
22460 [(set (match_operand:VI48F 0 "memory_operand" "=m")
22462 [(match_operand:VI48F 1 "register_operand" "x")
22464 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22465 UNSPEC_COMPRESS_STORE))]
22467 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22468 [(set_attr "type" "ssemov")
22469 (set_attr "prefix" "evex")
22470 (set_attr "memory" "store")
22471 (set_attr "mode" "<sseinsnmode>")])
22473 (define_insn "compressstore<mode>_mask"
22474 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
22475 (unspec:VI12_AVX512VLBW
22476 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
22478 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
22479 UNSPEC_COMPRESS_STORE))]
22480 "TARGET_AVX512VBMI2"
22481 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
22482 [(set_attr "type" "ssemov")
22483 (set_attr "prefix" "evex")
22484 (set_attr "memory" "store")
22485 (set_attr "mode" "<sseinsnmode>")])
22487 (define_expand "<avx512>_expand<mode>_maskz"
22488 [(set (match_operand:VI48F 0 "register_operand")
22490 [(match_operand:VI48F 1 "nonimmediate_operand")
22491 (match_operand:VI48F 2 "nonimm_or_0_operand")
22492 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22495 "operands[2] = CONST0_RTX (<MODE>mode);")
22497 (define_insn "<avx512>_expand<mode>_mask"
22498 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
22500 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
22501 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
22502 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22505 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22506 [(set_attr "type" "ssemov")
22507 (set_attr "prefix" "evex")
22508 (set_attr "memory" "none,load")
22509 (set_attr "mode" "<sseinsnmode>")])
22511 (define_insn "expand<mode>_mask"
22512 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
22513 (unspec:VI12_AVX512VLBW
22514 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
22515 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
22516 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22518 "TARGET_AVX512VBMI2"
22519 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22520 [(set_attr "type" "ssemov")
22521 (set_attr "prefix" "evex")
22522 (set_attr "memory" "none,load")
22523 (set_attr "mode" "<sseinsnmode>")])
22525 (define_expand "expand<mode>_maskz"
22526 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
22527 (unspec:VI12_AVX512VLBW
22528 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
22529 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
22530 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22532 "TARGET_AVX512VBMI2"
22533 "operands[2] = CONST0_RTX (<MODE>mode);")
22535 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
22536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22537 (unspec:VF_AVX512VL
22538 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
22539 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
22540 (match_operand:SI 3 "const_0_to_15_operand")]
22542 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
22543 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
22544 [(set_attr "type" "sse")
22545 (set_attr "prefix" "evex")
22546 (set_attr "mode" "<MODE>")])
22548 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
22549 [(set (match_operand:VF_128 0 "register_operand" "=v")
22552 [(match_operand:VF_128 1 "register_operand" "v")
22553 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22554 (match_operand:SI 3 "const_0_to_15_operand")]
22559 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
22560 [(set_attr "type" "sse")
22561 (set_attr "prefix" "evex")
22562 (set_attr "mode" "<MODE>")])
22564 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
22565 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22566 (unspec:<avx512fmaskmode>
22567 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
22568 (match_operand 2 "const_0_to_255_operand" "n")]
22571 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22572 [(set_attr "type" "sse")
22573 (set_attr "length_immediate" "1")
22574 (set_attr "prefix" "evex")
22575 (set_attr "mode" "<MODE>")])
22577 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
22578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22579 (and:<avx512fmaskmode>
22580 (unspec:<avx512fmaskmode>
22581 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
22582 (match_operand 2 "const_0_to_255_operand" "n")]
22586 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22587 [(set_attr "type" "sse")
22588 (set_attr "length_immediate" "1")
22589 (set_attr "prefix" "evex")
22590 (set_attr "mode" "<MODE>")])
22592 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
22593 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22594 (unspec:VF_AVX512VL
22595 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
22596 (match_operand:SI 2 "const_0_to_15_operand")]
22599 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
22600 [(set_attr "prefix" "evex")
22601 (set_attr "mode" "<MODE>")])
22603 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
22604 [(set (match_operand:VF_128 0 "register_operand" "=v")
22607 [(match_operand:VF_128 1 "register_operand" "v")
22608 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22609 (match_operand:SI 3 "const_0_to_15_operand")]
22614 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
22615 [(set_attr "prefix" "evex")
22616 (set_attr "mode" "<ssescalarmode>")])
22618 ;; The correct representation for this is absolutely enormous, and
22619 ;; surely not generally useful.
22620 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
22621 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22622 (unspec:VI2_AVX512VL
22623 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
22624 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
22625 (match_operand:SI 3 "const_0_to_255_operand")]
22628 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
22629 [(set_attr "type" "sselog1")
22630 (set_attr "length_immediate" "1")
22631 (set_attr "prefix" "evex")
22632 (set_attr "mode" "<sseinsnmode>")])
22634 (define_insn "clz<mode>2<mask_name>"
22635 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22637 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22639 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22640 [(set_attr "type" "sse")
22641 (set_attr "prefix" "evex")
22642 (set_attr "mode" "<sseinsnmode>")])
22644 (define_insn "<mask_codefor>conflict<mode><mask_name>"
22645 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22646 (unspec:VI48_AVX512VL
22647 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
22650 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22651 [(set_attr "type" "sse")
22652 (set_attr "prefix" "evex")
22653 (set_attr "mode" "<sseinsnmode>")])
22655 (define_insn "sha1msg1"
22656 [(set (match_operand:V4SI 0 "register_operand" "=x")
22658 [(match_operand:V4SI 1 "register_operand" "0")
22659 (match_operand:V4SI 2 "vector_operand" "xBm")]
22662 "sha1msg1\t{%2, %0|%0, %2}"
22663 [(set_attr "type" "sselog1")
22664 (set_attr "mode" "TI")])
22666 (define_insn "sha1msg2"
22667 [(set (match_operand:V4SI 0 "register_operand" "=x")
22669 [(match_operand:V4SI 1 "register_operand" "0")
22670 (match_operand:V4SI 2 "vector_operand" "xBm")]
22673 "sha1msg2\t{%2, %0|%0, %2}"
22674 [(set_attr "type" "sselog1")
22675 (set_attr "mode" "TI")])
22677 (define_insn "sha1nexte"
22678 [(set (match_operand:V4SI 0 "register_operand" "=x")
22680 [(match_operand:V4SI 1 "register_operand" "0")
22681 (match_operand:V4SI 2 "vector_operand" "xBm")]
22682 UNSPEC_SHA1NEXTE))]
22684 "sha1nexte\t{%2, %0|%0, %2}"
22685 [(set_attr "type" "sselog1")
22686 (set_attr "mode" "TI")])
22688 (define_insn "sha1rnds4"
22689 [(set (match_operand:V4SI 0 "register_operand" "=x")
22691 [(match_operand:V4SI 1 "register_operand" "0")
22692 (match_operand:V4SI 2 "vector_operand" "xBm")
22693 (match_operand:SI 3 "const_0_to_3_operand" "n")]
22694 UNSPEC_SHA1RNDS4))]
22696 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
22697 [(set_attr "type" "sselog1")
22698 (set_attr "length_immediate" "1")
22699 (set_attr "mode" "TI")])
22701 (define_insn "sha256msg1"
22702 [(set (match_operand:V4SI 0 "register_operand" "=x")
22704 [(match_operand:V4SI 1 "register_operand" "0")
22705 (match_operand:V4SI 2 "vector_operand" "xBm")]
22706 UNSPEC_SHA256MSG1))]
22708 "sha256msg1\t{%2, %0|%0, %2}"
22709 [(set_attr "type" "sselog1")
22710 (set_attr "mode" "TI")])
22712 (define_insn "sha256msg2"
22713 [(set (match_operand:V4SI 0 "register_operand" "=x")
22715 [(match_operand:V4SI 1 "register_operand" "0")
22716 (match_operand:V4SI 2 "vector_operand" "xBm")]
22717 UNSPEC_SHA256MSG2))]
22719 "sha256msg2\t{%2, %0|%0, %2}"
22720 [(set_attr "type" "sselog1")
22721 (set_attr "mode" "TI")])
22723 (define_insn "sha256rnds2"
22724 [(set (match_operand:V4SI 0 "register_operand" "=x")
22726 [(match_operand:V4SI 1 "register_operand" "0")
22727 (match_operand:V4SI 2 "vector_operand" "xBm")
22728 (match_operand:V4SI 3 "register_operand" "Yz")]
22729 UNSPEC_SHA256RNDS2))]
22731 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
22732 [(set_attr "type" "sselog1")
22733 (set_attr "length_immediate" "1")
22734 (set_attr "mode" "TI")])
22736 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
22737 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22738 (vec_concat:AVX512MODE2P
22739 (vec_concat:<ssehalfvecmode>
22740 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
22741 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
22742 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22743 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22745 "&& reload_completed"
22746 [(set (match_dup 0) (match_dup 1))]
22748 if (REG_P (operands[0]))
22749 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
22751 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22752 <ssequartermode>mode);
22755 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
22756 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22757 (vec_concat:AVX512MODE2P
22758 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
22759 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22760 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22762 "&& reload_completed"
22763 [(set (match_dup 0) (match_dup 1))]
22765 if (REG_P (operands[0]))
22766 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
22768 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22769 <ssehalfvecmode>mode);
22772 (define_int_iterator VPMADD52
22773 [UNSPEC_VPMADD52LUQ
22774 UNSPEC_VPMADD52HUQ])
22776 (define_int_attr vpmadd52type
22777 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
22779 (define_expand "vpamdd52huq<mode>_maskz"
22780 [(match_operand:VI8_AVX512VL 0 "register_operand")
22781 (match_operand:VI8_AVX512VL 1 "register_operand")
22782 (match_operand:VI8_AVX512VL 2 "register_operand")
22783 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22784 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22785 "TARGET_AVX512IFMA"
22787 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
22788 operands[0], operands[1], operands[2], operands[3],
22789 CONST0_RTX (<MODE>mode), operands[4]));
22793 (define_expand "vpamdd52luq<mode>_maskz"
22794 [(match_operand:VI8_AVX512VL 0 "register_operand")
22795 (match_operand:VI8_AVX512VL 1 "register_operand")
22796 (match_operand:VI8_AVX512VL 2 "register_operand")
22797 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22798 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22799 "TARGET_AVX512IFMA"
22801 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
22802 operands[0], operands[1], operands[2], operands[3],
22803 CONST0_RTX (<MODE>mode), operands[4]));
22807 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
22808 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22809 (unspec:VI8_AVX512VL
22810 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22811 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22812 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22814 "TARGET_AVX512IFMA"
22815 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
22816 [(set_attr "type" "ssemuladd")
22817 (set_attr "prefix" "evex")
22818 (set_attr "mode" "<sseinsnmode>")])
22820 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
22821 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22822 (vec_merge:VI8_AVX512VL
22823 (unspec:VI8_AVX512VL
22824 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22825 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22826 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22829 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22830 "TARGET_AVX512IFMA"
22831 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
22832 [(set_attr "type" "ssemuladd")
22833 (set_attr "prefix" "evex")
22834 (set_attr "mode" "<sseinsnmode>")])
22836 (define_insn "vpmultishiftqb<mode><mask_name>"
22837 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
22838 (unspec:VI1_AVX512VL
22839 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
22840 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
22841 UNSPEC_VPMULTISHIFT))]
22842 "TARGET_AVX512VBMI"
22843 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22844 [(set_attr "type" "sselog")
22845 (set_attr "prefix" "evex")
22846 (set_attr "mode" "<sseinsnmode>")])
22848 (define_mode_iterator IMOD4
22849 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
22851 (define_mode_attr imod4_narrow
22852 [(V64SF "V16SF") (V64SI "V16SI")])
22854 (define_expand "mov<mode>"
22855 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
22856 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
22859 ix86_expand_vector_move (<MODE>mode, operands);
22863 (define_insn_and_split "*mov<mode>_internal"
22864 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
22865 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
22867 && (register_operand (operands[0], <MODE>mode)
22868 || register_operand (operands[1], <MODE>mode))"
22870 "&& reload_completed"
22876 for (i = 0; i < 4; i++)
22878 op0 = simplify_subreg
22879 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
22880 op1 = simplify_subreg
22881 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
22882 emit_move_insn (op0, op1);
22887 (define_insn "avx5124fmaddps_4fmaddps"
22888 [(set (match_operand:V16SF 0 "register_operand" "=v")
22890 [(match_operand:V16SF 1 "register_operand" "0")
22891 (match_operand:V64SF 2 "register_operand" "v")
22892 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22893 "TARGET_AVX5124FMAPS"
22894 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22895 [(set_attr ("type") ("ssemuladd"))
22896 (set_attr ("prefix") ("evex"))
22897 (set_attr ("mode") ("V16SF"))])
22899 (define_insn "avx5124fmaddps_4fmaddps_mask"
22900 [(set (match_operand:V16SF 0 "register_operand" "=v")
22903 [(match_operand:V64SF 1 "register_operand" "v")
22904 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22905 (match_operand:V16SF 3 "register_operand" "0")
22906 (match_operand:HI 4 "register_operand" "Yk")))]
22907 "TARGET_AVX5124FMAPS"
22908 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22909 [(set_attr ("type") ("ssemuladd"))
22910 (set_attr ("prefix") ("evex"))
22911 (set_attr ("mode") ("V16SF"))])
22913 (define_insn "avx5124fmaddps_4fmaddps_maskz"
22914 [(set (match_operand:V16SF 0 "register_operand" "=v")
22917 [(match_operand:V16SF 1 "register_operand" "0")
22918 (match_operand:V64SF 2 "register_operand" "v")
22919 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22920 (match_operand:V16SF 4 "const0_operand" "C")
22921 (match_operand:HI 5 "register_operand" "Yk")))]
22922 "TARGET_AVX5124FMAPS"
22923 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22924 [(set_attr ("type") ("ssemuladd"))
22925 (set_attr ("prefix") ("evex"))
22926 (set_attr ("mode") ("V16SF"))])
22928 (define_insn "avx5124fmaddps_4fmaddss"
22929 [(set (match_operand:V4SF 0 "register_operand" "=v")
22931 [(match_operand:V4SF 1 "register_operand" "0")
22932 (match_operand:V64SF 2 "register_operand" "v")
22933 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22934 "TARGET_AVX5124FMAPS"
22935 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
22936 [(set_attr ("type") ("ssemuladd"))
22937 (set_attr ("prefix") ("evex"))
22938 (set_attr ("mode") ("SF"))])
22940 (define_insn "avx5124fmaddps_4fmaddss_mask"
22941 [(set (match_operand:V4SF 0 "register_operand" "=v")
22944 [(match_operand:V64SF 1 "register_operand" "v")
22945 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22946 (match_operand:V4SF 3 "register_operand" "0")
22947 (match_operand:QI 4 "register_operand" "Yk")))]
22948 "TARGET_AVX5124FMAPS"
22949 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
22950 [(set_attr ("type") ("ssemuladd"))
22951 (set_attr ("prefix") ("evex"))
22952 (set_attr ("mode") ("SF"))])
22954 (define_insn "avx5124fmaddps_4fmaddss_maskz"
22955 [(set (match_operand:V4SF 0 "register_operand" "=v")
22958 [(match_operand:V4SF 1 "register_operand" "0")
22959 (match_operand:V64SF 2 "register_operand" "v")
22960 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22961 (match_operand:V4SF 4 "const0_operand" "C")
22962 (match_operand:QI 5 "register_operand" "Yk")))]
22963 "TARGET_AVX5124FMAPS"
22964 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
22965 [(set_attr ("type") ("ssemuladd"))
22966 (set_attr ("prefix") ("evex"))
22967 (set_attr ("mode") ("SF"))])
22969 (define_insn "avx5124fmaddps_4fnmaddps"
22970 [(set (match_operand:V16SF 0 "register_operand" "=v")
22972 [(match_operand:V16SF 1 "register_operand" "0")
22973 (match_operand:V64SF 2 "register_operand" "v")
22974 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
22975 "TARGET_AVX5124FMAPS"
22976 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22977 [(set_attr ("type") ("ssemuladd"))
22978 (set_attr ("prefix") ("evex"))
22979 (set_attr ("mode") ("V16SF"))])
22981 (define_insn "avx5124fmaddps_4fnmaddps_mask"
22982 [(set (match_operand:V16SF 0 "register_operand" "=v")
22985 [(match_operand:V64SF 1 "register_operand" "v")
22986 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22987 (match_operand:V16SF 3 "register_operand" "0")
22988 (match_operand:HI 4 "register_operand" "Yk")))]
22989 "TARGET_AVX5124FMAPS"
22990 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22991 [(set_attr ("type") ("ssemuladd"))
22992 (set_attr ("prefix") ("evex"))
22993 (set_attr ("mode") ("V16SF"))])
22995 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
22996 [(set (match_operand:V16SF 0 "register_operand" "=v")
22999 [(match_operand:V16SF 1 "register_operand" "0")
23000 (match_operand:V64SF 2 "register_operand" "v")
23001 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23002 (match_operand:V16SF 4 "const0_operand" "C")
23003 (match_operand:HI 5 "register_operand" "Yk")))]
23004 "TARGET_AVX5124FMAPS"
23005 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23006 [(set_attr ("type") ("ssemuladd"))
23007 (set_attr ("prefix") ("evex"))
23008 (set_attr ("mode") ("V16SF"))])
23010 (define_insn "avx5124fmaddps_4fnmaddss"
23011 [(set (match_operand:V4SF 0 "register_operand" "=v")
23013 [(match_operand:V4SF 1 "register_operand" "0")
23014 (match_operand:V64SF 2 "register_operand" "v")
23015 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
23016 "TARGET_AVX5124FMAPS"
23017 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
23018 [(set_attr ("type") ("ssemuladd"))
23019 (set_attr ("prefix") ("evex"))
23020 (set_attr ("mode") ("SF"))])
23022 (define_insn "avx5124fmaddps_4fnmaddss_mask"
23023 [(set (match_operand:V4SF 0 "register_operand" "=v")
23026 [(match_operand:V64SF 1 "register_operand" "v")
23027 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23028 (match_operand:V4SF 3 "register_operand" "0")
23029 (match_operand:QI 4 "register_operand" "Yk")))]
23030 "TARGET_AVX5124FMAPS"
23031 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
23032 [(set_attr ("type") ("ssemuladd"))
23033 (set_attr ("prefix") ("evex"))
23034 (set_attr ("mode") ("SF"))])
23036 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
23037 [(set (match_operand:V4SF 0 "register_operand" "=v")
23040 [(match_operand:V4SF 1 "register_operand" "0")
23041 (match_operand:V64SF 2 "register_operand" "v")
23042 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
23043 (match_operand:V4SF 4 "const0_operand" "C")
23044 (match_operand:QI 5 "register_operand" "Yk")))]
23045 "TARGET_AVX5124FMAPS"
23046 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
23047 [(set_attr ("type") ("ssemuladd"))
23048 (set_attr ("prefix") ("evex"))
23049 (set_attr ("mode") ("SF"))])
23051 (define_insn "avx5124vnniw_vp4dpwssd"
23052 [(set (match_operand:V16SI 0 "register_operand" "=v")
23054 [(match_operand:V16SI 1 "register_operand" "0")
23055 (match_operand:V64SI 2 "register_operand" "v")
23056 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
23057 "TARGET_AVX5124VNNIW"
23058 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
23059 [(set_attr ("type") ("ssemuladd"))
23060 (set_attr ("prefix") ("evex"))
23061 (set_attr ("mode") ("TI"))])
23063 (define_insn "avx5124vnniw_vp4dpwssd_mask"
23064 [(set (match_operand:V16SI 0 "register_operand" "=v")
23067 [(match_operand:V64SI 1 "register_operand" "v")
23068 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23069 (match_operand:V16SI 3 "register_operand" "0")
23070 (match_operand:HI 4 "register_operand" "Yk")))]
23071 "TARGET_AVX5124VNNIW"
23072 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23073 [(set_attr ("type") ("ssemuladd"))
23074 (set_attr ("prefix") ("evex"))
23075 (set_attr ("mode") ("TI"))])
23077 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
23078 [(set (match_operand:V16SI 0 "register_operand" "=v")
23081 [(match_operand:V16SI 1 "register_operand" "0")
23082 (match_operand:V64SI 2 "register_operand" "v")
23083 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
23084 (match_operand:V16SI 4 "const0_operand" "C")
23085 (match_operand:HI 5 "register_operand" "Yk")))]
23086 "TARGET_AVX5124VNNIW"
23087 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23088 [(set_attr ("type") ("ssemuladd"))
23089 (set_attr ("prefix") ("evex"))
23090 (set_attr ("mode") ("TI"))])
23092 (define_insn "avx5124vnniw_vp4dpwssds"
23093 [(set (match_operand:V16SI 0 "register_operand" "=v")
23095 [(match_operand:V16SI 1 "register_operand" "0")
23096 (match_operand:V64SI 2 "register_operand" "v")
23097 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
23098 "TARGET_AVX5124VNNIW"
23099 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
23100 [(set_attr ("type") ("ssemuladd"))
23101 (set_attr ("prefix") ("evex"))
23102 (set_attr ("mode") ("TI"))])
23104 (define_insn "avx5124vnniw_vp4dpwssds_mask"
23105 [(set (match_operand:V16SI 0 "register_operand" "=v")
23108 [(match_operand:V64SI 1 "register_operand" "v")
23109 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23110 (match_operand:V16SI 3 "register_operand" "0")
23111 (match_operand:HI 4 "register_operand" "Yk")))]
23112 "TARGET_AVX5124VNNIW"
23113 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
23114 [(set_attr ("type") ("ssemuladd"))
23115 (set_attr ("prefix") ("evex"))
23116 (set_attr ("mode") ("TI"))])
23118 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
23119 [(set (match_operand:V16SI 0 "register_operand" "=v")
23122 [(match_operand:V16SI 1 "register_operand" "0")
23123 (match_operand:V64SI 2 "register_operand" "v")
23124 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
23125 (match_operand:V16SI 4 "const0_operand" "C")
23126 (match_operand:HI 5 "register_operand" "Yk")))]
23127 "TARGET_AVX5124VNNIW"
23128 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
23129 [(set_attr ("type") ("ssemuladd"))
23130 (set_attr ("prefix") ("evex"))
23131 (set_attr ("mode") ("TI"))])
23133 (define_expand "popcount<mode>2"
23134 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
23135 (popcount:VI48_AVX512VL
23136 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
23137 "TARGET_AVX512VPOPCNTDQ")
23139 (define_insn "vpopcount<mode><mask_name>"
23140 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
23141 (popcount:VI48_AVX512VL
23142 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
23143 "TARGET_AVX512VPOPCNTDQ"
23144 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23146 ;; Save multiple registers out-of-line.
23147 (define_insn "*save_multiple<mode>"
23148 [(match_parallel 0 "save_multiple"
23149 [(use (match_operand:P 1 "symbol_operand"))])]
23150 "TARGET_SSE && TARGET_64BIT"
23153 ;; Restore multiple registers out-of-line.
23154 (define_insn "*restore_multiple<mode>"
23155 [(match_parallel 0 "restore_multiple"
23156 [(use (match_operand:P 1 "symbol_operand"))])]
23157 "TARGET_SSE && TARGET_64BIT"
23160 ;; Restore multiple registers out-of-line and return.
23161 (define_insn "*restore_multiple_and_return<mode>"
23162 [(match_parallel 0 "restore_multiple"
23164 (use (match_operand:P 1 "symbol_operand"))
23165 (set (reg:DI SP_REG) (reg:DI R10_REG))
23167 "TARGET_SSE && TARGET_64BIT"
23170 ;; Restore multiple registers out-of-line when hard frame pointer is used,
23171 ;; perform the leave operation prior to returning (from the function).
23172 (define_insn "*restore_multiple_leave_return<mode>"
23173 [(match_parallel 0 "restore_multiple"
23175 (use (match_operand:P 1 "symbol_operand"))
23176 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
23177 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
23178 (clobber (mem:BLK (scratch)))
23180 "TARGET_SSE && TARGET_64BIT"
23183 (define_expand "popcount<mode>2"
23184 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23185 (popcount:VI12_AVX512VL
23186 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23187 "TARGET_AVX512BITALG")
23189 (define_insn "vpopcount<mode><mask_name>"
23190 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
23191 (popcount:VI12_AVX512VL
23192 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
23193 "TARGET_AVX512BITALG"
23194 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23196 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
23197 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23198 (unspec:VI1_AVX512F
23199 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23200 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23201 (match_operand 3 "const_0_to_255_operand" "n,n")]
23202 UNSPEC_GF2P8AFFINEINV))]
23205 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
23206 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23207 [(set_attr "isa" "noavx,avx")
23208 (set_attr "prefix_data16" "1,*")
23209 (set_attr "prefix_extra" "1")
23210 (set_attr "prefix" "orig,maybe_evex")
23211 (set_attr "mode" "<sseinsnmode>")])
23213 (define_insn "vgf2p8affineqb_<mode><mask_name>"
23214 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23215 (unspec:VI1_AVX512F
23216 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
23217 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
23218 (match_operand 3 "const_0_to_255_operand" "n,n")]
23219 UNSPEC_GF2P8AFFINE))]
23222 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
23223 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
23224 [(set_attr "isa" "noavx,avx")
23225 (set_attr "prefix_data16" "1,*")
23226 (set_attr "prefix_extra" "1")
23227 (set_attr "prefix" "orig,maybe_evex")
23228 (set_attr "mode" "<sseinsnmode>")])
23230 (define_insn "vgf2p8mulb_<mode><mask_name>"
23231 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
23232 (unspec:VI1_AVX512F
23233 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
23234 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
23238 gf2p8mulb\t{%2, %0| %0, %2}
23239 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
23240 [(set_attr "isa" "noavx,avx")
23241 (set_attr "prefix_data16" "1,*")
23242 (set_attr "prefix_extra" "1")
23243 (set_attr "prefix" "orig,maybe_evex")
23244 (set_attr "mode" "<sseinsnmode>")])
23246 (define_insn "vpshrd_<mode><mask_name>"
23247 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23248 (unspec:VI248_AVX512VL
23249 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23250 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23251 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23253 "TARGET_AVX512VBMI2"
23254 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23255 [(set_attr ("prefix") ("evex"))])
23257 (define_insn "vpshld_<mode><mask_name>"
23258 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23259 (unspec:VI248_AVX512VL
23260 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
23261 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
23262 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23264 "TARGET_AVX512VBMI2"
23265 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
23266 [(set_attr ("prefix") ("evex"))])
23268 (define_insn "vpshrdv_<mode>"
23269 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23270 (unspec:VI248_AVX512VL
23271 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23272 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23273 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23275 "TARGET_AVX512VBMI2"
23276 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23277 [(set_attr ("prefix") ("evex"))
23278 (set_attr "mode" "<sseinsnmode>")])
23280 (define_insn "vpshrdv_<mode>_mask"
23281 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23282 (vec_merge:VI248_AVX512VL
23283 (unspec:VI248_AVX512VL
23284 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23285 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23286 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23289 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23290 "TARGET_AVX512VBMI2"
23291 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23292 [(set_attr ("prefix") ("evex"))
23293 (set_attr "mode" "<sseinsnmode>")])
23295 (define_expand "vpshrdv_<mode>_maskz"
23296 [(match_operand:VI248_AVX512VL 0 "register_operand")
23297 (match_operand:VI248_AVX512VL 1 "register_operand")
23298 (match_operand:VI248_AVX512VL 2 "register_operand")
23299 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23300 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23301 "TARGET_AVX512VBMI2"
23303 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
23304 operands[2], operands[3],
23305 CONST0_RTX (<MODE>mode),
23310 (define_insn "vpshrdv_<mode>_maskz_1"
23311 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23312 (vec_merge:VI248_AVX512VL
23313 (unspec:VI248_AVX512VL
23314 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23315 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23316 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23318 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23319 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23320 "TARGET_AVX512VBMI2"
23321 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23322 [(set_attr ("prefix") ("evex"))
23323 (set_attr "mode" "<sseinsnmode>")])
23325 (define_insn "vpshldv_<mode>"
23326 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23327 (unspec:VI248_AVX512VL
23328 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23329 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23330 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23332 "TARGET_AVX512VBMI2"
23333 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
23334 [(set_attr ("prefix") ("evex"))
23335 (set_attr "mode" "<sseinsnmode>")])
23337 (define_insn "vpshldv_<mode>_mask"
23338 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23339 (vec_merge:VI248_AVX512VL
23340 (unspec:VI248_AVX512VL
23341 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23342 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23343 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23346 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23347 "TARGET_AVX512VBMI2"
23348 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23349 [(set_attr ("prefix") ("evex"))
23350 (set_attr "mode" "<sseinsnmode>")])
23352 (define_expand "vpshldv_<mode>_maskz"
23353 [(match_operand:VI248_AVX512VL 0 "register_operand")
23354 (match_operand:VI248_AVX512VL 1 "register_operand")
23355 (match_operand:VI248_AVX512VL 2 "register_operand")
23356 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
23357 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23358 "TARGET_AVX512VBMI2"
23360 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
23361 operands[2], operands[3],
23362 CONST0_RTX (<MODE>mode),
23367 (define_insn "vpshldv_<mode>_maskz_1"
23368 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
23369 (vec_merge:VI248_AVX512VL
23370 (unspec:VI248_AVX512VL
23371 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
23372 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
23373 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
23375 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
23376 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23377 "TARGET_AVX512VBMI2"
23378 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23379 [(set_attr ("prefix") ("evex"))
23380 (set_attr "mode" "<sseinsnmode>")])
23382 (define_insn "vpdpbusd_v16si"
23383 [(set (match_operand:V16SI 0 "register_operand" "=v")
23385 [(match_operand:V16SI 1 "register_operand" "0")
23386 (match_operand:V16SI 2 "register_operand" "v")
23387 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23388 UNSPEC_VPMADDUBSWACCD))]
23389 "TARGET_AVX512VNNI"
23390 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23391 [(set_attr ("prefix") ("evex"))])
23393 (define_insn "vpdpbusd_<mode>"
23394 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23396 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23397 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23398 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23399 UNSPEC_VPMADDUBSWACCD))]
23400 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23402 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
23403 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
23404 [(set_attr ("prefix") ("vex,evex"))
23405 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23407 (define_insn "vpdpbusd_<mode>_mask"
23408 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23409 (vec_merge:VI4_AVX512VL
23410 (unspec:VI4_AVX512VL
23411 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23412 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23413 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23414 UNSPEC_VPMADDUBSWACCD)
23416 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23417 "TARGET_AVX512VNNI"
23418 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23419 [(set_attr ("prefix") ("evex"))])
23421 (define_expand "vpdpbusd_<mode>_maskz"
23422 [(match_operand:VI4_AVX512VL 0 "register_operand")
23423 (match_operand:VI4_AVX512VL 1 "register_operand")
23424 (match_operand:VI4_AVX512VL 2 "register_operand")
23425 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23426 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23427 "TARGET_AVX512VNNI"
23429 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
23430 operands[2], operands[3],
23431 CONST0_RTX (<MODE>mode),
23436 (define_insn "vpdpbusd_<mode>_maskz_1"
23437 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23438 (vec_merge:VI4_AVX512VL
23439 (unspec:VI4_AVX512VL
23440 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23441 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23442 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
23443 ] UNSPEC_VPMADDUBSWACCD)
23444 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23445 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23446 "TARGET_AVX512VNNI"
23447 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23448 [(set_attr ("prefix") ("evex"))])
23450 (define_insn "vpdpbusds_v16si"
23451 [(set (match_operand:V16SI 0 "register_operand" "=v")
23453 [(match_operand:V16SI 1 "register_operand" "0")
23454 (match_operand:V16SI 2 "register_operand" "v")
23455 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23456 UNSPEC_VPMADDUBSWACCSSD))]
23457 "TARGET_AVX512VNNI"
23458 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23459 [(set_attr ("prefix") ("evex"))])
23461 (define_insn "vpdpbusds_<mode>"
23462 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23464 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23465 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23466 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23467 UNSPEC_VPMADDUBSWACCSSD))]
23468 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23470 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
23471 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
23472 [(set_attr ("prefix") ("vex,evex"))
23473 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23475 (define_insn "vpdpbusds_<mode>_mask"
23476 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23477 (vec_merge:VI4_AVX512VL
23478 (unspec:VI4_AVX512VL
23479 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23480 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23481 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23482 UNSPEC_VPMADDUBSWACCSSD)
23484 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23485 "TARGET_AVX512VNNI"
23486 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23487 [(set_attr ("prefix") ("evex"))])
23489 (define_expand "vpdpbusds_<mode>_maskz"
23490 [(match_operand:VI4_AVX512VL 0 "register_operand")
23491 (match_operand:VI4_AVX512VL 1 "register_operand")
23492 (match_operand:VI4_AVX512VL 2 "register_operand")
23493 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23494 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23495 "TARGET_AVX512VNNI"
23497 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
23498 operands[2], operands[3],
23499 CONST0_RTX (<MODE>mode),
23504 (define_insn "vpdpbusds_<mode>_maskz_1"
23505 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23506 (vec_merge:VI4_AVX512VL
23507 (unspec:VI4_AVX512VL
23508 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23509 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23510 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23511 UNSPEC_VPMADDUBSWACCSSD)
23512 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23513 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23514 "TARGET_AVX512VNNI"
23515 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23516 [(set_attr ("prefix") ("evex"))])
23518 (define_insn "vpdpwssd_v16si"
23519 [(set (match_operand:V16SI 0 "register_operand" "=v")
23521 [(match_operand:V16SI 1 "register_operand" "0")
23522 (match_operand:V16SI 2 "register_operand" "v")
23523 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23524 UNSPEC_VPMADDWDACCD))]
23525 "TARGET_AVX512VNNI"
23526 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23527 [(set_attr ("prefix") ("evex"))])
23529 (define_insn "vpdpwssd_<mode>"
23530 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23532 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23533 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23534 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23535 UNSPEC_VPMADDWDACCD))]
23536 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23538 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
23539 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
23540 [(set_attr ("prefix") ("vex,evex"))
23541 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23543 (define_insn "vpdpwssd_<mode>_mask"
23544 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23545 (vec_merge:VI4_AVX512VL
23546 (unspec:VI4_AVX512VL
23547 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23548 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23549 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23550 UNSPEC_VPMADDWDACCD)
23552 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23553 "TARGET_AVX512VNNI"
23554 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23555 [(set_attr ("prefix") ("evex"))])
23557 (define_expand "vpdpwssd_<mode>_maskz"
23558 [(match_operand:VI4_AVX512VL 0 "register_operand")
23559 (match_operand:VI4_AVX512VL 1 "register_operand")
23560 (match_operand:VI4_AVX512VL 2 "register_operand")
23561 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23562 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23563 "TARGET_AVX512VNNI"
23565 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
23566 operands[2], operands[3],
23567 CONST0_RTX (<MODE>mode),
23572 (define_insn "vpdpwssd_<mode>_maskz_1"
23573 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23574 (vec_merge:VI4_AVX512VL
23575 (unspec:VI4_AVX512VL
23576 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23577 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23578 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23579 UNSPEC_VPMADDWDACCD)
23580 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23581 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23582 "TARGET_AVX512VNNI"
23583 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23584 [(set_attr ("prefix") ("evex"))])
23586 (define_insn "vpdpwssds_v16si"
23587 [(set (match_operand:V16SI 0 "register_operand" "=v")
23589 [(match_operand:V16SI 1 "register_operand" "0")
23590 (match_operand:V16SI 2 "register_operand" "v")
23591 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
23592 UNSPEC_VPMADDWDACCSSD))]
23593 "TARGET_AVX512VNNI"
23594 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23595 [(set_attr ("prefix") ("evex"))])
23597 (define_insn "vpdpwssds_<mode>"
23598 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
23600 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
23601 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
23602 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
23603 UNSPEC_VPMADDWDACCSSD))]
23604 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
23606 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
23607 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
23608 [(set_attr ("prefix") ("vex,evex"))
23609 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
23611 (define_insn "vpdpwssds_<mode>_mask"
23612 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23613 (vec_merge:VI4_AVX512VL
23614 (unspec:VI4_AVX512VL
23615 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23616 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23617 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23618 UNSPEC_VPMADDWDACCSSD)
23620 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23621 "TARGET_AVX512VNNI"
23622 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23623 [(set_attr ("prefix") ("evex"))])
23625 (define_expand "vpdpwssds_<mode>_maskz"
23626 [(match_operand:VI4_AVX512VL 0 "register_operand")
23627 (match_operand:VI4_AVX512VL 1 "register_operand")
23628 (match_operand:VI4_AVX512VL 2 "register_operand")
23629 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23630 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23631 "TARGET_AVX512VNNI"
23633 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
23634 operands[2], operands[3],
23635 CONST0_RTX (<MODE>mode),
23640 (define_insn "vpdpwssds_<mode>_maskz_1"
23641 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23642 (vec_merge:VI4_AVX512VL
23643 (unspec:VI4_AVX512VL
23644 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23645 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23646 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23647 UNSPEC_VPMADDWDACCSSD)
23648 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23649 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23650 "TARGET_AVX512VNNI"
23651 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23652 [(set_attr ("prefix") ("evex"))])
23654 (define_insn "vaesdec_<mode>"
23655 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23656 (unspec:VI1_AVX512VL_F
23657 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23658 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23661 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
23664 (define_insn "vaesdeclast_<mode>"
23665 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23666 (unspec:VI1_AVX512VL_F
23667 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23668 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23669 UNSPEC_VAESDECLAST))]
23671 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
23674 (define_insn "vaesenc_<mode>"
23675 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23676 (unspec:VI1_AVX512VL_F
23677 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23678 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23681 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
23684 (define_insn "vaesenclast_<mode>"
23685 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23686 (unspec:VI1_AVX512VL_F
23687 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23688 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23689 UNSPEC_VAESENCLAST))]
23691 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
23694 (define_insn "vpclmulqdq_<mode>"
23695 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
23696 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
23697 (match_operand:VI8_FVL 2 "vector_operand" "vm")
23698 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23699 UNSPEC_VPCLMULQDQ))]
23700 "TARGET_VPCLMULQDQ"
23701 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23702 [(set_attr "mode" "DI")])
23704 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
23705 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
23706 (unspec:<avx512fmaskmode>
23707 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
23708 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
23709 UNSPEC_VPSHUFBIT))]
23710 "TARGET_AVX512BITALG"
23711 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
23712 [(set_attr "prefix" "evex")
23713 (set_attr "mode" "<sseinsnmode>")])
23715 (define_mode_iterator VI48_AVX512VP2VL
23717 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
23718 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
23720 (define_mode_iterator MASK_DWI [P2QI P2HI])
23722 (define_expand "mov<mode>"
23723 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
23724 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
23725 "TARGET_AVX512VP2INTERSECT"
23727 if (MEM_P (operands[0]) && MEM_P (operands[1]))
23728 operands[1] = force_reg (<MODE>mode, operands[1]);
23731 (define_insn_and_split "*mov<mode>_internal"
23732 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
23733 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
23734 "TARGET_AVX512VP2INTERSECT
23735 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23737 "&& reload_completed"
23738 [(set (match_dup 0) (match_dup 1))
23739 (set (match_dup 2) (match_dup 3))]
23741 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
23744 (define_insn "avx512vp2intersect_2intersect<mode>"
23745 [(set (match_operand:P2QI 0 "register_operand" "=k")
23747 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
23748 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
23749 UNSPEC_VP2INTERSECT))]
23750 "TARGET_AVX512VP2INTERSECT"
23751 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23752 [(set_attr ("prefix") ("evex"))])
23754 (define_insn "avx512vp2intersect_2intersectv16si"
23755 [(set (match_operand:P2HI 0 "register_operand" "=k")
23756 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
23757 (match_operand:V16SI 2 "vector_operand" "vm")]
23758 UNSPEC_VP2INTERSECT))]
23759 "TARGET_AVX512VP2INTERSECT"
23760 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
23761 [(set_attr ("prefix") ("evex"))])
23763 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
23764 ;; Converting from BF to SF
23765 (define_mode_attr bf16_cvt_2sf
23766 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
23767 ;; Converting from SF to BF
23768 (define_mode_attr sf_cvt_bf16
23769 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
23770 ;; Mapping from BF to SF
23771 (define_mode_attr sf_bf16
23772 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
23774 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
23775 [(match_operand:BF16 0 "register_operand")
23776 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
23777 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
23778 (match_operand:<avx512fmaskmode> 3 "register_operand")]
23779 "TARGET_AVX512BF16"
23781 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
23782 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
23786 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
23787 [(set (match_operand:BF16 0 "register_operand" "=v")
23789 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
23790 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
23791 UNSPEC_VCVTNE2PS2BF16))]
23792 "TARGET_AVX512BF16"
23793 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
23795 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
23796 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
23797 (match_operand:VF1_AVX512VL 1 "register_operand")
23798 (match_operand:<avx512fmaskmode> 2 "register_operand")]
23799 "TARGET_AVX512BF16"
23801 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
23802 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
23806 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
23807 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
23808 (unspec:<sf_cvt_bf16>
23809 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
23810 UNSPEC_VCVTNEPS2BF16))]
23811 "TARGET_AVX512BF16"
23812 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23814 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
23815 [(match_operand:VF1_AVX512VL 0 "register_operand")
23816 (match_operand:VF1_AVX512VL 1 "register_operand")
23817 (match_operand:<sf_bf16> 2 "register_operand")
23818 (match_operand:<sf_bf16> 3 "register_operand")
23819 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
23820 "TARGET_AVX512BF16"
23822 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
23823 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
23827 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
23828 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23829 (unspec:VF1_AVX512VL
23830 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23831 (match_operand:<sf_bf16> 2 "register_operand" "v")
23832 (match_operand:<sf_bf16> 3 "register_operand" "v")]
23833 UNSPEC_VDPBF16PS))]
23834 "TARGET_AVX512BF16"
23835 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
23837 (define_insn "avx512f_dpbf16ps_<mode>_mask"
23838 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23839 (vec_merge:VF1_AVX512VL
23840 (unspec:VF1_AVX512VL
23841 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23842 (match_operand:<sf_bf16> 2 "register_operand" "v")
23843 (match_operand:<sf_bf16> 3 "register_operand" "v")]
23846 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
23847 "TARGET_AVX512BF16"
23848 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
23851 (define_insn "loadiwkey"
23852 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
23853 (match_operand:V2DI 1 "register_operand" "v")
23854 (match_operand:V2DI 2 "register_operand" "Yz")
23855 (match_operand:SI 3 "register_operand" "a")]
23857 (clobber (reg:CC FLAGS_REG))]
23859 "loadiwkey\t{%0, %1|%1, %0}"
23860 [(set_attr "type" "other")])
23862 (define_expand "encodekey128u32"
23864 [(set (match_operand:SI 0 "register_operand")
23865 (unspec_volatile:SI
23866 [(match_operand:SI 1 "register_operand")
23867 (reg:V2DI XMM0_REG)]
23868 UNSPECV_ENCODEKEY128U32))])]
23875 /* parallel rtx for encodekey128 predicate */
23876 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
23878 for (i = 0; i < 7; i++)
23879 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
23882 = gen_rtx_UNSPEC_VOLATILE (SImode,
23883 gen_rtvec (2, operands[1], xmm_regs[0]),
23884 UNSPECV_ENCODEKEY128U32);
23886 XVECEXP (operands[2], 0, 0)
23887 = gen_rtx_SET (operands[0], tmp_unspec);
23890 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
23891 gen_rtvec (1, const0_rtx),
23892 UNSPECV_ENCODEKEY128U32);
23894 for (i = 0; i < 3; i++)
23895 XVECEXP (operands[2], 0, i + 1)
23896 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
23898 for (i = 4; i < 7; i++)
23899 XVECEXP (operands[2], 0, i)
23900 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
23902 XVECEXP (operands[2], 0, 7)
23903 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
23906 (define_insn "*encodekey128u32"
23907 [(match_parallel 2 "encodekey128_operation"
23908 [(set (match_operand:SI 0 "register_operand" "=r")
23909 (unspec_volatile:SI
23910 [(match_operand:SI 1 "register_operand" "r")
23911 (reg:V2DI XMM0_REG)]
23912 UNSPECV_ENCODEKEY128U32))])]
23914 "encodekey128\t{%1, %0|%0, %1}"
23915 [(set_attr "type" "other")])
23917 (define_expand "encodekey256u32"
23919 [(set (match_operand:SI 0 "register_operand")
23920 (unspec_volatile:SI
23921 [(match_operand:SI 1 "register_operand")
23922 (reg:V2DI XMM0_REG)
23923 (reg:V2DI XMM1_REG)]
23924 UNSPECV_ENCODEKEY256U32))])]
23931 /* parallel rtx for encodekey256 predicate */
23932 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
23934 for (i = 0; i < 7; i++)
23935 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
23938 = gen_rtx_UNSPEC_VOLATILE (SImode,
23939 gen_rtvec (3, operands[1],
23940 xmm_regs[0], xmm_regs[1]),
23941 UNSPECV_ENCODEKEY256U32);
23943 XVECEXP (operands[2], 0, 0)
23944 = gen_rtx_SET (operands[0], tmp_unspec);
23947 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
23948 gen_rtvec (1, const0_rtx),
23949 UNSPECV_ENCODEKEY256U32);
23951 for (i = 0; i < 4; i++)
23952 XVECEXP (operands[2], 0, i + 1)
23953 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
23955 for (i = 4; i < 7; i++)
23956 XVECEXP (operands[2], 0, i + 1)
23957 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
23959 XVECEXP (operands[2], 0, 8)
23960 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
23963 (define_insn "*encodekey256u32"
23964 [(match_parallel 2 "encodekey256_operation"
23965 [(set (match_operand:SI 0 "register_operand" "=r")
23966 (unspec_volatile:SI
23967 [(match_operand:SI 1 "register_operand" "r")
23968 (reg:V2DI XMM0_REG)
23969 (reg:V2DI XMM1_REG)]
23970 UNSPECV_ENCODEKEY256U32))])]
23972 "encodekey256\t{%1, %0|%0, %1}"
23973 [(set_attr "type" "other")])
23975 (define_int_iterator AESDECENCKL
23976 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
23977 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
23979 (define_int_attr aesklvariant
23980 [(UNSPECV_AESDEC128KLU8 "dec128kl")
23981 (UNSPECV_AESDEC256KLU8 "dec256kl")
23982 (UNSPECV_AESENC128KLU8 "enc128kl")
23983 (UNSPECV_AESENC256KLU8 "enc256kl")])
23985 (define_insn "aes<aesklvariant>u8"
23986 [(set (match_operand:V2DI 0 "register_operand" "=v")
23987 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
23988 (match_operand:BLK 2 "memory_operand" "m")]
23990 (set (reg:CCZ FLAGS_REG)
23991 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
23993 "aes<aesklvariant>\t{%2, %0|%0, %2}"
23994 [(set_attr "type" "other")])
23996 (define_int_iterator AESDECENCWIDEKL
23997 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
23998 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
24000 (define_int_attr aeswideklvariant
24001 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
24002 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
24003 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
24004 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
24006 (define_int_attr AESWIDEKLVARIANT
24007 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
24008 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
24009 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
24010 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
24012 (define_expand "aes<aeswideklvariant>u8"
24014 [(set (reg:CCZ FLAGS_REG)
24015 (unspec_volatile:CCZ
24016 [(match_operand:BLK 0 "memory_operand")]
24017 AESDECENCWIDEKL))])]
24023 /* parallel rtx for widekl predicate */
24024 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
24027 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
24028 gen_rtvec (1, operands[0]),
24029 UNSPECV_<AESWIDEKLVARIANT>);
24031 XVECEXP (operands[1], 0, 0)
24032 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
24035 for (i = 0; i < 8; i++)
24037 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
24040 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
24041 gen_rtvec (1, xmm_reg),
24042 UNSPECV_<AESWIDEKLVARIANT>);
24043 XVECEXP (operands[1], 0, i + 1)
24044 = gen_rtx_SET (xmm_reg, tmp_unspec);
24048 (define_insn "*aes<aeswideklvariant>u8"
24049 [(match_parallel 1 "aeswidekl_operation"
24050 [(set (reg:CCZ FLAGS_REG)
24051 (unspec_volatile:CCZ
24052 [(match_operand:BLK 0 "memory_operand" "m")]
24053 AESDECENCWIDEKL))])]
24055 "aes<aeswideklvariant>\t{%0}"
24056 [(set_attr "type" "other")])