Support variable index vec_set.
authorliuhongt <hongtao.liu@intel.com>
Mon, 19 Oct 2020 08:04:39 +0000 (16:04 +0800)
committerliuhongt <hongtao.liu@intel.com>
Tue, 17 Nov 2020 03:29:46 +0000 (11:29 +0800)
gcc/ChangeLog:

PR target/97194
* config/i386/i386-expand.c (ix86_expand_vector_set_var): New function.
* config/i386/i386-protos.h (ix86_expand_vector_set_var): New Decl.
* config/i386/predicates.md (vec_setm_operand): New predicate,
true for const_int_operand or register_operand under TARGET_AVX2.
* config/i386/sse.md (vec_set<mode>): Support both constant
and variable index vec_set.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx2-vec-set-1.c: New test.
* gcc.target/i386/avx2-vec-set-2.c: New test.
* gcc.target/i386/avx512bw-vec-set-1.c: New test.
* gcc.target/i386/avx512bw-vec-set-2.c: New test.
* gcc.target/i386/avx512f-vec-set-2.c: New test.
* gcc.target/i386/avx512vl-vec-set-2.c: New test.

gcc/config/i386/i386-expand.c
gcc/config/i386/i386-protos.h
gcc/config/i386/predicates.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c [new file with mode: 0644]

index 044faf3423fa05c71b5bb892583c3e6c1b2fb8cb..73e3358b290b4de960515f804c3676950ad5e0f7 100644 (file)
@@ -14564,6 +14564,112 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
 }
 
+/* Implemented as
+   V setg (V v, int idx, T val)
+   {
+     V idxv = (V){idx, idx, idx, idx, idx, idx, idx, idx};
+     V valv = (V){val, val, val, val, val, val, val, val};
+     V mask = ((V){0, 1, 2, 3, 4, 5, 6, 7} == idxv);
+     v = (v & ~mask) | (valv & mask);
+     return v;
+   }.  */
+void
+ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
+{
+  rtx vec[64];
+  machine_mode mode = GET_MODE (target);
+  machine_mode cmp_mode = mode;
+  int n_elts = GET_MODE_NUNITS (mode);
+  rtx valv,idxv,constv,idx_tmp;
+  bool ok = false;
+
+  /* 512-bits vector byte/word broadcast and comparison only available
+     under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector
+     when without TARGET_AVX512BW.  */
+  if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW)
+    {
+      gcc_assert (TARGET_AVX512F);
+      rtx vhi, vlo, idx_hi;
+      machine_mode half_mode;
+      rtx (*extract_hi)(rtx, rtx);
+      rtx (*extract_lo)(rtx, rtx);
+
+      if (mode == V32HImode)
+       {
+         half_mode = V16HImode;
+         extract_hi = gen_vec_extract_hi_v32hi;
+         extract_lo = gen_vec_extract_lo_v32hi;
+       }
+      else
+       {
+         half_mode = V32QImode;
+         extract_hi = gen_vec_extract_hi_v64qi;
+         extract_lo = gen_vec_extract_lo_v64qi;
+       }
+
+      vhi = gen_reg_rtx (half_mode);
+      vlo = gen_reg_rtx (half_mode);
+      idx_hi = gen_reg_rtx (GET_MODE (idx));
+      emit_insn (extract_hi (vhi, target));
+      emit_insn (extract_lo (vlo, target));
+      vec[0] = idx_hi;
+      vec[1] = idx;
+      vec[2] = GEN_INT (n_elts/2);
+      ix86_expand_binary_operator (MINUS, GET_MODE (idx), vec);
+      ix86_expand_vector_set_var (vhi, val, idx_hi);
+      ix86_expand_vector_set_var (vlo, val, idx);
+      emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, vlo, vhi)));
+      return;
+    }
+
+  if (FLOAT_MODE_P (GET_MODE_INNER (mode)))
+    {
+      switch (mode)
+       {
+       case E_V2DFmode:
+         cmp_mode = V2DImode;
+         break;
+       case E_V4DFmode:
+         cmp_mode = V4DImode;
+         break;
+       case E_V8DFmode:
+         cmp_mode = V8DImode;
+         break;
+       case E_V4SFmode:
+         cmp_mode = V4SImode;
+         break;
+       case E_V8SFmode:
+         cmp_mode = V8SImode;
+         break;
+       case E_V16SFmode:
+         cmp_mode = V16SImode;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+    }
+
+  for (int i = 0; i != n_elts; i++)
+    vec[i] = GEN_INT (i);
+  constv = gen_rtx_CONST_VECTOR (cmp_mode, gen_rtvec_v (n_elts, vec));
+  valv = gen_reg_rtx (mode);
+  idxv = gen_reg_rtx (cmp_mode);
+  idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1);
+
+  ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
+  gcc_assert (ok);
+  ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp);
+  gcc_assert (ok);
+  vec[0] = target;
+  vec[1] = valv;
+  vec[2] = target;
+  vec[3] = gen_rtx_EQ (mode, idxv, constv);
+  vec[4] = idxv;
+  vec[5] = constv;
+  ok = ix86_expand_int_vcond (vec);
+  gcc_assert (ok);
+}
+
 void
 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
 {
index a3d9f9eaf148b417fb894396f9e51fe182612f8a..65347a59b79ff3312675a5018227886011d7710c 100644 (file)
@@ -245,6 +245,7 @@ extern rtx ix86_rewrite_tls_address (rtx);
 
 extern void ix86_expand_vector_init (bool, rtx, rtx);
 extern void ix86_expand_vector_set (bool, rtx, rtx, int);
+extern void ix86_expand_vector_set_var (rtx, rtx, rtx);
 extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
 extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
index 36f9dfcc586985d57c28fd8ac16c59433800b8c5..be5aaa4d76fe12e38867120e7e541c2a8e529f94 100644 (file)
   return op == const1_rtx || op == constm1_rtx;
 })
 
+;; True for registers, or const_int_operand, used to vec_setm expander.
+(define_predicate "vec_setm_operand"
+  (ior (and (match_operand 0 "register_operand")
+           (match_test "TARGET_AVX2"))
+       (match_code "const_int")))
+
 ;; True for registers, or 1 or -1.  Used to optimize double-word shifts.
 (define_predicate "reg_or_pm1_operand"
   (ior (match_operand 0 "register_operand")
index 8437ad27087a8ba5bdd50c75dcc1298a0180d270..1193680956195208d52c6a638e3de06fc7ff3336 100644 (file)
 (define_expand "vec_set<mode>"
   [(match_operand:V 0 "register_operand")
    (match_operand:<ssescalarmode> 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_operand")]
   "TARGET_SSE"
 {
-  ix86_expand_vector_set (false, operands[0], operands[1],
-                         INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (false, operands[0], operands[1],
+                           INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c b/gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c
new file mode 100644 (file)
index 0000000..4c16ec5
--- /dev/null
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -mno-avx512f" } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpeq[bwdq]} 12 } } */
+/* { dg-final { scan-assembler-times {(?n)vp?blendv} 12 } } */
+
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef int v4si __attribute__ ((vector_size (16)));
+
+typedef long long v4di __attribute__ ((vector_size (32)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+#define FOO(VTYPE, TYPE)                       \
+  VTYPE                                                \
+  __attribute__ ((noipa))                      \
+  foo_##VTYPE (VTYPE a, TYPE b, unsigned int c)        \
+  {                                            \
+    a[c] = b;                                  \
+    return a;                                  \
+  }                                            \
+
+FOO (v16qi, char);
+FOO (v32qi, char);
+
+FOO (v8hi, short);
+FOO (v16hi, short);
+
+FOO (v4si, int);
+FOO (v8si, int);
+
+FOO (v2di, long long);
+FOO (v4di, long long);
+
+FOO (v4sf, float);
+FOO (v8sf, float);
+
+FOO (v2df, double);
+FOO (v4df, double);
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c
new file mode 100644 (file)
index 0000000..9086ef4
--- /dev/null
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O2 -mavx2" } */
+
+
+#ifndef CHECK
+#define CHECK "avx2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx2_test
+#endif
+
+#include CHECK
+
+#include "avx2-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx)                         \
+do                                                             \
+  {                                                            \
+    int i,val = idx * idx - idx * 3 + 16;                      \
+    type res[N],exp[N];                                                \
+    vtype resv;                                                        \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       res[i] = i * i - i * 3 + 15;                            \
+       exp[i] = res[i];                                        \
+      }                                                                \
+    exp[idx] = val;                                            \
+    resv = foo_##vtype (*(vtype *)&res[0], val, idx);          \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       if (resv[i] != exp[i])                                  \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+TEST (void)
+{
+  CALC_TEST (v32qi, char, 32, 17);
+  CALC_TEST (v16qi, char, 16, 5);
+  CALC_TEST (v16hi, short, 16, 9);
+  CALC_TEST (v8hi, short, 8, 6);
+  CALC_TEST (v8si, int, 8, 3);
+  CALC_TEST (v4si, int, 4, 2);
+  CALC_TEST (v4di, long long, 4, 1);
+  CALC_TEST (v2di, long long, 2, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c
new file mode 100644 (file)
index 0000000..5cfbc85
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times {(?n)(?:vp?broadcast|vmovddup)} 36 } } */
+/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]+\$0} 18 } } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__ ((vector_size (64)));
+typedef float v16sf __attribute__ ((vector_size (64)));
+typedef double v8df __attribute__ ((vector_size (64)));
+
+#include "avx2-vec-set-1.c"
+
+FOO (v64qi, char);
+FOO (v32hi, short);
+FOO (v16si, int);
+FOO (v8di, long long);
+FOO (v16sf, float);
+FOO (v8df, double);
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c
new file mode 100644 (file)
index 0000000..22e6418
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define AVX512BW
+
+#include CHECK
+
+#include "avx512bw-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx)                         \
+do                                                             \
+  {                                                            \
+    int i,val = idx * idx - idx * 3 + 16;                      \
+    type res[N],exp[N];                                                \
+    vtype resv;                                                        \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       res[i] = i * i - i * 3 + 15;                            \
+       exp[i] = res[i];                                        \
+      }                                                                \
+    exp[idx] = val;                                            \
+    resv = foo_##vtype (*(vtype *)&res[0], val, idx);          \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       if (resv[i] != exp[i])                                  \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+test_512 (void)
+{
+  CALC_TEST (v64qi, char, 64, 50);
+  CALC_TEST (v32hi, short, 32, 30);
+  CALC_TEST (v16si, int, 16, 15);
+  CALC_TEST (v8di, long long, 8, 7);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c
new file mode 100644 (file)
index 0000000..8f2aa03
--- /dev/null
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f -mno-avx512bw" } */
+
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define AVX512F
+
+#include CHECK
+
+#include "avx512bw-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx)                         \
+do                                                             \
+  {                                                            \
+    int i,val = idx * idx - idx * 3 + 16;                      \
+    type res[N],exp[N];                                                \
+    vtype resv;                                                        \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       res[i] = i * i - i * 3 + 15;                            \
+       exp[i] = res[i];                                        \
+      }                                                                \
+    exp[idx] = val;                                            \
+    resv = foo_##vtype (*(vtype *)&res[0], val, idx);          \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       if (resv[i] != exp[i])                                  \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+test_512 (void)
+{
+  CALC_TEST (v64qi, char, 64, 50);
+  CALC_TEST (v32hi, short, 32, 30);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c
new file mode 100644 (file)
index 0000000..4f32742
--- /dev/null
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define AVX512VL
+#define AVX512BW
+
+#include CHECK
+
+#include "avx512bw-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx)                         \
+do                                                             \
+  {                                                            \
+    int i,val = idx * idx - idx * 3 + 16;                      \
+    type res[N],exp[N];                                                \
+    vtype resv;                                                        \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       res[i] = i * i - i * 3 + 15;                            \
+       exp[i] = res[i];                                        \
+      }                                                                \
+    exp[idx] = val;                                            \
+    resv = foo_##vtype (*(vtype *)&res[0], val, idx);          \
+    for (i = 0; i < N; i++)                                    \
+      {                                                                \
+       if (resv[i] != exp[i])                                  \
+         abort ();                                             \
+      }                                                                \
+  }                                                            \
+while (0)
+
+static void
+test_256 (void)
+{
+  CALC_TEST (v32qi, char, 32, 17);
+  CALC_TEST (v16hi, short, 16, 9);
+  CALC_TEST (v8si, int, 8, 3);
+  CALC_TEST (v4di, long long, 4, 1);
+}
+
+static void
+test_128 (void)
+{
+  CALC_TEST (v16qi, char, 16, 5);
+  CALC_TEST (v8hi, short, 8, 6);
+  CALC_TEST (v4si, int, 4, 2);
+  CALC_TEST (v2di, long long, 2, 0);
+}