aarch64: Support SVE comparisons for unpacked integers
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 11 Nov 2020 11:42:46 +0000 (11:42 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Wed, 11 Nov 2020 11:42:46 +0000 (11:42 +0000)
This patch adds support for comparing unpacked SVE integer vectors,
such as byte elements stored in the bottom bytes of halfword
containers.  It also adds support for selects between unpacked
SVE vectors (both integer and floating-point), since selects and
compares are closely tied via the vcond optab interface.

gcc/
* config/aarch64/aarch64-sve.md (@vcond_mask_<mode><vpred>): Extend
from SVE_FULL to SVE_ALL.
(*vcond_mask_<mode><vpred>): Likewise.
(@aarch64_sel_dup<mode>): Likewise.
(vcond<SVE_FULL:mode><v_int_equiv>): Extend to...
(vcond<SVE_ALL:mode><SVE_I:mode>): ...this, but requiring the
sizes of the container modes to match.
(vcondu<SVE_FULL:mode><v_int_equiv>): Extend to...
(vcondu<SVE_ALL:mode><SVE_I:mode>): ...this.
(vec_cmp<SVE_FULL_I:mode><vpred>): Extend to...
(vec_cmp<SVE_I:mode><vpred>): ...this.
(vec_cmpu<SVE_FULL_I:mode><vpred>): Extend to...
(vec_cmpu<SVE_I:mode><vpred>): ...this.
(@aarch64_pred_cmp<cmp_op><SVE_FULL_I:mode>): Extend to...
(@aarch64_pred_cmp<cmp_op><SVE_I:mode>): ...this.
(*cmp<cmp_op><SVE_FULL_I:mode>_cc): Extend to...
(*cmp<cmp_op><SVE_I:mode>_cc): ...this.
(*cmp<cmp_op><SVE_FULL_I:mode>_ptest): Extend to...
(*cmp<cmp_op><SVE_I:mode>_ptest): ...this.
(*cmp<cmp_op><SVE_FULL_I:mode>_and): Extend to...
(*cmp<cmp_op><SVE_I:mode>_and): ...this.

gcc/testsuite/
* gcc.target/aarch64/sve/cmp_1.c: New test.
* gcc.target/aarch64/sve/cmp_2.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_1.c: Add --param
aarch64-sve-compare-costs=0
* gcc.target/aarch64/sve/cond_arith_1_run.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_3.c: Likewise.
* gcc.target/aarch64/sve/cond_arith_3_run.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise.
* gcc.target/aarch64/sve/mask_load_slp_1.c: Likewise.
* gcc.target/aarch64/sve/vcond_11.c: Likewise.
* gcc.target/aarch64/sve/vcond_11_run.c: Likewise.

gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/gcc.target/aarch64/sve/cmp_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cmp_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1.c
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_1_run.c
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3.c
gcc/testsuite/gcc.target/aarch64/sve/cond_arith_3_run.c
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c
gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
gcc/testsuite/gcc.target/aarch64/sve/vcond_11.c
gcc/testsuite/gcc.target/aarch64/sve/vcond_11_run.c

index 4b0a1ebe9e1dd8bcbf683c5c136d9458b61dd943..455b025521f21f1cc1b0731ca612acfdb087de88 100644 (file)
 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
 ;; SEL operand order:        mask, true, false
 (define_expand "@vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-       (unspec:SVE_FULL
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+       (unspec:SVE_ALL
          [(match_operand:<VPRED> 3 "register_operand")
-          (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
-          (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
+          (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE"
   {
 ;; - two registers
 ;; - a duplicated immediate and a register
 ;; - a duplicated immediate and zero
+;;
+;; For unpacked vectors, it doesn't really matter whether SEL uses the
+;; the container size or the element size.  If SEL used the container size,
+;; it would ignore undefined bits of the predicate but would copy the
+;; upper (undefined) bits of each container along with the defined bits.
+;; If SEL used the element size, it would use undefined bits of the predicate
+;; to select between undefined elements in each input vector.  Thus the only
+;; difference is whether the undefined bits in a container always come from
+;; the same input as the defined bits, or whether the choice can vary
+;; independently of the defined bits.
+;;
+;; For the other instructions, using the element size is more natural,
+;; so we do that for SEL as well.
 (define_insn "*vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
-       (unspec:SVE_FULL
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+       (unspec:SVE_ALL
          [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
-          (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
-          (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
+          (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+          (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && (!register_operand (operands[1], <MODE>mode)
 ;; of GPRs as being more expensive than duplicates of FPRs, since they
 ;; involve a cross-file move.
 (define_insn "@aarch64_sel_dup<mode>"
-  [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
-       (unspec:SVE_FULL
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
+       (unspec:SVE_ALL
          [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
-          (vec_duplicate:SVE_FULL
+          (vec_duplicate:SVE_ALL
             (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
-          (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
+          (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
          UNSPEC_SEL))]
   "TARGET_SVE"
   "@
 
 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<mode><v_int_equiv>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-       (if_then_else:SVE_FULL
+(define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+       (if_then_else:SVE_ALL
          (match_operator 3 "comparison_operator"
-           [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-            (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-         (match_operand:SVE_FULL 1 "nonmemory_operand")
-         (match_operand:SVE_FULL 2 "nonmemory_operand")))]
-  "TARGET_SVE"
+           [(match_operand:SVE_I 4 "register_operand")
+            (match_operand:SVE_I 5 "nonmemory_operand")])
+         (match_operand:SVE_ALL 1 "nonmemory_operand")
+         (match_operand:SVE_ALL 2 "nonmemory_operand")))]
+  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
     DONE;
   }
 )
 
 ;; Integer vcondu.  Don't enforce an immediate range here, since it
 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<mode><v_int_equiv>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-       (if_then_else:SVE_FULL
+(define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+       (if_then_else:SVE_ALL
          (match_operator 3 "comparison_operator"
-           [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-            (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-         (match_operand:SVE_FULL 1 "nonmemory_operand")
-         (match_operand:SVE_FULL 2 "nonmemory_operand")))]
-  "TARGET_SVE"
+           [(match_operand:SVE_I 4 "register_operand")
+            (match_operand:SVE_I 5 "nonmemory_operand")])
+         (match_operand:SVE_ALL 1 "nonmemory_operand")
+         (match_operand:SVE_ALL 2 "nonmemory_operand")))]
+  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
   {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
     DONE;
   }
 )
   [(parallel
     [(set (match_operand:<VPRED> 0 "register_operand")
          (match_operator:<VPRED> 1 "comparison_operator"
-           [(match_operand:SVE_FULL_I 2 "register_operand")
-            (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
+           [(match_operand:SVE_I 2 "register_operand")
+            (match_operand:SVE_I 3 "nonmemory_operand")]))
      (clobber (reg:CC_NZC CC_REGNUM))])]
   "TARGET_SVE"
   {
   [(parallel
     [(set (match_operand:<VPRED> 0 "register_operand")
          (match_operator:<VPRED> 1 "comparison_operator"
-           [(match_operand:SVE_FULL_I 2 "register_operand")
-            (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
+           [(match_operand:SVE_I 2 "register_operand")
+            (match_operand:SVE_I 3 "nonmemory_operand")]))
      (clobber (reg:CC_NZC CC_REGNUM))])]
   "TARGET_SVE"
   {
 )
 
 ;; Predicated integer comparisons.
+;;
+;; For unpacked vectors, only the lowpart element in each input container
+;; has a defined value, and only the predicate bits associated with
+;; those elements are defined.  For example, when comparing two VNx2SIs:
+;;
+;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
+;;   DI container store an SI element.  The upper bits of each DI container
+;;   are undefined.
+;;
+;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
+;;   even elements are defined and the odd elements are undefined.
+;;
+;; - The associated predicate mode is VNx2BI.  This means that only the
+;;   low bit in each predicate byte is defined (on input and on output).
+;;
+;; - We use a .s comparison to compare VNx2SIs, under the control of a
+;;   VNx2BI governing predicate, to produce a VNx2BI result.  If we view
+;;   the .s operation as operating on VNx4SIs then for odd lanes:
+;;
+;;   - the input governing predicate bit is undefined
+;;   - the SI elements being compared are undefined
+;;   - the predicate result bit is therefore undefined, but
+;;   - the predicate result bit is in the undefined part of a VNx2BI,
+;;     so its value doesn't matter anyway.
 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
        (unspec:<VPRED>
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 2 "aarch64_sve_ptrue_flag")
           (SVE_INT_CMP:<VPRED>
-            (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
-            (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+            (match_operand:SVE_I 3 "register_operand" "w, w")
+            (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
          UNSPEC_PRED_Z))
    (clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
             [(match_operand 6)
              (match_operand:SI 7 "aarch64_sve_ptrue_flag")
              (SVE_INT_CMP:<VPRED>
-               (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-               (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+               (match_operand:SVE_I 2 "register_operand" "w, w")
+               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
             UNSPEC_PRED_Z)]
          UNSPEC_PTEST))
    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
             [(match_operand 6)
              (match_operand:SI 7 "aarch64_sve_ptrue_flag")
              (SVE_INT_CMP:<VPRED>
-               (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-               (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+               (match_operand:SVE_I 2 "register_operand" "w, w")
+               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
             UNSPEC_PRED_Z)]
          UNSPEC_PTEST))
    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
            [(match_operand 4)
             (const_int SVE_KNOWN_PTRUE)
             (SVE_INT_CMP:<VPRED>
-              (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-              (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+              (match_operand:SVE_I 2 "register_operand" "w, w")
+              (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
            UNSPEC_PRED_Z)
          (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
    (clobber (reg:CC_NZC CC_REGNUM))]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cmp_1.c
new file mode 100644 (file)
index 0000000..7cf66c5
--- /dev/null
@@ -0,0 +1,57 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_PAIR(TYPE1, TYPE2)                                \
+  void                                                 \
+  f_##TYPE1##_##TYPE2 (TYPE1 *restrict x,              \
+                      TYPE2 *restrict g, int n)        \
+  {                                                    \
+    for (int i = 0; i < n; ++i)                                \
+      if (g[i] < 4)                                    \
+       x[i] += 1;                                      \
+  }
+
+#define TEST_SINGLE(TYPE)                      \
+  TEST_PAIR (TYPE, int8_t)                     \
+  TEST_PAIR (TYPE, uint8_t)                    \
+  TEST_PAIR (TYPE, int16_t)                    \
+  TEST_PAIR (TYPE, uint16_t)                   \
+  TEST_PAIR (TYPE, int32_t)                    \
+  TEST_PAIR (TYPE, uint32_t)                   \
+  TEST_PAIR (TYPE, int64_t)                    \
+  TEST_PAIR (TYPE, uint64_t)
+
+TEST_SINGLE (int8_t)
+TEST_SINGLE (uint8_t)
+TEST_SINGLE (int16_t)
+TEST_SINGLE (uint16_t)
+TEST_SINGLE (int32_t)
+TEST_SINGLE (uint32_t)
+TEST_SINGLE (int64_t)
+TEST_SINGLE (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 16 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 24 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 32 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.b,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.b,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.s,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[et]\tp[0-9]+\.d,} 8 } } */
+/* { dg-final { scan-assembler-times {\tcmpl[so]\tp[0-9]+\.d,} 8 } } */
+
+/* { dg-final { scan-assembler-not {\tpunpk} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cmp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cmp_2.c
new file mode 100644 (file)
index 0000000..b221206
--- /dev/null
@@ -0,0 +1,72 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_PAIR(TYPE1, TYPE2)                                        \
+  void                                                         \
+  f_##TYPE1##_##TYPE2 (TYPE1 *restrict x, TYPE1 y, TYPE1 z,    \
+                      TYPE2 *restrict g, TYPE2 h, int n)       \
+  {                                                            \
+    for (int i = 0; i < n; ++i)                                        \
+      x[i] = g[i] < h ? y : z;                                 \
+  }
+
+#define TEST_SINGLE(TYPE)                      \
+  TEST_PAIR (TYPE, int8_t)                     \
+  TEST_PAIR (TYPE, uint8_t)                    \
+  TEST_PAIR (TYPE, int16_t)                    \
+  TEST_PAIR (TYPE, uint16_t)                   \
+  TEST_PAIR (TYPE, int32_t)                    \
+  TEST_PAIR (TYPE, uint32_t)                   \
+  TEST_PAIR (TYPE, int64_t)                    \
+  TEST_PAIR (TYPE, uint64_t)
+
+TEST_SINGLE (int8_t)
+TEST_SINGLE (uint8_t)
+TEST_SINGLE (int16_t)
+TEST_SINGLE (uint16_t)
+TEST_SINGLE (int32_t)
+TEST_SINGLE (uint32_t)
+TEST_SINGLE (float)
+TEST_SINGLE (int64_t)
+TEST_SINGLE (uint64_t)
+TEST_SINGLE (double)
+
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b,} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 14 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 20 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.d,} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 8 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d,} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 18 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 24 } } */
+
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.b,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.b,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.h,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.h,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.s,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.s,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp(?:h[is]|l[os])\tp[0-9]+\.d,} 10 } } */
+/* { dg-final { scan-assembler-times {\tcmp[lg][et]\tp[0-9]+\.d,} 10 } } */
+
+/* { dg-final { scan-assembler-not {\tpunpk} } } */
index 52138d2b023bee8160737c1946a9025456fd3374..d831e9c114251df41364e75bfb58a8d135958700 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 876f98f6ec20ca5087f0164d67b893333f253106..5808e0a966392a92fb6cfb9de4e1846e3dc4c58d 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include "cond_arith_1.c"
 
index 94eb255c969b535f61d19a0abe9c77d2cca1b652..068e0b64793116c4b980dbdaebe0d67d07c19533 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 31457da523b459eb7f28e46c9ede183c3fa7e697..d2580046dab5b1b8a2351fddd31f3534df8cdae9 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include "cond_arith_3.c"
 
index cd2661ef3a5da863661f90a0eac62bea876d789c..687716e7176a3f2a2f82a6368d3df3d970dbf614 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 78c70b2be3202819cc2bfbd3bedbe0e3a3702d30..a38b92dc53e7ce76b38d89a6d02bb4130ceb4a93 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 3c9e340475a3f16035934f3a23f1d9837b1aa971..4efcf3ab32aeddaeb6e860f72e7f4408af472414 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-sve-compare-costs=0" } */
 
 #include <stdint.h>
 
index 9a4edb81448296eaa9186ad8103d1d06a3564fe5..4cbe4a6f867074d2889f80f6eb55f538d48988bc 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --param aarch64-sve-compare-costs=0" } */
 
 #include "vcond_11.c"