arm: Add new vector mode macros
authorRichard Sandiford <richard.sandiford@arm.com>
Tue, 29 Sep 2020 10:41:26 +0000 (11:41 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Tue, 29 Sep 2020 10:41:26 +0000 (11:41 +0100)
The AArch32 port now has three vector extensions: iwMMXt, Neon
and MVE.  We already have some named expanders that are shared
by all three, and soon we'll need more.

One way of handling this would be to use define_mode_iterators
that specify the condition for each mode.  For example,

  (V16QI "TARGET_NEON || TARGET_HAVE_MVE")
  (V8QI "TARGET_NEON || TARGET_REALLY_IWMXXT")
  ...
  (V2SF "TARGET_NEON && flag_unsafe_math_optimizations")

etc.  However, we'll need several mode iterators, and it would
be repetitive to specify the mode condition every time.

This patch therefore introduces per-mode macros that say whether
we can perform general arithmetic on the mode.  Initially there are
two sets of macros:

ARM_HAVE_NEON_<MODE>_ARITH
  true if Neon can handle general arithmetic on <MODE>

ARM_HAVE_<MODE>_ARITH
  true if any vector extension can handle general arithmetic on <MODE>

The macro definitions themselves are undeniably ugly, but hopefully
they're justified by the simplifications they allow.

The patch converts the addition patterns to use this scheme.

Previously there were three copies of the V8HF and V4HF addition
patterns for Neon:

(1) *add<VDQ:mode>3_neon, which provided plus:VnHF even without
    TARGET_NEON_FP16INST.  This was probably harmless since all the
    named patterns had an appropriate guard, but it is possible that
    something could have tried to generate the plus directly, such as
    by using a REG_EQUAL note to generate a new pattern.

(2) addv8hf3_neon and addv4hf3, which had the correct
    TARGET_NEON_FP16INST target condition, but unnecessarily required
    flag_unsafe_math_optimizations.  Unlike VnSF operations, VnHF
    operations do not force flush to zero.

(3) add<VH:mode>3_fp16, which provided plus:VnHF with the
    correct conditions (TARGET_NEON_FP16INST, with no
    flag_unsafe_math_optimizations test).

The patch in essence renames add<VH:mode>3_fp16 to *add<VH:mode>3_neon
(part of *add<VDQ:mode>3_neon) and removes the other two patterns.

gcc/
* config/arm/arm.h (ARM_HAVE_NEON_V8QI_ARITH, ARM_HAVE_NEON_V4HI_ARITH)
(ARM_HAVE_NEON_V2SI_ARITH, ARM_HAVE_NEON_V16QI_ARITH): New macros.
(ARM_HAVE_NEON_V8HI_ARITH, ARM_HAVE_NEON_V4SI_ARITH): Likewise.
(ARM_HAVE_NEON_V2DI_ARITH, ARM_HAVE_NEON_V4HF_ARITH): Likewise.
(ARM_HAVE_NEON_V8HF_ARITH, ARM_HAVE_NEON_V2SF_ARITH): Likewise.
(ARM_HAVE_NEON_V4SF_ARITH, ARM_HAVE_V8QI_ARITH, ARM_HAVE_V4HI_ARITH)
(ARM_HAVE_V2SI_ARITH, ARM_HAVE_V16QI_ARITH, ARM_HAVE_V8HI_ARITH)
(ARM_HAVE_V4SI_ARITH, ARM_HAVE_V2DI_ARITH, ARM_HAVE_V4HF_ARITH)
(ARM_HAVE_V2SF_ARITH, ARM_HAVE_V8HF_ARITH, ARM_HAVE_V4SF_ARITH):
Likewise.
* config/arm/iterators.md (VNIM, VNINOTM): Delete.
* config/arm/vec-common.md (add<VNIM:mode>3, addv8hf3)
(add<VNINOTM:mode>3): Replace with...
(add<VDQ:mode>3): ...this new expander.
* config/arm/neon.md (*add<VDQ:mode>3_neon): Use the new
ARM_HAVE_NEON_<MODE>_ARITH macros as the C condition.
(addv8hf3_neon, addv4hf3, add<VFH:mode>3_fp16): Delete in
favor of the above.
(neon_vadd<VH:mode>): Use gen_add<mode>3 instead of
gen_add<mode>3_fp16.

gcc/testsuite/
* gcc.target/arm/armv8_2-fp16-arith-2.c: Expect FP16 vectorization
even without -ffast-math.

gcc/config/arm/arm.h
gcc/config/arm/iterators.md
gcc/config/arm/neon.md
gcc/config/arm/vec-common.md
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c

index f4d3676c5bce8faf5349fc4d6ac5827aa2723327..4a63d33c70dea64406f3a3ae9e6374e76551ecb4 100644 (file)
@@ -1110,6 +1110,47 @@ extern const int arm_arch_cde_coproc_bits[];
 #define VALID_MVE_STRUCT_MODE(MODE) \
   ((MODE) == TImode || (MODE) == OImode || (MODE) == XImode)
 
+/* The conditions under which vector modes are supported for general
+   arithmetic using Neon.  */
+
+#define ARM_HAVE_NEON_V8QI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V4HI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V2SI_ARITH TARGET_NEON
+
+#define ARM_HAVE_NEON_V16QI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V8HI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V4SI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V2DI_ARITH TARGET_NEON
+
+/* HF operations have their own flush-to-zero control (FPSCR.FZ16).  */
+#define ARM_HAVE_NEON_V4HF_ARITH TARGET_NEON_FP16INST
+#define ARM_HAVE_NEON_V8HF_ARITH TARGET_NEON_FP16INST
+
+/* SF operations always flush to zero, regardless of FPSCR.FZ, so we can
+   only use them for general arithmetic when -funsafe-math-optimizations
+   is in effect.  */
+#define ARM_HAVE_NEON_V2SF_ARITH \
+  (TARGET_NEON && flag_unsafe_math_optimizations)
+#define ARM_HAVE_NEON_V4SF_ARITH ARM_HAVE_NEON_V2SF_ARITH
+
+/* The conditions under which vector modes are supported for general
+   arithmetic by any vector extension.  */
+
+#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT)
+
+#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V4SI_ARITH (ARM_HAVE_NEON_V4SI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V2DI_ARITH ARM_HAVE_NEON_V2DI_ARITH
+
+#define ARM_HAVE_V4HF_ARITH ARM_HAVE_NEON_V4HF_ARITH
+#define ARM_HAVE_V2SF_ARITH ARM_HAVE_NEON_V2SF_ARITH
+
+#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT)
+#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT)
+
 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 extern int arm_regs_in_sequence[];
 
index 0bc9eba0722689aff4c1a143e952f6eb91c0cd86..c70e3bc273126676d9eaa649acd7194cecdd65a1 100644 (file)
 ;; Integer and float modes supported by Neon and IWMMXT.
 (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
 
-;; Integer and float modes supported by Neon, IWMMXT and MVE, used by
-;; arithmetic epxand patterns.
-(define_mode_iterator VNIM [V16QI V8HI V4SI V4SF])
-
-;; Integer and float modes supported by Neon and IWMMXT but not MVE, used by
-;; arithmetic epxand patterns.
-(define_mode_iterator VNINOTM [V2SI V4HI V8QI V2SF V2DI])
-
 ;; Integer and float modes supported by Neon, IWMMXT and MVE.
 (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI])
 
index 3e7b51d8ab60007901392df0ca1cb09fead4d0e9..96bf277f5016c1c8b3c42ca396157c8fd07d7b20 100644 (file)
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                  (match_operand:VDQ 2 "s_register_operand" "w")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
                     (const_string "neon_add<q>")))]
 )
 
-;; As with SFmode, full support for HFmode vector arithmetic is only available
-;; when flag-unsafe-math-optimizations is enabled.
-
-;; Add pattern with modes V8HF and V4HF is split into separate patterns to add
-;; support for standard pattern addv8hf3 in MVE.  Following pattern is called
-;; from "addv8hf3" standard pattern inside vec-common.md file.
-
-(define_insn "addv8hf3_neon"
-  [(set
-    (match_operand:V8HF 0 "s_register_operand" "=w")
-    (plus:V8HF
-     (match_operand:V8HF 1 "s_register_operand" "w")
-     (match_operand:V8HF 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_addsub_s_q")]
-)
-
-(define_insn "addv4hf3"
-  [(set
-    (match_operand:V4HF 0 "s_register_operand" "=w")
-    (plus:V4HF
-     (match_operand:V4HF 1 "s_register_operand" "w")
-     (match_operand:V4HF 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_addsub_s_q")]
-)
-
-(define_insn "add<mode>3_fp16"
-  [(set
-    (match_operand:VH 0 "s_register_operand" "=w")
-    (plus:VH
-     (match_operand:VH 1 "s_register_operand" "w")
-     (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST"
- "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "type")
-   (if_then_else (match_test "<Is_float_mode>")
-    (const_string "neon_fp_addsub_s<q>")
-    (const_string "neon_add<q>")))]
-)
-
 (define_insn "*sub<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
    (match_operand:VH 2 "s_register_operand")]
   "TARGET_NEON_FP16INST"
 {
-  emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
+  emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
index b7e3619caf461063876654c54393d305147f7bf7..c3c86c46355e6ace6c90e189b4160dbe4cd9caf3 100644 (file)
 ;; patterns separately for Neon, IWMMXT and MVE.
 
 (define_expand "add<mode>3"
-  [(set (match_operand:VNIM 0 "s_register_operand")
-       (plus:VNIM (match_operand:VNIM 1 "s_register_operand")
-                  (match_operand:VNIM 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-                   || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE(<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(<MODE>mode))"
-{
-})
-
-;; Vector arithmetic.  Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon and MVE.
-
-(define_expand "addv8hf3"
-  [(set (match_operand:V8HF 0 "s_register_operand")
-       (plus:V8HF (match_operand:V8HF 1 "s_register_operand")
-                  (match_operand:V8HF 2 "s_register_operand")))]
-  "(TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(V8HFmode))
-   || (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)"
-{
-  if (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)
-    emit_insn (gen_addv8hf3_neon (operands[0], operands[1], operands[2]));
-})
-
-;; Vector arithmetic.  Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon and IWMMXT.
-
-(define_expand "add<mode>3"
-  [(set (match_operand:VNINOTM 0 "s_register_operand")
-       (plus:VNINOTM (match_operand:VNINOTM 1 "s_register_operand")
-                     (match_operand:VNINOTM 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-                   || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+  [(set (match_operand:VDQ 0 "s_register_operand")
+       (plus:VDQ (match_operand:VDQ 1 "s_register_operand")
+                 (match_operand:VDQ 2 "s_register_operand")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
 
 ;; Vector arithmetic. Expanders are blank, then unnamed insns implement
 ;; patterns separately for IWMMXT and Neon.
index 24d0528540d58a43b896dd88c64c23b823553e96..81bad225a1f72a4ff722b5390dd9c970ea28b288 100644 (file)
@@ -89,17 +89,23 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
 /* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } }  */
 /* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } }  */
 /* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } }  */
+
+/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } }  */
+
+/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } }  */
+
+/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }  */
+/* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } }  */
 
-/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } }  */
-/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } }  */
-/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } }  */
 /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } }  */
 /* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } }  */
-
 /* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } }  */
-/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, #0} 2 } }  */
-
-/* { dg-final { scan-assembler-not {vabs\.f16} } }  */
 
 /* { dg-final { scan-assembler-not {vadd\.f32} } }  */
 /* { dg-final { scan-assembler-not {vsub\.f32} } }  */