vect, aarch64: Fix alignment units for IFN_MASK* [PR95401]
authorRichard Sandiford <richard.sandiford@arm.com>
Mon, 4 Jan 2021 14:44:21 +0000 (14:44 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Mon, 4 Jan 2021 14:44:21 +0000 (14:44 +0000)
The IFN_MASK* functions take two leading arguments: a load or
store pointer and a “cookie”.  The type of the cookie is the
type of the access for TBAA purposes (like for MEM_REFs)
while the value of the cookie is the alignment of the access.
This PR was caused by a disagreement about whether the alignment
is measured in bits or bytes.

It looks like this goes back to PR68786, which made the
vectoriser create its own cookie argument rather than reusing
the one created by ifcvt.  The alignment value of the new cookie
was measured in bytes (as needed by set_ptr_info_alignment)
while the existing code expected it to be measured in bits.
The folds I added for IFN_MASK_LOAD and STORE then made
things worse.

gcc/
PR tree-optimization/95401
* config/aarch64/aarch64-sve-builtins.cc
(gimple_folder::load_store_cookie): Use bits rather than bytes
for the alignment argument to IFN_MASK_LOAD and IFN_MASK_STORE.
* gimple-fold.c (gimple_fold_mask_load_store_mem_ref): Likewise.
* tree-vect-stmts.c (vectorizable_store): Likewise.
(vectorizable_load): Likewise.

gcc/testsuite/
PR tree-optimization/95401
* g++.dg/vect/pr95401.cc: New test.
* g++.dg/vect/pr95401a.cc: Likewise.

gcc/config/aarch64/aarch64-sve-builtins.cc
gcc/gimple-fold.c
gcc/testsuite/g++.dg/vect/pr95401.cc [new file with mode: 0644]
gcc/testsuite/g++.dg/vect/pr95401a.cc [new file with mode: 0644]
gcc/tree-vect-stmts.c

index 681b79c990da6143b2291c6c0fcc042192c5ad70..6270b51fbf436c7e2a95b6ad617cb8b29845c0dd 100644 (file)
@@ -2580,7 +2580,7 @@ gimple_folder::fold_contiguous_base (gimple_seq &stmts, tree vectype)
 tree
 gimple_folder::load_store_cookie (tree type)
 {
-  return build_int_cst (build_pointer_type (type), TYPE_ALIGN_UNIT (type));
+  return build_int_cst (build_pointer_type (type), TYPE_ALIGN (type));
 }
 
 /* Fold the call to a call to INSTANCE, with the same arguments.  */
index ec8da781dd39274e96b610f1f8f0283308ec0ecb..fe46d987faab657b7d5c2c55a4c637b5bb996070 100644 (file)
@@ -5201,7 +5201,7 @@ gimple_fold_mask_load_store_mem_ref (gcall *call, tree vectype)
   if (!tree_fits_uhwi_p (alias_align) || !integer_all_onesp (mask))
     return NULL_TREE;
 
-  unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align) * BITS_PER_UNIT;
+  unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align);
   if (TYPE_ALIGN (vectype) != align)
     vectype = build_aligned_type (vectype, align);
   tree offset = build_zero_cst (TREE_TYPE (alias_align));
diff --git a/gcc/testsuite/g++.dg/vect/pr95401.cc b/gcc/testsuite/g++.dg/vect/pr95401.cc
new file mode 100644 (file)
index 0000000..6a56dab
--- /dev/null
@@ -0,0 +1,13 @@
+// { dg-additional-options "-mavx2 -O3" { target avx2_runtime } }
+// { dg-additional-sources pr95401a.cc }
+
+extern int var_9;
+extern unsigned var_14;
+extern int arr_16[];
+#include <algorithm>
+void test() {
+  for (short a = 0; a < (short)var_9; a += 12140)
+    for (short b = 0; b < 8; b++)
+      if (std::max(var_14, 1U))
+        arr_16[a + b] = 0;
+}
diff --git a/gcc/testsuite/g++.dg/vect/pr95401a.cc b/gcc/testsuite/g++.dg/vect/pr95401a.cc
new file mode 100644 (file)
index 0000000..71b054c
--- /dev/null
@@ -0,0 +1,13 @@
+// { dg-do compile }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int var_9 = 1693986256, var_14;
+int arr_16[11];
+void test();
+int main()
+{
+  check_vect();
+  test();
+  return 0;
+}
index 54fb68b216f54e697d16ff60ed4650ab4f4d874e..014f1aff4c1bbda55c431635f72e0382ff77401e 100644 (file)
@@ -7964,7 +7964,7 @@ vectorizable_store (vec_info *vinfo,
              /* Emit:
                   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
                                     VEC_ARRAY).  */
-             unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
+             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
              tree alias_ptr = build_int_cst (ref_type, align);
              call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
                                                 dataref_ptr, alias_ptr,
@@ -8079,7 +8079,7 @@ vectorizable_store (vec_info *vinfo,
              if (final_mask)
                {
                  align = least_bit_hwi (misalign | align);
-                 tree ptr = build_int_cst (ref_type, align);
+                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
                  gcall *call
                    = gimple_build_call_internal (IFN_MASK_STORE, 4,
                                                  dataref_ptr, ptr,
@@ -8094,7 +8094,7 @@ vectorizable_store (vec_info *vinfo,
                    = vect_get_loop_len (loop_vinfo, loop_lens,
                                         vec_num * ncopies, vec_num * j + i);
                  align = least_bit_hwi (misalign | align);
-                 tree ptr = build_int_cst (ref_type, align);
+                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
                  machine_mode vmode = TYPE_MODE (vectype);
                  opt_machine_mode new_ovmode
                    = get_len_load_store_mode (vmode, false);
@@ -9246,7 +9246,7 @@ vectorizable_load (vec_info *vinfo,
              /* Emit:
                   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
                                                VEC_MASK).  */
-             unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
+             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
              tree alias_ptr = build_int_cst (ref_type, align);
              call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
                                                 dataref_ptr, alias_ptr,
@@ -9347,7 +9347,8 @@ vectorizable_load (vec_info *vinfo,
                    if (final_mask)
                      {
                        align = least_bit_hwi (misalign | align);
-                       tree ptr = build_int_cst (ref_type, align);
+                       tree ptr = build_int_cst (ref_type,
+                                                 align * BITS_PER_UNIT);
                        gcall *call
                          = gimple_build_call_internal (IFN_MASK_LOAD, 3,
                                                        dataref_ptr, ptr,
@@ -9363,7 +9364,8 @@ vectorizable_load (vec_info *vinfo,
                                               vec_num * ncopies,
                                               vec_num * j + i);
                        align = least_bit_hwi (misalign | align);
-                       tree ptr = build_int_cst (ref_type, align);
+                       tree ptr = build_int_cst (ref_type,
+                                                 align * BITS_PER_UNIT);
                        gcall *call
                          = gimple_build_call_internal (IFN_LEN_LOAD, 3,
                                                        dataref_ptr, ptr,