tree-optimization/98137 - enhance split_constant_offset range handling
authorRichard Biener <rguenther@suse.de>
Fri, 4 Dec 2020 10:13:48 +0000 (11:13 +0100)
committerRichard Biener <rguenther@suse.de>
Mon, 7 Dec 2020 07:15:15 +0000 (08:15 +0100)
split_constant_offset currently gives up looking at ranges when
dealing with possibly wrapping operations for looking through
conversions when the downstream analysis does not yield a SSA name.
That's overly conservative and we have a nice helper that can
deal with arbitrary expresssions.  Use that.  This helps data
reference group analysis so the testcase is fully SLP vectorized,
making use of the whole-function "BB" vectorization capabilities
we now have.

2020-12-04  Richard Biener  <rguenther@suse.de>

PR tree-optimization/98137
* tree-data-ref.c (split_constant_offset_1): Use
determine_value_range instead of get_range_info to handle
arbitrary expressions.

* gcc.dg/vect/bb-slp-pr98137.c: New testcase.

gcc/testsuite/gcc.dg/vect/bb-slp-pr98137.c [new file with mode: 0644]
gcc/tree-data-ref.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr98137.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr98137.c
new file mode 100644 (file)
index 0000000..ecf7df2
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target vect_double } */
+
+void
+gemm (const double* __restrict__ A, const double* __restrict__ B,
+      double* __restrict__ C)
+{
+  unsigned int l_m = 0;
+  unsigned int l_n = 0;
+  unsigned int l_k = 0;
+
+  for ( l_n = 0; l_n < 9; l_n++ ) {
+    /* Use -O3 so this loop is unrolled completely early.  */
+    for ( l_m = 0; l_m < 10; l_m++ ) { C[(l_n*10)+l_m] = 0.0; }
+    for ( l_k = 0; l_k < 17; l_k++ ) {
+      /* Use -O3 so this loop is unrolled completely early.  */
+      for ( l_m = 0; l_m < 10; l_m++ ) {
+        C[(l_n*10)+l_m] += A[(l_k*20)+l_m] * B[(l_n*20)+l_k];
+      }
+    }
+  }
+}
+
+/* Exact scanning is difficult but we expect all loads and stores
+   and computations to be vectorized.  */
+/* { dg-final { scan-tree-dump "optimized: basic block" "slp1" } } */
index 3bf460cccfd9d59ab8b563bfd632bc2f61c8e328..e8308ce8250625ee467aac6aa864899b1adc196b 100644 (file)
@@ -763,18 +763,22 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
                tree tmp_var, tmp_off;
                split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
 
-               /* See whether we have an SSA_NAME whose range is known
-                  to be [A, B].  */
-               if (TREE_CODE (tmp_var) != SSA_NAME)
-                 return false;
+               /* See whether we have an known range [A, B] for tmp_var.  */
                wide_int var_min, var_max;
-               value_range_kind vr_type = get_range_info (tmp_var, &var_min,
-                                                          &var_max);
-               wide_int var_nonzero = get_nonzero_bits (tmp_var);
                signop sgn = TYPE_SIGN (itype);
-               if (intersect_range_with_nonzero_bits (vr_type, &var_min,
-                                                      &var_max, var_nonzero,
-                                                      sgn) != VR_RANGE)
+               if (TREE_CODE (tmp_var) == SSA_NAME)
+                 {
+                   value_range_kind vr_type
+                     = get_range_info (tmp_var, &var_min, &var_max);
+                   wide_int var_nonzero = get_nonzero_bits (tmp_var);
+                   if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+                                                          &var_max,
+                                                          var_nonzero,
+                                                          sgn) != VR_RANGE)
+                     return false;
+                 }
+               else if (determine_value_range (tmp_var, &var_min, &var_max)
+                        != VR_RANGE)
                  return false;
 
                /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)