tree-optimization/98235 - limit SLP discovery
authorRichard Biener <rguenther@suse.de>
Fri, 11 Dec 2020 09:52:58 +0000 (10:52 +0100)
committerRichard Biener <rguenther@suse.de>
Fri, 11 Dec 2020 10:55:29 +0000 (11:55 +0100)
With following backedges and the SLP discovery cache not being
permute aware we have to put some discovery limits in place again.
That's also the opportunity to ditch the separate limit on the
number of permutes we try, so the patch limits the overall work
done (as in vect_build_slp_tree cache misses) to what we compute
as max_tree_size which is based on the number of scalar stmts in
the vectorized region.

Note the limit is global and there's no attempt to divide the
allowed work evenly amongst opportunities, so one degenerate
can eat it all up.  That's probably only relevant for BB
vectorization where the limit is based on up to the size of the
whole function.

2020-12-11  Richard Biener  <rguenther@suse.de>

PR tree-optimization/98235
* tree-vect-slp.c (vect_build_slp_tree): Exchange npermutes
for limit.  Decrement that for each cache miss and fail
discovery when it reaches zero.
(vect_build_slp_tree_2): Remove npermutes handling and
simply pass down limit.
(vect_build_slp_instance): Use pass down limit.
(vect_analyze_slp_instance): Likewise.
(vect_analyze_slp): Base the SLP discovery limit on
max_tree_size and pass it down.

* gcc.dg/torture/pr98235.c: New testcase.

gcc/testsuite/gcc.dg/torture/pr98235.c [new file with mode: 0644]
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr98235.c b/gcc/testsuite/gcc.dg/torture/pr98235.c
new file mode 100644 (file)
index 0000000..5f59013
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fallow-store-data-races" } */
+
+char tcube[3][9];
+int cur_move;
+void perm_cube(void) {
+  int i, j, k, tmp;
+  for (; i < cur_move; i++)
+    while (k-- >= 0)
+      switch (j) {
+      case 0:
+        tmp = tcube[0][6];
+        tcube[2][8] = tcube[0][8];
+        tcube[0][8] = tmp;
+        tmp = tcube[0][5];
+        tcube[0][5] = tcube[1][8];
+        tcube[1][8] = tcube[2][5];
+        tcube[2][5] = tcube[1][2];
+        tcube[1][2] = tcube[2][1];
+        tcube[2][1] = tcube[1][0];
+        tcube[0][6] = tmp;
+        tmp = tcube[0][3];
+        tcube[0][3] = tcube[1][0];
+        tcube[1][0] = tcube[2][3];
+        tcube[2][3] = tcube[1][6];
+        tcube[1][6] = tmp;
+        break;
+      case 5:
+        tmp = tcube[2][0];
+        tcube[2][0] = tcube[2][2];
+        tcube[2][2] = tcube[2][8];
+        tcube[2][3] = tmp;
+      }
+}
index e93e9c7a2d3d9810147299831d5634feaab8ed7e..2d55885a553bc729f2e180d88e913b9cafbff5de 100644 (file)
@@ -1375,14 +1375,14 @@ static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                       vec<stmt_vec_info> stmts, unsigned int group_size,
                       poly_uint64 *max_nunits,
-                      bool *matches, unsigned *npermutes, unsigned *tree_size,
+                      bool *matches, unsigned *limit, unsigned *tree_size,
                       scalar_stmts_to_slp_tree_map_t *bst_map);
 
 static slp_tree
 vect_build_slp_tree (vec_info *vinfo,
                     vec<stmt_vec_info> stmts, unsigned int group_size,
                     poly_uint64 *max_nunits,
-                    bool *matches, unsigned *npermutes, unsigned *tree_size,
+                    bool *matches, unsigned *limit, unsigned *tree_size,
                     scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   if (slp_tree *leader = bst_map->get (stmts))
@@ -1405,10 +1405,26 @@ vect_build_slp_tree (vec_info *vinfo,
   SLP_TREE_SCALAR_STMTS (res) = stmts;
   bst_map->put (stmts.copy (), res);
 
+  if (*limit == 0)
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "SLP discovery limit exceeded\n");
+      bool existed_p = bst_map->put (stmts, NULL);
+      gcc_assert (existed_p);
+      /* Mark the node invalid so we can detect those when still in use
+        as backedge destinations.  */
+      SLP_TREE_SCALAR_STMTS (res) = vNULL;
+      SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
+      vect_free_slp_tree (res);
+      return NULL;
+    }
+  --*limit;
+
   poly_uint64 this_max_nunits = 1;
   slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size,
                                        &this_max_nunits,
-                                       matches, npermutes, tree_size, bst_map);
+                                       matches, limit, tree_size, bst_map);
   if (!res_)
     {
       bool existed_p = bst_map->put (stmts, NULL);
@@ -1441,7 +1457,7 @@ static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                       vec<stmt_vec_info> stmts, unsigned int group_size,
                       poly_uint64 *max_nunits,
-                      bool *matches, unsigned *npermutes, unsigned *tree_size,
+                      bool *matches, unsigned *limit, unsigned *tree_size,
                       scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   unsigned nops, i, this_tree_size = 0;
@@ -1687,7 +1703,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 
       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
                                        group_size, &this_max_nunits,
-                                       matches, npermutes,
+                                       matches, limit,
                                        &this_tree_size, bst_map)) != NULL)
        {
          oprnd_info->def_stmts = vNULL;
@@ -1708,12 +1724,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          && is_gimple_assign (stmt_info->stmt)
          /* Swapping operands for reductions breaks assumptions later on.  */
          && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
-         && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
-         /* Do so only if the number of not successful permutes was nor more
-            than a cut-ff as re-trying the recursive match on
-            possibly each level of the tree would expose exponential
-            behavior.  */
-         && *npermutes < 4)
+         && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
        {
          /* See whether we can swap the matching or the non-matching
             stmt operands.  */
@@ -1759,17 +1770,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          bool *tem = XALLOCAVEC (bool, group_size);
          if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
                                            group_size, &this_max_nunits,
-                                           tem, npermutes,
+                                           tem, limit,
                                            &this_tree_size, bst_map)) != NULL)
            {
              oprnd_info->def_stmts = vNULL;
              children.safe_push (child);
              continue;
            }
-         /* We do not undo the swapping here since it might still be
-            the better order for the second operand in case we build
-            the first one from scalars below.  */
-         ++*npermutes;
        }
 fail:
 
@@ -2213,7 +2220,7 @@ static bool
 vect_analyze_slp_instance (vec_info *vinfo,
                           scalar_stmts_to_slp_tree_map_t *bst_map,
                           stmt_vec_info stmt_info, slp_instance_kind kind,
-                          unsigned max_tree_size);
+                          unsigned max_tree_size, unsigned *limit);
 
 /* Analyze an SLP instance starting from SCALAR_STMTS which are a group
    of KIND.  Return true if successful.  */
@@ -2223,7 +2230,7 @@ vect_build_slp_instance (vec_info *vinfo,
                         slp_instance_kind kind,
                         vec<stmt_vec_info> &scalar_stmts,
                         stmt_vec_info root_stmt_info,
-                        unsigned max_tree_size,
+                        unsigned max_tree_size, unsigned *limit,
                         scalar_stmts_to_slp_tree_map_t *bst_map,
                         /* ???  We need stmt_info for group splitting.  */
                         stmt_vec_info stmt_info_)
@@ -2240,12 +2247,11 @@ vect_build_slp_instance (vec_info *vinfo,
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
-  unsigned npermutes = 0;
   poly_uint64 max_nunits = 1;
   unsigned tree_size = 0;
   unsigned i;
   slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
-                                      &max_nunits, matches, &npermutes,
+                                      &max_nunits, matches, limit,
                                       &tree_size, bst_map);
   if (node != NULL)
     {
@@ -2413,7 +2419,8 @@ vect_build_slp_instance (vec_info *vinfo,
              stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
                                                               group1_size);
              bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
-                                                   kind, max_tree_size);
+                                                   kind, max_tree_size,
+                                                   limit);
              /* Split the rest at the failure point and possibly
                 re-analyze the remaining matching part if it has
                 at least two lanes.  */
@@ -2425,13 +2432,15 @@ vect_build_slp_instance (vec_info *vinfo,
                  rest = vect_split_slp_store_group (rest, i - group1_size);
                  if (i - group1_size > 1)
                    res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
-                                                     kind, max_tree_size);
+                                                     kind, max_tree_size,
+                                                     limit);
                }
              /* Re-analyze the non-matching tail if it has at least
                 two lanes.  */
              if (i + 1 < group_size)
                res |= vect_analyze_slp_instance (vinfo, bst_map,
-                                                 rest, kind, max_tree_size);
+                                                 rest, kind, max_tree_size,
+                                                 limit);
              return res;
            }
        }
@@ -2456,10 +2465,10 @@ vect_build_slp_instance (vec_info *vinfo,
          DR_GROUP_GAP (stmt_info) = 0;
 
          bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
-                                               kind, max_tree_size);
+                                               kind, max_tree_size, limit);
          if (i + 1 < group_size)
            res |= vect_analyze_slp_instance (vinfo, bst_map,
-                                             rest, kind, max_tree_size);
+                                             rest, kind, max_tree_size, limit);
 
          return res;
        }
@@ -2484,7 +2493,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
                           scalar_stmts_to_slp_tree_map_t *bst_map,
                           stmt_vec_info stmt_info,
                           slp_instance_kind kind,
-                          unsigned max_tree_size)
+                          unsigned max_tree_size, unsigned *limit)
 {
   unsigned int i;
   vec<stmt_vec_info> scalar_stmts;
@@ -2556,7 +2565,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
   bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
                                      kind == slp_inst_kind_ctor
                                      ? stmt_info : NULL,
-                                     max_tree_size, bst_map,
+                                     max_tree_size, limit, bst_map,
                                      kind == slp_inst_kind_store
                                      ? stmt_info : NULL);
 
@@ -2577,6 +2586,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 
   DUMP_VECT_SCOPE ("vect_analyze_slp");
 
+  unsigned limit = max_tree_size;
+
   scalar_stmts_to_slp_tree_map_t *bst_map
     = new scalar_stmts_to_slp_tree_map_t ();
 
@@ -2585,7 +2596,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
     vect_analyze_slp_instance (vinfo, bst_map, first_element,
                               STMT_VINFO_GROUPED_ACCESS (first_element)
                               ? slp_inst_kind_store : slp_inst_kind_ctor,
-                              max_tree_size);
+                              max_tree_size, &limit);
 
   if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
     {
@@ -2595,7 +2606,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
          if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind,
                                       bb_vinfo->roots[i].stmts,
                                       bb_vinfo->roots[i].root,
-                                      max_tree_size, bst_map, NULL))
+                                      max_tree_size, &limit, bst_map, NULL))
            bb_vinfo->roots[i].stmts = vNULL;
        }
     }
@@ -2609,7 +2620,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
          ;
        else if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
                                              slp_inst_kind_reduc_chain,
-                                             max_tree_size))
+                                             max_tree_size, &limit))
          {
            /* Dissolve reduction chain group.  */
            stmt_vec_info vinfo = first_element;
@@ -2630,7 +2641,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
       /* Find SLP sequences starting from groups of reductions.  */
       if (loop_vinfo->reductions.length () > 1)
        vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
-                                  slp_inst_kind_reduc_group, max_tree_size);
+                                  slp_inst_kind_reduc_group, max_tree_size,
+                                  &limit);
     }
 
   /* The map keeps a reference on SLP nodes built, release that.  */