loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771]
authorJakub Jelinek <jakub@redhat.com>
Mon, 4 Jan 2021 13:36:06 +0000 (14:36 +0100)
committerJakub Jelinek <jakub@redhat.com>
Mon, 4 Jan 2021 13:36:06 +0000 (14:36 +0100)
As the testcase shows, we punt unnecessarily on popcount loop idioms if
the type is smaller than int or larger than long long.
Smaller type than int can be handled by zero-extending the argument to
unsigned int, and types twice as long as long long by doing
__builtin_popcountll on both halves of the __int128.

2020-01-04  Jakub Jelinek  <jakub@redhat.com>

PR tree-optimization/95771
* tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types
with precision smaller than int's precision and types with precision
twice as large as long long.  Formatting fixes.

* gcc.target/i386/pr95771.c: New test.

gcc/testsuite/gcc.target/i386/pr95771.c [new file with mode: 0644]
gcc/tree-ssa-loop-niter.c

diff --git a/gcc/testsuite/gcc.target/i386/pr95771.c b/gcc/testsuite/gcc.target/i386/pr95771.c
new file mode 100644 (file)
index 0000000..d7b6701
--- /dev/null
@@ -0,0 +1,67 @@
+/* PR tree-optimization/95771 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { target int128 } } } */
+/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { target { ! int128 } } } } */
+
+int
+foo (unsigned char x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+int
+bar (unsigned short x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+int
+baz (unsigned int x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+int
+qux (unsigned long long x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+#ifdef __SIZEOF_INT128__
+int
+corge (unsigned __int128 x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+#endif
index 055aec51deef62466bf6ce784fa7b4279b1edccc..98978bcaa85387a1f6f37e96c607d2f186a87929 100644 (file)
@@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p loop, edge exit,
 
   /* We found a match. Get the corresponding popcount builtin.  */
   tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
-  if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node))
+  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
     fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
-  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
-          (long_integer_type_node))
+  else if (TYPE_PRECISION (TREE_TYPE (src))
+          == TYPE_PRECISION (long_integer_type_node))
     fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
-  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
-          (long_long_integer_type_node))
+  else if (TYPE_PRECISION (TREE_TYPE (src))
+          == TYPE_PRECISION (long_long_integer_type_node)
+          || (TYPE_PRECISION (TREE_TYPE (src))
+              == 2 * TYPE_PRECISION (long_long_integer_type_node)))
     fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
 
-  /* ??? Support promoting char/short to int.  */
   if (!fn)
     return false;
 
   /* Update NITER params accordingly  */
   tree utype = unsigned_type_for (TREE_TYPE (src));
   src = fold_convert (utype, src);
-  tree call = fold_convert (utype, build_call_expr (fn, 1, src));
+  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
+    src = fold_convert (unsigned_type_node, src);
+  tree call;
+  if (TYPE_PRECISION (TREE_TYPE (src))
+      == 2 * TYPE_PRECISION (long_long_integer_type_node))
+    {
+      int prec = TYPE_PRECISION (long_long_integer_type_node);
+      tree src1 = fold_convert (long_long_unsigned_type_node,
+                               fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+                                            unshare_expr (src),
+                                            build_int_cst (integer_type_node,
+                                                           prec)));
+      tree src2 = fold_convert (long_long_unsigned_type_node, src);
+      call = build_call_expr (fn, 1, src1);
+      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
+                         build_call_expr (fn, 1, src2));
+      call = fold_convert (utype, call);
+    }
+  else
+    call = fold_convert (utype, build_call_expr (fn, 1, src));
   if (adjust)
-    iter = fold_build2 (MINUS_EXPR, utype,
-                       call,
-                       build_int_cst (utype, 1));
+    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
   else
     iter = call;
 
@@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p loop, edge exit,
   if (adjust)
     {
       tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
-                                     build_zero_cst
-                                     (TREE_TYPE (src)));
-      niter->may_be_zero =
-       simplify_using_initial_conditions (loop, may_be_zero);
+                                     build_zero_cst (TREE_TYPE (src)));
+      niter->may_be_zero
+       = simplify_using_initial_conditions (loop, may_be_zero);
     }
   else
     niter->may_be_zero = boolean_false_node;