&& gimple_assign_rhs1 (use_stmt) != def;
}
-/* Return whether USE_STMT is DEF * DEF. */
+/* Return TRUE if USE_STMT is a multiplication of DEF by A. */
static inline bool
-is_square_of (gimple *use_stmt, tree def)
+is_mult_by (gimple *use_stmt, tree def, tree a)
{
if (gimple_code (use_stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
tree op0 = gimple_assign_rhs1 (use_stmt);
tree op1 = gimple_assign_rhs2 (use_stmt);
- return op0 == op1 && op0 == def;
+ return (op0 == def && op1 == a)
+ || (op0 == a && op1 == def);
}
return 0;
}
+/* Return whether USE_STMT is DEF * DEF. */
+static inline bool
+is_square_of (gimple *use_stmt, tree def)
+{
+ return is_mult_by (use_stmt, def, def);
+}
+
/* Return whether USE_STMT is a floating-point division by
DEF * DEF. */
static inline bool
}
}
+/* Transform sequences like
+ t = sqrt (a)
+ x = 1.0 / t;
+ r1 = x * x;
+ r2 = a * x;
+ into:
+ t = sqrt (a)
+ r1 = 1.0 / a;
+ r2 = t;
+ x = r1 * r2;
+ depending on the uses of x, r1, r2. This removes one multiplication and
+ allows the sqrt and division operations to execute in parallel.
+ DEF_GSI is the gsi of the initial division by sqrt that defines
+ DEF (x in the example abovs). */
+
+static void
+optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
+{
+ gimple *use_stmt;
+ imm_use_iterator use_iter;
+ gimple *stmt = gsi_stmt (*def_gsi);
+ tree x = def;
+ tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
+ tree div_rhs1 = gimple_assign_rhs1 (stmt);
+
+ if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
+ || TREE_CODE (div_rhs1) != REAL_CST
+ || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
+ return;
+
+ gcall *sqrt_stmt
+ = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
+
+ if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
+ return;
+
+ switch (gimple_call_combined_fn (sqrt_stmt))
+ {
+ CASE_CFN_SQRT:
+ CASE_CFN_SQRT_FN:
+ break;
+
+ default:
+ return;
+ }
+ tree a = gimple_call_arg (sqrt_stmt, 0);
+
+ /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
+
+ /* Statements that use x in x * x. */
+ auto_vec<gimple *> sqr_stmts;
+ /* Statements that use x in a * x. */
+ auto_vec<gimple *> mult_stmts;
+ bool has_other_use = false;
+ bool mult_on_main_path = false;
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
+ {
+ if (is_gimple_debug (use_stmt))
+ continue;
+ if (is_square_of (use_stmt, x))
+ {
+ sqr_stmts.safe_push (use_stmt);
+ if (gimple_bb (use_stmt) == gimple_bb (stmt))
+ mult_on_main_path = true;
+ }
+ else if (is_mult_by (use_stmt, x, a))
+ {
+ mult_stmts.safe_push (use_stmt);
+ if (gimple_bb (use_stmt) == gimple_bb (stmt))
+ mult_on_main_path = true;
+ }
+ else
+ has_other_use = true;
+ }
+
+ /* In the x * x and a * x cases we just rewire stmt operands or
+ remove multiplications. In the has_other_use case we introduce
+ a multiplication so make sure we don't introduce a multiplication
+ on a path where there was none. */
+ if (has_other_use && !mult_on_main_path)
+ return;
+
+ if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
+ return;
+
+ /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
+ to be able to compose it from the sqr and mult cases. */
+ if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
+ return;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
+ print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
+ print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+ fprintf (dump_file, "\n");
+ }
+
+ bool delete_div = !has_other_use;
+ tree sqr_ssa_name = NULL_TREE;
+ if (!sqr_stmts.is_empty ())
+ {
+ /* r1 = x * x. Transform the original
+ x = 1.0 / t
+ into
+ tmp1 = 1.0 / a
+ r1 = tmp1. */
+
+ sqr_ssa_name
+ = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Replacing original division\n");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+ fprintf (dump_file, "with new division\n");
+ }
+ gimple_assign_set_lhs (stmt, sqr_ssa_name);
+ gimple_assign_set_rhs2 (stmt, a);
+ fold_stmt_inplace (def_gsi);
+ update_stmt (stmt);
+
+ if (dump_file)
+ print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+
+ delete_div = false;
+ gimple *sqr_stmt;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
+ {
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
+ gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
+ update_stmt (sqr_stmt);
+ }
+ }
+ if (!mult_stmts.is_empty ())
+ {
+ /* r2 = a * x. Transform this into:
+ r2 = t (The original sqrt (a)). */
+ unsigned int i;
+ gimple *mult_stmt = NULL;
+ FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
+ {
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Replacing squaring multiplication\n");
+ print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
+ fprintf (dump_file, "with assignment\n");
+ }
+ gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
+ fold_stmt_inplace (&gsi2);
+ update_stmt (mult_stmt);
+ if (dump_file)
+ print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
+ }
+ }
+
+ if (has_other_use)
+ {
+ /* Using the two temporaries tmp1, tmp2 from above
+ the original x is now:
+ x = tmp1 * tmp2. */
+ gcc_assert (orig_sqrt_ssa_name);
+ gcc_assert (sqr_ssa_name);
+
+ gimple *new_stmt
+ = gimple_build_assign (x, MULT_EXPR,
+ orig_sqrt_ssa_name, sqr_ssa_name);
+ gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
+ update_stmt (stmt);
+ }
+ else if (delete_div)
+ {
+ /* Remove the original division. */
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
+ gsi_remove (&gsi2, true);
+ release_defs (stmt);
+ }
+}
/* Look for floating-point divisions among DEF's uses, and try to
replace them by multiplications with the reciprocal. Add
&& (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
&& FLOAT_TYPE_P (TREE_TYPE (def))
&& TREE_CODE (def) == SSA_NAME)
- execute_cse_reciprocals_1 (&gsi, def);
+ {
+ if (flag_unsafe_math_optimizations
+ && is_gimple_assign (stmt)
+ && !stmt_can_throw_internal (stmt)
+ && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
+ optimize_recip_sqrt (&gsi, def);
+ else
+ execute_cse_reciprocals_1 (&gsi, def);
+ }
}
if (optimize_bb_for_size_p (bb))