gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Follow clang and allow inlining for always_inline functions.  */
 268   if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 269     return true;
 270
 271   const sanitize_code codes[] =
 272     {
 273       SANITIZE_ADDRESS,
 274       SANITIZE_THREAD,
 275       SANITIZE_UNDEFINED,
 276       SANITIZE_UNDEFINED_NONDEFAULT,
 277       SANITIZE_POINTER_COMPARE,
 278       SANITIZE_POINTER_SUBTRACT
 279     };
 280
 281   for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
 282     if (sanitize_flags_p (codes[i], caller)
 283         != sanitize_flags_p (codes[i], callee))
 284       return false;
 285
 286   return true;
 287 }
 288
 289 /* Used for flags where it is safe to inline when caller's value is
 290    grater than callee's.  */
 291 #define check_maybe_up(flag) \
 292       (opts_for_fn (caller->decl)->x_##flag             \
 293        != opts_for_fn (callee->decl)->x_##flag          \
 294        && (!always_inline                               \
 295            || opts_for_fn (caller->decl)->x_##flag      \
 296               < opts_for_fn (callee->decl)->x_##flag))
 297 /* Used for flags where it is safe to inline when caller's value is
 298    smaller than callee's.  */
 299 #define check_maybe_down(flag) \
 300       (opts_for_fn (caller->decl)->x_##flag             \
 301        != opts_for_fn (callee->decl)->x_##flag          \
 302        && (!always_inline                               \
 303            || opts_for_fn (caller->decl)->x_##flag      \
 304               > opts_for_fn (callee->decl)->x_##flag))
 305 /* Used for flags where exact match is needed for correctness.  */
 306 #define check_match(flag) \
 307       (opts_for_fn (caller->decl)->x_##flag             \
 308        != opts_for_fn (callee->decl)->x_##flag)
 309
 310 /* Decide if we can inline the edge and possibly update
 311    inline_failed reason.
 312    We check whether inlining is possible at all and whether
 313    caller growth limits allow doing so.
 314
 315    if REPORT is true, output reason to the dump file. */
 316
 317 static bool
 318 can_inline_edge_p (struct cgraph_edge *e, bool report,
 319                    bool early = false)
 320 {
 321   gcc_checking_assert (e->inline_failed);
 322
 323   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 324     {
 325       if (report)
 326         report_inline_failed_reason (e);
 327       return false;
 328     }
 329
 330   bool inlinable = true;
 331   enum availability avail;
 332   cgraph_node *caller = (e->caller->inlined_to
 333                          ? e->caller->inlined_to : e->caller);
 334   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 335
 336   if (!callee->definition)
 337     {
 338       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 339       inlinable = false;
 340     }
 341   if (!early && (!opt_for_fn (callee->decl, optimize)
 342                  || !opt_for_fn (caller->decl, optimize)))
 343     {
 344       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 345       inlinable = false;
 346     }
 347   else if (callee->calls_comdat_local)
 348     {
 349       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 350       inlinable = false;
 351     }
 352   else if (avail <= AVAIL_INTERPOSABLE)
 353     {
 354       e->inline_failed = CIF_OVERWRITABLE;
 355       inlinable = false;
 356     }
 357   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 358      initialized to one of FINAL_ERROR reasons.  */
 359   else if (e->call_stmt_cannot_inline_p)
 360     gcc_unreachable ();
 361   /* Don't inline if the functions have different EH personalities.  */
 362   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 363            && DECL_FUNCTION_PERSONALITY (callee->decl)
 364            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 365                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 366     {
 367       e->inline_failed = CIF_EH_PERSONALITY;
 368       inlinable = false;
 369     }
 370   /* TM pure functions should not be inlined into non-TM_pure
 371      functions.  */
 372   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 373     {
 374       e->inline_failed = CIF_UNSPECIFIED;
 375       inlinable = false;
 376     }
 377   /* Check compatibility of target optimization options.  */
 378   else if (!targetm.target_option.can_inline_p (caller->decl,
 379                                                 callee->decl))
 380     {
 381       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 382       inlinable = false;
 383     }
 384   else if (ipa_fn_summaries->get (callee) == NULL
 385            || !ipa_fn_summaries->get (callee)->inlinable)
 386     {
 387       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 388       inlinable = false;
 389     }
 390   /* Don't inline a function with mismatched sanitization attributes. */
 391   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 392     {
 393       e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
 394       inlinable = false;
 395     }
 396   if (!inlinable && report)
 397     report_inline_failed_reason (e);
 398   return inlinable;
 399 }
 400
 401 /* Return inlining_insns_single limit for function N.  If HINT or HINT2 is true
 402    scale up the bound.  */
 403
 404 static int
 405 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
 406 {
 407   if (hint && hint2)
 408     {
 409       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
 410       spd = spd * spd;
 411       if (spd > 1000000)
 412         spd = 1000000;
 413       return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
 414     }
 415   if (hint || hint2)
 416     return opt_for_fn (n->decl, param_max_inline_insns_single)
 417            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 418   return opt_for_fn (n->decl, param_max_inline_insns_single);
 419 }
 420
 421 /* Return inlining_insns_auto limit for function N.  If HINT or HINT2 is true
 422    scale up the bound.   */
 423
 424 static int
 425 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
 426 {
 427   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 428   if (hint && hint2)
 429     {
 430       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
 431       spd = spd * spd;
 432       if (spd > 1000000)
 433         spd = 1000000;
 434       return max_inline_insns_auto * spd / 100;
 435     }
 436   if (hint || hint2)
 437     return max_inline_insns_auto
 438            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 439   return max_inline_insns_auto;
 440 }
 441
 442 /* Decide if we can inline the edge and possibly update
 443    inline_failed reason.
 444    We check whether inlining is possible at all and whether
 445    caller growth limits allow doing so.
 446
 447    if REPORT is true, output reason to the dump file.
 448
 449    if DISREGARD_LIMITS is true, ignore size limits.  */
 450
 451 static bool
 452 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 453                              bool disregard_limits = false, bool early = false)
 454 {
 455   gcc_checking_assert (e->inline_failed);
 456
 457   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 458     {
 459       if (report)
 460         report_inline_failed_reason (e);
 461       return false;
 462     }
 463
 464   bool inlinable = true;
 465   enum availability avail;
 466   cgraph_node *caller = (e->caller->inlined_to
 467                          ? e->caller->inlined_to : e->caller);
 468   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 469   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 470   tree callee_tree
 471     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 472   /* Check if caller growth allows the inlining.  */
 473   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 474       && !disregard_limits
 475       && !lookup_attribute ("flatten",
 476                  DECL_ATTRIBUTES (caller->decl))
 477       && !caller_growth_limits (e))
 478     inlinable = false;
 479   else if (callee->externally_visible
 480            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 481            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 482     {
 483       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 484       inlinable = false;
 485     }
 486   /* Don't inline a function with a higher optimization level than the
 487      caller.  FIXME: this is really just tip of iceberg of handling
 488      optimization attribute.  */
 489   else if (caller_tree != callee_tree)
 490     {
 491       bool always_inline =
 492              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 493               && lookup_attribute ("always_inline",
 494                                    DECL_ATTRIBUTES (callee->decl)));
 495       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 496       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 497
 498      /* Until GCC 4.9 we did not check the semantics-altering flags
 499         below and inlined across optimization boundaries.
 500         Enabling checks below breaks several packages by refusing
 501         to inline library always_inline functions. See PR65873.
 502         Disable the check for early inlining for now until better solution
 503         is found.  */
 504      if (always_inline && early)
 505         ;
 506       /* There are some options that change IL semantics which means
 507          we cannot inline in these cases for correctness reason.
 508          Not even for always_inline declared functions.  */
 509      else if (check_match (flag_wrapv)
 510               || check_match (flag_trapv)
 511               || check_match (flag_pcc_struct_return)
 512               || check_maybe_down (optimize_debug)
 513               /* When caller or callee does FP math, be sure FP codegen flags
 514                  compatible.  */
 515               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 516                   && (check_maybe_up (flag_rounding_math)
 517                       || check_maybe_up (flag_trapping_math)
 518                       || check_maybe_down (flag_unsafe_math_optimizations)
 519                       || check_maybe_down (flag_finite_math_only)
 520                       || check_maybe_up (flag_signaling_nans)
 521                       || check_maybe_down (flag_cx_limited_range)
 522                       || check_maybe_up (flag_signed_zeros)
 523                       || check_maybe_down (flag_associative_math)
 524                       || check_maybe_down (flag_reciprocal_math)
 525                       || check_maybe_down (flag_fp_int_builtin_inexact)
 526                       /* Strictly speaking only when the callee contains function
 527                          calls that may end up setting errno.  */
 528                       || check_maybe_up (flag_errno_math)))
 529               /* We do not want to make code compiled with exceptions to be
 530                  brought into a non-EH function unless we know that the callee
 531                  does not throw.
 532                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 533               || (check_maybe_up (flag_non_call_exceptions)
 534                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 535               || (check_maybe_up (flag_exceptions)
 536                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 537               /* When devirtualization is disabled for callee, it is not safe
 538                  to inline it as we possibly mangled the type info.
 539                  Allow early inlining of always inlines.  */
 540               || (!early && check_maybe_down (flag_devirtualize)))
 541         {
 542           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 543           inlinable = false;
 544         }
 545       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 546       else if (always_inline)
 547         ;
 548       /* When user added an attribute to the callee honor it.  */
 549       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 550                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 551         {
 552           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 553           inlinable = false;
 554         }
 555       /* If explicit optimize attribute are not used, the mismatch is caused
 556          by different command line options used to build different units.
 557          Do not care about COMDAT functions - those are intended to be
 558          optimized with the optimization flags of module they are used in.
 559          Also do not care about mixing up size/speed optimization when
 560          DECL_DISREGARD_INLINE_LIMITS is set.  */
 561       else if ((callee->merged_comdat
 562                 && !lookup_attribute ("optimize",
 563                                       DECL_ATTRIBUTES (caller->decl)))
 564                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 565         ;
 566       /* If mismatch is caused by merging two LTO units with different
 567          optimization flags we want to be bit nicer.  However never inline
 568          if one of functions is not optimized at all.  */
 569       else if (!opt_for_fn (callee->decl, optimize)
 570                || !opt_for_fn (caller->decl, optimize))
 571         {
 572           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 573           inlinable = false;
 574         }
 575       /* If callee is optimized for size and caller is not, allow inlining if
 576          code shrinks or we are in param_max_inline_insns_single limit and
 577          callee is inline (and thus likely an unified comdat).
 578          This will allow caller to run faster.  */
 579       else if (opt_for_fn (callee->decl, optimize_size)
 580                > opt_for_fn (caller->decl, optimize_size))
 581         {
 582           int growth = estimate_edge_growth (e);
 583           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 584               && (!DECL_DECLARED_INLINE_P (callee->decl)
 585                   && growth >= MAX (inline_insns_single (caller, false, false),
 586                                     inline_insns_auto (caller, false, false))))
 587             {
 588               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 589               inlinable = false;
 590             }
 591         }
 592       /* If callee is more aggressively optimized for performance than caller,
 593          we generally want to inline only cheap (runtime wise) functions.  */
 594       else if (opt_for_fn (callee->decl, optimize_size)
 595                < opt_for_fn (caller->decl, optimize_size)
 596                || (opt_for_fn (callee->decl, optimize)
 597                    > opt_for_fn (caller->decl, optimize)))
 598         {
 599           if (estimate_edge_time (e)
 600               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 601             {
 602               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 603               inlinable = false;
 604             }
 605         }
 606
 607     }
 608
 609   if (!inlinable && report)
 610     report_inline_failed_reason (e);
 611   return inlinable;
 612 }
 613
 614
 615 /* Return true if the edge E is inlinable during early inlining.  */
 616
 617 static bool
 618 can_early_inline_edge_p (struct cgraph_edge *e)
 619 {
 620   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 621   /* Early inliner might get called at WPA stage when IPA pass adds new
 622      function.  In this case we cannot really do any of early inlining
 623      because function bodies are missing.  */
 624   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 625     return false;
 626   if (!gimple_has_body_p (callee->decl))
 627     {
 628       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 629       return false;
 630     }
 631   /* In early inliner some of callees may not be in SSA form yet
 632      (i.e. the callgraph is cyclic and we did not process
 633      the callee by early inliner, yet).  We don't have CIF code for this
 634      case; later we will re-do the decision in the real inliner.  */
 635   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 636       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 637     {
 638       if (dump_enabled_p ())
 639         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 640                          "  edge not inlinable: not in SSA form\n");
 641       return false;
 642     }
 643   if (!can_inline_edge_p (e, true, true)
 644       || !can_inline_edge_by_limits_p (e, true, false, true))
 645     return false;
 646   return true;
 647 }
 648
 649
 650 /* Return number of calls in N.  Ignore cheap builtins.  */
 651
 652 static int
 653 num_calls (struct cgraph_node *n)
 654 {
 655   struct cgraph_edge *e;
 656   int num = 0;
 657
 658   for (e = n->callees; e; e = e->next_callee)
 659     if (!is_inexpensive_builtin (e->callee->decl))
 660       num++;
 661   return num;
 662 }
 663
 664
 665 /* Return true if we are interested in inlining small function.  */
 666
 667 static bool
 668 want_early_inline_function_p (struct cgraph_edge *e)
 669 {
 670   bool want_inline = true;
 671   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 672
 673   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 674     ;
 675   /* For AutoFDO, we need to make sure that before profile summary, all
 676      hot paths' IR look exactly the same as profiled binary. As a result,
 677      in einliner, we will disregard size limit and inline those callsites
 678      that are:
 679        * inlined in the profiled binary, and
 680        * the cloned callee has enough samples to be considered "hot".  */
 681   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 682     ;
 683   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 684            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 685     {
 686       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 687       report_inline_failed_reason (e);
 688       want_inline = false;
 689     }
 690   else
 691     {
 692       /* First take care of very large functions.  */
 693       int min_growth = estimate_min_edge_growth (e), growth = 0;
 694       int n;
 695       int early_inlining_insns = param_early_inlining_insns;
 696
 697       if (min_growth > early_inlining_insns)
 698         {
 699           if (dump_enabled_p ())
 700             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 701                              "  will not early inline: %C->%C, "
 702                              "call is cold and code would grow "
 703                              "at least by %i\n",
 704                              e->caller, callee,
 705                              min_growth);
 706           want_inline = false;
 707         }
 708       else
 709         growth = estimate_edge_growth (e);
 710
 711
 712       if (!want_inline || growth <= param_max_inline_insns_size)
 713         ;
 714       else if (!e->maybe_hot_p ())
 715         {
 716           if (dump_enabled_p ())
 717             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 718                              "  will not early inline: %C->%C, "
 719                              "call is cold and code would grow by %i\n",
 720                              e->caller, callee,
 721                              growth);
 722           want_inline = false;
 723         }
 724       else if (growth > early_inlining_insns)
 725         {
 726           if (dump_enabled_p ())
 727             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 728                              "  will not early inline: %C->%C, "
 729                              "growth %i exceeds --param early-inlining-insns\n",
 730                              e->caller, callee, growth);
 731           want_inline = false;
 732         }
 733       else if ((n = num_calls (callee)) != 0
 734                && growth * (n + 1) > early_inlining_insns)
 735         {
 736           if (dump_enabled_p ())
 737             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 738                              "  will not early inline: %C->%C, "
 739                              "growth %i exceeds --param early-inlining-insns "
 740                              "divided by number of calls\n",
 741                              e->caller, callee, growth);
 742           want_inline = false;
 743         }
 744     }
 745   return want_inline;
 746 }
 747
 748 /* Compute time of the edge->caller + edge->callee execution when inlining
 749    does not happen.  */
 750
 751 inline sreal
 752 compute_uninlined_call_time (struct cgraph_edge *edge,
 753                              sreal uninlined_call_time,
 754                              sreal freq)
 755 {
 756   cgraph_node *caller = (edge->caller->inlined_to
 757                          ? edge->caller->inlined_to
 758                          : edge->caller);
 759
 760   if (freq > 0)
 761     uninlined_call_time *= freq;
 762   else
 763     uninlined_call_time = uninlined_call_time >> 11;
 764
 765   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 766   return uninlined_call_time + caller_time;
 767 }
 768
 769 /* Same as compute_uinlined_call_time but compute time when inlining
 770    does happen.  */
 771
 772 inline sreal
 773 compute_inlined_call_time (struct cgraph_edge *edge,
 774                            sreal time,
 775                            sreal freq)
 776 {
 777   cgraph_node *caller = (edge->caller->inlined_to
 778                          ? edge->caller->inlined_to
 779                          : edge->caller);
 780   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 781
 782   if (freq > 0)
 783     time *= freq;
 784   else
 785     time = time >> 11;
 786
 787   /* This calculation should match one in ipa-inline-analysis.c
 788      (estimate_edge_size_and_time).  */
 789   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 790   time += caller_time;
 791   if (time <= 0)
 792     time = ((sreal) 1) >> 8;
 793   gcc_checking_assert (time >= 0);
 794   return time;
 795 }
 796
 797 /* Determine time saved by inlining EDGE of frequency FREQ
 798    where callee's runtime w/o inlining is UNINLINED_TYPE
 799    and with inlined is INLINED_TYPE.  */
 800
 801 inline sreal
 802 inlining_speedup (struct cgraph_edge *edge,
 803                   sreal freq,
 804                   sreal uninlined_time,
 805                   sreal inlined_time)
 806 {
 807   sreal speedup = uninlined_time - inlined_time;
 808   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 809      (estimate_edge_size_and_time).  */
 810   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 811
 812   if (freq > 0)
 813     {
 814       speedup = (speedup + call_time);
 815       if (freq != 1)
 816        speedup = speedup * freq;
 817     }
 818   else if (freq == 0)
 819     speedup = speedup >> 11;
 820   gcc_checking_assert (speedup >= 0);
 821   return speedup;
 822 }
 823
 824 /* Return true if the speedup for inlining E is bigger than
 825    param_inline_min_speedup.  */
 826
 827 static bool
 828 big_speedup_p (struct cgraph_edge *e)
 829 {
 830   sreal unspec_time;
 831   sreal spec_time = estimate_edge_time (e, &unspec_time);
 832   sreal freq = e->sreal_frequency ();
 833   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 834   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 835   cgraph_node *caller = (e->caller->inlined_to
 836                          ? e->caller->inlined_to
 837                          : e->caller);
 838   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 839
 840   if ((time - inlined_time) * 100 > time * limit)
 841     return true;
 842   return false;
 843 }
 844
 845 /* Return true if we are interested in inlining small function.
 846    When REPORT is true, report reason to dump file.  */
 847
 848 static bool
 849 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 850 {
 851   bool want_inline = true;
 852   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 853   cgraph_node *to  = (e->caller->inlined_to
 854                       ? e->caller->inlined_to : e->caller);
 855
 856   /* Allow this function to be called before can_inline_edge_p,
 857      since it's usually cheaper.  */
 858   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 859     want_inline = false;
 860   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 861     ;
 862   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 863            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 864     {
 865       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 866       want_inline = false;
 867     }
 868   /* Do fast and conservative check if the function can be good
 869      inline candidate.  */
 870   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 871            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 872            && ipa_fn_summaries->get (callee)->min_size
 873                 - ipa_call_summaries->get (e)->call_stmt_size
 874               > inline_insns_auto (e->caller, true, true))
 875     {
 876       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 877       want_inline = false;
 878     }
 879   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 880             || e->count.ipa ().nonzero_p ())
 881            && ipa_fn_summaries->get (callee)->min_size
 882                 - ipa_call_summaries->get (e)->call_stmt_size
 883               > inline_insns_single (e->caller, true, true))
 884     {
 885       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 886                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 887                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 888       want_inline = false;
 889     }
 890   else
 891     {
 892       int growth = estimate_edge_growth (e);
 893       ipa_hints hints = estimate_edge_hints (e);
 894       /* We have two independent groups of hints.  If one matches in each
 895          of groups the limits are inreased.  If both groups matches, limit
 896          is increased even more.  */
 897       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 898                                    | INLINE_HINT_known_hot
 899                                    | INLINE_HINT_loop_iterations
 900                                    | INLINE_HINT_loop_stride));
 901       bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
 902
 903       if (growth <= opt_for_fn (to->decl,
 904                                 param_max_inline_insns_size))
 905         ;
 906       /* Apply param_max_inline_insns_single limit.  Do not do so when
 907          hints suggests that inlining given function is very profitable.
 908          Avoid computation of big_speedup_p when not necessary to change
 909          outcome of decision.  */
 910       else if (DECL_DECLARED_INLINE_P (callee->decl)
 911                && growth >= inline_insns_single (e->caller, apply_hints,
 912                                                  apply_hints2)
 913                && (apply_hints || apply_hints2
 914                    || growth >= inline_insns_single (e->caller, true,
 915                                                      apply_hints2)
 916                    || !big_speedup_p (e)))
 917         {
 918           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 919           want_inline = false;
 920         }
 921       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 922                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 923                && growth >= opt_for_fn (to->decl,
 924                                         param_max_inline_insns_small))
 925         {
 926           /* growth_positive_p is expensive, always test it last.  */
 927           if (growth >= inline_insns_single (e->caller, false, false)
 928               || growth_positive_p (callee, e, growth))
 929             {
 930               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 931               want_inline = false;
 932             }
 933         }
 934       /* Apply param_max_inline_insns_auto limit for functions not declared
 935          inline.  Bypass the limit when speedup seems big.  */
 936       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 937                && growth >= inline_insns_auto (e->caller, apply_hints,
 938                                                apply_hints2)
 939                && (apply_hints || apply_hints2
 940                    || growth >= inline_insns_auto (e->caller, true,
 941                                                    apply_hints2)
 942                    || !big_speedup_p (e)))
 943         {
 944           /* growth_positive_p is expensive, always test it last.  */
 945           if (growth >= inline_insns_single (e->caller, false, false)
 946               || growth_positive_p (callee, e, growth))
 947             {
 948               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 949               want_inline = false;
 950             }
 951         }
 952       /* If call is cold, do not inline when function body would grow. */
 953       else if (!e->maybe_hot_p ()
 954                && (growth >= inline_insns_single (e->caller, false, false)
 955                    || growth_positive_p (callee, e, growth)))
 956         {
 957           e->inline_failed = CIF_UNLIKELY_CALL;
 958           want_inline = false;
 959         }
 960     }
 961   if (!want_inline && report)
 962     report_inline_failed_reason (e);
 963   return want_inline;
 964 }
 965
 966 /* EDGE is self recursive edge.
 967    We handle two cases - when function A is inlining into itself
 968    or when function A is being inlined into another inliner copy of function
 969    A within function B.
 970
 971    In first case OUTER_NODE points to the toplevel copy of A, while
 972    in the second case OUTER_NODE points to the outermost copy of A in B.
 973
 974    In both cases we want to be extra selective since
 975    inlining the call will just introduce new recursive calls to appear.  */
 976
 977 static bool
 978 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 979                                    struct cgraph_node *outer_node,
 980                                    bool peeling,
 981                                    int depth)
 982 {
 983   char const *reason = NULL;
 984   bool want_inline = true;
 985   sreal caller_freq = 1;
 986   int max_depth = opt_for_fn (outer_node->decl,
 987                               param_max_inline_recursive_depth_auto);
 988
 989   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 990     max_depth = opt_for_fn (outer_node->decl,
 991                             param_max_inline_recursive_depth);
 992
 993   if (!edge->maybe_hot_p ())
 994     {
 995       reason = "recursive call is cold";
 996       want_inline = false;
 997     }
 998   else if (depth > max_depth)
 999     {
1000       reason = "--param max-inline-recursive-depth exceeded.";
1001       want_inline = false;
1002     }
1003   else if (outer_node->inlined_to
1004            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1005     {
1006       reason = "caller frequency is 0";
1007       want_inline = false;
1008     }
1009
1010   if (!want_inline)
1011     ;
1012   /* Inlining of self recursive function into copy of itself within other
1013      function is transformation similar to loop peeling.
1014
1015      Peeling is profitable if we can inline enough copies to make probability
1016      of actual call to the self recursive function very small.  Be sure that
1017      the probability of recursion is small.
1018
1019      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1020      This way the expected number of recursion is at most max_depth.  */
1021   else if (peeling)
1022     {
1023       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1024       int i;
1025       for (i = 1; i < depth; i++)
1026         max_prob = max_prob * max_prob;
1027       if (edge->sreal_frequency () >= max_prob * caller_freq)
1028         {
1029           reason = "frequency of recursive call is too large";
1030           want_inline = false;
1031         }
1032     }
1033   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1034      recursion depth is large.  We reduce function call overhead and increase
1035      chances that things fit in hardware return predictor.
1036
1037      Recursive inlining might however increase cost of stack frame setup
1038      actually slowing down functions whose recursion tree is wide rather than
1039      deep.
1040
1041      Deciding reliably on when to do recursive inlining without profile feedback
1042      is tricky.  For now we disable recursive inlining when probability of self
1043      recursion is low.
1044
1045      Recursive inlining of self recursive call within loop also results in
1046      large loop depths that generally optimize badly.  We may want to throttle
1047      down inlining in those cases.  In particular this seems to happen in one
1048      of libstdc++ rb tree methods.  */
1049   else
1050     {
1051       if (edge->sreal_frequency () * 100
1052           <= caller_freq
1053              * opt_for_fn (outer_node->decl,
1054                            param_min_inline_recursive_probability))
1055         {
1056           reason = "frequency of recursive call is too small";
1057           want_inline = false;
1058         }
1059     }
1060   if (!want_inline && dump_enabled_p ())
1061     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1062                      "   not inlining recursively: %s\n", reason);
1063   return want_inline;
1064 }
1065
1066 /* Return true when NODE has uninlinable caller;
1067    set HAS_HOT_CALL if it has hot call.
1068    Worker for cgraph_for_node_and_aliases.  */
1069
1070 static bool
1071 check_callers (struct cgraph_node *node, void *has_hot_call)
1072 {
1073   struct cgraph_edge *e;
1074    for (e = node->callers; e; e = e->next_caller)
1075      {
1076        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1077            || !opt_for_fn (e->caller->decl, optimize))
1078          return true;
1079        if (!can_inline_edge_p (e, true))
1080          return true;
1081        if (e->recursive_p ())
1082          return true;
1083        if (!can_inline_edge_by_limits_p (e, true))
1084          return true;
1085        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1086          *(bool *)has_hot_call = true;
1087      }
1088   return false;
1089 }
1090
1091 /* If NODE has a caller, return true.  */
1092
1093 static bool
1094 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1095 {
1096   if (node->callers)
1097     return true;
1098   return false;
1099 }
1100
1101 /* Decide if inlining NODE would reduce unit size by eliminating
1102    the offline copy of function.
1103    When COLD is true the cold calls are considered, too.  */
1104
1105 static bool
1106 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1107 {
1108   bool has_hot_call = false;
1109
1110   /* Aliases gets inlined along with the function they alias.  */
1111   if (node->alias)
1112     return false;
1113   /* Already inlined?  */
1114   if (node->inlined_to)
1115     return false;
1116   /* Does it have callers?  */
1117   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1118     return false;
1119   /* Inlining into all callers would increase size?  */
1120   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1121     return false;
1122   /* All inlines must be possible.  */
1123   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1124                                          true))
1125     return false;
1126   if (!cold && !has_hot_call)
1127     return false;
1128   return true;
1129 }
1130
1131 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1132    in estimate_edge_badness.  */
1133
1134 static bool
1135 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1136 {
1137   return size < (DECL_DECLARED_INLINE_P (where->decl)
1138                  ? inline_insns_single (where, false, false)
1139                  : inline_insns_auto (where, false, false));
1140 }
1141
1142 /* A cost model driving the inlining heuristics in a way so the edges with
1143    smallest badness are inlined first.  After each inlining is performed
1144    the costs of all caller edges of nodes affected are recomputed so the
1145    metrics may accurately depend on values such as number of inlinable callers
1146    of the function or function body size.  */
1147
1148 static sreal
1149 edge_badness (struct cgraph_edge *edge, bool dump)
1150 {
1151   sreal badness;
1152   int growth;
1153   sreal edge_time, unspec_edge_time;
1154   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1155   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1156   ipa_hints hints;
1157   cgraph_node *caller = (edge->caller->inlined_to
1158                          ? edge->caller->inlined_to
1159                          : edge->caller);
1160
1161   growth = estimate_edge_growth (edge);
1162   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1163   hints = estimate_edge_hints (edge);
1164   gcc_checking_assert (edge_time >= 0);
1165   /* Check that inlined time is better, but tolerate some roundoff issues.
1166      FIXME: When callee profile drops to 0 we account calls more.  This
1167      should be fixed by never doing that.  */
1168   gcc_checking_assert ((edge_time * 100
1169                         - callee_info->time * 101).to_int () <= 0
1170                         || callee->count.ipa ().initialized_p ());
1171   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1172
1173   if (dump)
1174     {
1175       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1176                edge->caller->dump_name (),
1177                edge->callee->dump_name ());
1178       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1179                growth,
1180                edge_time.to_double (),
1181                unspec_edge_time.to_double ());
1182       ipa_dump_hints (dump_file, hints);
1183       if (big_speedup_p (edge))
1184         fprintf (dump_file, " big_speedup");
1185       fprintf (dump_file, "\n");
1186     }
1187
1188   /* Always prefer inlining saving code size.  */
1189   if (growth <= 0)
1190     {
1191       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1192       if (dump)
1193         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1194                  growth);
1195     }
1196    /* Inlining into EXTERNAL functions is not going to change anything unless
1197       they are themselves inlined.  */
1198    else if (DECL_EXTERNAL (caller->decl))
1199     {
1200       if (dump)
1201         fprintf (dump_file, "      max: function is external\n");
1202       return sreal::max ();
1203     }
1204   /* When profile is available. Compute badness as:
1205
1206                  time_saved * caller_count
1207      goodness =  -------------------------------------------------
1208                  growth_of_caller * overall_growth * combined_size
1209
1210      badness = - goodness
1211
1212      Again use negative value to make calls with profile appear hotter
1213      then calls without.
1214   */
1215   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1216            || caller->count.ipa ().nonzero_p ())
1217     {
1218       sreal numerator, denominator;
1219       int overall_growth;
1220       sreal freq = edge->sreal_frequency ();
1221
1222       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1223       if (numerator <= 0)
1224         numerator = ((sreal) 1 >> 8);
1225       if (caller->count.ipa ().nonzero_p ())
1226         numerator *= caller->count.ipa ().to_gcov_type ();
1227       else if (caller->count.ipa ().initialized_p ())
1228         numerator = numerator >> 11;
1229       denominator = growth;
1230
1231       overall_growth = callee_info->growth;
1232
1233       /* Look for inliner wrappers of the form:
1234
1235          inline_caller ()
1236            {
1237              do_fast_job...
1238              if (need_more_work)
1239                noninline_callee ();
1240            }
1241          Without penalizing this case, we usually inline noninline_callee
1242          into the inline_caller because overall_growth is small preventing
1243          further inlining of inline_caller.
1244
1245          Penalize only callgraph edges to functions with small overall
1246          growth ...
1247         */
1248       if (growth > overall_growth
1249           /* ... and having only one caller which is not inlined ... */
1250           && callee_info->single_caller
1251           && !edge->caller->inlined_to
1252           /* ... and edges executed only conditionally ... */
1253           && freq < 1
1254           /* ... consider case where callee is not inline but caller is ... */
1255           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1256                && DECL_DECLARED_INLINE_P (caller->decl))
1257               /* ... or when early optimizers decided to split and edge
1258                  frequency still indicates splitting is a win ... */
1259               || (callee->split_part && !caller->split_part
1260                   && freq * 100
1261                          < opt_for_fn (caller->decl,
1262                                        param_partial_inlining_entry_probability)
1263                   /* ... and do not overwrite user specified hints.   */
1264                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1265                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1266         {
1267           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1268           int caller_growth = caller_info->growth;
1269
1270           /* Only apply the penalty when caller looks like inline candidate,
1271              and it is not called once.  */
1272           if (!caller_info->single_caller && overall_growth < caller_growth
1273               && caller_info->inlinable
1274               && wrapper_heuristics_may_apply
1275                  (caller, ipa_size_summaries->get (caller)->size))
1276             {
1277               if (dump)
1278                 fprintf (dump_file,
1279                          "     Wrapper penalty. Increasing growth %i to %i\n",
1280                          overall_growth, caller_growth);
1281               overall_growth = caller_growth;
1282             }
1283         }
1284       if (overall_growth > 0)
1285         {
1286           /* Strongly prefer functions with few callers that can be inlined
1287              fully.  The square root here leads to smaller binaries at average.
1288              Watch however for extreme cases and return to linear function
1289              when growth is large.  */
1290           if (overall_growth < 256)
1291             overall_growth *= overall_growth;
1292           else
1293             overall_growth += 256 * 256 - 256;
1294           denominator *= overall_growth;
1295         }
1296       denominator *= ipa_size_summaries->get (caller)->size + growth;
1297
1298       badness = - numerator / denominator;
1299
1300       if (dump)
1301         {
1302           fprintf (dump_file,
1303                    "      %f: guessed profile. frequency %f, count %" PRId64
1304                    " caller count %" PRId64
1305                    " time saved %f"
1306                    " overall growth %i (current) %i (original)"
1307                    " %i (compensated)\n",
1308                    badness.to_double (),
1309                    freq.to_double (),
1310                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1311                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1312                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1313                    estimate_growth (callee),
1314                    callee_info->growth, overall_growth);
1315         }
1316     }
1317   /* When function local profile is not available or it does not give
1318      useful information (i.e. frequency is zero), base the cost on
1319      loop nest and overall size growth, so we optimize for overall number
1320      of functions fully inlined in program.  */
1321   else
1322     {
1323       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1324       badness = growth;
1325
1326       /* Decrease badness if call is nested.  */
1327       if (badness > 0)
1328         badness = badness >> nest;
1329       else
1330         badness = badness << nest;
1331       if (dump)
1332         fprintf (dump_file, "      %f: no profile. nest %i\n",
1333                  badness.to_double (), nest);
1334     }
1335   gcc_checking_assert (badness != 0);
1336
1337   if (edge->recursive_p ())
1338     badness = badness.shift (badness > 0 ? 4 : -4);
1339   if ((hints & (INLINE_HINT_indirect_call
1340                 | INLINE_HINT_loop_iterations
1341                 | INLINE_HINT_loop_stride))
1342       || callee_info->growth <= 0)
1343     badness = badness.shift (badness > 0 ? -2 : 2);
1344   if (hints & INLINE_HINT_builtin_constant_p)
1345     badness = badness.shift (badness > 0 ? -4 : 4);
1346   if (hints & (INLINE_HINT_same_scc))
1347     badness = badness.shift (badness > 0 ? 3 : -3);
1348   else if (hints & (INLINE_HINT_in_scc))
1349     badness = badness.shift (badness > 0 ? 2 : -2);
1350   else if (hints & (INLINE_HINT_cross_module))
1351     badness = badness.shift (badness > 0 ? 1 : -1);
1352   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1353     badness = badness.shift (badness > 0 ? -4 : 4);
1354   else if ((hints & INLINE_HINT_declared_inline))
1355     badness = badness.shift (badness > 0 ? -3 : 3);
1356   if (dump)
1357     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1358   return badness;
1359 }
1360
1361 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1362 static inline void
1363 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1364 {
1365   sreal badness = edge_badness (edge, false);
1366   if (edge->aux)
1367     {
1368       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1369       gcc_checking_assert (n->get_data () == edge);
1370
1371       /* fibonacci_heap::replace_key does busy updating of the
1372          heap that is unnecessarily expensive.
1373          We do lazy increases: after extracting minimum if the key
1374          turns out to be out of date, it is re-inserted into heap
1375          with correct value.  */
1376       if (badness < n->get_key ())
1377         {
1378           if (dump_file && (dump_flags & TDF_DETAILS))
1379             {
1380               fprintf (dump_file,
1381                        "  decreasing badness %s -> %s, %f to %f\n",
1382                        edge->caller->dump_name (),
1383                        edge->callee->dump_name (),
1384                        n->get_key ().to_double (),
1385                        badness.to_double ());
1386             }
1387           heap->decrease_key (n, badness);
1388         }
1389     }
1390   else
1391     {
1392        if (dump_file && (dump_flags & TDF_DETAILS))
1393          {
1394            fprintf (dump_file,
1395                     "  enqueuing call %s -> %s, badness %f\n",
1396                     edge->caller->dump_name (),
1397                     edge->callee->dump_name (),
1398                     badness.to_double ());
1399          }
1400       edge->aux = heap->insert (badness, edge);
1401     }
1402 }
1403
1404
1405 /* NODE was inlined.
1406    All caller edges needs to be reset because
1407    size estimates change. Similarly callees needs reset
1408    because better context may be known.  */
1409
1410 static void
1411 reset_edge_caches (struct cgraph_node *node)
1412 {
1413   struct cgraph_edge *edge;
1414   struct cgraph_edge *e = node->callees;
1415   struct cgraph_node *where = node;
1416   struct ipa_ref *ref;
1417
1418   if (where->inlined_to)
1419     where = where->inlined_to;
1420
1421   reset_node_cache (where);
1422
1423   if (edge_growth_cache != NULL)
1424     for (edge = where->callers; edge; edge = edge->next_caller)
1425       if (edge->inline_failed)
1426         edge_growth_cache->remove (edge);
1427
1428   FOR_EACH_ALIAS (where, ref)
1429     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1430
1431   if (!e)
1432     return;
1433
1434   while (true)
1435     if (!e->inline_failed && e->callee->callees)
1436       e = e->callee->callees;
1437     else
1438       {
1439         if (edge_growth_cache != NULL && e->inline_failed)
1440           edge_growth_cache->remove (e);
1441         if (e->next_callee)
1442           e = e->next_callee;
1443         else
1444           {
1445             do
1446               {
1447                 if (e->caller == node)
1448                   return;
1449                 e = e->caller->callers;
1450               }
1451             while (!e->next_callee);
1452             e = e->next_callee;
1453           }
1454       }
1455 }
1456
1457 /* Recompute HEAP nodes for each of caller of NODE.
1458    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1459    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1460    it is inlinable. Otherwise check all edges.  */
1461
1462 static void
1463 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1464                     bitmap updated_nodes,
1465                     struct cgraph_edge *check_inlinablity_for)
1466 {
1467   struct cgraph_edge *edge;
1468   struct ipa_ref *ref;
1469
1470   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1471       || node->inlined_to)
1472     return;
1473   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1474     return;
1475
1476   FOR_EACH_ALIAS (node, ref)
1477     {
1478       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1479       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1480     }
1481
1482   for (edge = node->callers; edge; edge = edge->next_caller)
1483     if (edge->inline_failed)
1484       {
1485         if (!check_inlinablity_for
1486             || check_inlinablity_for == edge)
1487           {
1488             if (can_inline_edge_p (edge, false)
1489                 && want_inline_small_function_p (edge, false)
1490                 && can_inline_edge_by_limits_p (edge, false))
1491               update_edge_key (heap, edge);
1492             else if (edge->aux)
1493               {
1494                 report_inline_failed_reason (edge);
1495                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1496                 edge->aux = NULL;
1497               }
1498           }
1499         else if (edge->aux)
1500           update_edge_key (heap, edge);
1501       }
1502 }
1503
1504 /* Recompute HEAP nodes for each uninlined call in NODE
1505    If UPDATE_SINCE is non-NULL check if edges called within that function
1506    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1507    where all edges have new context).
1508
1509    This is used when we know that edge badnesses are going only to increase
1510    (we introduced new call site) and thus all we need is to insert newly
1511    created edges into heap.  */
1512
1513 static void
1514 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1515                     struct cgraph_node *update_since,
1516                     bitmap updated_nodes)
1517 {
1518   struct cgraph_edge *e = node->callees;
1519   bool check_inlinability = update_since == node;
1520
1521   if (!e)
1522     return;
1523   while (true)
1524     if (!e->inline_failed && e->callee->callees)
1525       {
1526         if (e->callee == update_since)
1527           check_inlinability = true;
1528         e = e->callee->callees;
1529       }
1530     else
1531       {
1532         enum availability avail;
1533         struct cgraph_node *callee;
1534         if (!check_inlinability)
1535           {
1536             if (e->aux
1537                 && !bitmap_bit_p (updated_nodes,
1538                                   e->callee->ultimate_alias_target
1539                                     (&avail, e->caller)->get_uid ()))
1540               update_edge_key (heap, e);
1541           }
1542         /* We do not reset callee growth cache here.  Since we added a new call,
1543            growth should have just increased and consequently badness metric
1544            don't need updating.  */
1545         else if (e->inline_failed
1546                  && (callee = e->callee->ultimate_alias_target (&avail,
1547                                                                 e->caller))
1548                  && avail >= AVAIL_AVAILABLE
1549                  && ipa_fn_summaries->get (callee) != NULL
1550                  && ipa_fn_summaries->get (callee)->inlinable
1551                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1552           {
1553             if (can_inline_edge_p (e, false)
1554                 && want_inline_small_function_p (e, false)
1555                 && can_inline_edge_by_limits_p (e, false))
1556               {
1557                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1558                 gcc_checking_assert (check_inlinability || e->aux);
1559                 update_edge_key (heap, e);
1560               }
1561             else if (e->aux)
1562               {
1563                 report_inline_failed_reason (e);
1564                 heap->delete_node ((edge_heap_node_t *) e->aux);
1565                 e->aux = NULL;
1566               }
1567           }
1568         /* In case we redirected to unreachable node we only need to remove the
1569            fibheap entry.  */
1570         else if (e->aux)
1571           {
1572             heap->delete_node ((edge_heap_node_t *) e->aux);
1573             e->aux = NULL;
1574           }
1575         if (e->next_callee)
1576           e = e->next_callee;
1577         else
1578           {
1579             do
1580               {
1581                 if (e->caller == node)
1582                   return;
1583                 if (e->caller == update_since)
1584                   check_inlinability = false;
1585                 e = e->caller->callers;
1586               }
1587             while (!e->next_callee);
1588             e = e->next_callee;
1589           }
1590       }
1591 }
1592
1593 /* Enqueue all recursive calls from NODE into priority queue depending on
1594    how likely we want to recursively inline the call.  */
1595
1596 static void
1597 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1598                         edge_heap_t *heap)
1599 {
1600   struct cgraph_edge *e;
1601   enum availability avail;
1602
1603   for (e = where->callees; e; e = e->next_callee)
1604     if (e->callee == node
1605         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1606             && avail > AVAIL_INTERPOSABLE))
1607       heap->insert (-e->sreal_frequency (), e);
1608   for (e = where->callees; e; e = e->next_callee)
1609     if (!e->inline_failed)
1610       lookup_recursive_calls (node, e->callee, heap);
1611 }
1612
1613 /* Decide on recursive inlining: in the case function has recursive calls,
1614    inline until body size reaches given argument.  If any new indirect edges
1615    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1616    is NULL.  */
1617
1618 static bool
1619 recursive_inlining (struct cgraph_edge *edge,
1620                     vec<cgraph_edge *> *new_edges)
1621 {
1622   cgraph_node *to  = (edge->caller->inlined_to
1623                       ? edge->caller->inlined_to : edge->caller);
1624   int limit = opt_for_fn (to->decl,
1625                           param_max_inline_insns_recursive_auto);
1626   edge_heap_t heap (sreal::min ());
1627   struct cgraph_node *node;
1628   struct cgraph_edge *e;
1629   struct cgraph_node *master_clone = NULL, *next;
1630   int depth = 0;
1631   int n = 0;
1632
1633   node = edge->caller;
1634   if (node->inlined_to)
1635     node = node->inlined_to;
1636
1637   if (DECL_DECLARED_INLINE_P (node->decl))
1638     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1639
1640   /* Make sure that function is small enough to be considered for inlining.  */
1641   if (estimate_size_after_inlining (node, edge)  >= limit)
1642     return false;
1643   lookup_recursive_calls (node, node, &heap);
1644   if (heap.empty ())
1645     return false;
1646
1647   if (dump_file)
1648     fprintf (dump_file,
1649              "  Performing recursive inlining on %s\n", node->dump_name ());
1650
1651   /* Do the inlining and update list of recursive call during process.  */
1652   while (!heap.empty ())
1653     {
1654       struct cgraph_edge *curr = heap.extract_min ();
1655       struct cgraph_node *cnode, *dest = curr->callee;
1656
1657       if (!can_inline_edge_p (curr, true)
1658           || !can_inline_edge_by_limits_p (curr, true))
1659         continue;
1660
1661       /* MASTER_CLONE is produced in the case we already started modified
1662          the function. Be sure to redirect edge to the original body before
1663          estimating growths otherwise we will be seeing growths after inlining
1664          the already modified body.  */
1665       if (master_clone)
1666         {
1667           curr->redirect_callee (master_clone);
1668           if (edge_growth_cache != NULL)
1669             edge_growth_cache->remove (curr);
1670         }
1671
1672       if (estimate_size_after_inlining (node, curr) > limit)
1673         {
1674           curr->redirect_callee (dest);
1675           if (edge_growth_cache != NULL)
1676             edge_growth_cache->remove (curr);
1677           break;
1678         }
1679
1680       depth = 1;
1681       for (cnode = curr->caller;
1682            cnode->inlined_to; cnode = cnode->callers->caller)
1683         if (node->decl
1684             == curr->callee->ultimate_alias_target ()->decl)
1685           depth++;
1686
1687       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1688         {
1689           curr->redirect_callee (dest);
1690           if (edge_growth_cache != NULL)
1691             edge_growth_cache->remove (curr);
1692           continue;
1693         }
1694
1695       if (dump_file)
1696         {
1697           fprintf (dump_file,
1698                    "   Inlining call of depth %i", depth);
1699           if (node->count.nonzero_p () && curr->count.initialized_p ())
1700             {
1701               fprintf (dump_file, " called approx. %.2f times per call",
1702                        (double)curr->count.to_gcov_type ()
1703                        / node->count.to_gcov_type ());
1704             }
1705           fprintf (dump_file, "\n");
1706         }
1707       if (!master_clone)
1708         {
1709           /* We need original clone to copy around.  */
1710           master_clone = node->create_clone (node->decl, node->count,
1711             false, vNULL, true, NULL, NULL);
1712           for (e = master_clone->callees; e; e = e->next_callee)
1713             if (!e->inline_failed)
1714               clone_inlined_nodes (e, true, false, NULL);
1715           curr->redirect_callee (master_clone);
1716           if (edge_growth_cache != NULL)
1717             edge_growth_cache->remove (curr);
1718         }
1719
1720       inline_call (curr, false, new_edges, &overall_size, true);
1721       reset_node_cache (node);
1722       lookup_recursive_calls (node, curr->callee, &heap);
1723       n++;
1724     }
1725
1726   if (!heap.empty () && dump_file)
1727     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1728
1729   if (!master_clone)
1730     return false;
1731
1732   if (dump_enabled_p ())
1733     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1734                      "\n   Inlined %i times, "
1735                      "body grown from size %i to %i, time %f to %f\n", n,
1736                      ipa_size_summaries->get (master_clone)->size,
1737                      ipa_size_summaries->get (node)->size,
1738                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1739                      ipa_fn_summaries->get (node)->time.to_double ());
1740
1741   /* Remove master clone we used for inlining.  We rely that clones inlined
1742      into master clone gets queued just before master clone so we don't
1743      need recursion.  */
1744   for (node = symtab->first_function (); node != master_clone;
1745        node = next)
1746     {
1747       next = symtab->next_function (node);
1748       if (node->inlined_to == master_clone)
1749         node->remove ();
1750     }
1751   master_clone->remove ();
1752   return true;
1753 }
1754
1755
1756 /* Given whole compilation unit estimate of INSNS, compute how large we can
1757    allow the unit to grow.  */
1758
1759 static int64_t
1760 compute_max_insns (cgraph_node *node, int insns)
1761 {
1762   int max_insns = insns;
1763   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1764     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1765
1766   return ((int64_t) max_insns
1767           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1768 }
1769
1770
1771 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1772
1773 static void
1774 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1775 {
1776   while (new_edges.length () > 0)
1777     {
1778       struct cgraph_edge *edge = new_edges.pop ();
1779
1780       gcc_assert (!edge->aux);
1781       gcc_assert (edge->callee);
1782       if (edge->inline_failed
1783           && can_inline_edge_p (edge, true)
1784           && want_inline_small_function_p (edge, true)
1785           && can_inline_edge_by_limits_p (edge, true))
1786         edge->aux = heap->insert (edge_badness (edge, false), edge);
1787     }
1788 }
1789
1790 /* Remove EDGE from the fibheap.  */
1791
1792 static void
1793 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1794 {
1795   if (e->aux)
1796     {
1797       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1798       e->aux = NULL;
1799     }
1800 }
1801
1802 /* Return true if speculation of edge E seems useful.
1803    If ANTICIPATE_INLINING is true, be conservative and hope that E
1804    may get inlined.  */
1805
1806 bool
1807 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1808 {
1809   /* If we have already decided to inline the edge, it seems useful.  */
1810   if (!e->inline_failed)
1811     return true;
1812
1813   enum availability avail;
1814   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1815                                                                  e->caller);
1816
1817   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1818
1819   if (!e->maybe_hot_p ())
1820     return false;
1821
1822   /* See if IP optimizations found something potentially useful about the
1823      function.  For now we look only for CONST/PURE flags.  Almost everything
1824      else we propagate is useless.  */
1825   if (avail >= AVAIL_AVAILABLE)
1826     {
1827       int ecf_flags = flags_from_decl_or_type (target->decl);
1828       if (ecf_flags & ECF_CONST)
1829         {
1830           if (!(e->speculative_call_indirect_edge ()->indirect_info
1831                 ->ecf_flags & ECF_CONST))
1832             return true;
1833         }
1834       else if (ecf_flags & ECF_PURE)
1835         {
1836           if (!(e->speculative_call_indirect_edge ()->indirect_info
1837                 ->ecf_flags & ECF_PURE))
1838             return true;
1839         }
1840     }
1841   /* If we did not managed to inline the function nor redirect
1842      to an ipa-cp clone (that are seen by having local flag set),
1843      it is probably pointless to inline it unless hardware is missing
1844      indirect call predictor.  */
1845   if (!anticipate_inlining && !target->local)
1846     return false;
1847   /* For overwritable targets there is not much to do.  */
1848   if (!can_inline_edge_p (e, false)
1849       || !can_inline_edge_by_limits_p (e, false, true))
1850     return false;
1851   /* OK, speculation seems interesting.  */
1852   return true;
1853 }
1854
1855 /* We know that EDGE is not going to be inlined.
1856    See if we can remove speculation.  */
1857
1858 static void
1859 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1860 {
1861   if (edge->speculative && !speculation_useful_p (edge, false))
1862     {
1863       struct cgraph_node *node = edge->caller;
1864       struct cgraph_node *where = node->inlined_to
1865                                   ? node->inlined_to : node;
1866       auto_bitmap updated_nodes;
1867
1868       if (edge->count.ipa ().initialized_p ())
1869         spec_rem += edge->count.ipa ();
1870       cgraph_edge::resolve_speculation (edge);
1871       reset_edge_caches (where);
1872       ipa_update_overall_fn_summary (where);
1873       update_caller_keys (edge_heap, where,
1874                           updated_nodes, NULL);
1875       update_callee_keys (edge_heap, where, NULL,
1876                           updated_nodes);
1877     }
1878 }
1879
1880 /* Return true if NODE should be accounted for overall size estimate.
1881    Skip all nodes optimized for size so we can measure the growth of hot
1882    part of program no matter of the padding.  */
1883
1884 bool
1885 inline_account_function_p (struct cgraph_node *node)
1886 {
1887    return (!DECL_EXTERNAL (node->decl)
1888            && !opt_for_fn (node->decl, optimize_size)
1889            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1890 }
1891
1892 /* Count number of callers of NODE and store it into DATA (that
1893    points to int.  Worker for cgraph_for_node_and_aliases.  */
1894
1895 static bool
1896 sum_callers (struct cgraph_node *node, void *data)
1897 {
1898   struct cgraph_edge *e;
1899   int *num_calls = (int *)data;
1900
1901   for (e = node->callers; e; e = e->next_caller)
1902     (*num_calls)++;
1903   return false;
1904 }
1905
1906 /* We only propagate across edges with non-interposable callee.  */
1907
1908 inline bool
1909 ignore_edge_p (struct cgraph_edge *e)
1910 {
1911   enum availability avail;
1912   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1913   return (avail <= AVAIL_INTERPOSABLE);
1914 }
1915
1916 /* We use greedy algorithm for inlining of small functions:
1917    All inline candidates are put into prioritized heap ordered in
1918    increasing badness.
1919
1920    The inlining of small functions is bounded by unit growth parameters.  */
1921
1922 static void
1923 inline_small_functions (void)
1924 {
1925   struct cgraph_node *node;
1926   struct cgraph_edge *edge;
1927   edge_heap_t edge_heap (sreal::min ());
1928   auto_bitmap updated_nodes;
1929   int min_size;
1930   auto_vec<cgraph_edge *> new_indirect_edges;
1931   int initial_size = 0;
1932   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1933   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1934   new_indirect_edges.create (8);
1935
1936   edge_removal_hook_holder
1937     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1938
1939   /* Compute overall unit size and other global parameters used by badness
1940      metrics.  */
1941
1942   max_count = profile_count::uninitialized ();
1943   ipa_reduced_postorder (order, true, ignore_edge_p);
1944   free (order);
1945
1946   FOR_EACH_DEFINED_FUNCTION (node)
1947     if (!node->inlined_to)
1948       {
1949         if (!node->alias && node->analyzed
1950             && (node->has_gimple_body_p () || node->thunk)
1951             && opt_for_fn (node->decl, optimize))
1952           {
1953             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1954             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1955
1956             /* Do not account external functions, they will be optimized out
1957                if not inlined.  Also only count the non-cold portion of program.  */
1958             if (inline_account_function_p (node))
1959               initial_size += ipa_size_summaries->get (node)->size;
1960             info->growth = estimate_growth (node);
1961
1962             int num_calls = 0;
1963             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1964                                                true);
1965             if (num_calls == 1)
1966               info->single_caller = true;
1967             if (dfs && dfs->next_cycle)
1968               {
1969                 struct cgraph_node *n2;
1970                 int id = dfs->scc_no + 1;
1971                 for (n2 = node; n2;
1972                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1973                   if (opt_for_fn (n2->decl, optimize))
1974                     {
1975                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1976                          (n2->inlined_to ? n2->inlined_to : n2);
1977                       if (info2->scc_no)
1978                         break;
1979                       info2->scc_no = id;
1980                     }
1981               }
1982           }
1983
1984         for (edge = node->callers; edge; edge = edge->next_caller)
1985           max_count = max_count.max (edge->count.ipa ());
1986       }
1987   ipa_free_postorder_info ();
1988   initialize_growth_caches ();
1989
1990   if (dump_file)
1991     fprintf (dump_file,
1992              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1993              initial_size);
1994
1995   overall_size = initial_size;
1996   min_size = overall_size;
1997
1998   /* Populate the heap with all edges we might inline.  */
1999
2000   FOR_EACH_DEFINED_FUNCTION (node)
2001     {
2002       bool update = false;
2003       struct cgraph_edge *next = NULL;
2004       bool has_speculative = false;
2005
2006       if (!opt_for_fn (node->decl, optimize))
2007         continue;
2008
2009       if (dump_file)
2010         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2011
2012       for (edge = node->callees; edge; edge = edge->next_callee)
2013         {
2014           if (edge->inline_failed
2015               && !edge->aux
2016               && can_inline_edge_p (edge, true)
2017               && want_inline_small_function_p (edge, true)
2018               && can_inline_edge_by_limits_p (edge, true)
2019               && edge->inline_failed)
2020             {
2021               gcc_assert (!edge->aux);
2022               update_edge_key (&edge_heap, edge);
2023             }
2024           if (edge->speculative)
2025             has_speculative = true;
2026         }
2027       if (has_speculative)
2028         for (edge = node->callees; edge; edge = next)
2029           {
2030             next = edge->next_callee;
2031             if (edge->speculative
2032                 && !speculation_useful_p (edge, edge->aux != NULL))
2033               {
2034                 cgraph_edge::resolve_speculation (edge);
2035                 update = true;
2036               }
2037           }
2038       if (update)
2039         {
2040           struct cgraph_node *where = node->inlined_to
2041                                       ? node->inlined_to : node;
2042           ipa_update_overall_fn_summary (where);
2043           reset_edge_caches (where);
2044           update_caller_keys (&edge_heap, where,
2045                               updated_nodes, NULL);
2046           update_callee_keys (&edge_heap, where, NULL,
2047                               updated_nodes);
2048           bitmap_clear (updated_nodes);
2049         }
2050     }
2051
2052   gcc_assert (in_lto_p
2053               || !(max_count > 0)
2054               || (profile_info && flag_branch_probabilities));
2055
2056   while (!edge_heap.empty ())
2057     {
2058       int old_size = overall_size;
2059       struct cgraph_node *where, *callee;
2060       sreal badness = edge_heap.min_key ();
2061       sreal current_badness;
2062       int growth;
2063
2064       edge = edge_heap.extract_min ();
2065       gcc_assert (edge->aux);
2066       edge->aux = NULL;
2067       if (!edge->inline_failed || !edge->callee->analyzed)
2068         continue;
2069
2070       /* Be sure that caches are maintained consistent.
2071          This check is affected by scaling roundoff errors when compiling for
2072          IPA this we skip it in that case.  */
2073       if (flag_checking && !edge->callee->count.ipa_p ()
2074           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2075         {
2076           sreal cached_badness = edge_badness (edge, false);
2077
2078           int old_size_est = estimate_edge_size (edge);
2079           sreal old_time_est = estimate_edge_time (edge);
2080           int old_hints_est = estimate_edge_hints (edge);
2081
2082           if (edge_growth_cache != NULL)
2083             edge_growth_cache->remove (edge);
2084           reset_node_cache (edge->caller->inlined_to
2085                             ? edge->caller->inlined_to
2086                             : edge->caller);
2087           gcc_assert (old_size_est == estimate_edge_size (edge));
2088           gcc_assert (old_time_est == estimate_edge_time (edge));
2089           /* FIXME:
2090
2091              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2092
2093              fails with profile feedback because some hints depends on
2094              maybe_hot_edge_p predicate and because callee gets inlined to other
2095              calls, the edge may become cold.
2096              This ought to be fixed by computing relative probabilities
2097              for given invocation but that will be better done once whole
2098              code is converted to sreals.  Disable for now and revert to "wrong"
2099              value so enable/disable checking paths agree.  */
2100           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2101
2102           /* When updating the edge costs, we only decrease badness in the keys.
2103              Increases of badness are handled lazily; when we see key with out
2104              of date value on it, we re-insert it now.  */
2105           current_badness = edge_badness (edge, false);
2106           gcc_assert (cached_badness == current_badness);
2107           gcc_assert (current_badness >= badness);
2108         }
2109       else
2110         current_badness = edge_badness (edge, false);
2111       if (current_badness != badness)
2112         {
2113           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2114             {
2115               edge->aux = edge_heap.insert (current_badness, edge);
2116               continue;
2117             }
2118           else
2119             badness = current_badness;
2120         }
2121
2122       if (!can_inline_edge_p (edge, true)
2123           || !can_inline_edge_by_limits_p (edge, true))
2124         {
2125           resolve_noninline_speculation (&edge_heap, edge);
2126           continue;
2127         }
2128
2129       callee = edge->callee->ultimate_alias_target ();
2130       growth = estimate_edge_growth (edge);
2131       if (dump_file)
2132         {
2133           fprintf (dump_file,
2134                    "\nConsidering %s with %i size\n",
2135                    callee->dump_name (),
2136                    ipa_size_summaries->get (callee)->size);
2137           fprintf (dump_file,
2138                    " to be inlined into %s in %s:%i\n"
2139                    " Estimated badness is %f, frequency %.2f.\n",
2140                    edge->caller->dump_name (),
2141                    edge->call_stmt
2142                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2143                                                         edge->call_stmt))
2144                        > BUILTINS_LOCATION)
2145                    ? gimple_filename ((const gimple *) edge->call_stmt)
2146                    : "unknown",
2147                    edge->call_stmt
2148                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2149                    : -1,
2150                    badness.to_double (),
2151                    edge->sreal_frequency ().to_double ());
2152           if (edge->count.ipa ().initialized_p ())
2153             {
2154               fprintf (dump_file, " Called ");
2155               edge->count.ipa ().dump (dump_file);
2156               fprintf (dump_file, " times\n");
2157             }
2158           if (dump_flags & TDF_DETAILS)
2159             edge_badness (edge, true);
2160         }
2161
2162       where = edge->caller;
2163
2164       if (overall_size + growth > compute_max_insns (where, min_size)
2165           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2166         {
2167           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2168           report_inline_failed_reason (edge);
2169           resolve_noninline_speculation (&edge_heap, edge);
2170           continue;
2171         }
2172
2173       if (!want_inline_small_function_p (edge, true))
2174         {
2175           resolve_noninline_speculation (&edge_heap, edge);
2176           continue;
2177         }
2178
2179       profile_count old_count = callee->count;
2180
2181       /* Heuristics for inlining small functions work poorly for
2182          recursive calls where we do effects similar to loop unrolling.
2183          When inlining such edge seems profitable, leave decision on
2184          specific inliner.  */
2185       if (edge->recursive_p ())
2186         {
2187           if (where->inlined_to)
2188             where = where->inlined_to;
2189           if (!recursive_inlining (edge,
2190                                    opt_for_fn (edge->caller->decl,
2191                                                flag_indirect_inlining)
2192                                    ? &new_indirect_edges : NULL))
2193             {
2194               edge->inline_failed = CIF_RECURSIVE_INLINING;
2195               resolve_noninline_speculation (&edge_heap, edge);
2196               continue;
2197             }
2198           reset_edge_caches (where);
2199           /* Recursive inliner inlines all recursive calls of the function
2200              at once. Consequently we need to update all callee keys.  */
2201           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2202             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2203           update_callee_keys (&edge_heap, where, where, updated_nodes);
2204           bitmap_clear (updated_nodes);
2205         }
2206       else
2207         {
2208           struct cgraph_node *outer_node = NULL;
2209           int depth = 0;
2210
2211           /* Consider the case where self recursive function A is inlined
2212              into B.  This is desired optimization in some cases, since it
2213              leads to effect similar of loop peeling and we might completely
2214              optimize out the recursive call.  However we must be extra
2215              selective.  */
2216
2217           where = edge->caller;
2218           while (where->inlined_to)
2219             {
2220               if (where->decl == callee->decl)
2221                 outer_node = where, depth++;
2222               where = where->callers->caller;
2223             }
2224           if (outer_node
2225               && !want_inline_self_recursive_call_p (edge, outer_node,
2226                                                      true, depth))
2227             {
2228               edge->inline_failed
2229                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2230                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2231               resolve_noninline_speculation (&edge_heap, edge);
2232               continue;
2233             }
2234           else if (depth && dump_file)
2235             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2236
2237           gcc_checking_assert (!callee->inlined_to);
2238
2239           int old_size = ipa_size_summaries->get (where)->size;
2240           sreal old_time = ipa_fn_summaries->get (where)->time;
2241
2242           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2243           reset_edge_caches (edge->callee);
2244           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2245
2246           /* If caller's size and time increased we do not need to update
2247              all edges because badness is not going to decrease.  */
2248           if (old_size <= ipa_size_summaries->get (where)->size
2249               && old_time <= ipa_fn_summaries->get (where)->time
2250               /* Wrapper penalty may be non-monotonous in this respect.
2251                  Fortunately it only affects small functions.  */
2252               && !wrapper_heuristics_may_apply (where, old_size))
2253             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2254                                 updated_nodes);
2255           else
2256             update_callee_keys (&edge_heap, where,
2257                                 edge->callee,
2258                                 updated_nodes);
2259         }
2260       where = edge->caller;
2261       if (where->inlined_to)
2262         where = where->inlined_to;
2263
2264       /* Our profitability metric can depend on local properties
2265          such as number of inlinable calls and size of the function body.
2266          After inlining these properties might change for the function we
2267          inlined into (since it's body size changed) and for the functions
2268          called by function we inlined (since number of it inlinable callers
2269          might change).  */
2270       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2271       /* Offline copy count has possibly changed, recompute if profile is
2272          available.  */
2273       struct cgraph_node *n
2274               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2275       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2276           && n->count.ipa_p ())
2277         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2278       bitmap_clear (updated_nodes);
2279
2280       if (dump_enabled_p ())
2281         {
2282           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2283
2284           /* dump_printf can't handle %+i.  */
2285           char buf_net_change[100];
2286           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2287                     overall_size - old_size);
2288
2289           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2290                            " Inlined %C into %C which now has time %f and "
2291                            "size %i, net change of %s%s.\n",
2292                            edge->callee, edge->caller,
2293                            s->time.to_double (),
2294                            ipa_size_summaries->get (edge->caller)->size,
2295                            buf_net_change,
2296                            cross_module_call_p (edge) ? " (cross module)":"");
2297         }
2298       if (min_size > overall_size)
2299         {
2300           min_size = overall_size;
2301
2302           if (dump_file)
2303             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2304         }
2305     }
2306
2307   free_growth_caches ();
2308   if (dump_enabled_p ())
2309     dump_printf (MSG_NOTE,
2310                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2311                  initial_size, overall_size,
2312                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2313   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2314 }
2315
2316 /* Flatten NODE.  Performed both during early inlining and
2317    at IPA inlining time.  */
2318
2319 static void
2320 flatten_function (struct cgraph_node *node, bool early, bool update)
2321 {
2322   struct cgraph_edge *e;
2323
2324   /* We shouldn't be called recursively when we are being processed.  */
2325   gcc_assert (node->aux == NULL);
2326
2327   node->aux = (void *) node;
2328
2329   for (e = node->callees; e; e = e->next_callee)
2330     {
2331       struct cgraph_node *orig_callee;
2332       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2333
2334       /* We've hit cycle?  It is time to give up.  */
2335       if (callee->aux)
2336         {
2337           if (dump_enabled_p ())
2338             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2339                              "Not inlining %C into %C to avoid cycle.\n",
2340                              callee, e->caller);
2341           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2342             e->inline_failed = CIF_RECURSIVE_INLINING;
2343           continue;
2344         }
2345
2346       /* When the edge is already inlined, we just need to recurse into
2347          it in order to fully flatten the leaves.  */
2348       if (!e->inline_failed)
2349         {
2350           flatten_function (callee, early, false);
2351           continue;
2352         }
2353
2354       /* Flatten attribute needs to be processed during late inlining. For
2355          extra code quality we however do flattening during early optimization,
2356          too.  */
2357       if (!early
2358           ? !can_inline_edge_p (e, true)
2359             && !can_inline_edge_by_limits_p (e, true)
2360           : !can_early_inline_edge_p (e))
2361         continue;
2362
2363       if (e->recursive_p ())
2364         {
2365           if (dump_enabled_p ())
2366             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2367                              "Not inlining: recursive call.\n");
2368           continue;
2369         }
2370
2371       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2372           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2373         {
2374           if (dump_enabled_p ())
2375             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2376                              "Not inlining: SSA form does not match.\n");
2377           continue;
2378         }
2379
2380       /* Inline the edge and flatten the inline clone.  Avoid
2381          recursing through the original node if the node was cloned.  */
2382       if (dump_enabled_p ())
2383         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2384                          " Inlining %C into %C.\n",
2385                          callee, e->caller);
2386       orig_callee = callee;
2387       inline_call (e, true, NULL, NULL, false);
2388       if (e->callee != orig_callee)
2389         orig_callee->aux = (void *) node;
2390       flatten_function (e->callee, early, false);
2391       if (e->callee != orig_callee)
2392         orig_callee->aux = NULL;
2393     }
2394
2395   node->aux = NULL;
2396   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2397   if (update && opt_for_fn (where->decl, optimize))
2398     ipa_update_overall_fn_summary (where);
2399 }
2400
2401 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2402    DATA points to number of calls originally found so we avoid infinite
2403    recursion.  */
2404
2405 static bool
2406 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2407                          hash_set<cgraph_node *> *callers)
2408 {
2409   int *num_calls = (int *)data;
2410   bool callee_removed = false;
2411
2412   while (node->callers && !node->inlined_to)
2413     {
2414       struct cgraph_node *caller = node->callers->caller;
2415
2416       if (!can_inline_edge_p (node->callers, true)
2417           || !can_inline_edge_by_limits_p (node->callers, true)
2418           || node->callers->recursive_p ())
2419         {
2420           if (dump_file)
2421             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2422           *num_calls = 0;
2423           return false;
2424         }
2425
2426       if (dump_file)
2427         {
2428           cgraph_node *ultimate = node->ultimate_alias_target ();
2429           fprintf (dump_file,
2430                    "\nInlining %s size %i.\n",
2431                    ultimate->dump_name (),
2432                    ipa_size_summaries->get (ultimate)->size);
2433           fprintf (dump_file,
2434                    " Called once from %s %i insns.\n",
2435                    node->callers->caller->dump_name (),
2436                    ipa_size_summaries->get (node->callers->caller)->size);
2437         }
2438
2439       /* Remember which callers we inlined to, delaying updating the
2440          overall summary.  */
2441       callers->add (node->callers->caller);
2442       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2443       if (dump_file)
2444         fprintf (dump_file,
2445                  " Inlined into %s which now has %i size\n",
2446                  caller->dump_name (),
2447                  ipa_size_summaries->get (caller)->size);
2448       if (!(*num_calls)--)
2449         {
2450           if (dump_file)
2451             fprintf (dump_file, "New calls found; giving up.\n");
2452           return callee_removed;
2453         }
2454       if (callee_removed)
2455         return true;
2456     }
2457   return false;
2458 }
2459
2460 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2461    update.  */
2462
2463 static bool
2464 inline_to_all_callers (struct cgraph_node *node, void *data)
2465 {
2466   hash_set<cgraph_node *> callers;
2467   bool res = inline_to_all_callers_1 (node, data, &callers);
2468   /* Perform the delayed update of the overall summary of all callers
2469      processed.  This avoids quadratic behavior in the cases where
2470      we have a lot of calls to the same function.  */
2471   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2472        i != callers.end (); ++i)
2473     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2474   return res;
2475 }
2476
2477 /* Output overall time estimate.  */
2478 static void
2479 dump_overall_stats (void)
2480 {
2481   sreal sum_weighted = 0, sum = 0;
2482   struct cgraph_node *node;
2483
2484   FOR_EACH_DEFINED_FUNCTION (node)
2485     if (!node->inlined_to
2486         && !node->alias)
2487       {
2488         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2489         if (s != NULL)
2490           {
2491           sum += s->time;
2492           if (node->count.ipa ().initialized_p ())
2493             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2494           }
2495       }
2496   fprintf (dump_file, "Overall time estimate: "
2497            "%f weighted by profile: "
2498            "%f\n", sum.to_double (), sum_weighted.to_double ());
2499 }
2500
2501 /* Output some useful stats about inlining.  */
2502
2503 static void
2504 dump_inline_stats (void)
2505 {
2506   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2507   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2508   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2509   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2510   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2511   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2512   int64_t reason[CIF_N_REASONS][2];
2513   sreal reason_freq[CIF_N_REASONS];
2514   int i;
2515   struct cgraph_node *node;
2516
2517   memset (reason, 0, sizeof (reason));
2518   for (i=0; i < CIF_N_REASONS; i++)
2519     reason_freq[i] = 0;
2520   FOR_EACH_DEFINED_FUNCTION (node)
2521   {
2522     struct cgraph_edge *e;
2523     for (e = node->callees; e; e = e->next_callee)
2524       {
2525         if (e->inline_failed)
2526           {
2527             if (e->count.ipa ().initialized_p ())
2528               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2529             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2530             reason[(int) e->inline_failed][1] ++;
2531             if (DECL_VIRTUAL_P (e->callee->decl)
2532                 && e->count.ipa ().initialized_p ())
2533               {
2534                 if (e->indirect_inlining_edge)
2535                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2536                 else
2537                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2538               }
2539             else if (e->count.ipa ().initialized_p ())
2540               {
2541                 if (e->indirect_inlining_edge)
2542                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2543                 else
2544                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2545               }
2546           }
2547         else if (e->count.ipa ().initialized_p ())
2548           {
2549             if (e->speculative)
2550               {
2551                 if (DECL_VIRTUAL_P (e->callee->decl))
2552                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2553                 else
2554                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2555               }
2556             else if (DECL_VIRTUAL_P (e->callee->decl))
2557               {
2558                 if (e->indirect_inlining_edge)
2559                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2560                 else
2561                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2562               }
2563             else
2564               {
2565                 if (e->indirect_inlining_edge)
2566                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2567                 else
2568                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2569               }
2570           }
2571       }
2572     for (e = node->indirect_calls; e; e = e->next_callee)
2573       if (e->indirect_info->polymorphic
2574           & e->count.ipa ().initialized_p ())
2575         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2576       else if (e->count.ipa ().initialized_p ())
2577         indirect_cnt += e->count.ipa ().to_gcov_type ();
2578   }
2579   if (max_count.initialized_p ())
2580     {
2581       fprintf (dump_file,
2582                "Inlined %" PRId64 " + speculative "
2583                "%" PRId64 " + speculative polymorphic "
2584                "%" PRId64 " + previously indirect "
2585                "%" PRId64 " + virtual "
2586                "%" PRId64 " + virtual and previously indirect "
2587                "%" PRId64 "\n" "Not inlined "
2588                "%" PRId64 " + previously indirect "
2589                "%" PRId64 " + virtual "
2590                "%" PRId64 " + virtual and previously indirect "
2591                "%" PRId64 " + still indirect "
2592                "%" PRId64 " + still indirect polymorphic "
2593                "%" PRId64 "\n", inlined_cnt,
2594                inlined_speculative, inlined_speculative_ply,
2595                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2596                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2597                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2598       fprintf (dump_file, "Removed speculations ");
2599       spec_rem.dump (dump_file);
2600       fprintf (dump_file, "\n");
2601     }
2602   dump_overall_stats ();
2603   fprintf (dump_file, "\nWhy inlining failed?\n");
2604   for (i = 0; i < CIF_N_REASONS; i++)
2605     if (reason[i][1])
2606       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2607                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2608                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2609 }
2610
2611 /* Called when node is removed.  */
2612
2613 static void
2614 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2615 {
2616   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2617     return;
2618
2619   hash_set<struct cgraph_node *> *removed
2620     = (hash_set<struct cgraph_node *> *) data;
2621   removed->add (node);
2622 }
2623
2624 /* Decide on the inlining.  We do so in the topological order to avoid
2625    expenses on updating data structures.  */
2626
2627 static unsigned int
2628 ipa_inline (void)
2629 {
2630   struct cgraph_node *node;
2631   int nnodes;
2632   struct cgraph_node **order;
2633   int i, j;
2634   int cold;
2635   bool remove_functions = false;
2636
2637   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2638
2639   if (dump_file)
2640     ipa_dump_fn_summaries (dump_file);
2641
2642   nnodes = ipa_reverse_postorder (order);
2643   spec_rem = profile_count::zero ();
2644
2645   FOR_EACH_FUNCTION (node)
2646     {
2647       node->aux = 0;
2648
2649       /* Recompute the default reasons for inlining because they may have
2650          changed during merging.  */
2651       if (in_lto_p)
2652         {
2653           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2654             {
2655               gcc_assert (e->inline_failed);
2656               initialize_inline_failed (e);
2657             }
2658           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2659             initialize_inline_failed (e);
2660         }
2661     }
2662
2663   if (dump_file)
2664     fprintf (dump_file, "\nFlattening functions:\n");
2665
2666   /* First shrink order array, so that it only contains nodes with
2667      flatten attribute.  */
2668   for (i = nnodes - 1, j = i; i >= 0; i--)
2669     {
2670       node = order[i];
2671       if (node->definition
2672           /* Do not try to flatten aliases.  These may happen for example when
2673              creating local aliases.  */
2674           && !node->alias
2675           && lookup_attribute ("flatten",
2676                                DECL_ATTRIBUTES (node->decl)) != NULL)
2677         order[j--] = order[i];
2678     }
2679
2680   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2681      nodes with flatten attribute.  If there is more than one such
2682      node, we need to register a node removal hook, as flatten_function
2683      could remove other nodes with flatten attribute.  See PR82801.  */
2684   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2685   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2686   if (j < nnodes - 2)
2687     {
2688       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2689       node_removal_hook_holder
2690         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2691                                            flatten_removed_nodes);
2692     }
2693
2694   /* In the first pass handle functions to be flattened.  Do this with
2695      a priority so none of our later choices will make this impossible.  */
2696   for (i = nnodes - 1; i > j; i--)
2697     {
2698       node = order[i];
2699       if (flatten_removed_nodes
2700           && flatten_removed_nodes->contains (node))
2701         continue;
2702
2703       /* Handle nodes to be flattened.
2704          Ideally when processing callees we stop inlining at the
2705          entry of cycles, possibly cloning that entry point and
2706          try to flatten itself turning it into a self-recursive
2707          function.  */
2708       if (dump_file)
2709         fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2710       flatten_function (node, false, true);
2711     }
2712
2713   if (j < nnodes - 2)
2714     {
2715       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2716       delete flatten_removed_nodes;
2717     }
2718   free (order);
2719
2720   if (dump_file)
2721     dump_overall_stats ();
2722
2723   inline_small_functions ();
2724
2725   gcc_assert (symtab->state == IPA_SSA);
2726   symtab->state = IPA_SSA_AFTER_INLINING;
2727   /* Do first after-inlining removal.  We want to remove all "stale" extern
2728      inline functions and virtual functions so we really know what is called
2729      once.  */
2730   symtab->remove_unreachable_nodes (dump_file);
2731
2732   /* Inline functions with a property that after inlining into all callers the
2733      code size will shrink because the out-of-line copy is eliminated.
2734      We do this regardless on the callee size as long as function growth limits
2735      are met.  */
2736   if (dump_file)
2737     fprintf (dump_file,
2738              "\nDeciding on functions to be inlined into all callers and "
2739              "removing useless speculations:\n");
2740
2741   /* Inlining one function called once has good chance of preventing
2742      inlining other function into the same callee.  Ideally we should
2743      work in priority order, but probably inlining hot functions first
2744      is good cut without the extra pain of maintaining the queue.
2745
2746      ??? this is not really fitting the bill perfectly: inlining function
2747      into callee often leads to better optimization of callee due to
2748      increased context for optimization.
2749      For example if main() function calls a function that outputs help
2750      and then function that does the main optimization, we should inline
2751      the second with priority even if both calls are cold by themselves.
2752
2753      We probably want to implement new predicate replacing our use of
2754      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2755      to be hot.  */
2756   for (cold = 0; cold <= 1; cold ++)
2757     {
2758       FOR_EACH_DEFINED_FUNCTION (node)
2759         {
2760           struct cgraph_edge *edge, *next;
2761           bool update=false;
2762
2763           if (!opt_for_fn (node->decl, optimize)
2764               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2765             continue;
2766
2767           for (edge = node->callees; edge; edge = next)
2768             {
2769               next = edge->next_callee;
2770               if (edge->speculative && !speculation_useful_p (edge, false))
2771                 {
2772                   if (edge->count.ipa ().initialized_p ())
2773                     spec_rem += edge->count.ipa ();
2774                   cgraph_edge::resolve_speculation (edge);
2775                   update = true;
2776                   remove_functions = true;
2777                 }
2778             }
2779           if (update)
2780             {
2781               struct cgraph_node *where = node->inlined_to
2782                                           ? node->inlined_to : node;
2783               reset_edge_caches (where);
2784               ipa_update_overall_fn_summary (where);
2785             }
2786           if (want_inline_function_to_all_callers_p (node, cold))
2787             {
2788               int num_calls = 0;
2789               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2790                                                  true);
2791               while (node->call_for_symbol_and_aliases
2792                        (inline_to_all_callers, &num_calls, true))
2793                 ;
2794               remove_functions = true;
2795             }
2796         }
2797     }
2798
2799   if (dump_enabled_p ())
2800     dump_printf (MSG_NOTE,
2801                  "\nInlined %i calls, eliminated %i functions\n\n",
2802                  ncalls_inlined, nfunctions_inlined);
2803   if (dump_file)
2804     dump_inline_stats ();
2805
2806   if (dump_file)
2807     ipa_dump_fn_summaries (dump_file);
2808   return remove_functions ? TODO_remove_functions : 0;
2809 }
2810
2811 /* Inline always-inline function calls in NODE.  */
2812
2813 static bool
2814 inline_always_inline_functions (struct cgraph_node *node)
2815 {
2816   struct cgraph_edge *e;
2817   bool inlined = false;
2818
2819   for (e = node->callees; e; e = e->next_callee)
2820     {
2821       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2822       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2823         continue;
2824
2825       if (e->recursive_p ())
2826         {
2827           if (dump_enabled_p ())
2828             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2829                              "  Not inlining recursive call to %C.\n",
2830                              e->callee);
2831           e->inline_failed = CIF_RECURSIVE_INLINING;
2832           continue;
2833         }
2834
2835       if (!can_early_inline_edge_p (e))
2836         {
2837           /* Set inlined to true if the callee is marked "always_inline" but
2838              is not inlinable.  This will allow flagging an error later in
2839              expand_call_inline in tree-inline.c.  */
2840           if (lookup_attribute ("always_inline",
2841                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2842             inlined = true;
2843           continue;
2844         }
2845
2846       if (dump_enabled_p ())
2847         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2848                          "  Inlining %C into %C (always_inline).\n",
2849                          e->callee, e->caller);
2850       inline_call (e, true, NULL, NULL, false);
2851       inlined = true;
2852     }
2853   if (inlined)
2854     ipa_update_overall_fn_summary (node);
2855
2856   return inlined;
2857 }
2858
2859 /* Decide on the inlining.  We do so in the topological order to avoid
2860    expenses on updating data structures.  */
2861
2862 static bool
2863 early_inline_small_functions (struct cgraph_node *node)
2864 {
2865   struct cgraph_edge *e;
2866   bool inlined = false;
2867
2868   for (e = node->callees; e; e = e->next_callee)
2869     {
2870       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2871
2872       /* We can encounter not-yet-analyzed function during
2873          early inlining on callgraphs with strongly
2874          connected components.  */
2875       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2876       if (s == NULL || !s->inlinable || !e->inline_failed)
2877         continue;
2878
2879       /* Do not consider functions not declared inline.  */
2880       if (!DECL_DECLARED_INLINE_P (callee->decl)
2881           && !opt_for_fn (node->decl, flag_inline_small_functions)
2882           && !opt_for_fn (node->decl, flag_inline_functions))
2883         continue;
2884
2885       if (dump_enabled_p ())
2886         dump_printf_loc (MSG_NOTE, e->call_stmt,
2887                          "Considering inline candidate %C.\n",
2888                          callee);
2889
2890       if (!can_early_inline_edge_p (e))
2891         continue;
2892
2893       if (e->recursive_p ())
2894         {
2895           if (dump_enabled_p ())
2896             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2897                              "  Not inlining: recursive call.\n");
2898           continue;
2899         }
2900
2901       if (!want_early_inline_function_p (e))
2902         continue;
2903
2904       if (dump_enabled_p ())
2905         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2906                          " Inlining %C into %C.\n",
2907                          callee, e->caller);
2908       inline_call (e, true, NULL, NULL, false);
2909       inlined = true;
2910     }
2911
2912   if (inlined)
2913     ipa_update_overall_fn_summary (node);
2914
2915   return inlined;
2916 }
2917
2918 unsigned int
2919 early_inliner (function *fun)
2920 {
2921   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2922   struct cgraph_edge *edge;
2923   unsigned int todo = 0;
2924   int iterations = 0;
2925   bool inlined = false;
2926
2927   if (seen_error ())
2928     return 0;
2929
2930   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2931      happens when some pass decides to construct new function and
2932      cgraph_add_new_function calls lowering passes and early optimization on
2933      it.  This may confuse ourself when early inliner decide to inline call to
2934      function clone, because function clones don't have parameter list in
2935      ipa-prop matching their signature.  */
2936   if (ipa_node_params_sum)
2937     return 0;
2938
2939   if (flag_checking)
2940     node->verify ();
2941   node->remove_all_references ();
2942
2943   /* Even when not optimizing or not inlining inline always-inline
2944      functions.  */
2945   inlined = inline_always_inline_functions (node);
2946
2947   if (!optimize
2948       || flag_no_inline
2949       || !flag_early_inlining
2950       /* Never inline regular functions into always-inline functions
2951          during incremental inlining.  This sucks as functions calling
2952          always inline functions will get less optimized, but at the
2953          same time inlining of functions calling always inline
2954          function into an always inline function might introduce
2955          cycles of edges to be always inlined in the callgraph.
2956
2957          We might want to be smarter and just avoid this type of inlining.  */
2958       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2959           && lookup_attribute ("always_inline",
2960                                DECL_ATTRIBUTES (node->decl))))
2961     ;
2962   else if (lookup_attribute ("flatten",
2963                              DECL_ATTRIBUTES (node->decl)) != NULL)
2964     {
2965       /* When the function is marked to be flattened, recursively inline
2966          all calls in it.  */
2967       if (dump_enabled_p ())
2968         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2969                      "Flattening %C\n", node);
2970       flatten_function (node, true, true);
2971       inlined = true;
2972     }
2973   else
2974     {
2975       /* If some always_inline functions was inlined, apply the changes.
2976          This way we will not account always inline into growth limits and
2977          moreover we will inline calls from always inlines that we skipped
2978          previously because of conditional above.  */
2979       if (inlined)
2980         {
2981           timevar_push (TV_INTEGRATION);
2982           todo |= optimize_inline_calls (current_function_decl);
2983           /* optimize_inline_calls call above might have introduced new
2984              statements that don't have inline parameters computed.  */
2985           for (edge = node->callees; edge; edge = edge->next_callee)
2986             {
2987               /* We can enounter not-yet-analyzed function during
2988                  early inlining on callgraphs with strongly
2989                  connected components.  */
2990               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2991               es->call_stmt_size
2992                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2993               es->call_stmt_time
2994                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2995             }
2996           ipa_update_overall_fn_summary (node);
2997           inlined = false;
2998           timevar_pop (TV_INTEGRATION);
2999         }
3000       /* We iterate incremental inlining to get trivial cases of indirect
3001          inlining.  */
3002       while (iterations < opt_for_fn (node->decl,
3003                                       param_early_inliner_max_iterations)
3004              && early_inline_small_functions (node))
3005         {
3006           timevar_push (TV_INTEGRATION);
3007           todo |= optimize_inline_calls (current_function_decl);
3008
3009           /* Technically we ought to recompute inline parameters so the new
3010              iteration of early inliner works as expected.  We however have
3011              values approximately right and thus we only need to update edge
3012              info that might be cleared out for newly discovered edges.  */
3013           for (edge = node->callees; edge; edge = edge->next_callee)
3014             {
3015               /* We have no summary for new bound store calls yet.  */
3016               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3017               es->call_stmt_size
3018                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3019               es->call_stmt_time
3020                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3021             }
3022           if (iterations < opt_for_fn (node->decl,
3023                                        param_early_inliner_max_iterations) - 1)
3024             ipa_update_overall_fn_summary (node);
3025           timevar_pop (TV_INTEGRATION);
3026           iterations++;
3027           inlined = false;
3028         }
3029       if (dump_file)
3030         fprintf (dump_file, "Iterations: %i\n", iterations);
3031     }
3032
3033   if (inlined)
3034     {
3035       timevar_push (TV_INTEGRATION);
3036       todo |= optimize_inline_calls (current_function_decl);
3037       timevar_pop (TV_INTEGRATION);
3038     }
3039
3040   fun->always_inline_functions_inlined = true;
3041
3042   return todo;
3043 }
3044
3045 /* Do inlining of small functions.  Doing so early helps profiling and other
3046    passes to be somewhat more effective and avoids some code duplication in
3047    later real inlining pass for testcases with very many function calls.  */
3048
3049 namespace {
3050
3051 const pass_data pass_data_early_inline =
3052 {
3053   GIMPLE_PASS, /* type */
3054   "einline", /* name */
3055   OPTGROUP_INLINE, /* optinfo_flags */
3056   TV_EARLY_INLINING, /* tv_id */
3057   PROP_ssa, /* properties_required */
3058   0, /* properties_provided */
3059   0, /* properties_destroyed */
3060   0, /* todo_flags_start */
3061   0, /* todo_flags_finish */
3062 };
3063
3064 class pass_early_inline : public gimple_opt_pass
3065 {
3066 public:
3067   pass_early_inline (gcc::context *ctxt)
3068     : gimple_opt_pass (pass_data_early_inline, ctxt)
3069   {}
3070
3071   /* opt_pass methods: */
3072   virtual unsigned int execute (function *);
3073
3074 }; // class pass_early_inline
3075
3076 unsigned int
3077 pass_early_inline::execute (function *fun)
3078 {
3079   return early_inliner (fun);
3080 }
3081
3082 } // anon namespace
3083
3084 gimple_opt_pass *
3085 make_pass_early_inline (gcc::context *ctxt)
3086 {
3087   return new pass_early_inline (ctxt);
3088 }
3089
3090 namespace {
3091
3092 const pass_data pass_data_ipa_inline =
3093 {
3094   IPA_PASS, /* type */
3095   "inline", /* name */
3096   OPTGROUP_INLINE, /* optinfo_flags */
3097   TV_IPA_INLINING, /* tv_id */
3098   0, /* properties_required */
3099   0, /* properties_provided */
3100   0, /* properties_destroyed */
3101   0, /* todo_flags_start */
3102   ( TODO_dump_symtab ), /* todo_flags_finish */
3103 };
3104
3105 class pass_ipa_inline : public ipa_opt_pass_d
3106 {
3107 public:
3108   pass_ipa_inline (gcc::context *ctxt)
3109     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3110                       NULL, /* generate_summary */
3111                       NULL, /* write_summary */
3112                       NULL, /* read_summary */
3113                       NULL, /* write_optimization_summary */
3114                       NULL, /* read_optimization_summary */
3115                       NULL, /* stmt_fixup */
3116                       0, /* function_transform_todo_flags_start */
3117                       inline_transform, /* function_transform */
3118                       NULL) /* variable_transform */
3119   {}
3120
3121   /* opt_pass methods: */
3122   virtual unsigned int execute (function *) { return ipa_inline (); }
3123
3124 }; // class pass_ipa_inline
3125
3126 } // anon namespace
3127
3128 ipa_opt_pass_d *
3129 make_pass_ipa_inline (gcc::context *ctxt)
3130 {
3131   return new pass_ipa_inline (ctxt);
3132 }