Daily bump.
[gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Inlining decision heuristics
22
23 The implementation of inliner is organized as follows:
24
25 inlining heuristics limits
26
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
30
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
35
36 inlining heuristics
37
38 The inliner itself is split into two passes:
39
40 pass_early_inlining
41
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
45
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
52
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
56
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
61
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
67
68 pass_ipa_inline
69
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
72
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
76
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
81
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
87
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
122
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
125
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
130
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
133
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
138
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
142 inlined. */
143
144 static bool
145 caller_growth_limits (struct cgraph_edge *e)
146 {
147 struct cgraph_node *to = e->caller;
148 struct cgraph_node *what = e->callee->ultimate_alias_target ();
149 int newsize;
150 int limit = 0;
151 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152 ipa_size_summary *outer_info = ipa_size_summaries->get (to);
153
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
162 while (true)
163 {
164 ipa_size_summary *size_info = ipa_size_summaries->get (to);
165 if (limit < size_info->self_size)
166 limit = size_info->self_size;
167 if (stack_size_limit < size_info->estimated_self_stack_size)
168 stack_size_limit = size_info->estimated_self_stack_size;
169 if (to->inlined_to)
170 to = to->callers->caller;
171 else
172 break;
173 }
174
175 ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176 ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
177
178 if (limit < what_size_info->self_size)
179 limit = what_size_info->self_size;
180
181 limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
182
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize = estimate_size_after_inlining (to, e);
186 if (newsize >= ipa_size_summaries->get (what)->size
187 && newsize > opt_for_fn (to->decl, param_large_function_insns)
188 && newsize > limit)
189 {
190 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191 return false;
192 }
193
194 if (!what_info->estimated_stack_size)
195 return true;
196
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit += ((gcov_type)stack_size_limit
203 * opt_for_fn (to->decl, param_stack_frame_growth)
204 / 100);
205
206 inlined_stack = (ipa_get_stack_frame_offset (to)
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217 && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
218 {
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
221 }
222 return true;
223 }
224
225 /* Dump info about why inlining has failed. */
226
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
229 {
230 if (dump_enabled_p ())
231 {
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 " not inlinable: %C -> %C, %s\n",
234 e->caller, e->callee,
235 cgraph_inline_failed_string (e->inline_failed));
236 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 && e->caller->lto_file_data
239 && e->callee->ultimate_alias_target ()->lto_file_data)
240 {
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 " LTO objects: %s, %s\n",
243 e->caller->lto_file_data->file_name,
244 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
245 }
246 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 if (dump_file)
248 cl_target_option_print_diff
249 (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 if (dump_file)
253 cl_optimization_print_diff
254 (dump_file, 2, opts_for_fn (e->caller->decl),
255 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
256 }
257 }
258
259 /* Decide whether sanitizer-related attributes allow inlining. */
260
261 static bool
262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
263 {
264 if (!caller || !callee)
265 return true;
266
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269 return true;
270
271 const sanitize_code codes[] =
272 {
273 SANITIZE_ADDRESS,
274 SANITIZE_THREAD,
275 SANITIZE_UNDEFINED,
276 SANITIZE_UNDEFINED_NONDEFAULT,
277 SANITIZE_POINTER_COMPARE,
278 SANITIZE_POINTER_SUBTRACT
279 };
280
281 for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282 if (sanitize_flags_p (codes[i], caller)
283 != sanitize_flags_p (codes[i], callee))
284 return false;
285
286 return true;
287 }
288
289 /* Used for flags where it is safe to inline when caller's value is
290 grater than callee's. */
291 #define check_maybe_up(flag) \
292 (opts_for_fn (caller->decl)->x_##flag \
293 != opts_for_fn (callee->decl)->x_##flag \
294 && (!always_inline \
295 || opts_for_fn (caller->decl)->x_##flag \
296 < opts_for_fn (callee->decl)->x_##flag))
297 /* Used for flags where it is safe to inline when caller's value is
298 smaller than callee's. */
299 #define check_maybe_down(flag) \
300 (opts_for_fn (caller->decl)->x_##flag \
301 != opts_for_fn (callee->decl)->x_##flag \
302 && (!always_inline \
303 || opts_for_fn (caller->decl)->x_##flag \
304 > opts_for_fn (callee->decl)->x_##flag))
305 /* Used for flags where exact match is needed for correctness. */
306 #define check_match(flag) \
307 (opts_for_fn (caller->decl)->x_##flag \
308 != opts_for_fn (callee->decl)->x_##flag)
309
310 /* Decide if we can inline the edge and possibly update
311 inline_failed reason.
312 We check whether inlining is possible at all and whether
313 caller growth limits allow doing so.
314
315 if REPORT is true, output reason to the dump file. */
316
317 static bool
318 can_inline_edge_p (struct cgraph_edge *e, bool report,
319 bool early = false)
320 {
321 gcc_checking_assert (e->inline_failed);
322
323 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
324 {
325 if (report)
326 report_inline_failed_reason (e);
327 return false;
328 }
329
330 bool inlinable = true;
331 enum availability avail;
332 cgraph_node *caller = (e->caller->inlined_to
333 ? e->caller->inlined_to : e->caller);
334 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
335
336 if (!callee->definition)
337 {
338 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
339 inlinable = false;
340 }
341 if (!early && (!opt_for_fn (callee->decl, optimize)
342 || !opt_for_fn (caller->decl, optimize)))
343 {
344 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
345 inlinable = false;
346 }
347 else if (callee->calls_comdat_local)
348 {
349 e->inline_failed = CIF_USES_COMDAT_LOCAL;
350 inlinable = false;
351 }
352 else if (avail <= AVAIL_INTERPOSABLE)
353 {
354 e->inline_failed = CIF_OVERWRITABLE;
355 inlinable = false;
356 }
357 /* All edges with call_stmt_cannot_inline_p should have inline_failed
358 initialized to one of FINAL_ERROR reasons. */
359 else if (e->call_stmt_cannot_inline_p)
360 gcc_unreachable ();
361 /* Don't inline if the functions have different EH personalities. */
362 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
363 && DECL_FUNCTION_PERSONALITY (callee->decl)
364 && (DECL_FUNCTION_PERSONALITY (caller->decl)
365 != DECL_FUNCTION_PERSONALITY (callee->decl)))
366 {
367 e->inline_failed = CIF_EH_PERSONALITY;
368 inlinable = false;
369 }
370 /* TM pure functions should not be inlined into non-TM_pure
371 functions. */
372 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
373 {
374 e->inline_failed = CIF_UNSPECIFIED;
375 inlinable = false;
376 }
377 /* Check compatibility of target optimization options. */
378 else if (!targetm.target_option.can_inline_p (caller->decl,
379 callee->decl))
380 {
381 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
382 inlinable = false;
383 }
384 else if (ipa_fn_summaries->get (callee) == NULL
385 || !ipa_fn_summaries->get (callee)->inlinable)
386 {
387 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
388 inlinable = false;
389 }
390 /* Don't inline a function with mismatched sanitization attributes. */
391 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
392 {
393 e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
394 inlinable = false;
395 }
396 if (!inlinable && report)
397 report_inline_failed_reason (e);
398 return inlinable;
399 }
400
401 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
402 scale up the bound. */
403
404 static int
405 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
406 {
407 if (hint && hint2)
408 {
409 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
410 spd = spd * spd;
411 if (spd > 1000000)
412 spd = 1000000;
413 return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
414 }
415 if (hint || hint2)
416 return opt_for_fn (n->decl, param_max_inline_insns_single)
417 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
418 return opt_for_fn (n->decl, param_max_inline_insns_single);
419 }
420
421 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
422 scale up the bound. */
423
424 static int
425 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
426 {
427 int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
428 if (hint && hint2)
429 {
430 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
431 spd = spd * spd;
432 if (spd > 1000000)
433 spd = 1000000;
434 return max_inline_insns_auto * spd / 100;
435 }
436 if (hint || hint2)
437 return max_inline_insns_auto
438 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
439 return max_inline_insns_auto;
440 }
441
442 /* Decide if we can inline the edge and possibly update
443 inline_failed reason.
444 We check whether inlining is possible at all and whether
445 caller growth limits allow doing so.
446
447 if REPORT is true, output reason to the dump file.
448
449 if DISREGARD_LIMITS is true, ignore size limits. */
450
451 static bool
452 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
453 bool disregard_limits = false, bool early = false)
454 {
455 gcc_checking_assert (e->inline_failed);
456
457 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
458 {
459 if (report)
460 report_inline_failed_reason (e);
461 return false;
462 }
463
464 bool inlinable = true;
465 enum availability avail;
466 cgraph_node *caller = (e->caller->inlined_to
467 ? e->caller->inlined_to : e->caller);
468 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
469 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
470 tree callee_tree
471 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
472 /* Check if caller growth allows the inlining. */
473 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
474 && !disregard_limits
475 && !lookup_attribute ("flatten",
476 DECL_ATTRIBUTES (caller->decl))
477 && !caller_growth_limits (e))
478 inlinable = false;
479 else if (callee->externally_visible
480 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
481 && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
482 {
483 e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
484 inlinable = false;
485 }
486 /* Don't inline a function with a higher optimization level than the
487 caller. FIXME: this is really just tip of iceberg of handling
488 optimization attribute. */
489 else if (caller_tree != callee_tree)
490 {
491 bool always_inline =
492 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
493 && lookup_attribute ("always_inline",
494 DECL_ATTRIBUTES (callee->decl)));
495 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
496 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
497
498 /* Until GCC 4.9 we did not check the semantics-altering flags
499 below and inlined across optimization boundaries.
500 Enabling checks below breaks several packages by refusing
501 to inline library always_inline functions. See PR65873.
502 Disable the check for early inlining for now until better solution
503 is found. */
504 if (always_inline && early)
505 ;
506 /* There are some options that change IL semantics which means
507 we cannot inline in these cases for correctness reason.
508 Not even for always_inline declared functions. */
509 else if (check_match (flag_wrapv)
510 || check_match (flag_trapv)
511 || check_match (flag_pcc_struct_return)
512 || check_maybe_down (optimize_debug)
513 /* When caller or callee does FP math, be sure FP codegen flags
514 compatible. */
515 || ((caller_info->fp_expressions && callee_info->fp_expressions)
516 && (check_maybe_up (flag_rounding_math)
517 || check_maybe_up (flag_trapping_math)
518 || check_maybe_down (flag_unsafe_math_optimizations)
519 || check_maybe_down (flag_finite_math_only)
520 || check_maybe_up (flag_signaling_nans)
521 || check_maybe_down (flag_cx_limited_range)
522 || check_maybe_up (flag_signed_zeros)
523 || check_maybe_down (flag_associative_math)
524 || check_maybe_down (flag_reciprocal_math)
525 || check_maybe_down (flag_fp_int_builtin_inexact)
526 /* Strictly speaking only when the callee contains function
527 calls that may end up setting errno. */
528 || check_maybe_up (flag_errno_math)))
529 /* We do not want to make code compiled with exceptions to be
530 brought into a non-EH function unless we know that the callee
531 does not throw.
532 This is tracked by DECL_FUNCTION_PERSONALITY. */
533 || (check_maybe_up (flag_non_call_exceptions)
534 && DECL_FUNCTION_PERSONALITY (callee->decl))
535 || (check_maybe_up (flag_exceptions)
536 && DECL_FUNCTION_PERSONALITY (callee->decl))
537 /* When devirtualization is disabled for callee, it is not safe
538 to inline it as we possibly mangled the type info.
539 Allow early inlining of always inlines. */
540 || (!early && check_maybe_down (flag_devirtualize)))
541 {
542 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
543 inlinable = false;
544 }
545 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
546 else if (always_inline)
547 ;
548 /* When user added an attribute to the callee honor it. */
549 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
550 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
551 {
552 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
553 inlinable = false;
554 }
555 /* If explicit optimize attribute are not used, the mismatch is caused
556 by different command line options used to build different units.
557 Do not care about COMDAT functions - those are intended to be
558 optimized with the optimization flags of module they are used in.
559 Also do not care about mixing up size/speed optimization when
560 DECL_DISREGARD_INLINE_LIMITS is set. */
561 else if ((callee->merged_comdat
562 && !lookup_attribute ("optimize",
563 DECL_ATTRIBUTES (caller->decl)))
564 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
565 ;
566 /* If mismatch is caused by merging two LTO units with different
567 optimization flags we want to be bit nicer. However never inline
568 if one of functions is not optimized at all. */
569 else if (!opt_for_fn (callee->decl, optimize)
570 || !opt_for_fn (caller->decl, optimize))
571 {
572 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
573 inlinable = false;
574 }
575 /* If callee is optimized for size and caller is not, allow inlining if
576 code shrinks or we are in param_max_inline_insns_single limit and
577 callee is inline (and thus likely an unified comdat).
578 This will allow caller to run faster. */
579 else if (opt_for_fn (callee->decl, optimize_size)
580 > opt_for_fn (caller->decl, optimize_size))
581 {
582 int growth = estimate_edge_growth (e);
583 if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
584 && (!DECL_DECLARED_INLINE_P (callee->decl)
585 && growth >= MAX (inline_insns_single (caller, false, false),
586 inline_insns_auto (caller, false, false))))
587 {
588 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
589 inlinable = false;
590 }
591 }
592 /* If callee is more aggressively optimized for performance than caller,
593 we generally want to inline only cheap (runtime wise) functions. */
594 else if (opt_for_fn (callee->decl, optimize_size)
595 < opt_for_fn (caller->decl, optimize_size)
596 || (opt_for_fn (callee->decl, optimize)
597 > opt_for_fn (caller->decl, optimize)))
598 {
599 if (estimate_edge_time (e)
600 >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
601 {
602 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
603 inlinable = false;
604 }
605 }
606
607 }
608
609 if (!inlinable && report)
610 report_inline_failed_reason (e);
611 return inlinable;
612 }
613
614
615 /* Return true if the edge E is inlinable during early inlining. */
616
617 static bool
618 can_early_inline_edge_p (struct cgraph_edge *e)
619 {
620 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
621 /* Early inliner might get called at WPA stage when IPA pass adds new
622 function. In this case we cannot really do any of early inlining
623 because function bodies are missing. */
624 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
625 return false;
626 if (!gimple_has_body_p (callee->decl))
627 {
628 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
629 return false;
630 }
631 /* In early inliner some of callees may not be in SSA form yet
632 (i.e. the callgraph is cyclic and we did not process
633 the callee by early inliner, yet). We don't have CIF code for this
634 case; later we will re-do the decision in the real inliner. */
635 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
636 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
637 {
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
640 " edge not inlinable: not in SSA form\n");
641 return false;
642 }
643 if (!can_inline_edge_p (e, true, true)
644 || !can_inline_edge_by_limits_p (e, true, false, true))
645 return false;
646 return true;
647 }
648
649
650 /* Return number of calls in N. Ignore cheap builtins. */
651
652 static int
653 num_calls (struct cgraph_node *n)
654 {
655 struct cgraph_edge *e;
656 int num = 0;
657
658 for (e = n->callees; e; e = e->next_callee)
659 if (!is_inexpensive_builtin (e->callee->decl))
660 num++;
661 return num;
662 }
663
664
665 /* Return true if we are interested in inlining small function. */
666
667 static bool
668 want_early_inline_function_p (struct cgraph_edge *e)
669 {
670 bool want_inline = true;
671 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
672
673 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
674 ;
675 /* For AutoFDO, we need to make sure that before profile summary, all
676 hot paths' IR look exactly the same as profiled binary. As a result,
677 in einliner, we will disregard size limit and inline those callsites
678 that are:
679 * inlined in the profiled binary, and
680 * the cloned callee has enough samples to be considered "hot". */
681 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
682 ;
683 else if (!DECL_DECLARED_INLINE_P (callee->decl)
684 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
685 {
686 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
687 report_inline_failed_reason (e);
688 want_inline = false;
689 }
690 else
691 {
692 /* First take care of very large functions. */
693 int min_growth = estimate_min_edge_growth (e), growth = 0;
694 int n;
695 int early_inlining_insns = param_early_inlining_insns;
696
697 if (min_growth > early_inlining_insns)
698 {
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
701 " will not early inline: %C->%C, "
702 "call is cold and code would grow "
703 "at least by %i\n",
704 e->caller, callee,
705 min_growth);
706 want_inline = false;
707 }
708 else
709 growth = estimate_edge_growth (e);
710
711
712 if (!want_inline || growth <= param_max_inline_insns_size)
713 ;
714 else if (!e->maybe_hot_p ())
715 {
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
718 " will not early inline: %C->%C, "
719 "call is cold and code would grow by %i\n",
720 e->caller, callee,
721 growth);
722 want_inline = false;
723 }
724 else if (growth > early_inlining_insns)
725 {
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
728 " will not early inline: %C->%C, "
729 "growth %i exceeds --param early-inlining-insns\n",
730 e->caller, callee, growth);
731 want_inline = false;
732 }
733 else if ((n = num_calls (callee)) != 0
734 && growth * (n + 1) > early_inlining_insns)
735 {
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
738 " will not early inline: %C->%C, "
739 "growth %i exceeds --param early-inlining-insns "
740 "divided by number of calls\n",
741 e->caller, callee, growth);
742 want_inline = false;
743 }
744 }
745 return want_inline;
746 }
747
748 /* Compute time of the edge->caller + edge->callee execution when inlining
749 does not happen. */
750
751 inline sreal
752 compute_uninlined_call_time (struct cgraph_edge *edge,
753 sreal uninlined_call_time,
754 sreal freq)
755 {
756 cgraph_node *caller = (edge->caller->inlined_to
757 ? edge->caller->inlined_to
758 : edge->caller);
759
760 if (freq > 0)
761 uninlined_call_time *= freq;
762 else
763 uninlined_call_time = uninlined_call_time >> 11;
764
765 sreal caller_time = ipa_fn_summaries->get (caller)->time;
766 return uninlined_call_time + caller_time;
767 }
768
769 /* Same as compute_uinlined_call_time but compute time when inlining
770 does happen. */
771
772 inline sreal
773 compute_inlined_call_time (struct cgraph_edge *edge,
774 sreal time,
775 sreal freq)
776 {
777 cgraph_node *caller = (edge->caller->inlined_to
778 ? edge->caller->inlined_to
779 : edge->caller);
780 sreal caller_time = ipa_fn_summaries->get (caller)->time;
781
782 if (freq > 0)
783 time *= freq;
784 else
785 time = time >> 11;
786
787 /* This calculation should match one in ipa-inline-analysis.c
788 (estimate_edge_size_and_time). */
789 time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
790 time += caller_time;
791 if (time <= 0)
792 time = ((sreal) 1) >> 8;
793 gcc_checking_assert (time >= 0);
794 return time;
795 }
796
797 /* Determine time saved by inlining EDGE of frequency FREQ
798 where callee's runtime w/o inlining is UNINLINED_TYPE
799 and with inlined is INLINED_TYPE. */
800
801 inline sreal
802 inlining_speedup (struct cgraph_edge *edge,
803 sreal freq,
804 sreal uninlined_time,
805 sreal inlined_time)
806 {
807 sreal speedup = uninlined_time - inlined_time;
808 /* Handling of call_time should match one in ipa-inline-fnsummary.c
809 (estimate_edge_size_and_time). */
810 sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
811
812 if (freq > 0)
813 {
814 speedup = (speedup + call_time);
815 if (freq != 1)
816 speedup = speedup * freq;
817 }
818 else if (freq == 0)
819 speedup = speedup >> 11;
820 gcc_checking_assert (speedup >= 0);
821 return speedup;
822 }
823
824 /* Return true if the speedup for inlining E is bigger than
825 param_inline_min_speedup. */
826
827 static bool
828 big_speedup_p (struct cgraph_edge *e)
829 {
830 sreal unspec_time;
831 sreal spec_time = estimate_edge_time (e, &unspec_time);
832 sreal freq = e->sreal_frequency ();
833 sreal time = compute_uninlined_call_time (e, unspec_time, freq);
834 sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
835 cgraph_node *caller = (e->caller->inlined_to
836 ? e->caller->inlined_to
837 : e->caller);
838 int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
839
840 if ((time - inlined_time) * 100 > time * limit)
841 return true;
842 return false;
843 }
844
845 /* Return true if we are interested in inlining small function.
846 When REPORT is true, report reason to dump file. */
847
848 static bool
849 want_inline_small_function_p (struct cgraph_edge *e, bool report)
850 {
851 bool want_inline = true;
852 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
853 cgraph_node *to = (e->caller->inlined_to
854 ? e->caller->inlined_to : e->caller);
855
856 /* Allow this function to be called before can_inline_edge_p,
857 since it's usually cheaper. */
858 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
859 want_inline = false;
860 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
861 ;
862 else if (!DECL_DECLARED_INLINE_P (callee->decl)
863 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
864 {
865 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
866 want_inline = false;
867 }
868 /* Do fast and conservative check if the function can be good
869 inline candidate. */
870 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
871 && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
872 && ipa_fn_summaries->get (callee)->min_size
873 - ipa_call_summaries->get (e)->call_stmt_size
874 > inline_insns_auto (e->caller, true, true))
875 {
876 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
877 want_inline = false;
878 }
879 else if ((DECL_DECLARED_INLINE_P (callee->decl)
880 || e->count.ipa ().nonzero_p ())
881 && ipa_fn_summaries->get (callee)->min_size
882 - ipa_call_summaries->get (e)->call_stmt_size
883 > inline_insns_single (e->caller, true, true))
884 {
885 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
886 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
887 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
888 want_inline = false;
889 }
890 else
891 {
892 int growth = estimate_edge_growth (e);
893 ipa_hints hints = estimate_edge_hints (e);
894 /* We have two independent groups of hints. If one matches in each
895 of groups the limits are inreased. If both groups matches, limit
896 is increased even more. */
897 bool apply_hints = (hints & (INLINE_HINT_indirect_call
898 | INLINE_HINT_known_hot
899 | INLINE_HINT_loop_iterations
900 | INLINE_HINT_loop_stride));
901 bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
902
903 if (growth <= opt_for_fn (to->decl,
904 param_max_inline_insns_size))
905 ;
906 /* Apply param_max_inline_insns_single limit. Do not do so when
907 hints suggests that inlining given function is very profitable.
908 Avoid computation of big_speedup_p when not necessary to change
909 outcome of decision. */
910 else if (DECL_DECLARED_INLINE_P (callee->decl)
911 && growth >= inline_insns_single (e->caller, apply_hints,
912 apply_hints2)
913 && (apply_hints || apply_hints2
914 || growth >= inline_insns_single (e->caller, true,
915 apply_hints2)
916 || !big_speedup_p (e)))
917 {
918 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
919 want_inline = false;
920 }
921 else if (!DECL_DECLARED_INLINE_P (callee->decl)
922 && !opt_for_fn (e->caller->decl, flag_inline_functions)
923 && growth >= opt_for_fn (to->decl,
924 param_max_inline_insns_small))
925 {
926 /* growth_positive_p is expensive, always test it last. */
927 if (growth >= inline_insns_single (e->caller, false, false)
928 || growth_positive_p (callee, e, growth))
929 {
930 e->inline_failed = CIF_NOT_DECLARED_INLINED;
931 want_inline = false;
932 }
933 }
934 /* Apply param_max_inline_insns_auto limit for functions not declared
935 inline. Bypass the limit when speedup seems big. */
936 else if (!DECL_DECLARED_INLINE_P (callee->decl)
937 && growth >= inline_insns_auto (e->caller, apply_hints,
938 apply_hints2)
939 && (apply_hints || apply_hints2
940 || growth >= inline_insns_auto (e->caller, true,
941 apply_hints2)
942 || !big_speedup_p (e)))
943 {
944 /* growth_positive_p is expensive, always test it last. */
945 if (growth >= inline_insns_single (e->caller, false, false)
946 || growth_positive_p (callee, e, growth))
947 {
948 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
949 want_inline = false;
950 }
951 }
952 /* If call is cold, do not inline when function body would grow. */
953 else if (!e->maybe_hot_p ()
954 && (growth >= inline_insns_single (e->caller, false, false)
955 || growth_positive_p (callee, e, growth)))
956 {
957 e->inline_failed = CIF_UNLIKELY_CALL;
958 want_inline = false;
959 }
960 }
961 if (!want_inline && report)
962 report_inline_failed_reason (e);
963 return want_inline;
964 }
965
966 /* EDGE is self recursive edge.
967 We handle two cases - when function A is inlining into itself
968 or when function A is being inlined into another inliner copy of function
969 A within function B.
970
971 In first case OUTER_NODE points to the toplevel copy of A, while
972 in the second case OUTER_NODE points to the outermost copy of A in B.
973
974 In both cases we want to be extra selective since
975 inlining the call will just introduce new recursive calls to appear. */
976
977 static bool
978 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
979 struct cgraph_node *outer_node,
980 bool peeling,
981 int depth)
982 {
983 char const *reason = NULL;
984 bool want_inline = true;
985 sreal caller_freq = 1;
986 int max_depth = opt_for_fn (outer_node->decl,
987 param_max_inline_recursive_depth_auto);
988
989 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
990 max_depth = opt_for_fn (outer_node->decl,
991 param_max_inline_recursive_depth);
992
993 if (!edge->maybe_hot_p ())
994 {
995 reason = "recursive call is cold";
996 want_inline = false;
997 }
998 else if (depth > max_depth)
999 {
1000 reason = "--param max-inline-recursive-depth exceeded.";
1001 want_inline = false;
1002 }
1003 else if (outer_node->inlined_to
1004 && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1005 {
1006 reason = "caller frequency is 0";
1007 want_inline = false;
1008 }
1009
1010 if (!want_inline)
1011 ;
1012 /* Inlining of self recursive function into copy of itself within other
1013 function is transformation similar to loop peeling.
1014
1015 Peeling is profitable if we can inline enough copies to make probability
1016 of actual call to the self recursive function very small. Be sure that
1017 the probability of recursion is small.
1018
1019 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1020 This way the expected number of recursion is at most max_depth. */
1021 else if (peeling)
1022 {
1023 sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1024 int i;
1025 for (i = 1; i < depth; i++)
1026 max_prob = max_prob * max_prob;
1027 if (edge->sreal_frequency () >= max_prob * caller_freq)
1028 {
1029 reason = "frequency of recursive call is too large";
1030 want_inline = false;
1031 }
1032 }
1033 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1034 recursion depth is large. We reduce function call overhead and increase
1035 chances that things fit in hardware return predictor.
1036
1037 Recursive inlining might however increase cost of stack frame setup
1038 actually slowing down functions whose recursion tree is wide rather than
1039 deep.
1040
1041 Deciding reliably on when to do recursive inlining without profile feedback
1042 is tricky. For now we disable recursive inlining when probability of self
1043 recursion is low.
1044
1045 Recursive inlining of self recursive call within loop also results in
1046 large loop depths that generally optimize badly. We may want to throttle
1047 down inlining in those cases. In particular this seems to happen in one
1048 of libstdc++ rb tree methods. */
1049 else
1050 {
1051 if (edge->sreal_frequency () * 100
1052 <= caller_freq
1053 * opt_for_fn (outer_node->decl,
1054 param_min_inline_recursive_probability))
1055 {
1056 reason = "frequency of recursive call is too small";
1057 want_inline = false;
1058 }
1059 }
1060 if (!want_inline && dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1062 " not inlining recursively: %s\n", reason);
1063 return want_inline;
1064 }
1065
1066 /* Return true when NODE has uninlinable caller;
1067 set HAS_HOT_CALL if it has hot call.
1068 Worker for cgraph_for_node_and_aliases. */
1069
1070 static bool
1071 check_callers (struct cgraph_node *node, void *has_hot_call)
1072 {
1073 struct cgraph_edge *e;
1074 for (e = node->callers; e; e = e->next_caller)
1075 {
1076 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1077 || !opt_for_fn (e->caller->decl, optimize))
1078 return true;
1079 if (!can_inline_edge_p (e, true))
1080 return true;
1081 if (e->recursive_p ())
1082 return true;
1083 if (!can_inline_edge_by_limits_p (e, true))
1084 return true;
1085 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1086 *(bool *)has_hot_call = true;
1087 }
1088 return false;
1089 }
1090
1091 /* If NODE has a caller, return true. */
1092
1093 static bool
1094 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1095 {
1096 if (node->callers)
1097 return true;
1098 return false;
1099 }
1100
1101 /* Decide if inlining NODE would reduce unit size by eliminating
1102 the offline copy of function.
1103 When COLD is true the cold calls are considered, too. */
1104
1105 static bool
1106 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1107 {
1108 bool has_hot_call = false;
1109
1110 /* Aliases gets inlined along with the function they alias. */
1111 if (node->alias)
1112 return false;
1113 /* Already inlined? */
1114 if (node->inlined_to)
1115 return false;
1116 /* Does it have callers? */
1117 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1118 return false;
1119 /* Inlining into all callers would increase size? */
1120 if (growth_positive_p (node, NULL, INT_MIN) > 0)
1121 return false;
1122 /* All inlines must be possible. */
1123 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1124 true))
1125 return false;
1126 if (!cold && !has_hot_call)
1127 return false;
1128 return true;
1129 }
1130
1131 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1132 in estimate_edge_badness. */
1133
1134 static bool
1135 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1136 {
1137 return size < (DECL_DECLARED_INLINE_P (where->decl)
1138 ? inline_insns_single (where, false, false)
1139 : inline_insns_auto (where, false, false));
1140 }
1141
1142 /* A cost model driving the inlining heuristics in a way so the edges with
1143 smallest badness are inlined first. After each inlining is performed
1144 the costs of all caller edges of nodes affected are recomputed so the
1145 metrics may accurately depend on values such as number of inlinable callers
1146 of the function or function body size. */
1147
1148 static sreal
1149 edge_badness (struct cgraph_edge *edge, bool dump)
1150 {
1151 sreal badness;
1152 int growth;
1153 sreal edge_time, unspec_edge_time;
1154 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1155 class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1156 ipa_hints hints;
1157 cgraph_node *caller = (edge->caller->inlined_to
1158 ? edge->caller->inlined_to
1159 : edge->caller);
1160
1161 growth = estimate_edge_growth (edge);
1162 edge_time = estimate_edge_time (edge, &unspec_edge_time);
1163 hints = estimate_edge_hints (edge);
1164 gcc_checking_assert (edge_time >= 0);
1165 /* Check that inlined time is better, but tolerate some roundoff issues.
1166 FIXME: When callee profile drops to 0 we account calls more. This
1167 should be fixed by never doing that. */
1168 gcc_checking_assert ((edge_time * 100
1169 - callee_info->time * 101).to_int () <= 0
1170 || callee->count.ipa ().initialized_p ());
1171 gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1172
1173 if (dump)
1174 {
1175 fprintf (dump_file, " Badness calculation for %s -> %s\n",
1176 edge->caller->dump_name (),
1177 edge->callee->dump_name ());
1178 fprintf (dump_file, " size growth %i, time %f unspec %f ",
1179 growth,
1180 edge_time.to_double (),
1181 unspec_edge_time.to_double ());
1182 ipa_dump_hints (dump_file, hints);
1183 if (big_speedup_p (edge))
1184 fprintf (dump_file, " big_speedup");
1185 fprintf (dump_file, "\n");
1186 }
1187
1188 /* Always prefer inlining saving code size. */
1189 if (growth <= 0)
1190 {
1191 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1192 if (dump)
1193 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1194 growth);
1195 }
1196 /* Inlining into EXTERNAL functions is not going to change anything unless
1197 they are themselves inlined. */
1198 else if (DECL_EXTERNAL (caller->decl))
1199 {
1200 if (dump)
1201 fprintf (dump_file, " max: function is external\n");
1202 return sreal::max ();
1203 }
1204 /* When profile is available. Compute badness as:
1205
1206 time_saved * caller_count
1207 goodness = -------------------------------------------------
1208 growth_of_caller * overall_growth * combined_size
1209
1210 badness = - goodness
1211
1212 Again use negative value to make calls with profile appear hotter
1213 then calls without.
1214 */
1215 else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1216 || caller->count.ipa ().nonzero_p ())
1217 {
1218 sreal numerator, denominator;
1219 int overall_growth;
1220 sreal freq = edge->sreal_frequency ();
1221
1222 numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1223 if (numerator <= 0)
1224 numerator = ((sreal) 1 >> 8);
1225 if (caller->count.ipa ().nonzero_p ())
1226 numerator *= caller->count.ipa ().to_gcov_type ();
1227 else if (caller->count.ipa ().initialized_p ())
1228 numerator = numerator >> 11;
1229 denominator = growth;
1230
1231 overall_growth = callee_info->growth;
1232
1233 /* Look for inliner wrappers of the form:
1234
1235 inline_caller ()
1236 {
1237 do_fast_job...
1238 if (need_more_work)
1239 noninline_callee ();
1240 }
1241 Without penalizing this case, we usually inline noninline_callee
1242 into the inline_caller because overall_growth is small preventing
1243 further inlining of inline_caller.
1244
1245 Penalize only callgraph edges to functions with small overall
1246 growth ...
1247 */
1248 if (growth > overall_growth
1249 /* ... and having only one caller which is not inlined ... */
1250 && callee_info->single_caller
1251 && !edge->caller->inlined_to
1252 /* ... and edges executed only conditionally ... */
1253 && freq < 1
1254 /* ... consider case where callee is not inline but caller is ... */
1255 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1256 && DECL_DECLARED_INLINE_P (caller->decl))
1257 /* ... or when early optimizers decided to split and edge
1258 frequency still indicates splitting is a win ... */
1259 || (callee->split_part && !caller->split_part
1260 && freq * 100
1261 < opt_for_fn (caller->decl,
1262 param_partial_inlining_entry_probability)
1263 /* ... and do not overwrite user specified hints. */
1264 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1265 || DECL_DECLARED_INLINE_P (caller->decl)))))
1266 {
1267 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1268 int caller_growth = caller_info->growth;
1269
1270 /* Only apply the penalty when caller looks like inline candidate,
1271 and it is not called once. */
1272 if (!caller_info->single_caller && overall_growth < caller_growth
1273 && caller_info->inlinable
1274 && wrapper_heuristics_may_apply
1275 (caller, ipa_size_summaries->get (caller)->size))
1276 {
1277 if (dump)
1278 fprintf (dump_file,
1279 " Wrapper penalty. Increasing growth %i to %i\n",
1280 overall_growth, caller_growth);
1281 overall_growth = caller_growth;
1282 }
1283 }
1284 if (overall_growth > 0)
1285 {
1286 /* Strongly prefer functions with few callers that can be inlined
1287 fully. The square root here leads to smaller binaries at average.
1288 Watch however for extreme cases and return to linear function
1289 when growth is large. */
1290 if (overall_growth < 256)
1291 overall_growth *= overall_growth;
1292 else
1293 overall_growth += 256 * 256 - 256;
1294 denominator *= overall_growth;
1295 }
1296 denominator *= ipa_size_summaries->get (caller)->size + growth;
1297
1298 badness = - numerator / denominator;
1299
1300 if (dump)
1301 {
1302 fprintf (dump_file,
1303 " %f: guessed profile. frequency %f, count %" PRId64
1304 " caller count %" PRId64
1305 " time saved %f"
1306 " overall growth %i (current) %i (original)"
1307 " %i (compensated)\n",
1308 badness.to_double (),
1309 freq.to_double (),
1310 edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1311 caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1312 inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1313 estimate_growth (callee),
1314 callee_info->growth, overall_growth);
1315 }
1316 }
1317 /* When function local profile is not available or it does not give
1318 useful information (i.e. frequency is zero), base the cost on
1319 loop nest and overall size growth, so we optimize for overall number
1320 of functions fully inlined in program. */
1321 else
1322 {
1323 int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1324 badness = growth;
1325
1326 /* Decrease badness if call is nested. */
1327 if (badness > 0)
1328 badness = badness >> nest;
1329 else
1330 badness = badness << nest;
1331 if (dump)
1332 fprintf (dump_file, " %f: no profile. nest %i\n",
1333 badness.to_double (), nest);
1334 }
1335 gcc_checking_assert (badness != 0);
1336
1337 if (edge->recursive_p ())
1338 badness = badness.shift (badness > 0 ? 4 : -4);
1339 if ((hints & (INLINE_HINT_indirect_call
1340 | INLINE_HINT_loop_iterations
1341 | INLINE_HINT_loop_stride))
1342 || callee_info->growth <= 0)
1343 badness = badness.shift (badness > 0 ? -2 : 2);
1344 if (hints & INLINE_HINT_builtin_constant_p)
1345 badness = badness.shift (badness > 0 ? -4 : 4);
1346 if (hints & (INLINE_HINT_same_scc))
1347 badness = badness.shift (badness > 0 ? 3 : -3);
1348 else if (hints & (INLINE_HINT_in_scc))
1349 badness = badness.shift (badness > 0 ? 2 : -2);
1350 else if (hints & (INLINE_HINT_cross_module))
1351 badness = badness.shift (badness > 0 ? 1 : -1);
1352 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1353 badness = badness.shift (badness > 0 ? -4 : 4);
1354 else if ((hints & INLINE_HINT_declared_inline))
1355 badness = badness.shift (badness > 0 ? -3 : 3);
1356 if (dump)
1357 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1358 return badness;
1359 }
1360
1361 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1362 static inline void
1363 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1364 {
1365 sreal badness = edge_badness (edge, false);
1366 if (edge->aux)
1367 {
1368 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1369 gcc_checking_assert (n->get_data () == edge);
1370
1371 /* fibonacci_heap::replace_key does busy updating of the
1372 heap that is unnecessarily expensive.
1373 We do lazy increases: after extracting minimum if the key
1374 turns out to be out of date, it is re-inserted into heap
1375 with correct value. */
1376 if (badness < n->get_key ())
1377 {
1378 if (dump_file && (dump_flags & TDF_DETAILS))
1379 {
1380 fprintf (dump_file,
1381 " decreasing badness %s -> %s, %f to %f\n",
1382 edge->caller->dump_name (),
1383 edge->callee->dump_name (),
1384 n->get_key ().to_double (),
1385 badness.to_double ());
1386 }
1387 heap->decrease_key (n, badness);
1388 }
1389 }
1390 else
1391 {
1392 if (dump_file && (dump_flags & TDF_DETAILS))
1393 {
1394 fprintf (dump_file,
1395 " enqueuing call %s -> %s, badness %f\n",
1396 edge->caller->dump_name (),
1397 edge->callee->dump_name (),
1398 badness.to_double ());
1399 }
1400 edge->aux = heap->insert (badness, edge);
1401 }
1402 }
1403
1404
1405 /* NODE was inlined.
1406 All caller edges needs to be reset because
1407 size estimates change. Similarly callees needs reset
1408 because better context may be known. */
1409
1410 static void
1411 reset_edge_caches (struct cgraph_node *node)
1412 {
1413 struct cgraph_edge *edge;
1414 struct cgraph_edge *e = node->callees;
1415 struct cgraph_node *where = node;
1416 struct ipa_ref *ref;
1417
1418 if (where->inlined_to)
1419 where = where->inlined_to;
1420
1421 reset_node_cache (where);
1422
1423 if (edge_growth_cache != NULL)
1424 for (edge = where->callers; edge; edge = edge->next_caller)
1425 if (edge->inline_failed)
1426 edge_growth_cache->remove (edge);
1427
1428 FOR_EACH_ALIAS (where, ref)
1429 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1430
1431 if (!e)
1432 return;
1433
1434 while (true)
1435 if (!e->inline_failed && e->callee->callees)
1436 e = e->callee->callees;
1437 else
1438 {
1439 if (edge_growth_cache != NULL && e->inline_failed)
1440 edge_growth_cache->remove (e);
1441 if (e->next_callee)
1442 e = e->next_callee;
1443 else
1444 {
1445 do
1446 {
1447 if (e->caller == node)
1448 return;
1449 e = e->caller->callers;
1450 }
1451 while (!e->next_callee);
1452 e = e->next_callee;
1453 }
1454 }
1455 }
1456
1457 /* Recompute HEAP nodes for each of caller of NODE.
1458 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1459 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1460 it is inlinable. Otherwise check all edges. */
1461
1462 static void
1463 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1464 bitmap updated_nodes,
1465 struct cgraph_edge *check_inlinablity_for)
1466 {
1467 struct cgraph_edge *edge;
1468 struct ipa_ref *ref;
1469
1470 if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1471 || node->inlined_to)
1472 return;
1473 if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1474 return;
1475
1476 FOR_EACH_ALIAS (node, ref)
1477 {
1478 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1479 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1480 }
1481
1482 for (edge = node->callers; edge; edge = edge->next_caller)
1483 if (edge->inline_failed)
1484 {
1485 if (!check_inlinablity_for
1486 || check_inlinablity_for == edge)
1487 {
1488 if (can_inline_edge_p (edge, false)
1489 && want_inline_small_function_p (edge, false)
1490 && can_inline_edge_by_limits_p (edge, false))
1491 update_edge_key (heap, edge);
1492 else if (edge->aux)
1493 {
1494 report_inline_failed_reason (edge);
1495 heap->delete_node ((edge_heap_node_t *) edge->aux);
1496 edge->aux = NULL;
1497 }
1498 }
1499 else if (edge->aux)
1500 update_edge_key (heap, edge);
1501 }
1502 }
1503
1504 /* Recompute HEAP nodes for each uninlined call in NODE
1505 If UPDATE_SINCE is non-NULL check if edges called within that function
1506 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1507 where all edges have new context).
1508
1509 This is used when we know that edge badnesses are going only to increase
1510 (we introduced new call site) and thus all we need is to insert newly
1511 created edges into heap. */
1512
1513 static void
1514 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1515 struct cgraph_node *update_since,
1516 bitmap updated_nodes)
1517 {
1518 struct cgraph_edge *e = node->callees;
1519 bool check_inlinability = update_since == node;
1520
1521 if (!e)
1522 return;
1523 while (true)
1524 if (!e->inline_failed && e->callee->callees)
1525 {
1526 if (e->callee == update_since)
1527 check_inlinability = true;
1528 e = e->callee->callees;
1529 }
1530 else
1531 {
1532 enum availability avail;
1533 struct cgraph_node *callee;
1534 if (!check_inlinability)
1535 {
1536 if (e->aux
1537 && !bitmap_bit_p (updated_nodes,
1538 e->callee->ultimate_alias_target
1539 (&avail, e->caller)->get_uid ()))
1540 update_edge_key (heap, e);
1541 }
1542 /* We do not reset callee growth cache here. Since we added a new call,
1543 growth should have just increased and consequently badness metric
1544 don't need updating. */
1545 else if (e->inline_failed
1546 && (callee = e->callee->ultimate_alias_target (&avail,
1547 e->caller))
1548 && avail >= AVAIL_AVAILABLE
1549 && ipa_fn_summaries->get (callee) != NULL
1550 && ipa_fn_summaries->get (callee)->inlinable
1551 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1552 {
1553 if (can_inline_edge_p (e, false)
1554 && want_inline_small_function_p (e, false)
1555 && can_inline_edge_by_limits_p (e, false))
1556 {
1557 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1558 gcc_checking_assert (check_inlinability || e->aux);
1559 update_edge_key (heap, e);
1560 }
1561 else if (e->aux)
1562 {
1563 report_inline_failed_reason (e);
1564 heap->delete_node ((edge_heap_node_t *) e->aux);
1565 e->aux = NULL;
1566 }
1567 }
1568 /* In case we redirected to unreachable node we only need to remove the
1569 fibheap entry. */
1570 else if (e->aux)
1571 {
1572 heap->delete_node ((edge_heap_node_t *) e->aux);
1573 e->aux = NULL;
1574 }
1575 if (e->next_callee)
1576 e = e->next_callee;
1577 else
1578 {
1579 do
1580 {
1581 if (e->caller == node)
1582 return;
1583 if (e->caller == update_since)
1584 check_inlinability = false;
1585 e = e->caller->callers;
1586 }
1587 while (!e->next_callee);
1588 e = e->next_callee;
1589 }
1590 }
1591 }
1592
1593 /* Enqueue all recursive calls from NODE into priority queue depending on
1594 how likely we want to recursively inline the call. */
1595
1596 static void
1597 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1598 edge_heap_t *heap)
1599 {
1600 struct cgraph_edge *e;
1601 enum availability avail;
1602
1603 for (e = where->callees; e; e = e->next_callee)
1604 if (e->callee == node
1605 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1606 && avail > AVAIL_INTERPOSABLE))
1607 heap->insert (-e->sreal_frequency (), e);
1608 for (e = where->callees; e; e = e->next_callee)
1609 if (!e->inline_failed)
1610 lookup_recursive_calls (node, e->callee, heap);
1611 }
1612
1613 /* Decide on recursive inlining: in the case function has recursive calls,
1614 inline until body size reaches given argument. If any new indirect edges
1615 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1616 is NULL. */
1617
1618 static bool
1619 recursive_inlining (struct cgraph_edge *edge,
1620 vec<cgraph_edge *> *new_edges)
1621 {
1622 cgraph_node *to = (edge->caller->inlined_to
1623 ? edge->caller->inlined_to : edge->caller);
1624 int limit = opt_for_fn (to->decl,
1625 param_max_inline_insns_recursive_auto);
1626 edge_heap_t heap (sreal::min ());
1627 struct cgraph_node *node;
1628 struct cgraph_edge *e;
1629 struct cgraph_node *master_clone = NULL, *next;
1630 int depth = 0;
1631 int n = 0;
1632
1633 node = edge->caller;
1634 if (node->inlined_to)
1635 node = node->inlined_to;
1636
1637 if (DECL_DECLARED_INLINE_P (node->decl))
1638 limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1639
1640 /* Make sure that function is small enough to be considered for inlining. */
1641 if (estimate_size_after_inlining (node, edge) >= limit)
1642 return false;
1643 lookup_recursive_calls (node, node, &heap);
1644 if (heap.empty ())
1645 return false;
1646
1647 if (dump_file)
1648 fprintf (dump_file,
1649 " Performing recursive inlining on %s\n", node->dump_name ());
1650
1651 /* Do the inlining and update list of recursive call during process. */
1652 while (!heap.empty ())
1653 {
1654 struct cgraph_edge *curr = heap.extract_min ();
1655 struct cgraph_node *cnode, *dest = curr->callee;
1656
1657 if (!can_inline_edge_p (curr, true)
1658 || !can_inline_edge_by_limits_p (curr, true))
1659 continue;
1660
1661 /* MASTER_CLONE is produced in the case we already started modified
1662 the function. Be sure to redirect edge to the original body before
1663 estimating growths otherwise we will be seeing growths after inlining
1664 the already modified body. */
1665 if (master_clone)
1666 {
1667 curr->redirect_callee (master_clone);
1668 if (edge_growth_cache != NULL)
1669 edge_growth_cache->remove (curr);
1670 }
1671
1672 if (estimate_size_after_inlining (node, curr) > limit)
1673 {
1674 curr->redirect_callee (dest);
1675 if (edge_growth_cache != NULL)
1676 edge_growth_cache->remove (curr);
1677 break;
1678 }
1679
1680 depth = 1;
1681 for (cnode = curr->caller;
1682 cnode->inlined_to; cnode = cnode->callers->caller)
1683 if (node->decl
1684 == curr->callee->ultimate_alias_target ()->decl)
1685 depth++;
1686
1687 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1688 {
1689 curr->redirect_callee (dest);
1690 if (edge_growth_cache != NULL)
1691 edge_growth_cache->remove (curr);
1692 continue;
1693 }
1694
1695 if (dump_file)
1696 {
1697 fprintf (dump_file,
1698 " Inlining call of depth %i", depth);
1699 if (node->count.nonzero_p () && curr->count.initialized_p ())
1700 {
1701 fprintf (dump_file, " called approx. %.2f times per call",
1702 (double)curr->count.to_gcov_type ()
1703 / node->count.to_gcov_type ());
1704 }
1705 fprintf (dump_file, "\n");
1706 }
1707 if (!master_clone)
1708 {
1709 /* We need original clone to copy around. */
1710 master_clone = node->create_clone (node->decl, node->count,
1711 false, vNULL, true, NULL, NULL);
1712 for (e = master_clone->callees; e; e = e->next_callee)
1713 if (!e->inline_failed)
1714 clone_inlined_nodes (e, true, false, NULL);
1715 curr->redirect_callee (master_clone);
1716 if (edge_growth_cache != NULL)
1717 edge_growth_cache->remove (curr);
1718 }
1719
1720 inline_call (curr, false, new_edges, &overall_size, true);
1721 reset_node_cache (node);
1722 lookup_recursive_calls (node, curr->callee, &heap);
1723 n++;
1724 }
1725
1726 if (!heap.empty () && dump_file)
1727 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1728
1729 if (!master_clone)
1730 return false;
1731
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_NOTE, edge->call_stmt,
1734 "\n Inlined %i times, "
1735 "body grown from size %i to %i, time %f to %f\n", n,
1736 ipa_size_summaries->get (master_clone)->size,
1737 ipa_size_summaries->get (node)->size,
1738 ipa_fn_summaries->get (master_clone)->time.to_double (),
1739 ipa_fn_summaries->get (node)->time.to_double ());
1740
1741 /* Remove master clone we used for inlining. We rely that clones inlined
1742 into master clone gets queued just before master clone so we don't
1743 need recursion. */
1744 for (node = symtab->first_function (); node != master_clone;
1745 node = next)
1746 {
1747 next = symtab->next_function (node);
1748 if (node->inlined_to == master_clone)
1749 node->remove ();
1750 }
1751 master_clone->remove ();
1752 return true;
1753 }
1754
1755
1756 /* Given whole compilation unit estimate of INSNS, compute how large we can
1757 allow the unit to grow. */
1758
1759 static int64_t
1760 compute_max_insns (cgraph_node *node, int insns)
1761 {
1762 int max_insns = insns;
1763 if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1764 max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1765
1766 return ((int64_t) max_insns
1767 * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1768 }
1769
1770
1771 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1772
1773 static void
1774 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1775 {
1776 while (new_edges.length () > 0)
1777 {
1778 struct cgraph_edge *edge = new_edges.pop ();
1779
1780 gcc_assert (!edge->aux);
1781 gcc_assert (edge->callee);
1782 if (edge->inline_failed
1783 && can_inline_edge_p (edge, true)
1784 && want_inline_small_function_p (edge, true)
1785 && can_inline_edge_by_limits_p (edge, true))
1786 edge->aux = heap->insert (edge_badness (edge, false), edge);
1787 }
1788 }
1789
1790 /* Remove EDGE from the fibheap. */
1791
1792 static void
1793 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1794 {
1795 if (e->aux)
1796 {
1797 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1798 e->aux = NULL;
1799 }
1800 }
1801
1802 /* Return true if speculation of edge E seems useful.
1803 If ANTICIPATE_INLINING is true, be conservative and hope that E
1804 may get inlined. */
1805
1806 bool
1807 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1808 {
1809 /* If we have already decided to inline the edge, it seems useful. */
1810 if (!e->inline_failed)
1811 return true;
1812
1813 enum availability avail;
1814 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1815 e->caller);
1816
1817 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1818
1819 if (!e->maybe_hot_p ())
1820 return false;
1821
1822 /* See if IP optimizations found something potentially useful about the
1823 function. For now we look only for CONST/PURE flags. Almost everything
1824 else we propagate is useless. */
1825 if (avail >= AVAIL_AVAILABLE)
1826 {
1827 int ecf_flags = flags_from_decl_or_type (target->decl);
1828 if (ecf_flags & ECF_CONST)
1829 {
1830 if (!(e->speculative_call_indirect_edge ()->indirect_info
1831 ->ecf_flags & ECF_CONST))
1832 return true;
1833 }
1834 else if (ecf_flags & ECF_PURE)
1835 {
1836 if (!(e->speculative_call_indirect_edge ()->indirect_info
1837 ->ecf_flags & ECF_PURE))
1838 return true;
1839 }
1840 }
1841 /* If we did not managed to inline the function nor redirect
1842 to an ipa-cp clone (that are seen by having local flag set),
1843 it is probably pointless to inline it unless hardware is missing
1844 indirect call predictor. */
1845 if (!anticipate_inlining && !target->local)
1846 return false;
1847 /* For overwritable targets there is not much to do. */
1848 if (!can_inline_edge_p (e, false)
1849 || !can_inline_edge_by_limits_p (e, false, true))
1850 return false;
1851 /* OK, speculation seems interesting. */
1852 return true;
1853 }
1854
1855 /* We know that EDGE is not going to be inlined.
1856 See if we can remove speculation. */
1857
1858 static void
1859 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1860 {
1861 if (edge->speculative && !speculation_useful_p (edge, false))
1862 {
1863 struct cgraph_node *node = edge->caller;
1864 struct cgraph_node *where = node->inlined_to
1865 ? node->inlined_to : node;
1866 auto_bitmap updated_nodes;
1867
1868 if (edge->count.ipa ().initialized_p ())
1869 spec_rem += edge->count.ipa ();
1870 cgraph_edge::resolve_speculation (edge);
1871 reset_edge_caches (where);
1872 ipa_update_overall_fn_summary (where);
1873 update_caller_keys (edge_heap, where,
1874 updated_nodes, NULL);
1875 update_callee_keys (edge_heap, where, NULL,
1876 updated_nodes);
1877 }
1878 }
1879
1880 /* Return true if NODE should be accounted for overall size estimate.
1881 Skip all nodes optimized for size so we can measure the growth of hot
1882 part of program no matter of the padding. */
1883
1884 bool
1885 inline_account_function_p (struct cgraph_node *node)
1886 {
1887 return (!DECL_EXTERNAL (node->decl)
1888 && !opt_for_fn (node->decl, optimize_size)
1889 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1890 }
1891
1892 /* Count number of callers of NODE and store it into DATA (that
1893 points to int. Worker for cgraph_for_node_and_aliases. */
1894
1895 static bool
1896 sum_callers (struct cgraph_node *node, void *data)
1897 {
1898 struct cgraph_edge *e;
1899 int *num_calls = (int *)data;
1900
1901 for (e = node->callers; e; e = e->next_caller)
1902 (*num_calls)++;
1903 return false;
1904 }
1905
1906 /* We only propagate across edges with non-interposable callee. */
1907
1908 inline bool
1909 ignore_edge_p (struct cgraph_edge *e)
1910 {
1911 enum availability avail;
1912 e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1913 return (avail <= AVAIL_INTERPOSABLE);
1914 }
1915
1916 /* We use greedy algorithm for inlining of small functions:
1917 All inline candidates are put into prioritized heap ordered in
1918 increasing badness.
1919
1920 The inlining of small functions is bounded by unit growth parameters. */
1921
1922 static void
1923 inline_small_functions (void)
1924 {
1925 struct cgraph_node *node;
1926 struct cgraph_edge *edge;
1927 edge_heap_t edge_heap (sreal::min ());
1928 auto_bitmap updated_nodes;
1929 int min_size;
1930 auto_vec<cgraph_edge *> new_indirect_edges;
1931 int initial_size = 0;
1932 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1933 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1934 new_indirect_edges.create (8);
1935
1936 edge_removal_hook_holder
1937 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1938
1939 /* Compute overall unit size and other global parameters used by badness
1940 metrics. */
1941
1942 max_count = profile_count::uninitialized ();
1943 ipa_reduced_postorder (order, true, ignore_edge_p);
1944 free (order);
1945
1946 FOR_EACH_DEFINED_FUNCTION (node)
1947 if (!node->inlined_to)
1948 {
1949 if (!node->alias && node->analyzed
1950 && (node->has_gimple_body_p () || node->thunk)
1951 && opt_for_fn (node->decl, optimize))
1952 {
1953 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1954 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1955
1956 /* Do not account external functions, they will be optimized out
1957 if not inlined. Also only count the non-cold portion of program. */
1958 if (inline_account_function_p (node))
1959 initial_size += ipa_size_summaries->get (node)->size;
1960 info->growth = estimate_growth (node);
1961
1962 int num_calls = 0;
1963 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1964 true);
1965 if (num_calls == 1)
1966 info->single_caller = true;
1967 if (dfs && dfs->next_cycle)
1968 {
1969 struct cgraph_node *n2;
1970 int id = dfs->scc_no + 1;
1971 for (n2 = node; n2;
1972 n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1973 if (opt_for_fn (n2->decl, optimize))
1974 {
1975 ipa_fn_summary *info2 = ipa_fn_summaries->get
1976 (n2->inlined_to ? n2->inlined_to : n2);
1977 if (info2->scc_no)
1978 break;
1979 info2->scc_no = id;
1980 }
1981 }
1982 }
1983
1984 for (edge = node->callers; edge; edge = edge->next_caller)
1985 max_count = max_count.max (edge->count.ipa ());
1986 }
1987 ipa_free_postorder_info ();
1988 initialize_growth_caches ();
1989
1990 if (dump_file)
1991 fprintf (dump_file,
1992 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1993 initial_size);
1994
1995 overall_size = initial_size;
1996 min_size = overall_size;
1997
1998 /* Populate the heap with all edges we might inline. */
1999
2000 FOR_EACH_DEFINED_FUNCTION (node)
2001 {
2002 bool update = false;
2003 struct cgraph_edge *next = NULL;
2004 bool has_speculative = false;
2005
2006 if (!opt_for_fn (node->decl, optimize))
2007 continue;
2008
2009 if (dump_file)
2010 fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2011
2012 for (edge = node->callees; edge; edge = edge->next_callee)
2013 {
2014 if (edge->inline_failed
2015 && !edge->aux
2016 && can_inline_edge_p (edge, true)
2017 && want_inline_small_function_p (edge, true)
2018 && can_inline_edge_by_limits_p (edge, true)
2019 && edge->inline_failed)
2020 {
2021 gcc_assert (!edge->aux);
2022 update_edge_key (&edge_heap, edge);
2023 }
2024 if (edge->speculative)
2025 has_speculative = true;
2026 }
2027 if (has_speculative)
2028 for (edge = node->callees; edge; edge = next)
2029 {
2030 next = edge->next_callee;
2031 if (edge->speculative
2032 && !speculation_useful_p (edge, edge->aux != NULL))
2033 {
2034 cgraph_edge::resolve_speculation (edge);
2035 update = true;
2036 }
2037 }
2038 if (update)
2039 {
2040 struct cgraph_node *where = node->inlined_to
2041 ? node->inlined_to : node;
2042 ipa_update_overall_fn_summary (where);
2043 reset_edge_caches (where);
2044 update_caller_keys (&edge_heap, where,
2045 updated_nodes, NULL);
2046 update_callee_keys (&edge_heap, where, NULL,
2047 updated_nodes);
2048 bitmap_clear (updated_nodes);
2049 }
2050 }
2051
2052 gcc_assert (in_lto_p
2053 || !(max_count > 0)
2054 || (profile_info && flag_branch_probabilities));
2055
2056 while (!edge_heap.empty ())
2057 {
2058 int old_size = overall_size;
2059 struct cgraph_node *where, *callee;
2060 sreal badness = edge_heap.min_key ();
2061 sreal current_badness;
2062 int growth;
2063
2064 edge = edge_heap.extract_min ();
2065 gcc_assert (edge->aux);
2066 edge->aux = NULL;
2067 if (!edge->inline_failed || !edge->callee->analyzed)
2068 continue;
2069
2070 /* Be sure that caches are maintained consistent.
2071 This check is affected by scaling roundoff errors when compiling for
2072 IPA this we skip it in that case. */
2073 if (flag_checking && !edge->callee->count.ipa_p ()
2074 && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2075 {
2076 sreal cached_badness = edge_badness (edge, false);
2077
2078 int old_size_est = estimate_edge_size (edge);
2079 sreal old_time_est = estimate_edge_time (edge);
2080 int old_hints_est = estimate_edge_hints (edge);
2081
2082 if (edge_growth_cache != NULL)
2083 edge_growth_cache->remove (edge);
2084 reset_node_cache (edge->caller->inlined_to
2085 ? edge->caller->inlined_to
2086 : edge->caller);
2087 gcc_assert (old_size_est == estimate_edge_size (edge));
2088 gcc_assert (old_time_est == estimate_edge_time (edge));
2089 /* FIXME:
2090
2091 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2092
2093 fails with profile feedback because some hints depends on
2094 maybe_hot_edge_p predicate and because callee gets inlined to other
2095 calls, the edge may become cold.
2096 This ought to be fixed by computing relative probabilities
2097 for given invocation but that will be better done once whole
2098 code is converted to sreals. Disable for now and revert to "wrong"
2099 value so enable/disable checking paths agree. */
2100 edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2101
2102 /* When updating the edge costs, we only decrease badness in the keys.
2103 Increases of badness are handled lazily; when we see key with out
2104 of date value on it, we re-insert it now. */
2105 current_badness = edge_badness (edge, false);
2106 gcc_assert (cached_badness == current_badness);
2107 gcc_assert (current_badness >= badness);
2108 }
2109 else
2110 current_badness = edge_badness (edge, false);
2111 if (current_badness != badness)
2112 {
2113 if (edge_heap.min () && current_badness > edge_heap.min_key ())
2114 {
2115 edge->aux = edge_heap.insert (current_badness, edge);
2116 continue;
2117 }
2118 else
2119 badness = current_badness;
2120 }
2121
2122 if (!can_inline_edge_p (edge, true)
2123 || !can_inline_edge_by_limits_p (edge, true))
2124 {
2125 resolve_noninline_speculation (&edge_heap, edge);
2126 continue;
2127 }
2128
2129 callee = edge->callee->ultimate_alias_target ();
2130 growth = estimate_edge_growth (edge);
2131 if (dump_file)
2132 {
2133 fprintf (dump_file,
2134 "\nConsidering %s with %i size\n",
2135 callee->dump_name (),
2136 ipa_size_summaries->get (callee)->size);
2137 fprintf (dump_file,
2138 " to be inlined into %s in %s:%i\n"
2139 " Estimated badness is %f, frequency %.2f.\n",
2140 edge->caller->dump_name (),
2141 edge->call_stmt
2142 && (LOCATION_LOCUS (gimple_location ((const gimple *)
2143 edge->call_stmt))
2144 > BUILTINS_LOCATION)
2145 ? gimple_filename ((const gimple *) edge->call_stmt)
2146 : "unknown",
2147 edge->call_stmt
2148 ? gimple_lineno ((const gimple *) edge->call_stmt)
2149 : -1,
2150 badness.to_double (),
2151 edge->sreal_frequency ().to_double ());
2152 if (edge->count.ipa ().initialized_p ())
2153 {
2154 fprintf (dump_file, " Called ");
2155 edge->count.ipa ().dump (dump_file);
2156 fprintf (dump_file, " times\n");
2157 }
2158 if (dump_flags & TDF_DETAILS)
2159 edge_badness (edge, true);
2160 }
2161
2162 where = edge->caller;
2163
2164 if (overall_size + growth > compute_max_insns (where, min_size)
2165 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2166 {
2167 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2168 report_inline_failed_reason (edge);
2169 resolve_noninline_speculation (&edge_heap, edge);
2170 continue;
2171 }
2172
2173 if (!want_inline_small_function_p (edge, true))
2174 {
2175 resolve_noninline_speculation (&edge_heap, edge);
2176 continue;
2177 }
2178
2179 profile_count old_count = callee->count;
2180
2181 /* Heuristics for inlining small functions work poorly for
2182 recursive calls where we do effects similar to loop unrolling.
2183 When inlining such edge seems profitable, leave decision on
2184 specific inliner. */
2185 if (edge->recursive_p ())
2186 {
2187 if (where->inlined_to)
2188 where = where->inlined_to;
2189 if (!recursive_inlining (edge,
2190 opt_for_fn (edge->caller->decl,
2191 flag_indirect_inlining)
2192 ? &new_indirect_edges : NULL))
2193 {
2194 edge->inline_failed = CIF_RECURSIVE_INLINING;
2195 resolve_noninline_speculation (&edge_heap, edge);
2196 continue;
2197 }
2198 reset_edge_caches (where);
2199 /* Recursive inliner inlines all recursive calls of the function
2200 at once. Consequently we need to update all callee keys. */
2201 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2202 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2203 update_callee_keys (&edge_heap, where, where, updated_nodes);
2204 bitmap_clear (updated_nodes);
2205 }
2206 else
2207 {
2208 struct cgraph_node *outer_node = NULL;
2209 int depth = 0;
2210
2211 /* Consider the case where self recursive function A is inlined
2212 into B. This is desired optimization in some cases, since it
2213 leads to effect similar of loop peeling and we might completely
2214 optimize out the recursive call. However we must be extra
2215 selective. */
2216
2217 where = edge->caller;
2218 while (where->inlined_to)
2219 {
2220 if (where->decl == callee->decl)
2221 outer_node = where, depth++;
2222 where = where->callers->caller;
2223 }
2224 if (outer_node
2225 && !want_inline_self_recursive_call_p (edge, outer_node,
2226 true, depth))
2227 {
2228 edge->inline_failed
2229 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2230 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2231 resolve_noninline_speculation (&edge_heap, edge);
2232 continue;
2233 }
2234 else if (depth && dump_file)
2235 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2236
2237 gcc_checking_assert (!callee->inlined_to);
2238
2239 int old_size = ipa_size_summaries->get (where)->size;
2240 sreal old_time = ipa_fn_summaries->get (where)->time;
2241
2242 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2243 reset_edge_caches (edge->callee);
2244 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2245
2246 /* If caller's size and time increased we do not need to update
2247 all edges because badness is not going to decrease. */
2248 if (old_size <= ipa_size_summaries->get (where)->size
2249 && old_time <= ipa_fn_summaries->get (where)->time
2250 /* Wrapper penalty may be non-monotonous in this respect.
2251 Fortunately it only affects small functions. */
2252 && !wrapper_heuristics_may_apply (where, old_size))
2253 update_callee_keys (&edge_heap, edge->callee, edge->callee,
2254 updated_nodes);
2255 else
2256 update_callee_keys (&edge_heap, where,
2257 edge->callee,
2258 updated_nodes);
2259 }
2260 where = edge->caller;
2261 if (where->inlined_to)
2262 where = where->inlined_to;
2263
2264 /* Our profitability metric can depend on local properties
2265 such as number of inlinable calls and size of the function body.
2266 After inlining these properties might change for the function we
2267 inlined into (since it's body size changed) and for the functions
2268 called by function we inlined (since number of it inlinable callers
2269 might change). */
2270 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2271 /* Offline copy count has possibly changed, recompute if profile is
2272 available. */
2273 struct cgraph_node *n
2274 = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2275 if (n != edge->callee && n->analyzed && !(n->count == old_count)
2276 && n->count.ipa_p ())
2277 update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2278 bitmap_clear (updated_nodes);
2279
2280 if (dump_enabled_p ())
2281 {
2282 ipa_fn_summary *s = ipa_fn_summaries->get (where);
2283
2284 /* dump_printf can't handle %+i. */
2285 char buf_net_change[100];
2286 snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2287 overall_size - old_size);
2288
2289 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2290 " Inlined %C into %C which now has time %f and "
2291 "size %i, net change of %s%s.\n",
2292 edge->callee, edge->caller,
2293 s->time.to_double (),
2294 ipa_size_summaries->get (edge->caller)->size,
2295 buf_net_change,
2296 cross_module_call_p (edge) ? " (cross module)":"");
2297 }
2298 if (min_size > overall_size)
2299 {
2300 min_size = overall_size;
2301
2302 if (dump_file)
2303 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2304 }
2305 }
2306
2307 free_growth_caches ();
2308 if (dump_enabled_p ())
2309 dump_printf (MSG_NOTE,
2310 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2311 initial_size, overall_size,
2312 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2313 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2314 }
2315
2316 /* Flatten NODE. Performed both during early inlining and
2317 at IPA inlining time. */
2318
2319 static void
2320 flatten_function (struct cgraph_node *node, bool early, bool update)
2321 {
2322 struct cgraph_edge *e;
2323
2324 /* We shouldn't be called recursively when we are being processed. */
2325 gcc_assert (node->aux == NULL);
2326
2327 node->aux = (void *) node;
2328
2329 for (e = node->callees; e; e = e->next_callee)
2330 {
2331 struct cgraph_node *orig_callee;
2332 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2333
2334 /* We've hit cycle? It is time to give up. */
2335 if (callee->aux)
2336 {
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2339 "Not inlining %C into %C to avoid cycle.\n",
2340 callee, e->caller);
2341 if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2342 e->inline_failed = CIF_RECURSIVE_INLINING;
2343 continue;
2344 }
2345
2346 /* When the edge is already inlined, we just need to recurse into
2347 it in order to fully flatten the leaves. */
2348 if (!e->inline_failed)
2349 {
2350 flatten_function (callee, early, false);
2351 continue;
2352 }
2353
2354 /* Flatten attribute needs to be processed during late inlining. For
2355 extra code quality we however do flattening during early optimization,
2356 too. */
2357 if (!early
2358 ? !can_inline_edge_p (e, true)
2359 && !can_inline_edge_by_limits_p (e, true)
2360 : !can_early_inline_edge_p (e))
2361 continue;
2362
2363 if (e->recursive_p ())
2364 {
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2367 "Not inlining: recursive call.\n");
2368 continue;
2369 }
2370
2371 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2372 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2376 "Not inlining: SSA form does not match.\n");
2377 continue;
2378 }
2379
2380 /* Inline the edge and flatten the inline clone. Avoid
2381 recursing through the original node if the node was cloned. */
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2384 " Inlining %C into %C.\n",
2385 callee, e->caller);
2386 orig_callee = callee;
2387 inline_call (e, true, NULL, NULL, false);
2388 if (e->callee != orig_callee)
2389 orig_callee->aux = (void *) node;
2390 flatten_function (e->callee, early, false);
2391 if (e->callee != orig_callee)
2392 orig_callee->aux = NULL;
2393 }
2394
2395 node->aux = NULL;
2396 cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2397 if (update && opt_for_fn (where->decl, optimize))
2398 ipa_update_overall_fn_summary (where);
2399 }
2400
2401 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2402 DATA points to number of calls originally found so we avoid infinite
2403 recursion. */
2404
2405 static bool
2406 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2407 hash_set<cgraph_node *> *callers)
2408 {
2409 int *num_calls = (int *)data;
2410 bool callee_removed = false;
2411
2412 while (node->callers && !node->inlined_to)
2413 {
2414 struct cgraph_node *caller = node->callers->caller;
2415
2416 if (!can_inline_edge_p (node->callers, true)
2417 || !can_inline_edge_by_limits_p (node->callers, true)
2418 || node->callers->recursive_p ())
2419 {
2420 if (dump_file)
2421 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2422 *num_calls = 0;
2423 return false;
2424 }
2425
2426 if (dump_file)
2427 {
2428 cgraph_node *ultimate = node->ultimate_alias_target ();
2429 fprintf (dump_file,
2430 "\nInlining %s size %i.\n",
2431 ultimate->dump_name (),
2432 ipa_size_summaries->get (ultimate)->size);
2433 fprintf (dump_file,
2434 " Called once from %s %i insns.\n",
2435 node->callers->caller->dump_name (),
2436 ipa_size_summaries->get (node->callers->caller)->size);
2437 }
2438
2439 /* Remember which callers we inlined to, delaying updating the
2440 overall summary. */
2441 callers->add (node->callers->caller);
2442 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2443 if (dump_file)
2444 fprintf (dump_file,
2445 " Inlined into %s which now has %i size\n",
2446 caller->dump_name (),
2447 ipa_size_summaries->get (caller)->size);
2448 if (!(*num_calls)--)
2449 {
2450 if (dump_file)
2451 fprintf (dump_file, "New calls found; giving up.\n");
2452 return callee_removed;
2453 }
2454 if (callee_removed)
2455 return true;
2456 }
2457 return false;
2458 }
2459
2460 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2461 update. */
2462
2463 static bool
2464 inline_to_all_callers (struct cgraph_node *node, void *data)
2465 {
2466 hash_set<cgraph_node *> callers;
2467 bool res = inline_to_all_callers_1 (node, data, &callers);
2468 /* Perform the delayed update of the overall summary of all callers
2469 processed. This avoids quadratic behavior in the cases where
2470 we have a lot of calls to the same function. */
2471 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2472 i != callers.end (); ++i)
2473 ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2474 return res;
2475 }
2476
2477 /* Output overall time estimate. */
2478 static void
2479 dump_overall_stats (void)
2480 {
2481 sreal sum_weighted = 0, sum = 0;
2482 struct cgraph_node *node;
2483
2484 FOR_EACH_DEFINED_FUNCTION (node)
2485 if (!node->inlined_to
2486 && !node->alias)
2487 {
2488 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2489 if (s != NULL)
2490 {
2491 sum += s->time;
2492 if (node->count.ipa ().initialized_p ())
2493 sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2494 }
2495 }
2496 fprintf (dump_file, "Overall time estimate: "
2497 "%f weighted by profile: "
2498 "%f\n", sum.to_double (), sum_weighted.to_double ());
2499 }
2500
2501 /* Output some useful stats about inlining. */
2502
2503 static void
2504 dump_inline_stats (void)
2505 {
2506 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2507 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2508 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2509 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2510 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2511 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2512 int64_t reason[CIF_N_REASONS][2];
2513 sreal reason_freq[CIF_N_REASONS];
2514 int i;
2515 struct cgraph_node *node;
2516
2517 memset (reason, 0, sizeof (reason));
2518 for (i=0; i < CIF_N_REASONS; i++)
2519 reason_freq[i] = 0;
2520 FOR_EACH_DEFINED_FUNCTION (node)
2521 {
2522 struct cgraph_edge *e;
2523 for (e = node->callees; e; e = e->next_callee)
2524 {
2525 if (e->inline_failed)
2526 {
2527 if (e->count.ipa ().initialized_p ())
2528 reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2529 reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2530 reason[(int) e->inline_failed][1] ++;
2531 if (DECL_VIRTUAL_P (e->callee->decl)
2532 && e->count.ipa ().initialized_p ())
2533 {
2534 if (e->indirect_inlining_edge)
2535 noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2536 else
2537 noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2538 }
2539 else if (e->count.ipa ().initialized_p ())
2540 {
2541 if (e->indirect_inlining_edge)
2542 noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2543 else
2544 noninlined_cnt += e->count.ipa ().to_gcov_type ();
2545 }
2546 }
2547 else if (e->count.ipa ().initialized_p ())
2548 {
2549 if (e->speculative)
2550 {
2551 if (DECL_VIRTUAL_P (e->callee->decl))
2552 inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2553 else
2554 inlined_speculative += e->count.ipa ().to_gcov_type ();
2555 }
2556 else if (DECL_VIRTUAL_P (e->callee->decl))
2557 {
2558 if (e->indirect_inlining_edge)
2559 inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2560 else
2561 inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2562 }
2563 else
2564 {
2565 if (e->indirect_inlining_edge)
2566 inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2567 else
2568 inlined_cnt += e->count.ipa ().to_gcov_type ();
2569 }
2570 }
2571 }
2572 for (e = node->indirect_calls; e; e = e->next_callee)
2573 if (e->indirect_info->polymorphic
2574 & e->count.ipa ().initialized_p ())
2575 indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2576 else if (e->count.ipa ().initialized_p ())
2577 indirect_cnt += e->count.ipa ().to_gcov_type ();
2578 }
2579 if (max_count.initialized_p ())
2580 {
2581 fprintf (dump_file,
2582 "Inlined %" PRId64 " + speculative "
2583 "%" PRId64 " + speculative polymorphic "
2584 "%" PRId64 " + previously indirect "
2585 "%" PRId64 " + virtual "
2586 "%" PRId64 " + virtual and previously indirect "
2587 "%" PRId64 "\n" "Not inlined "
2588 "%" PRId64 " + previously indirect "
2589 "%" PRId64 " + virtual "
2590 "%" PRId64 " + virtual and previously indirect "
2591 "%" PRId64 " + still indirect "
2592 "%" PRId64 " + still indirect polymorphic "
2593 "%" PRId64 "\n", inlined_cnt,
2594 inlined_speculative, inlined_speculative_ply,
2595 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2596 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2597 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2598 fprintf (dump_file, "Removed speculations ");
2599 spec_rem.dump (dump_file);
2600 fprintf (dump_file, "\n");
2601 }
2602 dump_overall_stats ();
2603 fprintf (dump_file, "\nWhy inlining failed?\n");
2604 for (i = 0; i < CIF_N_REASONS; i++)
2605 if (reason[i][1])
2606 fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2607 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2608 (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2609 }
2610
2611 /* Called when node is removed. */
2612
2613 static void
2614 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2615 {
2616 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2617 return;
2618
2619 hash_set<struct cgraph_node *> *removed
2620 = (hash_set<struct cgraph_node *> *) data;
2621 removed->add (node);
2622 }
2623
2624 /* Decide on the inlining. We do so in the topological order to avoid
2625 expenses on updating data structures. */
2626
2627 static unsigned int
2628 ipa_inline (void)
2629 {
2630 struct cgraph_node *node;
2631 int nnodes;
2632 struct cgraph_node **order;
2633 int i, j;
2634 int cold;
2635 bool remove_functions = false;
2636
2637 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2638
2639 if (dump_file)
2640 ipa_dump_fn_summaries (dump_file);
2641
2642 nnodes = ipa_reverse_postorder (order);
2643 spec_rem = profile_count::zero ();
2644
2645 FOR_EACH_FUNCTION (node)
2646 {
2647 node->aux = 0;
2648
2649 /* Recompute the default reasons for inlining because they may have
2650 changed during merging. */
2651 if (in_lto_p)
2652 {
2653 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2654 {
2655 gcc_assert (e->inline_failed);
2656 initialize_inline_failed (e);
2657 }
2658 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2659 initialize_inline_failed (e);
2660 }
2661 }
2662
2663 if (dump_file)
2664 fprintf (dump_file, "\nFlattening functions:\n");
2665
2666 /* First shrink order array, so that it only contains nodes with
2667 flatten attribute. */
2668 for (i = nnodes - 1, j = i; i >= 0; i--)
2669 {
2670 node = order[i];
2671 if (node->definition
2672 /* Do not try to flatten aliases. These may happen for example when
2673 creating local aliases. */
2674 && !node->alias
2675 && lookup_attribute ("flatten",
2676 DECL_ATTRIBUTES (node->decl)) != NULL)
2677 order[j--] = order[i];
2678 }
2679
2680 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2681 nodes with flatten attribute. If there is more than one such
2682 node, we need to register a node removal hook, as flatten_function
2683 could remove other nodes with flatten attribute. See PR82801. */
2684 struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2685 hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2686 if (j < nnodes - 2)
2687 {
2688 flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2689 node_removal_hook_holder
2690 = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2691 flatten_removed_nodes);
2692 }
2693
2694 /* In the first pass handle functions to be flattened. Do this with
2695 a priority so none of our later choices will make this impossible. */
2696 for (i = nnodes - 1; i > j; i--)
2697 {
2698 node = order[i];
2699 if (flatten_removed_nodes
2700 && flatten_removed_nodes->contains (node))
2701 continue;
2702
2703 /* Handle nodes to be flattened.
2704 Ideally when processing callees we stop inlining at the
2705 entry of cycles, possibly cloning that entry point and
2706 try to flatten itself turning it into a self-recursive
2707 function. */
2708 if (dump_file)
2709 fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2710 flatten_function (node, false, true);
2711 }
2712
2713 if (j < nnodes - 2)
2714 {
2715 symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2716 delete flatten_removed_nodes;
2717 }
2718 free (order);
2719
2720 if (dump_file)
2721 dump_overall_stats ();
2722
2723 inline_small_functions ();
2724
2725 gcc_assert (symtab->state == IPA_SSA);
2726 symtab->state = IPA_SSA_AFTER_INLINING;
2727 /* Do first after-inlining removal. We want to remove all "stale" extern
2728 inline functions and virtual functions so we really know what is called
2729 once. */
2730 symtab->remove_unreachable_nodes (dump_file);
2731
2732 /* Inline functions with a property that after inlining into all callers the
2733 code size will shrink because the out-of-line copy is eliminated.
2734 We do this regardless on the callee size as long as function growth limits
2735 are met. */
2736 if (dump_file)
2737 fprintf (dump_file,
2738 "\nDeciding on functions to be inlined into all callers and "
2739 "removing useless speculations:\n");
2740
2741 /* Inlining one function called once has good chance of preventing
2742 inlining other function into the same callee. Ideally we should
2743 work in priority order, but probably inlining hot functions first
2744 is good cut without the extra pain of maintaining the queue.
2745
2746 ??? this is not really fitting the bill perfectly: inlining function
2747 into callee often leads to better optimization of callee due to
2748 increased context for optimization.
2749 For example if main() function calls a function that outputs help
2750 and then function that does the main optimization, we should inline
2751 the second with priority even if both calls are cold by themselves.
2752
2753 We probably want to implement new predicate replacing our use of
2754 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2755 to be hot. */
2756 for (cold = 0; cold <= 1; cold ++)
2757 {
2758 FOR_EACH_DEFINED_FUNCTION (node)
2759 {
2760 struct cgraph_edge *edge, *next;
2761 bool update=false;
2762
2763 if (!opt_for_fn (node->decl, optimize)
2764 || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2765 continue;
2766
2767 for (edge = node->callees; edge; edge = next)
2768 {
2769 next = edge->next_callee;
2770 if (edge->speculative && !speculation_useful_p (edge, false))
2771 {
2772 if (edge->count.ipa ().initialized_p ())
2773 spec_rem += edge->count.ipa ();
2774 cgraph_edge::resolve_speculation (edge);
2775 update = true;
2776 remove_functions = true;
2777 }
2778 }
2779 if (update)
2780 {
2781 struct cgraph_node *where = node->inlined_to
2782 ? node->inlined_to : node;
2783 reset_edge_caches (where);
2784 ipa_update_overall_fn_summary (where);
2785 }
2786 if (want_inline_function_to_all_callers_p (node, cold))
2787 {
2788 int num_calls = 0;
2789 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2790 true);
2791 while (node->call_for_symbol_and_aliases
2792 (inline_to_all_callers, &num_calls, true))
2793 ;
2794 remove_functions = true;
2795 }
2796 }
2797 }
2798
2799 if (dump_enabled_p ())
2800 dump_printf (MSG_NOTE,
2801 "\nInlined %i calls, eliminated %i functions\n\n",
2802 ncalls_inlined, nfunctions_inlined);
2803 if (dump_file)
2804 dump_inline_stats ();
2805
2806 if (dump_file)
2807 ipa_dump_fn_summaries (dump_file);
2808 return remove_functions ? TODO_remove_functions : 0;
2809 }
2810
2811 /* Inline always-inline function calls in NODE. */
2812
2813 static bool
2814 inline_always_inline_functions (struct cgraph_node *node)
2815 {
2816 struct cgraph_edge *e;
2817 bool inlined = false;
2818
2819 for (e = node->callees; e; e = e->next_callee)
2820 {
2821 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2822 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2823 continue;
2824
2825 if (e->recursive_p ())
2826 {
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2829 " Not inlining recursive call to %C.\n",
2830 e->callee);
2831 e->inline_failed = CIF_RECURSIVE_INLINING;
2832 continue;
2833 }
2834
2835 if (!can_early_inline_edge_p (e))
2836 {
2837 /* Set inlined to true if the callee is marked "always_inline" but
2838 is not inlinable. This will allow flagging an error later in
2839 expand_call_inline in tree-inline.c. */
2840 if (lookup_attribute ("always_inline",
2841 DECL_ATTRIBUTES (callee->decl)) != NULL)
2842 inlined = true;
2843 continue;
2844 }
2845
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2848 " Inlining %C into %C (always_inline).\n",
2849 e->callee, e->caller);
2850 inline_call (e, true, NULL, NULL, false);
2851 inlined = true;
2852 }
2853 if (inlined)
2854 ipa_update_overall_fn_summary (node);
2855
2856 return inlined;
2857 }
2858
2859 /* Decide on the inlining. We do so in the topological order to avoid
2860 expenses on updating data structures. */
2861
2862 static bool
2863 early_inline_small_functions (struct cgraph_node *node)
2864 {
2865 struct cgraph_edge *e;
2866 bool inlined = false;
2867
2868 for (e = node->callees; e; e = e->next_callee)
2869 {
2870 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2871
2872 /* We can encounter not-yet-analyzed function during
2873 early inlining on callgraphs with strongly
2874 connected components. */
2875 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2876 if (s == NULL || !s->inlinable || !e->inline_failed)
2877 continue;
2878
2879 /* Do not consider functions not declared inline. */
2880 if (!DECL_DECLARED_INLINE_P (callee->decl)
2881 && !opt_for_fn (node->decl, flag_inline_small_functions)
2882 && !opt_for_fn (node->decl, flag_inline_functions))
2883 continue;
2884
2885 if (dump_enabled_p ())
2886 dump_printf_loc (MSG_NOTE, e->call_stmt,
2887 "Considering inline candidate %C.\n",
2888 callee);
2889
2890 if (!can_early_inline_edge_p (e))
2891 continue;
2892
2893 if (e->recursive_p ())
2894 {
2895 if (dump_enabled_p ())
2896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2897 " Not inlining: recursive call.\n");
2898 continue;
2899 }
2900
2901 if (!want_early_inline_function_p (e))
2902 continue;
2903
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2906 " Inlining %C into %C.\n",
2907 callee, e->caller);
2908 inline_call (e, true, NULL, NULL, false);
2909 inlined = true;
2910 }
2911
2912 if (inlined)
2913 ipa_update_overall_fn_summary (node);
2914
2915 return inlined;
2916 }
2917
2918 unsigned int
2919 early_inliner (function *fun)
2920 {
2921 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2922 struct cgraph_edge *edge;
2923 unsigned int todo = 0;
2924 int iterations = 0;
2925 bool inlined = false;
2926
2927 if (seen_error ())
2928 return 0;
2929
2930 /* Do nothing if datastructures for ipa-inliner are already computed. This
2931 happens when some pass decides to construct new function and
2932 cgraph_add_new_function calls lowering passes and early optimization on
2933 it. This may confuse ourself when early inliner decide to inline call to
2934 function clone, because function clones don't have parameter list in
2935 ipa-prop matching their signature. */
2936 if (ipa_node_params_sum)
2937 return 0;
2938
2939 if (flag_checking)
2940 node->verify ();
2941 node->remove_all_references ();
2942
2943 /* Even when not optimizing or not inlining inline always-inline
2944 functions. */
2945 inlined = inline_always_inline_functions (node);
2946
2947 if (!optimize
2948 || flag_no_inline
2949 || !flag_early_inlining
2950 /* Never inline regular functions into always-inline functions
2951 during incremental inlining. This sucks as functions calling
2952 always inline functions will get less optimized, but at the
2953 same time inlining of functions calling always inline
2954 function into an always inline function might introduce
2955 cycles of edges to be always inlined in the callgraph.
2956
2957 We might want to be smarter and just avoid this type of inlining. */
2958 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2959 && lookup_attribute ("always_inline",
2960 DECL_ATTRIBUTES (node->decl))))
2961 ;
2962 else if (lookup_attribute ("flatten",
2963 DECL_ATTRIBUTES (node->decl)) != NULL)
2964 {
2965 /* When the function is marked to be flattened, recursively inline
2966 all calls in it. */
2967 if (dump_enabled_p ())
2968 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2969 "Flattening %C\n", node);
2970 flatten_function (node, true, true);
2971 inlined = true;
2972 }
2973 else
2974 {
2975 /* If some always_inline functions was inlined, apply the changes.
2976 This way we will not account always inline into growth limits and
2977 moreover we will inline calls from always inlines that we skipped
2978 previously because of conditional above. */
2979 if (inlined)
2980 {
2981 timevar_push (TV_INTEGRATION);
2982 todo |= optimize_inline_calls (current_function_decl);
2983 /* optimize_inline_calls call above might have introduced new
2984 statements that don't have inline parameters computed. */
2985 for (edge = node->callees; edge; edge = edge->next_callee)
2986 {
2987 /* We can enounter not-yet-analyzed function during
2988 early inlining on callgraphs with strongly
2989 connected components. */
2990 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2991 es->call_stmt_size
2992 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2993 es->call_stmt_time
2994 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2995 }
2996 ipa_update_overall_fn_summary (node);
2997 inlined = false;
2998 timevar_pop (TV_INTEGRATION);
2999 }
3000 /* We iterate incremental inlining to get trivial cases of indirect
3001 inlining. */
3002 while (iterations < opt_for_fn (node->decl,
3003 param_early_inliner_max_iterations)
3004 && early_inline_small_functions (node))
3005 {
3006 timevar_push (TV_INTEGRATION);
3007 todo |= optimize_inline_calls (current_function_decl);
3008
3009 /* Technically we ought to recompute inline parameters so the new
3010 iteration of early inliner works as expected. We however have
3011 values approximately right and thus we only need to update edge
3012 info that might be cleared out for newly discovered edges. */
3013 for (edge = node->callees; edge; edge = edge->next_callee)
3014 {
3015 /* We have no summary for new bound store calls yet. */
3016 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3017 es->call_stmt_size
3018 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3019 es->call_stmt_time
3020 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3021 }
3022 if (iterations < opt_for_fn (node->decl,
3023 param_early_inliner_max_iterations) - 1)
3024 ipa_update_overall_fn_summary (node);
3025 timevar_pop (TV_INTEGRATION);
3026 iterations++;
3027 inlined = false;
3028 }
3029 if (dump_file)
3030 fprintf (dump_file, "Iterations: %i\n", iterations);
3031 }
3032
3033 if (inlined)
3034 {
3035 timevar_push (TV_INTEGRATION);
3036 todo |= optimize_inline_calls (current_function_decl);
3037 timevar_pop (TV_INTEGRATION);
3038 }
3039
3040 fun->always_inline_functions_inlined = true;
3041
3042 return todo;
3043 }
3044
3045 /* Do inlining of small functions. Doing so early helps profiling and other
3046 passes to be somewhat more effective and avoids some code duplication in
3047 later real inlining pass for testcases with very many function calls. */
3048
3049 namespace {
3050
3051 const pass_data pass_data_early_inline =
3052 {
3053 GIMPLE_PASS, /* type */
3054 "einline", /* name */
3055 OPTGROUP_INLINE, /* optinfo_flags */
3056 TV_EARLY_INLINING, /* tv_id */
3057 PROP_ssa, /* properties_required */
3058 0, /* properties_provided */
3059 0, /* properties_destroyed */
3060 0, /* todo_flags_start */
3061 0, /* todo_flags_finish */
3062 };
3063
3064 class pass_early_inline : public gimple_opt_pass
3065 {
3066 public:
3067 pass_early_inline (gcc::context *ctxt)
3068 : gimple_opt_pass (pass_data_early_inline, ctxt)
3069 {}
3070
3071 /* opt_pass methods: */
3072 virtual unsigned int execute (function *);
3073
3074 }; // class pass_early_inline
3075
3076 unsigned int
3077 pass_early_inline::execute (function *fun)
3078 {
3079 return early_inliner (fun);
3080 }
3081
3082 } // anon namespace
3083
3084 gimple_opt_pass *
3085 make_pass_early_inline (gcc::context *ctxt)
3086 {
3087 return new pass_early_inline (ctxt);
3088 }
3089
3090 namespace {
3091
3092 const pass_data pass_data_ipa_inline =
3093 {
3094 IPA_PASS, /* type */
3095 "inline", /* name */
3096 OPTGROUP_INLINE, /* optinfo_flags */
3097 TV_IPA_INLINING, /* tv_id */
3098 0, /* properties_required */
3099 0, /* properties_provided */
3100 0, /* properties_destroyed */
3101 0, /* todo_flags_start */
3102 ( TODO_dump_symtab ), /* todo_flags_finish */
3103 };
3104
3105 class pass_ipa_inline : public ipa_opt_pass_d
3106 {
3107 public:
3108 pass_ipa_inline (gcc::context *ctxt)
3109 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3110 NULL, /* generate_summary */
3111 NULL, /* write_summary */
3112 NULL, /* read_summary */
3113 NULL, /* write_optimization_summary */
3114 NULL, /* read_optimization_summary */
3115 NULL, /* stmt_fixup */
3116 0, /* function_transform_todo_flags_start */
3117 inline_transform, /* function_transform */
3118 NULL) /* variable_transform */
3119 {}
3120
3121 /* opt_pass methods: */
3122 virtual unsigned int execute (function *) { return ipa_inline (); }
3123
3124 }; // class pass_ipa_inline
3125
3126 } // anon namespace
3127
3128 ipa_opt_pass_d *
3129 make_pass_ipa_inline (gcc::context *ctxt)
3130 {
3131 return new pass_ipa_inline (ctxt);
3132 }