ipa-sra: Do not remove return values needed because of non-call EH
[gcc.git] / gcc / ipa-sra.c
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2021 Free Software Foundation, Inc.
3
4 Contributed by Martin Jambor <mjambor@suse.cz>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
28
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
45
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
52
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
58
59
60
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
87 #include "symtab-clones.h"
88
89 static void ipa_sra_summarize_function (cgraph_node *);
90
91 /* Bits used to track size of an aggregate in bytes interprocedurally. */
92 #define ISRA_ARG_SIZE_LIMIT_BITS 16
93 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94 /* How many parameters can feed into a call actual argument and still be
95 tracked. */
96 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97
98 /* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
102
103 struct GTY(()) param_access
104 {
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
108 stage. */
109 tree type;
110
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
114
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
124 /* Set if the access has a reversed scalar storage order. */
125 unsigned reverse : 1;
126 };
127
128 /* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
130 phase. */
131
132 struct gensum_param_access
133 {
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
137
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
144
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
148 stage. */
149 tree type;
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
151 arguments. */
152 tree alias_ptr_type;
153
154 /* Have there been writes to or reads from this exact location except for as
155 arguments to a function call that can be tracked. */
156 bool nonarg;
157
158 /* Set if the access has a reversed scalar storage order. */
159 bool reverse;
160 };
161
162 /* Summary describing a parameter in the IPA stages. */
163
164 struct GTY(()) isra_param_desc
165 {
166 /* List of access representatives to the parameters, sorted according to
167 their offset. */
168 vec <param_access *, va_gc> *accesses;
169
170 /* Unit size limit of total size of all replacements. */
171 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
172 /* Sum of unit sizes of all certain replacements. */
173 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
174
175 /* A parameter that is used only in call arguments and can be removed if all
176 concerned actual arguments are removed. */
177 unsigned locally_unused : 1;
178 /* An aggregate that is a candidate for breaking up or complete removal. */
179 unsigned split_candidate : 1;
180 /* Is this a parameter passing stuff by reference? */
181 unsigned by_ref : 1;
182 };
183
184 /* Structure used when generating summaries that describes a parameter. */
185
186 struct gensum_param_desc
187 {
188 /* Roots of param_accesses. */
189 gensum_param_access *accesses;
190 /* Number of accesses in the access tree rooted in field accesses. */
191 unsigned access_count;
192
193 /* If the below is non-zero, this is the number of uses as actual arguments. */
194 int call_uses;
195 /* Number of times this parameter has been directly passed to. */
196 unsigned ptr_pt_count;
197
198 /* Size limit of total size of all replacements. */
199 unsigned param_size_limit;
200 /* Sum of sizes of nonarg accesses. */
201 unsigned nonarg_acc_size;
202
203 /* A parameter that is used only in call arguments and can be removed if all
204 concerned actual arguments are removed. */
205 bool locally_unused;
206 /* An aggregate that is a candidate for breaking up or a pointer passing data
207 by reference that is a candidate for being converted to a set of
208 parameters passing those data by value. */
209 bool split_candidate;
210 /* Is this a parameter passing stuff by reference? */
211 bool by_ref;
212
213 /* The number of this parameter as they are ordered in function decl. */
214 int param_number;
215 /* For parameters passing data by reference, this is parameter index to
216 compute indices to bb_dereferences. */
217 int deref_index;
218 };
219
220 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
221 not in GC memory, this is not necessary and we can consider removing the
222 function. */
223
224 static void
225 free_param_decl_accesses (isra_param_desc *desc)
226 {
227 unsigned len = vec_safe_length (desc->accesses);
228 for (unsigned i = 0; i < len; ++i)
229 ggc_free ((*desc->accesses)[i]);
230 vec_free (desc->accesses);
231 }
232
233 /* Class used to convey information about functions from the
234 intra-procedural analysis stage to inter-procedural one. */
235
236 class GTY((for_user)) isra_func_summary
237 {
238 public:
239 /* initialize the object. */
240
241 isra_func_summary ()
242 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
243 m_return_ignored (false), m_queued (false)
244 {}
245
246 /* Destroy m_parameters. */
247
248 ~isra_func_summary ();
249
250 /* Mark the function as not a candidate for any IPA-SRA transformation.
251 Return true if it was a candidate until now. */
252
253 bool zap ();
254
255 /* Vector of parameter descriptors corresponding to the function being
256 analyzed. */
257 vec<isra_param_desc, va_gc> *m_parameters;
258
259 /* Whether the node is even a candidate for any IPA-SRA transformation at
260 all. */
261 unsigned m_candidate : 1;
262
263 /* Whether the original function returns any value. */
264 unsigned m_returns_value : 1;
265
266 /* Set to true if all call statements do not actually use the returned
267 value. */
268
269 unsigned m_return_ignored : 1;
270
271 /* Whether the node is already queued in IPA SRA stack during processing of
272 call graphs SCCs. */
273
274 unsigned m_queued : 1;
275 };
276
277 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
278 structure is not in GC memory, this is not necessary and we can consider
279 removing the destructor. */
280
281 isra_func_summary::~isra_func_summary ()
282 {
283 unsigned len = vec_safe_length (m_parameters);
284 for (unsigned i = 0; i < len; ++i)
285 free_param_decl_accesses (&(*m_parameters)[i]);
286 vec_free (m_parameters);
287 }
288
289
290 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
291 true if it was a candidate until now. */
292
293 bool
294 isra_func_summary::zap ()
295 {
296 bool ret = m_candidate;
297 m_candidate = false;
298
299 unsigned len = vec_safe_length (m_parameters);
300 for (unsigned i = 0; i < len; ++i)
301 free_param_decl_accesses (&(*m_parameters)[i]);
302 vec_free (m_parameters);
303
304 return ret;
305 }
306
307 /* Structure to describe which formal parameters feed into a particular actual
308 arguments. */
309
310 struct isra_param_flow
311 {
312 /* Number of elements in array inputs that contain valid data. */
313 char length;
314 /* Indices of formal parameters that feed into the described actual argument.
315 If aggregate_pass_through or pointer_pass_through below are true, it must
316 contain exactly one element which is passed through from a formal
317 parameter if the given number. Otherwise, the array contains indices of
318 callee's formal parameters which are used to calculate value of this
319 actual argument. */
320 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
321
322 /* Offset within the formal parameter. */
323 unsigned unit_offset;
324 /* Size of the portion of the formal parameter that is being passed. */
325 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
326
327 /* True when the value of this actual copy is a portion of a formal
328 parameter. */
329 unsigned aggregate_pass_through : 1;
330 /* True when the value of this actual copy is a verbatim pass through of an
331 obtained pointer. */
332 unsigned pointer_pass_through : 1;
333 /* True when it is safe to copy access candidates here from the callee, which
334 would mean introducing dereferences into callers of the caller. */
335 unsigned safe_to_import_accesses : 1;
336 };
337
338 /* Structure used to convey information about calls from the intra-procedural
339 analysis stage to inter-procedural one. */
340
341 class isra_call_summary
342 {
343 public:
344 isra_call_summary ()
345 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
346 m_bit_aligned_arg (false)
347 {}
348
349 void init_inputs (unsigned arg_count);
350 void dump (FILE *f);
351
352 /* Information about what formal parameters of the caller are used to compute
353 individual actual arguments of this call. */
354 auto_vec <isra_param_flow> m_arg_flow;
355
356 /* Set to true if the call statement does not have a LHS. */
357 unsigned m_return_ignored : 1;
358
359 /* Set to true if the LHS of call statement is only used to construct the
360 return value of the caller. */
361 unsigned m_return_returned : 1;
362
363 /* Set when any of the call arguments are not byte-aligned. */
364 unsigned m_bit_aligned_arg : 1;
365 };
366
367 /* Class to manage function summaries. */
368
369 class GTY((user)) ipa_sra_function_summaries
370 : public function_summary <isra_func_summary *>
371 {
372 public:
373 ipa_sra_function_summaries (symbol_table *table, bool ggc):
374 function_summary<isra_func_summary *> (table, ggc) { }
375
376 virtual void duplicate (cgraph_node *, cgraph_node *,
377 isra_func_summary *old_sum,
378 isra_func_summary *new_sum);
379 virtual void insert (cgraph_node *, isra_func_summary *);
380 };
381
382 /* Hook that is called by summary when a node is duplicated. */
383
384 void
385 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
386 isra_func_summary *old_sum,
387 isra_func_summary *new_sum)
388 {
389 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
390 useless. */
391 new_sum->m_candidate = old_sum->m_candidate;
392 new_sum->m_returns_value = old_sum->m_returns_value;
393 new_sum->m_return_ignored = old_sum->m_return_ignored;
394 gcc_assert (!old_sum->m_queued);
395 new_sum->m_queued = false;
396
397 unsigned param_count = vec_safe_length (old_sum->m_parameters);
398 if (!param_count)
399 return;
400 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
401 new_sum->m_parameters->quick_grow_cleared (param_count);
402 for (unsigned i = 0; i < param_count; i++)
403 {
404 isra_param_desc *s = &(*old_sum->m_parameters)[i];
405 isra_param_desc *d = &(*new_sum->m_parameters)[i];
406
407 d->param_size_limit = s->param_size_limit;
408 d->size_reached = s->size_reached;
409 d->locally_unused = s->locally_unused;
410 d->split_candidate = s->split_candidate;
411 d->by_ref = s->by_ref;
412
413 unsigned acc_count = vec_safe_length (s->accesses);
414 vec_safe_reserve_exact (d->accesses, acc_count);
415 for (unsigned j = 0; j < acc_count; j++)
416 {
417 param_access *from = (*s->accesses)[j];
418 param_access *to = ggc_cleared_alloc<param_access> ();
419 to->type = from->type;
420 to->alias_ptr_type = from->alias_ptr_type;
421 to->unit_offset = from->unit_offset;
422 to->unit_size = from->unit_size;
423 to->certain = from->certain;
424 d->accesses->quick_push (to);
425 }
426 }
427 }
428
429 /* Pointer to the pass function summary holder. */
430
431 static GTY(()) ipa_sra_function_summaries *func_sums;
432
433 /* Hook that is called by summary when new node appears. */
434
435 void
436 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
437 {
438 if (opt_for_fn (node->decl, flag_ipa_sra))
439 {
440 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
441 ipa_sra_summarize_function (node);
442 pop_cfun ();
443 }
444 else
445 func_sums->remove (node);
446 }
447
448 /* Class to manage call summaries. */
449
450 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
451 {
452 public:
453 ipa_sra_call_summaries (symbol_table *table):
454 call_summary<isra_call_summary *> (table) { }
455
456 /* Duplicate info when an edge is cloned. */
457 virtual void duplicate (cgraph_edge *, cgraph_edge *,
458 isra_call_summary *old_sum,
459 isra_call_summary *new_sum);
460 };
461
462 static ipa_sra_call_summaries *call_sums;
463
464
465 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
466 ARG_COUNT is the number of actual arguments passed. */
467
468 void
469 isra_call_summary::init_inputs (unsigned arg_count)
470 {
471 if (arg_count == 0)
472 {
473 gcc_checking_assert (m_arg_flow.length () == 0);
474 return;
475 }
476 if (m_arg_flow.length () == 0)
477 {
478 m_arg_flow.reserve_exact (arg_count);
479 m_arg_flow.quick_grow_cleared (arg_count);
480 }
481 else
482 gcc_checking_assert (arg_count == m_arg_flow.length ());
483 }
484
485 /* Dump all information in call summary to F. */
486
487 void
488 isra_call_summary::dump (FILE *f)
489 {
490 if (m_return_ignored)
491 fprintf (f, " return value ignored\n");
492 if (m_return_returned)
493 fprintf (f, " return value used only to compute caller return value\n");
494 for (unsigned i = 0; i < m_arg_flow.length (); i++)
495 {
496 fprintf (f, " Parameter %u:\n", i);
497 isra_param_flow *ipf = &m_arg_flow[i];
498
499 if (ipf->length)
500 {
501 bool first = true;
502 fprintf (f, " Scalar param sources: ");
503 for (int j = 0; j < ipf->length; j++)
504 {
505 if (!first)
506 fprintf (f, ", ");
507 else
508 first = false;
509 fprintf (f, "%i", (int) ipf->inputs[j]);
510 }
511 fprintf (f, "\n");
512 }
513 if (ipf->aggregate_pass_through)
514 fprintf (f, " Aggregate pass through from the param given above, "
515 "unit offset: %u , unit size: %u\n",
516 ipf->unit_offset, ipf->unit_size);
517 if (ipf->pointer_pass_through)
518 fprintf (f, " Pointer pass through from the param given above, "
519 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
520 }
521 }
522
523 /* Duplicate edge summary when an edge is cloned. */
524
525 void
526 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
527 isra_call_summary *old_sum,
528 isra_call_summary *new_sum)
529 {
530 unsigned arg_count = old_sum->m_arg_flow.length ();
531 new_sum->init_inputs (arg_count);
532 for (unsigned i = 0; i < arg_count; i++)
533 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
534
535 new_sum->m_return_ignored = old_sum->m_return_ignored;
536 new_sum->m_return_returned = old_sum->m_return_returned;
537 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
538 }
539
540
541 /* With all GTY stuff done, we can move to anonymous namespace. */
542 namespace {
543 /* Quick mapping from a decl to its param descriptor. */
544
545 hash_map<tree, gensum_param_desc *> *decl2desc;
546
547 /* Countdown of allowed Alias analysis steps during summary building. */
548
549 int aa_walking_limit;
550
551 /* This is a table in which for each basic block and parameter there is a
552 distance (offset + size) in that parameter which is dereferenced and
553 accessed in that BB. */
554 HOST_WIDE_INT *bb_dereferences = NULL;
555 /* How many by-reference parameters there are in the current function. */
556 int by_ref_count;
557
558 /* Bitmap of BBs that can cause the function to "stop" progressing by
559 returning, throwing externally, looping infinitely or calling a function
560 which might abort etc.. */
561 bitmap final_bbs;
562
563 /* Obstack to allocate various small structures required only when generating
564 summary for a function. */
565 struct obstack gensum_obstack;
566
567 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
568 attributes, return true otherwise. NODE is the cgraph node of the current
569 function. */
570
571 static bool
572 ipa_sra_preliminary_function_checks (cgraph_node *node)
573 {
574 if (!node->can_change_signature)
575 {
576 if (dump_file)
577 fprintf (dump_file, "Function cannot change signature.\n");
578 return false;
579 }
580
581 if (!tree_versionable_function_p (node->decl))
582 {
583 if (dump_file)
584 fprintf (dump_file, "Function is not versionable.\n");
585 return false;
586 }
587
588 if (!opt_for_fn (node->decl, optimize)
589 || !opt_for_fn (node->decl, flag_ipa_sra))
590 {
591 if (dump_file)
592 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
593 "function.\n");
594 return false;
595 }
596
597 if (DECL_VIRTUAL_P (node->decl))
598 {
599 if (dump_file)
600 fprintf (dump_file, "Function is a virtual method.\n");
601 return false;
602 }
603
604 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
605 if (fun->stdarg)
606 {
607 if (dump_file)
608 fprintf (dump_file, "Function uses stdarg. \n");
609 return false;
610 }
611
612 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
613 {
614 if (dump_file)
615 fprintf (dump_file, "Function type has attributes. \n");
616 return false;
617 }
618
619 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
620 {
621 if (dump_file)
622 fprintf (dump_file, "Always inline function will be inlined "
623 "anyway. \n");
624 return false;
625 }
626
627 return true;
628 }
629
630 /* Print access tree starting at ACCESS to F. */
631
632 static void
633 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
634 {
635 fprintf (f, " ");
636 for (unsigned i = 0; i < indent; i++)
637 fprintf (f, " ");
638 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
639 access->offset);
640 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
641 fprintf (f, ", type: ");
642 print_generic_expr (f, access->type);
643 fprintf (f, ", alias_ptr_type: ");
644 print_generic_expr (f, access->alias_ptr_type);
645 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
646 for (gensum_param_access *ch = access->first_child;
647 ch;
648 ch = ch->next_sibling)
649 dump_gensum_access (f, ch, indent + 2);
650 }
651
652
653 /* Print access tree starting at ACCESS to F. */
654
655 static void
656 dump_isra_access (FILE *f, param_access *access)
657 {
658 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
659 fprintf (f, ", unit size: %u", access->unit_size);
660 fprintf (f, ", type: ");
661 print_generic_expr (f, access->type);
662 fprintf (f, ", alias_ptr_type: ");
663 print_generic_expr (f, access->alias_ptr_type);
664 if (access->certain)
665 fprintf (f, ", certain");
666 else
667 fprintf (f, ", not-certain");
668 if (access->reverse)
669 fprintf (f, ", reverse");
670 fprintf (f, "\n");
671 }
672
673 /* Dump access tree starting at ACCESS to stderr. */
674
675 DEBUG_FUNCTION void
676 debug_isra_access (param_access *access)
677 {
678 dump_isra_access (stderr, access);
679 }
680
681 /* Dump DESC to F. */
682
683 static void
684 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
685 {
686 if (desc->locally_unused)
687 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
688 if (!desc->split_candidate)
689 {
690 fprintf (f, " not a candidate\n");
691 return;
692 }
693 if (desc->by_ref)
694 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
695
696 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
697 dump_gensum_access (f, acc, 2);
698 }
699
700 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
701 F. */
702
703 static void
704 dump_gensum_param_descriptors (FILE *f, tree fndecl,
705 vec<gensum_param_desc> *param_descriptions)
706 {
707 tree parm = DECL_ARGUMENTS (fndecl);
708 for (unsigned i = 0;
709 i < param_descriptions->length ();
710 ++i, parm = DECL_CHAIN (parm))
711 {
712 fprintf (f, " Descriptor for parameter %i ", i);
713 print_generic_expr (f, parm, TDF_UID);
714 fprintf (f, "\n");
715 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
716 }
717 }
718
719
720 /* Dump DESC to F. */
721
722 static void
723 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
724 {
725 if (desc->locally_unused)
726 {
727 fprintf (f, " (locally) unused\n");
728 }
729 if (!desc->split_candidate)
730 {
731 fprintf (f, " not a candidate for splitting\n");
732 return;
733 }
734 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
735 desc->param_size_limit, desc->size_reached,
736 desc->by_ref ? ", by_ref" : "");
737
738 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
739 {
740 param_access *access = (*desc->accesses)[i];
741 dump_isra_access (f, access);
742 }
743 }
744
745 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
746 F. */
747
748 static void
749 dump_isra_param_descriptors (FILE *f, tree fndecl,
750 isra_func_summary *ifs)
751 {
752 tree parm = DECL_ARGUMENTS (fndecl);
753 if (!ifs->m_parameters)
754 {
755 fprintf (f, " parameter descriptors not available\n");
756 return;
757 }
758
759 for (unsigned i = 0;
760 i < ifs->m_parameters->length ();
761 ++i, parm = DECL_CHAIN (parm))
762 {
763 fprintf (f, " Descriptor for parameter %i ", i);
764 print_generic_expr (f, parm, TDF_UID);
765 fprintf (f, "\n");
766 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
767 }
768 }
769
770 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
771 function fails return false, otherwise return true. SRC must fit into an
772 unsigned char. Used for purposes of transitive unused parameter
773 removal. */
774
775 static bool
776 add_src_to_param_flow (isra_param_flow *param_flow, int src)
777 {
778 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
779 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
780 return false;
781
782 param_flow->inputs[(int) param_flow->length] = src;
783 param_flow->length++;
784 return true;
785 }
786
787 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
788 it is the only input. Used for purposes of transitive parameter
789 splitting. */
790
791 static void
792 set_single_param_flow_source (isra_param_flow *param_flow, int src)
793 {
794 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
795 if (param_flow->length == 0)
796 {
797 param_flow->inputs[0] = src;
798 param_flow->length = 1;
799 }
800 else if (param_flow->length == 1)
801 gcc_assert (param_flow->inputs[0] == src);
802 else
803 gcc_unreachable ();
804 }
805
806 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
807 it. */
808
809 static unsigned
810 get_single_param_flow_source (const isra_param_flow *param_flow)
811 {
812 gcc_assert (param_flow->length == 1);
813 return param_flow->inputs[0];
814 }
815
816 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
817 in FUN represented with NODE and return a negative number if any of them is
818 used for something else than either an actual call argument, simple
819 arithmetic operation or debug statement. If there are no such uses, return
820 the number of actual arguments that this parameter eventually feeds to (or
821 zero if there is none). For any such parameter, mark PARM_NUM as one of its
822 sources. ANALYZED is a bitmap that tracks which SSA names we have already
823 started investigating. */
824
825 static int
826 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
827 int parm_num, bitmap analyzed)
828 {
829 int res = 0;
830 imm_use_iterator imm_iter;
831 gimple *stmt;
832
833 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
834 {
835 if (is_gimple_debug (stmt))
836 continue;
837
838 /* TODO: We could handle at least const builtin functions like arithmetic
839 operations below. */
840 if (is_gimple_call (stmt))
841 {
842 int all_uses = 0;
843 use_operand_p use_p;
844 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
845 all_uses++;
846
847 gcall *call = as_a <gcall *> (stmt);
848 unsigned arg_count;
849 if (gimple_call_internal_p (call)
850 || (arg_count = gimple_call_num_args (call)) == 0)
851 {
852 res = -1;
853 break;
854 }
855
856 cgraph_edge *cs = node->get_edge (stmt);
857 gcc_checking_assert (cs);
858 isra_call_summary *csum = call_sums->get_create (cs);
859 csum->init_inputs (arg_count);
860
861 int simple_uses = 0;
862 for (unsigned i = 0; i < arg_count; i++)
863 if (gimple_call_arg (call, i) == name)
864 {
865 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
866 {
867 simple_uses = -1;
868 break;
869 }
870 simple_uses++;
871 }
872
873 if (simple_uses < 0
874 || all_uses != simple_uses)
875 {
876 res = -1;
877 break;
878 }
879 res += all_uses;
880 }
881 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
882 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
883 || gimple_code (stmt) == GIMPLE_PHI))
884 {
885 tree lhs;
886 if (gimple_code (stmt) == GIMPLE_PHI)
887 lhs = gimple_phi_result (stmt);
888 else
889 lhs = gimple_assign_lhs (stmt);
890
891 if (TREE_CODE (lhs) != SSA_NAME)
892 {
893 res = -1;
894 break;
895 }
896 gcc_assert (!gimple_vdef (stmt));
897 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
898 {
899 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
900 analyzed);
901 if (tmp < 0)
902 {
903 res = tmp;
904 break;
905 }
906 res += tmp;
907 }
908 }
909 else
910 {
911 res = -1;
912 break;
913 }
914 }
915 return res;
916 }
917
918 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
919 also described by NODE) and simple arithmetic calculations involving PARM
920 and return false if any of them is used for something else than either an
921 actual call argument, simple arithmetic operation or debug statement. If
922 there are no such uses, return true and store the number of actual arguments
923 that this parameter eventually feeds to (or zero if there is none) to
924 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
925 sources.
926
927 This function is similar to ptr_parm_has_nonarg_uses but its results are
928 meant for unused parameter removal, as opposed to splitting of parameters
929 passed by reference or converting them to passed by value.
930 */
931
932 static bool
933 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
934 int parm_num, int *call_uses_p)
935 {
936 gcc_checking_assert (is_gimple_reg (parm));
937
938 tree name = ssa_default_def (fun, parm);
939 if (!name || has_zero_uses (name))
940 {
941 *call_uses_p = 0;
942 return false;
943 }
944
945 /* Edge summaries can only handle callers with fewer than 256 parameters. */
946 if (parm_num > UCHAR_MAX)
947 return true;
948
949 bitmap analyzed = BITMAP_ALLOC (NULL);
950 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
951 analyzed);
952 BITMAP_FREE (analyzed);
953 if (call_uses < 0)
954 return true;
955 *call_uses_p = call_uses;
956 return false;
957 }
958
959 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
960 examine whether there are any nonarg uses that are not actual arguments or
961 otherwise infeasible uses. If so, return true, otherwise return false.
962 Create pass-through IPA flow records for any direct uses as argument calls
963 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
964 must represent the function that is currently analyzed, PARM_NUM must be the
965 index of the analyzed parameter.
966
967 This function is similar to isra_track_scalar_param_local_uses but its
968 results are meant for splitting of parameters passed by reference or turning
969 them into bits passed by value, as opposed to generic unused parameter
970 removal.
971 */
972
973 static bool
974 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
975 int parm_num, unsigned *pt_count_p)
976 {
977 imm_use_iterator ui;
978 gimple *stmt;
979 tree name = ssa_default_def (fun, parm);
980 bool ret = false;
981 unsigned pt_count = 0;
982
983 if (!name || has_zero_uses (name))
984 return false;
985
986 /* Edge summaries can only handle callers with fewer than 256 parameters. */
987 if (parm_num > UCHAR_MAX)
988 return true;
989
990 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
991 {
992 unsigned uses_ok = 0;
993 use_operand_p use_p;
994
995 if (is_gimple_debug (stmt))
996 continue;
997
998 if (gimple_assign_single_p (stmt))
999 {
1000 tree rhs = gimple_assign_rhs1 (stmt);
1001 while (handled_component_p (rhs))
1002 rhs = TREE_OPERAND (rhs, 0);
1003 if (TREE_CODE (rhs) == MEM_REF
1004 && TREE_OPERAND (rhs, 0) == name
1005 && integer_zerop (TREE_OPERAND (rhs, 1))
1006 && types_compatible_p (TREE_TYPE (rhs),
1007 TREE_TYPE (TREE_TYPE (name)))
1008 && !TREE_THIS_VOLATILE (rhs))
1009 uses_ok++;
1010 }
1011 else if (is_gimple_call (stmt))
1012 {
1013 gcall *call = as_a <gcall *> (stmt);
1014 unsigned arg_count;
1015 if (gimple_call_internal_p (call)
1016 || (arg_count = gimple_call_num_args (call)) == 0)
1017 {
1018 ret = true;
1019 break;
1020 }
1021
1022 cgraph_edge *cs = node->get_edge (stmt);
1023 gcc_checking_assert (cs);
1024 isra_call_summary *csum = call_sums->get_create (cs);
1025 csum->init_inputs (arg_count);
1026
1027 for (unsigned i = 0; i < arg_count; ++i)
1028 {
1029 tree arg = gimple_call_arg (stmt, i);
1030
1031 if (arg == name)
1032 {
1033 /* TODO: Allow &MEM_REF[name + offset] here,
1034 ipa_param_body_adjustments::modify_call_stmt has to be
1035 adjusted too. */
1036 csum->m_arg_flow[i].pointer_pass_through = true;
1037 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1038 pt_count++;
1039 uses_ok++;
1040 continue;
1041 }
1042
1043 while (handled_component_p (arg))
1044 arg = TREE_OPERAND (arg, 0);
1045 if (TREE_CODE (arg) == MEM_REF
1046 && TREE_OPERAND (arg, 0) == name
1047 && integer_zerop (TREE_OPERAND (arg, 1))
1048 && types_compatible_p (TREE_TYPE (arg),
1049 TREE_TYPE (TREE_TYPE (name)))
1050 && !TREE_THIS_VOLATILE (arg))
1051 uses_ok++;
1052 }
1053 }
1054
1055 /* If the number of valid uses does not match the number of
1056 uses in this stmt there is an unhandled use. */
1057 unsigned all_uses = 0;
1058 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1059 all_uses++;
1060
1061 gcc_checking_assert (uses_ok <= all_uses);
1062 if (uses_ok != all_uses)
1063 {
1064 ret = true;
1065 break;
1066 }
1067 }
1068
1069 *pt_count_p = pt_count;
1070 return ret;
1071 }
1072
1073 /* Initialize vector of parameter descriptors of NODE. Return true if there
1074 are any candidates for splitting or unused aggregate parameter removal (the
1075 function may return false if there are candidates for removal of register
1076 parameters) and function body must be scanned. */
1077
1078 static bool
1079 create_parameter_descriptors (cgraph_node *node,
1080 vec<gensum_param_desc> *param_descriptions)
1081 {
1082 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1083 bool ret = false;
1084
1085 int num = 0;
1086 for (tree parm = DECL_ARGUMENTS (node->decl);
1087 parm;
1088 parm = DECL_CHAIN (parm), num++)
1089 {
1090 const char *msg;
1091 gensum_param_desc *desc = &(*param_descriptions)[num];
1092 /* param_descriptions vector is grown cleared in the caller. */
1093 desc->param_number = num;
1094 decl2desc->put (parm, desc);
1095
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 print_generic_expr (dump_file, parm, TDF_UID);
1098
1099 int scalar_call_uses;
1100 tree type = TREE_TYPE (parm);
1101 if (TREE_THIS_VOLATILE (parm))
1102 {
1103 if (dump_file && (dump_flags & TDF_DETAILS))
1104 fprintf (dump_file, " not a candidate, is volatile\n");
1105 continue;
1106 }
1107 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1108 {
1109 if (dump_file && (dump_flags & TDF_DETAILS))
1110 fprintf (dump_file, " not a candidate, is a va_list type\n");
1111 continue;
1112 }
1113 if (TREE_ADDRESSABLE (parm))
1114 {
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, " not a candidate, is addressable\n");
1117 continue;
1118 }
1119 if (TREE_ADDRESSABLE (type))
1120 {
1121 if (dump_file && (dump_flags & TDF_DETAILS))
1122 fprintf (dump_file, " not a candidate, type cannot be split\n");
1123 continue;
1124 }
1125
1126 if (is_gimple_reg (parm)
1127 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1128 &scalar_call_uses))
1129 {
1130 if (dump_file && (dump_flags & TDF_DETAILS))
1131 fprintf (dump_file, " is a scalar with only %i call uses\n",
1132 scalar_call_uses);
1133
1134 desc->locally_unused = true;
1135 desc->call_uses = scalar_call_uses;
1136 }
1137
1138 if (POINTER_TYPE_P (type))
1139 {
1140 type = TREE_TYPE (type);
1141
1142 if (TREE_CODE (type) == FUNCTION_TYPE
1143 || TREE_CODE (type) == METHOD_TYPE)
1144 {
1145 if (dump_file && (dump_flags & TDF_DETAILS))
1146 fprintf (dump_file, " not a candidate, reference to "
1147 "a function\n");
1148 continue;
1149 }
1150 if (TYPE_VOLATILE (type))
1151 {
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1153 fprintf (dump_file, " not a candidate, reference to "
1154 "a volatile type\n");
1155 continue;
1156 }
1157 if (TREE_CODE (type) == ARRAY_TYPE
1158 && TYPE_NONALIASED_COMPONENT (type))
1159 {
1160 if (dump_file && (dump_flags & TDF_DETAILS))
1161 fprintf (dump_file, " not a candidate, reference to "
1162 "a nonaliased component array\n");
1163 continue;
1164 }
1165 if (!is_gimple_reg (parm))
1166 {
1167 if (dump_file && (dump_flags & TDF_DETAILS))
1168 fprintf (dump_file, " not a candidate, a reference which is "
1169 "not a gimple register (probably addressable)\n");
1170 continue;
1171 }
1172 if (is_va_list_type (type))
1173 {
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 fprintf (dump_file, " not a candidate, reference to "
1176 "a va list\n");
1177 continue;
1178 }
1179 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1180 &desc->ptr_pt_count))
1181 {
1182 if (dump_file && (dump_flags & TDF_DETAILS))
1183 fprintf (dump_file, " not a candidate, reference has "
1184 "nonarg uses\n");
1185 continue;
1186 }
1187 desc->by_ref = true;
1188 }
1189 else if (!AGGREGATE_TYPE_P (type))
1190 {
1191 /* This is in an else branch because scalars passed by reference are
1192 still candidates to be passed by value. */
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 fprintf (dump_file, " not a candidate, not an aggregate\n");
1195 continue;
1196 }
1197
1198 if (!COMPLETE_TYPE_P (type))
1199 {
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1201 fprintf (dump_file, " not a candidate, not a complete type\n");
1202 continue;
1203 }
1204 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1205 {
1206 if (dump_file && (dump_flags & TDF_DETAILS))
1207 fprintf (dump_file, " not a candidate, size not representable\n");
1208 continue;
1209 }
1210 unsigned HOST_WIDE_INT type_size
1211 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1212 if (type_size == 0
1213 || type_size >= ISRA_ARG_SIZE_LIMIT)
1214 {
1215 if (dump_file && (dump_flags & TDF_DETAILS))
1216 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1217 continue;
1218 }
1219 if (type_internals_preclude_sra_p (type, &msg))
1220 {
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1222 fprintf (dump_file, " not a candidate, %s\n", msg);
1223 continue;
1224 }
1225
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1227 fprintf (dump_file, " is a candidate\n");
1228
1229 ret = true;
1230 desc->split_candidate = true;
1231 if (desc->by_ref)
1232 desc->deref_index = by_ref_count++;
1233 }
1234 return ret;
1235 }
1236
1237 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1238 found, which happens if DECL is for a static chain. */
1239
1240 static gensum_param_desc *
1241 get_gensum_param_desc (tree decl)
1242 {
1243 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1244 gensum_param_desc **slot = decl2desc->get (decl);
1245 if (!slot)
1246 /* This can happen for static chains which we cannot handle so far. */
1247 return NULL;
1248 gcc_checking_assert (*slot);
1249 return *slot;
1250 }
1251
1252
1253 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1254 write REASON to the dump file if there is one. */
1255
1256 static void
1257 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1258 {
1259 if (!desc->split_candidate)
1260 return;
1261
1262 if (dump_file && (dump_flags & TDF_DETAILS))
1263 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1264 desc->param_number, reason);
1265
1266 desc->split_candidate = false;
1267 }
1268
1269 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1270 there is one. */
1271
1272 static void
1273 disqualify_split_candidate (tree decl, const char *reason)
1274 {
1275 gensum_param_desc *desc = get_gensum_param_desc (decl);
1276 if (desc)
1277 disqualify_split_candidate (desc, reason);
1278 }
1279
1280 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1281 first, check that there are not too many of them already. If so, do not
1282 allocate anything and return NULL. */
1283
1284 static gensum_param_access *
1285 allocate_access (gensum_param_desc *desc,
1286 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1287 {
1288 if (desc->access_count
1289 == (unsigned) param_ipa_sra_max_replacements)
1290 {
1291 disqualify_split_candidate (desc, "Too many replacement candidates");
1292 return NULL;
1293 }
1294
1295 gensum_param_access *access
1296 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1297 sizeof (gensum_param_access));
1298 memset (access, 0, sizeof (*access));
1299 access->offset = offset;
1300 access->size = size;
1301 return access;
1302 }
1303
1304 /* In what context scan_expr_access has been called, whether it deals with a
1305 load, a function argument, or a store. Please note that in rare
1306 circumstances when it is not clear if the access is a load or store,
1307 ISRA_CTX_STORE is used too. */
1308
1309 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1310
1311 /* Return an access describing memory access to the variable described by DESC
1312 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1313 at a certain tree level FIRST. Attempt to create it and put into the
1314 appropriate place in the access tree if does not exist, but fail and return
1315 NULL if there are already too many accesses, if it would create a partially
1316 overlapping access or if an access would end up within a pre-existing
1317 non-call access. */
1318
1319 static gensum_param_access *
1320 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1321 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1322 {
1323 gensum_param_access *access = *first, **ptr = first;
1324
1325 if (!access)
1326 {
1327 /* No pre-existing access at this level, just create it. */
1328 gensum_param_access *a = allocate_access (desc, offset, size);
1329 if (!a)
1330 return NULL;
1331 *first = a;
1332 return *first;
1333 }
1334
1335 if (access->offset >= offset + size)
1336 {
1337 /* We want to squeeze it in front of the very first access, just do
1338 it. */
1339 gensum_param_access *r = allocate_access (desc, offset, size);
1340 if (!r)
1341 return NULL;
1342 r->next_sibling = access;
1343 *first = r;
1344 return r;
1345 }
1346
1347 /* Skip all accesses that have to come before us until the next sibling is
1348 already too far. */
1349 while (offset >= access->offset + access->size
1350 && access->next_sibling
1351 && access->next_sibling->offset < offset + size)
1352 {
1353 ptr = &access->next_sibling;
1354 access = access->next_sibling;
1355 }
1356
1357 /* At this point we know we do not belong before access. */
1358 gcc_assert (access->offset < offset + size);
1359
1360 if (access->offset == offset && access->size == size)
1361 /* We found what we were looking for. */
1362 return access;
1363
1364 if (access->offset <= offset
1365 && access->offset + access->size >= offset + size)
1366 {
1367 /* We fit into access which is larger than us. We need to find/create
1368 something below access. But we only allow nesting in call
1369 arguments. */
1370 if (access->nonarg)
1371 return NULL;
1372
1373 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1374 }
1375
1376 if (offset <= access->offset
1377 && offset + size >= access->offset + access->size)
1378 /* We are actually bigger than access, which fully fits into us, take its
1379 place and make all accesses fitting into it its children. */
1380 {
1381 /* But first, we only allow nesting in call arguments so check if that is
1382 what we are trying to represent. */
1383 if (ctx != ISRA_CTX_ARG)
1384 return NULL;
1385
1386 gensum_param_access *r = allocate_access (desc, offset, size);
1387 if (!r)
1388 return NULL;
1389 r->first_child = access;
1390
1391 while (access->next_sibling
1392 && access->next_sibling->offset < offset + size)
1393 access = access->next_sibling;
1394 if (access->offset + access->size > offset + size)
1395 {
1396 /* This must be a different access, which are sorted, so the
1397 following must be true and this signals a partial overlap. */
1398 gcc_assert (access->offset > offset);
1399 return NULL;
1400 }
1401
1402 r->next_sibling = access->next_sibling;
1403 access->next_sibling = NULL;
1404 *ptr = r;
1405 return r;
1406 }
1407
1408 if (offset >= access->offset + access->size)
1409 {
1410 /* We belong after access. */
1411 gensum_param_access *r = allocate_access (desc, offset, size);
1412 if (!r)
1413 return NULL;
1414 r->next_sibling = access->next_sibling;
1415 access->next_sibling = r;
1416 return r;
1417 }
1418
1419 if (offset < access->offset)
1420 {
1421 /* We know the following, otherwise we would have created a
1422 super-access. */
1423 gcc_checking_assert (offset + size < access->offset + access->size);
1424 return NULL;
1425 }
1426
1427 if (offset + size > access->offset + access->size)
1428 {
1429 /* Likewise. */
1430 gcc_checking_assert (offset > access->offset);
1431 return NULL;
1432 }
1433
1434 gcc_unreachable ();
1435 }
1436
1437 /* Return an access describing memory access to the variable described by DESC
1438 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1439 to create if it does not exist, but fail and return NULL if there are
1440 already too many accesses, if it would create a partially overlapping access
1441 or if an access would end up in a non-call access. */
1442
1443 static gensum_param_access *
1444 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1445 isra_scan_context ctx)
1446 {
1447 gcc_checking_assert (desc->split_candidate);
1448
1449 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1450 size, ctx);
1451 if (!access)
1452 {
1453 disqualify_split_candidate (desc,
1454 "Bad access overlap or too many accesses");
1455 return NULL;
1456 }
1457
1458 switch (ctx)
1459 {
1460 case ISRA_CTX_STORE:
1461 gcc_assert (!desc->by_ref);
1462 /* Fall-through */
1463 case ISRA_CTX_LOAD:
1464 access->nonarg = true;
1465 break;
1466 case ISRA_CTX_ARG:
1467 break;
1468 }
1469
1470 return access;
1471 }
1472
1473 /* Verify that parameter access tree starting with ACCESS is in good shape.
1474 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1475 ACCESS or zero if there is none. */
1476
1477 static bool
1478 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1479 HOST_WIDE_INT parent_size)
1480 {
1481 while (access)
1482 {
1483 gcc_assert (access->offset >= 0 && access->size >= 0);
1484
1485 if (parent_size != 0)
1486 {
1487 if (access->offset < parent_offset)
1488 {
1489 error ("Access offset before parent offset");
1490 return true;
1491 }
1492 if (access->size >= parent_size)
1493 {
1494 error ("Access size greater or equal to its parent size");
1495 return true;
1496 }
1497 if (access->offset + access->size > parent_offset + parent_size)
1498 {
1499 error ("Access terminates outside of its parent");
1500 return true;
1501 }
1502 }
1503
1504 if (verify_access_tree_1 (access->first_child, access->offset,
1505 access->size))
1506 return true;
1507
1508 if (access->next_sibling
1509 && (access->next_sibling->offset < access->offset + access->size))
1510 {
1511 error ("Access overlaps with its sibling");
1512 return true;
1513 }
1514
1515 access = access->next_sibling;
1516 }
1517 return false;
1518 }
1519
1520 /* Verify that parameter access tree starting with ACCESS is in good shape,
1521 halt compilation and dump the tree to stderr if not. */
1522
1523 DEBUG_FUNCTION void
1524 isra_verify_access_tree (gensum_param_access *access)
1525 {
1526 if (verify_access_tree_1 (access, 0, 0))
1527 {
1528 for (; access; access = access->next_sibling)
1529 dump_gensum_access (stderr, access, 2);
1530 internal_error ("IPA-SRA access verification failed");
1531 }
1532 }
1533
1534
1535 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1536 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1537
1538 static bool
1539 asm_visit_addr (gimple *, tree op, tree, void *)
1540 {
1541 op = get_base_address (op);
1542 if (op
1543 && TREE_CODE (op) == PARM_DECL)
1544 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1545
1546 return false;
1547 }
1548
1549 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1550 basic block BB, unless the BB has already been marked as a potentially
1551 final. */
1552
1553 static void
1554 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1555 basic_block bb)
1556 {
1557 gcc_assert (desc->by_ref);
1558 gcc_checking_assert (desc->split_candidate);
1559
1560 if (bitmap_bit_p (final_bbs, bb->index))
1561 return;
1562
1563 int idx = bb->index * by_ref_count + desc->deref_index;
1564 if (bb_dereferences[idx] < dist)
1565 bb_dereferences[idx] = dist;
1566 }
1567
1568 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1569 previously recorded OLD_TYPE. */
1570
1571 static bool
1572 type_prevails_p (tree old_type, tree new_type)
1573 {
1574 if (old_type == new_type)
1575 return false;
1576
1577 /* Non-aggregates are always better. */
1578 if (!is_gimple_reg_type (old_type)
1579 && is_gimple_reg_type (new_type))
1580 return true;
1581 if (is_gimple_reg_type (old_type)
1582 && !is_gimple_reg_type (new_type))
1583 return false;
1584
1585 /* Prefer any complex or vector type over any other scalar type. */
1586 if (TREE_CODE (old_type) != COMPLEX_TYPE
1587 && TREE_CODE (old_type) != VECTOR_TYPE
1588 && (TREE_CODE (new_type) == COMPLEX_TYPE
1589 || TREE_CODE (new_type) == VECTOR_TYPE))
1590 return true;
1591 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1592 || TREE_CODE (old_type) == VECTOR_TYPE)
1593 && TREE_CODE (new_type) != COMPLEX_TYPE
1594 && TREE_CODE (new_type) != VECTOR_TYPE)
1595 return false;
1596
1597 /* Use the integral type with the bigger precision. */
1598 if (INTEGRAL_TYPE_P (old_type)
1599 && INTEGRAL_TYPE_P (new_type))
1600 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1601
1602 /* Attempt to disregard any integral type with non-full precision. */
1603 if (INTEGRAL_TYPE_P (old_type)
1604 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1605 != TYPE_PRECISION (old_type)))
1606 return true;
1607 if (INTEGRAL_TYPE_P (new_type)
1608 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1609 != TYPE_PRECISION (new_type)))
1610 return false;
1611 /* Stabilize the selection. */
1612 return TYPE_UID (old_type) < TYPE_UID (new_type);
1613 }
1614
1615 /* When scanning an expression which is a call argument, this structure
1616 specifies the call and the position of the argument. */
1617
1618 struct scan_call_info
1619 {
1620 /* Call graph edge representing the call. */
1621 cgraph_edge *cs;
1622 /* Total number of arguments in the call. */
1623 unsigned argument_count;
1624 /* Number of the actual argument being scanned. */
1625 unsigned arg_idx;
1626 };
1627
1628 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1629 call argument described by CALL_INFO. */
1630
1631 static void
1632 record_nonregister_call_use (gensum_param_desc *desc,
1633 scan_call_info *call_info,
1634 unsigned unit_offset, unsigned unit_size)
1635 {
1636 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1637 csum->init_inputs (call_info->argument_count);
1638
1639 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1640 param_flow->aggregate_pass_through = true;
1641 set_single_param_flow_source (param_flow, desc->param_number);
1642 param_flow->unit_offset = unit_offset;
1643 param_flow->unit_size = unit_size;
1644 desc->call_uses++;
1645 }
1646
1647 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1648 modification. */
1649
1650 static bool
1651 mark_maybe_modified (ao_ref *, tree, void *data)
1652 {
1653 bool *maybe_modified = (bool *) data;
1654 *maybe_modified = true;
1655 return true;
1656 }
1657
1658 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1659 specifies whether EXPR is used in a load, store or as an argument call. BB
1660 must be the basic block in which expr resides. If CTX specifies call
1661 argument context, CALL_INFO must describe that call and argument position,
1662 otherwise it is ignored. */
1663
1664 static void
1665 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1666 basic_block bb, scan_call_info *call_info = NULL)
1667 {
1668 poly_int64 poffset, psize, pmax_size;
1669 HOST_WIDE_INT offset, size, max_size;
1670 tree base;
1671 bool deref = false;
1672 bool reverse;
1673
1674 if (TREE_CODE (expr) == BIT_FIELD_REF
1675 || TREE_CODE (expr) == IMAGPART_EXPR
1676 || TREE_CODE (expr) == REALPART_EXPR)
1677 expr = TREE_OPERAND (expr, 0);
1678
1679 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1680
1681 if (TREE_CODE (base) == MEM_REF)
1682 {
1683 tree op = TREE_OPERAND (base, 0);
1684 if (TREE_CODE (op) != SSA_NAME
1685 || !SSA_NAME_IS_DEFAULT_DEF (op))
1686 return;
1687 base = SSA_NAME_VAR (op);
1688 if (!base)
1689 return;
1690 deref = true;
1691 }
1692 if (TREE_CODE (base) != PARM_DECL)
1693 return;
1694
1695 gensum_param_desc *desc = get_gensum_param_desc (base);
1696 if (!desc || !desc->split_candidate)
1697 return;
1698
1699 if (!poffset.is_constant (&offset)
1700 || !psize.is_constant (&size)
1701 || !pmax_size.is_constant (&max_size))
1702 {
1703 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1704 "access.");
1705 return;
1706 }
1707 if (size < 0 || size != max_size)
1708 {
1709 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1710 return;
1711 }
1712 if (TREE_CODE (expr) == COMPONENT_REF
1713 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1714 {
1715 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1716 return;
1717 }
1718 if (offset < 0)
1719 {
1720 disqualify_split_candidate (desc, "Encountered an access at a "
1721 "negative offset.");
1722 return;
1723 }
1724 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1725 gcc_assert ((size % BITS_PER_UNIT) == 0);
1726 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1727 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1728 {
1729 disqualify_split_candidate (desc, "Encountered an access with too big "
1730 "offset or size");
1731 return;
1732 }
1733
1734 tree type = TREE_TYPE (expr);
1735 unsigned int exp_align = get_object_alignment (expr);
1736
1737 if (exp_align < TYPE_ALIGN (type))
1738 {
1739 disqualify_split_candidate (desc, "Underaligned access.");
1740 return;
1741 }
1742
1743 if (deref)
1744 {
1745 if (!desc->by_ref)
1746 {
1747 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1748 return;
1749 }
1750 else if (ctx == ISRA_CTX_STORE)
1751 {
1752 disqualify_split_candidate (desc, "Storing to data passed by "
1753 "reference.");
1754 return;
1755 }
1756
1757 if (!aa_walking_limit)
1758 {
1759 disqualify_split_candidate (desc, "Out of alias analysis step "
1760 "limit.");
1761 return;
1762 }
1763
1764 gcc_checking_assert (gimple_vuse (stmt));
1765 bool maybe_modified = false;
1766 ao_ref ar;
1767
1768 ao_ref_init (&ar, expr);
1769 bitmap visited = BITMAP_ALLOC (NULL);
1770 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1771 mark_maybe_modified, &maybe_modified,
1772 &visited, NULL, aa_walking_limit);
1773 BITMAP_FREE (visited);
1774 if (walked > 0)
1775 {
1776 gcc_assert (aa_walking_limit > walked);
1777 aa_walking_limit = aa_walking_limit - walked;
1778 }
1779 if (walked < 0)
1780 aa_walking_limit = 0;
1781 if (maybe_modified || walked < 0)
1782 {
1783 disqualify_split_candidate (desc, "Data passed by reference possibly "
1784 "modified through an alias.");
1785 return;
1786 }
1787 else
1788 mark_param_dereference (desc, offset + size, bb);
1789 }
1790 else
1791 /* Pointer parameters with direct uses should have been ruled out by
1792 analyzing SSA default def when looking at the parameters. */
1793 gcc_assert (!desc->by_ref);
1794
1795 gensum_param_access *access = get_access (desc, offset, size, ctx);
1796 if (!access)
1797 return;
1798
1799 if (ctx == ISRA_CTX_ARG)
1800 {
1801 gcc_checking_assert (call_info);
1802
1803 if (!deref)
1804 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1805 size / BITS_PER_UNIT);
1806 else
1807 /* This is not a pass-through of a pointer, this is a use like any
1808 other. */
1809 access->nonarg = true;
1810 }
1811
1812 if (!access->type)
1813 {
1814 access->type = type;
1815 access->alias_ptr_type = reference_alias_ptr_type (expr);
1816 access->reverse = reverse;
1817 }
1818 else
1819 {
1820 if (exp_align < TYPE_ALIGN (access->type))
1821 {
1822 disqualify_split_candidate (desc, "Reference has lower alignment "
1823 "than a previous one.");
1824 return;
1825 }
1826 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1827 {
1828 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1829 return;
1830 }
1831 if (access->reverse != reverse)
1832 {
1833 disqualify_split_candidate (desc, "Both normal and reverse "
1834 "scalar storage order.");
1835 return;
1836 }
1837 if (!deref
1838 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1839 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1840 {
1841 /* We need the same aggregate type on all accesses to be able to
1842 distinguish transformation spots from pass-through arguments in
1843 the transformation phase. */
1844 disqualify_split_candidate (desc, "We do not support aggregate "
1845 "type punning.");
1846 return;
1847 }
1848
1849 if (type_prevails_p (access->type, type))
1850 access->type = type;
1851 }
1852 }
1853
1854 /* Scan body function described by NODE and FUN and create access trees for
1855 parameters. */
1856
1857 static void
1858 scan_function (cgraph_node *node, struct function *fun)
1859 {
1860 basic_block bb;
1861
1862 FOR_EACH_BB_FN (bb, fun)
1863 {
1864 gimple_stmt_iterator gsi;
1865 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1866 {
1867 gimple *stmt = gsi_stmt (gsi);
1868
1869 if (stmt_can_throw_external (fun, stmt))
1870 bitmap_set_bit (final_bbs, bb->index);
1871 switch (gimple_code (stmt))
1872 {
1873 case GIMPLE_RETURN:
1874 {
1875 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1876 if (t != NULL_TREE)
1877 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1878 bitmap_set_bit (final_bbs, bb->index);
1879 }
1880 break;
1881
1882 case GIMPLE_ASSIGN:
1883 if (gimple_assign_single_p (stmt)
1884 && !gimple_clobber_p (stmt))
1885 {
1886 tree rhs = gimple_assign_rhs1 (stmt);
1887 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1888 tree lhs = gimple_assign_lhs (stmt);
1889 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1890 }
1891 break;
1892
1893 case GIMPLE_CALL:
1894 {
1895 unsigned argument_count = gimple_call_num_args (stmt);
1896 isra_scan_context ctx = ISRA_CTX_ARG;
1897 scan_call_info call_info, *call_info_p = &call_info;
1898 if (gimple_call_internal_p (stmt))
1899 {
1900 call_info_p = NULL;
1901 ctx = ISRA_CTX_LOAD;
1902 internal_fn ifn = gimple_call_internal_fn (stmt);
1903 if (internal_store_fn_p (ifn))
1904 ctx = ISRA_CTX_STORE;
1905 }
1906 else
1907 {
1908 call_info.cs = node->get_edge (stmt);
1909 call_info.argument_count = argument_count;
1910 }
1911
1912 for (unsigned i = 0; i < argument_count; i++)
1913 {
1914 call_info.arg_idx = i;
1915 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1916 ctx, bb, call_info_p);
1917 }
1918
1919 tree lhs = gimple_call_lhs (stmt);
1920 if (lhs)
1921 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1922 int flags = gimple_call_flags (stmt);
1923 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1924 bitmap_set_bit (final_bbs, bb->index);
1925 }
1926 break;
1927
1928 case GIMPLE_ASM:
1929 {
1930 gasm *asm_stmt = as_a <gasm *> (stmt);
1931 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1932 asm_visit_addr);
1933 bitmap_set_bit (final_bbs, bb->index);
1934
1935 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1936 {
1937 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1938 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1939 }
1940 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1941 {
1942 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1943 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1944 }
1945 }
1946 break;
1947
1948 default:
1949 break;
1950 }
1951 }
1952 }
1953 }
1954
1955 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1956 return statements, or if results of any operations it is involved in are
1957 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1958 names we have already started investigating. */
1959
1960 static bool
1961 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1962 {
1963 bool res = true;
1964 imm_use_iterator imm_iter;
1965 gimple *stmt;
1966
1967 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1968 {
1969 if (is_gimple_debug (stmt))
1970 continue;
1971
1972 if (gimple_code (stmt) == GIMPLE_RETURN)
1973 {
1974 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1975 if (t != name)
1976 {
1977 res = false;
1978 break;
1979 }
1980 }
1981 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1982 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1983 || gimple_code (stmt) == GIMPLE_PHI))
1984 {
1985 /* TODO: And perhaps for const function calls too? */
1986 tree lhs;
1987 if (gimple_code (stmt) == GIMPLE_PHI)
1988 lhs = gimple_phi_result (stmt);
1989 else
1990 lhs = gimple_assign_lhs (stmt);
1991
1992 if (TREE_CODE (lhs) != SSA_NAME)
1993 {
1994 res = false;
1995 break;
1996 }
1997 gcc_assert (!gimple_vdef (stmt));
1998 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1999 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2000 {
2001 res = false;
2002 break;
2003 }
2004 }
2005 else
2006 {
2007 res = false;
2008 break;
2009 }
2010 }
2011 return res;
2012 }
2013
2014 /* Inspect the uses of the return value of the call associated with CS, and if
2015 it is not used or if it is only used to construct the return value of the
2016 caller, mark it as such in call or caller summary. Also check for
2017 misaligned arguments. */
2018
2019 static void
2020 isra_analyze_call (cgraph_edge *cs)
2021 {
2022 gcall *call_stmt = cs->call_stmt;
2023 unsigned count = gimple_call_num_args (call_stmt);
2024 isra_call_summary *csum = call_sums->get_create (cs);
2025
2026 for (unsigned i = 0; i < count; i++)
2027 {
2028 tree arg = gimple_call_arg (call_stmt, i);
2029 if (is_gimple_reg (arg))
2030 continue;
2031
2032 tree offset;
2033 poly_int64 bitsize, bitpos;
2034 machine_mode mode;
2035 int unsignedp, reversep, volatilep = 0;
2036 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2037 &unsignedp, &reversep, &volatilep);
2038 if (!multiple_p (bitpos, BITS_PER_UNIT))
2039 {
2040 csum->m_bit_aligned_arg = true;
2041 break;
2042 }
2043 }
2044
2045 tree lhs = gimple_call_lhs (call_stmt);
2046 if (lhs)
2047 {
2048 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2049 from this function (without being read anywhere). */
2050 if (TREE_CODE (lhs) == SSA_NAME)
2051 {
2052 bitmap analyzed = BITMAP_ALLOC (NULL);
2053 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2054 lhs, analyzed))
2055 csum->m_return_returned = true;
2056 BITMAP_FREE (analyzed);
2057 }
2058 }
2059 else
2060 csum->m_return_ignored = true;
2061 }
2062
2063 /* Look at all calls going out of NODE, described also by IFS and perform all
2064 analyses necessary for IPA-SRA that are not done at body scan time or done
2065 even when body is not scanned because the function is not a candidate. */
2066
2067 static void
2068 isra_analyze_all_outgoing_calls (cgraph_node *node)
2069 {
2070 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2071 isra_analyze_call (cs);
2072 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2073 isra_analyze_call (cs);
2074 }
2075
2076 /* Dump a dereferences table with heading STR to file F. */
2077
2078 static void
2079 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2080 {
2081 basic_block bb;
2082
2083 fprintf (dump_file, "%s", str);
2084 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2085 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2086 {
2087 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2088 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2089 {
2090 int i;
2091 for (i = 0; i < by_ref_count; i++)
2092 {
2093 int idx = bb->index * by_ref_count + i;
2094 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2095 }
2096 }
2097 fprintf (f, "\n");
2098 }
2099 fprintf (dump_file, "\n");
2100 }
2101
2102 /* Propagate distances in bb_dereferences in the opposite direction than the
2103 control flow edges, in each step storing the maximum of the current value
2104 and the minimum of all successors. These steps are repeated until the table
2105 stabilizes. Note that BBs which might terminate the functions (according to
2106 final_bbs bitmap) never updated in this way. */
2107
2108 static void
2109 propagate_dereference_distances (struct function *fun)
2110 {
2111 basic_block bb;
2112
2113 if (dump_file && (dump_flags & TDF_DETAILS))
2114 dump_dereferences_table (dump_file, fun,
2115 "Dereference table before propagation:\n");
2116
2117 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2118 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2119 FOR_EACH_BB_FN (bb, fun)
2120 {
2121 queue.quick_push (bb);
2122 bb->aux = bb;
2123 }
2124
2125 while (!queue.is_empty ())
2126 {
2127 edge_iterator ei;
2128 edge e;
2129 bool change = false;
2130 int i;
2131
2132 bb = queue.pop ();
2133 bb->aux = NULL;
2134
2135 if (bitmap_bit_p (final_bbs, bb->index))
2136 continue;
2137
2138 for (i = 0; i < by_ref_count; i++)
2139 {
2140 int idx = bb->index * by_ref_count + i;
2141 bool first = true;
2142 HOST_WIDE_INT inh = 0;
2143
2144 FOR_EACH_EDGE (e, ei, bb->succs)
2145 {
2146 int succ_idx = e->dest->index * by_ref_count + i;
2147
2148 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2149 continue;
2150
2151 if (first)
2152 {
2153 first = false;
2154 inh = bb_dereferences [succ_idx];
2155 }
2156 else if (bb_dereferences [succ_idx] < inh)
2157 inh = bb_dereferences [succ_idx];
2158 }
2159
2160 if (!first && bb_dereferences[idx] < inh)
2161 {
2162 bb_dereferences[idx] = inh;
2163 change = true;
2164 }
2165 }
2166
2167 if (change)
2168 FOR_EACH_EDGE (e, ei, bb->preds)
2169 {
2170 if (e->src->aux)
2171 continue;
2172
2173 e->src->aux = e->src;
2174 queue.quick_push (e->src);
2175 }
2176 }
2177
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 dump_dereferences_table (dump_file, fun,
2180 "Dereference table after propagation:\n");
2181 }
2182
2183 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2184 children, return true if the parameter cannot be split, otherwise return
2185 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2186 index of the entry BB in the function of PARM. */
2187
2188 static bool
2189 check_gensum_access (tree parm, gensum_param_desc *desc,
2190 gensum_param_access *access,
2191 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2192 int entry_bb_index)
2193 {
2194 if (access->nonarg)
2195 {
2196 *only_calls = false;
2197 *nonarg_acc_size += access->size;
2198
2199 if (access->first_child)
2200 {
2201 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2202 return true;
2203 }
2204 }
2205 /* Do not decompose a non-BLKmode param in a way that would create
2206 BLKmode params. Especially for by-reference passing (thus,
2207 pointer-type param) this is hardly worthwhile. */
2208 if (DECL_MODE (parm) != BLKmode
2209 && TYPE_MODE (access->type) == BLKmode)
2210 {
2211 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2212 return true;
2213 }
2214
2215 if (desc->by_ref)
2216 {
2217 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2218 if ((access->offset + access->size) > bb_dereferences[idx])
2219 {
2220 disqualify_split_candidate (desc, "Would create a possibly "
2221 "illegal dereference in a caller.");
2222 return true;
2223 }
2224 }
2225
2226 for (gensum_param_access *ch = access->first_child;
2227 ch;
2228 ch = ch->next_sibling)
2229 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2230 entry_bb_index))
2231 return true;
2232
2233 return false;
2234 }
2235
2236 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2237 descriptor DESC. */
2238
2239 static void
2240 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2241 {
2242 param_access *to = ggc_cleared_alloc<param_access> ();
2243 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2244 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2245 to->unit_offset = from->offset / BITS_PER_UNIT;
2246 to->unit_size = from->size / BITS_PER_UNIT;
2247 to->type = from->type;
2248 to->alias_ptr_type = from->alias_ptr_type;
2249 to->certain = from->nonarg;
2250 to->reverse = from->reverse;
2251 vec_safe_push (desc->accesses, to);
2252
2253 for (gensum_param_access *ch = from->first_child;
2254 ch;
2255 ch = ch->next_sibling)
2256 copy_accesses_to_ipa_desc (ch, desc);
2257 }
2258
2259 /* Analyze function body scan results stored in param_accesses and
2260 param_accesses, detect possible transformations and store information of
2261 those in function summary. NODE, FUN and IFS are all various structures
2262 describing the currently analyzed function. */
2263
2264 static void
2265 process_scan_results (cgraph_node *node, struct function *fun,
2266 isra_func_summary *ifs,
2267 vec<gensum_param_desc> *param_descriptions)
2268 {
2269 bool check_pass_throughs = false;
2270 bool dereferences_propagated = false;
2271 tree parm = DECL_ARGUMENTS (node->decl);
2272 unsigned param_count = param_descriptions->length();
2273
2274 for (unsigned desc_index = 0;
2275 desc_index < param_count;
2276 desc_index++, parm = DECL_CHAIN (parm))
2277 {
2278 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2279 if (!desc->split_candidate)
2280 continue;
2281
2282 if (flag_checking)
2283 isra_verify_access_tree (desc->accesses);
2284
2285 if (!dereferences_propagated
2286 && desc->by_ref
2287 && desc->accesses)
2288 {
2289 propagate_dereference_distances (fun);
2290 dereferences_propagated = true;
2291 }
2292
2293 HOST_WIDE_INT nonarg_acc_size = 0;
2294 bool only_calls = true;
2295 bool check_failed = false;
2296
2297 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2298 for (gensum_param_access *acc = desc->accesses;
2299 acc;
2300 acc = acc->next_sibling)
2301 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2302 entry_bb_index))
2303 {
2304 check_failed = true;
2305 break;
2306 }
2307 if (check_failed)
2308 continue;
2309
2310 if (only_calls)
2311 desc->locally_unused = true;
2312
2313 HOST_WIDE_INT cur_param_size
2314 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2315 HOST_WIDE_INT param_size_limit;
2316 if (!desc->by_ref || optimize_function_for_size_p (fun))
2317 param_size_limit = cur_param_size;
2318 else
2319 param_size_limit
2320 = opt_for_fn (node->decl,
2321 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2322 if (nonarg_acc_size > param_size_limit
2323 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2324 {
2325 disqualify_split_candidate (desc, "Would result into a too big set "
2326 "of replacements.");
2327 }
2328 else
2329 {
2330 /* create_parameter_descriptors makes sure unit sizes of all
2331 candidate parameters fit unsigned integers restricted to
2332 ISRA_ARG_SIZE_LIMIT. */
2333 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2334 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2335 if (desc->split_candidate && desc->ptr_pt_count)
2336 {
2337 gcc_assert (desc->by_ref);
2338 check_pass_throughs = true;
2339 }
2340 }
2341 }
2342
2343 /* When a pointer parameter is passed-through to a callee, in which it is
2344 only used to read only one or a few items, we can attempt to transform it
2345 to obtaining and passing through the items instead of the pointer. But we
2346 must take extra care that 1) we do not introduce any segfault by moving
2347 dereferences above control flow and that 2) the data is not modified
2348 through an alias in this function. The IPA analysis must not introduce
2349 any accesses candidates unless it can prove both.
2350
2351 The current solution is very crude as it consists of ensuring that the
2352 call postdominates entry BB and that the definition of VUSE of the call is
2353 default definition. TODO: For non-recursive callees in the same
2354 compilation unit we could do better by doing analysis in topological order
2355 an looking into access candidates of callees, using their alias_ptr_types
2356 to attempt real AA. We could also use the maximum known dereferenced
2357 offset in this function at IPA level.
2358
2359 TODO: Measure the overhead and the effect of just being pessimistic.
2360 Maybe this is only -O3 material?
2361 */
2362 bool pdoms_calculated = false;
2363 if (check_pass_throughs)
2364 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2365 {
2366 gcall *call_stmt = cs->call_stmt;
2367 tree vuse = gimple_vuse (call_stmt);
2368
2369 /* If the callee is a const function, we don't get a VUSE. In such
2370 case there will be no memory accesses in the called function (or the
2371 const attribute is wrong) and then we just don't care. */
2372 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2373
2374 unsigned count = gimple_call_num_args (call_stmt);
2375 isra_call_summary *csum = call_sums->get_create (cs);
2376 csum->init_inputs (count);
2377 for (unsigned argidx = 0; argidx < count; argidx++)
2378 {
2379 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2380 continue;
2381 unsigned pidx
2382 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2383 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2384 if (!desc->split_candidate)
2385 {
2386 csum->m_arg_flow[argidx].pointer_pass_through = false;
2387 continue;
2388 }
2389 if (!uses_memory_as_obtained)
2390 continue;
2391
2392 /* Post-dominator check placed last, hoping that it usually won't
2393 be needed. */
2394 if (!pdoms_calculated)
2395 {
2396 gcc_checking_assert (cfun);
2397 add_noreturn_fake_exit_edges ();
2398 connect_infinite_loops_to_exit ();
2399 calculate_dominance_info (CDI_POST_DOMINATORS);
2400 pdoms_calculated = true;
2401 }
2402 if (dominated_by_p (CDI_POST_DOMINATORS,
2403 gimple_bb (call_stmt),
2404 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2405 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2406 }
2407
2408 }
2409 if (pdoms_calculated)
2410 {
2411 free_dominance_info (CDI_POST_DOMINATORS);
2412 remove_fake_exit_edges ();
2413 }
2414
2415 /* TODO: Add early exit if we disqualified everything. This also requires
2416 that we either relax the restriction that
2417 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2418 or store the number of parameters to IPA-SRA function summary and use that
2419 when just removing params. */
2420
2421 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2422 ifs->m_parameters->quick_grow_cleared (param_count);
2423 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2424 {
2425 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2426 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2427
2428 d->param_size_limit = s->param_size_limit;
2429 d->size_reached = s->nonarg_acc_size;
2430 d->locally_unused = s->locally_unused;
2431 d->split_candidate = s->split_candidate;
2432 d->by_ref = s->by_ref;
2433
2434 for (gensum_param_access *acc = s->accesses;
2435 acc;
2436 acc = acc->next_sibling)
2437 copy_accesses_to_ipa_desc (acc, d);
2438 }
2439
2440 if (dump_file)
2441 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2442 }
2443
2444 /* Return true if there are any overlaps among certain accesses of DESC. If
2445 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2446 too. DESC is assumed to be a split candidate that is not locally
2447 unused. */
2448
2449 static bool
2450 overlapping_certain_accesses_p (isra_param_desc *desc,
2451 bool *certain_access_present_p)
2452 {
2453 unsigned pclen = vec_safe_length (desc->accesses);
2454 for (unsigned i = 0; i < pclen; i++)
2455 {
2456 param_access *a1 = (*desc->accesses)[i];
2457
2458 if (!a1->certain)
2459 continue;
2460 if (certain_access_present_p)
2461 *certain_access_present_p = true;
2462 for (unsigned j = i + 1; j < pclen; j++)
2463 {
2464 param_access *a2 = (*desc->accesses)[j];
2465 if (a2->certain
2466 && a1->unit_offset < a2->unit_offset + a2->unit_size
2467 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2468 return true;
2469 }
2470 }
2471 return false;
2472 }
2473
2474 /* Check for any overlaps of certain param accesses among splitting candidates
2475 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2476 check that used splitting candidates have at least one certain access. */
2477
2478 static void
2479 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2480 {
2481 isra_func_summary *ifs = func_sums->get (node);
2482 if (!ifs || !ifs->m_candidate)
2483 return;
2484 unsigned param_count = vec_safe_length (ifs->m_parameters);
2485 for (unsigned pidx = 0; pidx < param_count; pidx++)
2486 {
2487 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2488 if (!desc->split_candidate || desc->locally_unused)
2489 continue;
2490
2491 bool certain_access_present = !certain_must_exist;
2492 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2493 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2494 "which overlap", node->dump_name (), pidx);
2495 if (!certain_access_present)
2496 internal_error ("Function %s, parameter %u, is used but does not "
2497 "have any certain IPA-SRA access",
2498 node->dump_name (), pidx);
2499 }
2500 }
2501
2502 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2503 this compilation unit and create summary structures describing IPA-SRA
2504 opportunities and constraints in them. */
2505
2506 static void
2507 ipa_sra_generate_summary (void)
2508 {
2509 struct cgraph_node *node;
2510
2511 gcc_checking_assert (!func_sums);
2512 gcc_checking_assert (!call_sums);
2513 func_sums
2514 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2515 ipa_sra_function_summaries (symtab, true));
2516 call_sums = new ipa_sra_call_summaries (symtab);
2517
2518 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2519 ipa_sra_summarize_function (node);
2520 return;
2521 }
2522
2523 /* Write intraprocedural analysis information about E and all of its outgoing
2524 edges into a stream for LTO WPA. */
2525
2526 static void
2527 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2528 {
2529 isra_call_summary *csum = call_sums->get (e);
2530 unsigned input_count = csum->m_arg_flow.length ();
2531 streamer_write_uhwi (ob, input_count);
2532 for (unsigned i = 0; i < input_count; i++)
2533 {
2534 isra_param_flow *ipf = &csum->m_arg_flow[i];
2535 streamer_write_hwi (ob, ipf->length);
2536 bitpack_d bp = bitpack_create (ob->main_stream);
2537 for (int j = 0; j < ipf->length; j++)
2538 bp_pack_value (&bp, ipf->inputs[j], 8);
2539 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2540 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2541 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2542 streamer_write_bitpack (&bp);
2543 streamer_write_uhwi (ob, ipf->unit_offset);
2544 streamer_write_uhwi (ob, ipf->unit_size);
2545 }
2546 bitpack_d bp = bitpack_create (ob->main_stream);
2547 bp_pack_value (&bp, csum->m_return_ignored, 1);
2548 bp_pack_value (&bp, csum->m_return_returned, 1);
2549 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2550 streamer_write_bitpack (&bp);
2551 }
2552
2553 /* Write intraprocedural analysis information about NODE and all of its outgoing
2554 edges into a stream for LTO WPA. */
2555
2556 static void
2557 isra_write_node_summary (output_block *ob, cgraph_node *node)
2558 {
2559 isra_func_summary *ifs = func_sums->get (node);
2560 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2561 int node_ref = lto_symtab_encoder_encode (encoder, node);
2562 streamer_write_uhwi (ob, node_ref);
2563
2564 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2565 streamer_write_uhwi (ob, param_desc_count);
2566 for (unsigned i = 0; i < param_desc_count; i++)
2567 {
2568 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2569 unsigned access_count = vec_safe_length (desc->accesses);
2570 streamer_write_uhwi (ob, access_count);
2571 for (unsigned j = 0; j < access_count; j++)
2572 {
2573 param_access *acc = (*desc->accesses)[j];
2574 stream_write_tree (ob, acc->type, true);
2575 stream_write_tree (ob, acc->alias_ptr_type, true);
2576 streamer_write_uhwi (ob, acc->unit_offset);
2577 streamer_write_uhwi (ob, acc->unit_size);
2578 bitpack_d bp = bitpack_create (ob->main_stream);
2579 bp_pack_value (&bp, acc->certain, 1);
2580 streamer_write_bitpack (&bp);
2581 }
2582 streamer_write_uhwi (ob, desc->param_size_limit);
2583 streamer_write_uhwi (ob, desc->size_reached);
2584 bitpack_d bp = bitpack_create (ob->main_stream);
2585 bp_pack_value (&bp, desc->locally_unused, 1);
2586 bp_pack_value (&bp, desc->split_candidate, 1);
2587 bp_pack_value (&bp, desc->by_ref, 1);
2588 streamer_write_bitpack (&bp);
2589 }
2590 bitpack_d bp = bitpack_create (ob->main_stream);
2591 bp_pack_value (&bp, ifs->m_candidate, 1);
2592 bp_pack_value (&bp, ifs->m_returns_value, 1);
2593 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2594 gcc_assert (!ifs->m_queued);
2595 streamer_write_bitpack (&bp);
2596
2597 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2598 isra_write_edge_summary (ob, e);
2599 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2600 isra_write_edge_summary (ob, e);
2601 }
2602
2603 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2604
2605 static void
2606 ipa_sra_write_summary (void)
2607 {
2608 if (!func_sums || !call_sums)
2609 return;
2610
2611 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2612 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2613 ob->symbol = NULL;
2614
2615 unsigned int count = 0;
2616 lto_symtab_encoder_iterator lsei;
2617 for (lsei = lsei_start_function_in_partition (encoder);
2618 !lsei_end_p (lsei);
2619 lsei_next_function_in_partition (&lsei))
2620 {
2621 cgraph_node *node = lsei_cgraph_node (lsei);
2622 if (node->has_gimple_body_p ()
2623 && func_sums->get (node) != NULL)
2624 count++;
2625 }
2626 streamer_write_uhwi (ob, count);
2627
2628 /* Process all of the functions. */
2629 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2630 lsei_next_function_in_partition (&lsei))
2631 {
2632 cgraph_node *node = lsei_cgraph_node (lsei);
2633 if (node->has_gimple_body_p ()
2634 && func_sums->get (node) != NULL)
2635 isra_write_node_summary (ob, node);
2636 }
2637 streamer_write_char_stream (ob->main_stream, 0);
2638 produce_asm (ob, NULL);
2639 destroy_output_block (ob);
2640 }
2641
2642 /* Read intraprocedural analysis information about E and all of its outgoing
2643 edges into a stream for LTO WPA. */
2644
2645 static void
2646 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2647 {
2648 isra_call_summary *csum = call_sums->get_create (cs);
2649 unsigned input_count = streamer_read_uhwi (ib);
2650 csum->init_inputs (input_count);
2651 for (unsigned i = 0; i < input_count; i++)
2652 {
2653 isra_param_flow *ipf = &csum->m_arg_flow[i];
2654 ipf->length = streamer_read_hwi (ib);
2655 bitpack_d bp = streamer_read_bitpack (ib);
2656 for (int j = 0; j < ipf->length; j++)
2657 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2658 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2659 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2660 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2661 ipf->unit_offset = streamer_read_uhwi (ib);
2662 ipf->unit_size = streamer_read_uhwi (ib);
2663 }
2664 bitpack_d bp = streamer_read_bitpack (ib);
2665 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2666 csum->m_return_returned = bp_unpack_value (&bp, 1);
2667 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2668 }
2669
2670 /* Read intraprocedural analysis information about NODE and all of its outgoing
2671 edges into a stream for LTO WPA. */
2672
2673 static void
2674 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2675 struct data_in *data_in)
2676 {
2677 isra_func_summary *ifs = func_sums->get_create (node);
2678 unsigned param_desc_count = streamer_read_uhwi (ib);
2679 if (param_desc_count > 0)
2680 {
2681 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2682 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2683 }
2684 for (unsigned i = 0; i < param_desc_count; i++)
2685 {
2686 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2687 unsigned access_count = streamer_read_uhwi (ib);
2688 for (unsigned j = 0; j < access_count; j++)
2689 {
2690 param_access *acc = ggc_cleared_alloc<param_access> ();
2691 acc->type = stream_read_tree (ib, data_in);
2692 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2693 acc->unit_offset = streamer_read_uhwi (ib);
2694 acc->unit_size = streamer_read_uhwi (ib);
2695 bitpack_d bp = streamer_read_bitpack (ib);
2696 acc->certain = bp_unpack_value (&bp, 1);
2697 vec_safe_push (desc->accesses, acc);
2698 }
2699 desc->param_size_limit = streamer_read_uhwi (ib);
2700 desc->size_reached = streamer_read_uhwi (ib);
2701 bitpack_d bp = streamer_read_bitpack (ib);
2702 desc->locally_unused = bp_unpack_value (&bp, 1);
2703 desc->split_candidate = bp_unpack_value (&bp, 1);
2704 desc->by_ref = bp_unpack_value (&bp, 1);
2705 }
2706 bitpack_d bp = streamer_read_bitpack (ib);
2707 ifs->m_candidate = bp_unpack_value (&bp, 1);
2708 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2709 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2710 ifs->m_queued = 0;
2711
2712 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2713 isra_read_edge_summary (ib, e);
2714 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2715 isra_read_edge_summary (ib, e);
2716 }
2717
2718 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2719 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2720 it should be possible to unify them somehow. */
2721
2722 static void
2723 isra_read_summary_section (struct lto_file_decl_data *file_data,
2724 const char *data, size_t len)
2725 {
2726 const struct lto_function_header *header =
2727 (const struct lto_function_header *) data;
2728 const int cfg_offset = sizeof (struct lto_function_header);
2729 const int main_offset = cfg_offset + header->cfg_size;
2730 const int string_offset = main_offset + header->main_size;
2731 struct data_in *data_in;
2732 unsigned int i;
2733 unsigned int count;
2734
2735 lto_input_block ib_main ((const char *) data + main_offset,
2736 header->main_size, file_data->mode_table);
2737
2738 data_in =
2739 lto_data_in_create (file_data, (const char *) data + string_offset,
2740 header->string_size, vNULL);
2741 count = streamer_read_uhwi (&ib_main);
2742
2743 for (i = 0; i < count; i++)
2744 {
2745 unsigned int index;
2746 struct cgraph_node *node;
2747 lto_symtab_encoder_t encoder;
2748
2749 index = streamer_read_uhwi (&ib_main);
2750 encoder = file_data->symtab_node_encoder;
2751 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2752 index));
2753 gcc_assert (node->definition);
2754 isra_read_node_info (&ib_main, node, data_in);
2755 }
2756 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2757 len);
2758 lto_data_in_delete (data_in);
2759 }
2760
2761 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2762
2763 static void
2764 ipa_sra_read_summary (void)
2765 {
2766 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2767 struct lto_file_decl_data *file_data;
2768 unsigned int j = 0;
2769
2770 gcc_checking_assert (!func_sums);
2771 gcc_checking_assert (!call_sums);
2772 func_sums
2773 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2774 ipa_sra_function_summaries (symtab, true));
2775 call_sums = new ipa_sra_call_summaries (symtab);
2776
2777 while ((file_data = file_data_vec[j++]))
2778 {
2779 size_t len;
2780 const char *data
2781 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2782 if (data)
2783 isra_read_summary_section (file_data, data, len);
2784 }
2785 }
2786
2787 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2788
2789 static void
2790 ipa_sra_dump_all_summaries (FILE *f)
2791 {
2792 cgraph_node *node;
2793 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2794 {
2795 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2796
2797 isra_func_summary *ifs = func_sums->get (node);
2798 if (!ifs)
2799 {
2800 fprintf (f, " Function does not have any associated IPA-SRA "
2801 "summary\n");
2802 continue;
2803 }
2804 if (!ifs->m_candidate)
2805 {
2806 fprintf (f, " Not a candidate function\n");
2807 continue;
2808 }
2809 if (ifs->m_returns_value)
2810 fprintf (f, " Returns value\n");
2811 if (vec_safe_is_empty (ifs->m_parameters))
2812 fprintf (f, " No parameter information. \n");
2813 else
2814 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2815 {
2816 fprintf (f, " Descriptor for parameter %i:\n", i);
2817 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2818 }
2819 fprintf (f, "\n");
2820
2821 struct cgraph_edge *cs;
2822 for (cs = node->callees; cs; cs = cs->next_callee)
2823 {
2824 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2825 cs->callee->dump_name ());
2826 isra_call_summary *csum = call_sums->get (cs);
2827 if (csum)
2828 csum->dump (f);
2829 else
2830 fprintf (f, " Call summary is MISSING!\n");
2831 }
2832
2833 }
2834 fprintf (f, "\n\n");
2835 }
2836
2837 /* Perform function-scope viability tests that can be only made at IPA level
2838 and return false if the function is deemed unsuitable for IPA-SRA. */
2839
2840 static bool
2841 ipa_sra_ipa_function_checks (cgraph_node *node)
2842 {
2843 if (!node->can_be_local_p ())
2844 {
2845 if (dump_file)
2846 fprintf (dump_file, "Function %s disqualified because it cannot be "
2847 "made local.\n", node->dump_name ());
2848 return false;
2849 }
2850 if (!node->can_change_signature)
2851 {
2852 if (dump_file)
2853 fprintf (dump_file, "Function can not change signature.\n");
2854 return false;
2855 }
2856
2857 return true;
2858 }
2859
2860 /* Issues found out by check_callers_for_issues. */
2861
2862 struct caller_issues
2863 {
2864 /* The candidate being considered. */
2865 cgraph_node *candidate;
2866 /* There is a thunk among callers. */
2867 bool thunk;
2868 /* Call site with no available information. */
2869 bool unknown_callsite;
2870 /* Call from outside the the candidate's comdat group. */
2871 bool call_from_outside_comdat;
2872 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2873 bool bit_aligned_aggregate_argument;
2874 };
2875
2876 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2877 that apply. */
2878
2879 static bool
2880 check_for_caller_issues (struct cgraph_node *node, void *data)
2881 {
2882 struct caller_issues *issues = (struct caller_issues *) data;
2883
2884 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2885 {
2886 if (cs->caller->thunk)
2887 {
2888 issues->thunk = true;
2889 /* TODO: We should be able to process at least some types of
2890 thunks. */
2891 return true;
2892 }
2893 if (issues->candidate->calls_comdat_local
2894 && issues->candidate->same_comdat_group
2895 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2896 {
2897 issues->call_from_outside_comdat = true;
2898 return true;
2899 }
2900
2901 isra_call_summary *csum = call_sums->get (cs);
2902 if (!csum)
2903 {
2904 issues->unknown_callsite = true;
2905 return true;
2906 }
2907
2908 if (csum->m_bit_aligned_arg)
2909 issues->bit_aligned_aggregate_argument = true;
2910 }
2911 return false;
2912 }
2913
2914 /* Look at all incoming edges to NODE, including aliases and thunks and look
2915 for problems. Return true if NODE type should not be modified at all. */
2916
2917 static bool
2918 check_all_callers_for_issues (cgraph_node *node)
2919 {
2920 struct caller_issues issues;
2921 memset (&issues, 0, sizeof (issues));
2922 issues.candidate = node;
2923
2924 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2925 if (issues.unknown_callsite)
2926 {
2927 if (dump_file && (dump_flags & TDF_DETAILS))
2928 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2929 "all modifications.\n", node->dump_name ());
2930 return true;
2931 }
2932 /* TODO: We should be able to process at least some types of thunks. */
2933 if (issues.thunk)
2934 {
2935 if (dump_file && (dump_flags & TDF_DETAILS))
2936 fprintf (dump_file, "A call of %s is through thunk, which are not"
2937 " handled yet. Disabling all modifications.\n",
2938 node->dump_name ());
2939 return true;
2940 }
2941 if (issues.call_from_outside_comdat)
2942 {
2943 if (dump_file)
2944 fprintf (dump_file, "Function would become private comdat called "
2945 "outside of its comdat group.\n");
2946 return true;
2947 }
2948
2949 if (issues.bit_aligned_aggregate_argument)
2950 {
2951 /* Let's only remove parameters/return values from such functions.
2952 TODO: We could only prevent splitting the problematic parameters if
2953 anybody thinks it is worth it. */
2954 if (dump_file && (dump_flags & TDF_DETAILS))
2955 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2956 " disabling parameter splitting.\n", node->dump_name ());
2957
2958 isra_func_summary *ifs = func_sums->get (node);
2959 gcc_checking_assert (ifs);
2960 unsigned param_count = vec_safe_length (ifs->m_parameters);
2961 for (unsigned i = 0; i < param_count; i++)
2962 (*ifs->m_parameters)[i].split_candidate = false;
2963 }
2964 return false;
2965 }
2966
2967 /* Find the access with corresponding OFFSET and SIZE among accesses in
2968 PARAM_DESC and return it or NULL if such an access is not there. */
2969
2970 static param_access *
2971 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2972 {
2973 unsigned pclen = vec_safe_length (param_desc->accesses);
2974
2975 /* The search is linear but the number of stored accesses is bound by
2976 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2977
2978 for (unsigned i = 0; i < pclen; i++)
2979 if ((*param_desc->accesses)[i]->unit_offset == offset
2980 && (*param_desc->accesses)[i]->unit_size == size)
2981 return (*param_desc->accesses)[i];
2982
2983 return NULL;
2984 }
2985
2986 /* Return iff the total size of definite replacement SIZE would violate the
2987 limit set for it in PARAM. */
2988
2989 static bool
2990 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
2991 {
2992 unsigned limit = desc->param_size_limit;
2993 if (size > limit
2994 || (!desc->by_ref && size == limit))
2995 return true;
2996 return false;
2997 }
2998
2999 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3000 the set limit. IDX is the parameter number which is dumped when
3001 disqualifying. */
3002
3003 static void
3004 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3005 {
3006 unsigned after = desc->size_reached + size;
3007 if (size_would_violate_limit_p (desc, after))
3008 {
3009 if (dump_file && (dump_flags & TDF_DETAILS))
3010 fprintf (dump_file, " ...size limit reached, disqualifying "
3011 "candidate parameter %u\n", idx);
3012 desc->split_candidate = false;
3013 return;
3014 }
3015 desc->size_reached = after;
3016 }
3017
3018 /* Take all actions required to deal with an edge CS that represents a call to
3019 an unknown or un-analyzed function, for both parameter removal and
3020 splitting. */
3021
3022 static void
3023 process_edge_to_unknown_caller (cgraph_edge *cs)
3024 {
3025 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3026 gcc_checking_assert (from_ifs);
3027 isra_call_summary *csum = call_sums->get (cs);
3028
3029 if (dump_file && (dump_flags & TDF_DETAILS))
3030 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3031 cs->caller->dump_name ());
3032
3033 unsigned args_count = csum->m_arg_flow.length ();
3034 for (unsigned i = 0; i < args_count; i++)
3035 {
3036 isra_param_flow *ipf = &csum->m_arg_flow[i];
3037
3038 if (ipf->pointer_pass_through)
3039 {
3040 isra_param_desc *param_desc
3041 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3042 param_desc->locally_unused = false;
3043 param_desc->split_candidate = false;
3044 continue;
3045 }
3046 if (ipf->aggregate_pass_through)
3047 {
3048 unsigned idx = get_single_param_flow_source (ipf);
3049 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3050
3051 param_desc->locally_unused = false;
3052 if (!param_desc->split_candidate)
3053 continue;
3054 gcc_assert (!param_desc->by_ref);
3055 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3056 ipf->unit_size);
3057 gcc_checking_assert (pacc);
3058 pacc->certain = true;
3059 if (overlapping_certain_accesses_p (param_desc, NULL))
3060 {
3061 if (dump_file && (dump_flags & TDF_DETAILS))
3062 fprintf (dump_file, " ...leading to overlap, "
3063 " disqualifying candidate parameter %u\n",
3064 idx);
3065 param_desc->split_candidate = false;
3066 }
3067 else
3068 bump_reached_size (param_desc, pacc->unit_size, idx);
3069 ipf->aggregate_pass_through = false;
3070 continue;
3071 }
3072
3073 for (int j = 0; j < ipf->length; j++)
3074 {
3075 int input_idx = ipf->inputs[j];
3076 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3077 }
3078 }
3079 }
3080
3081 /* Propagate parameter removal information through cross-SCC edge CS,
3082 i.e. decrease the use count in the caller parameter descriptor for each use
3083 in this call. */
3084
3085 static void
3086 param_removal_cross_scc_edge (cgraph_edge *cs)
3087 {
3088 enum availability availability;
3089 cgraph_node *callee = cs->callee->function_symbol (&availability);
3090 isra_func_summary *to_ifs = func_sums->get (callee);
3091 if (!to_ifs || !to_ifs->m_candidate
3092 || (availability < AVAIL_AVAILABLE)
3093 || vec_safe_is_empty (to_ifs->m_parameters))
3094 {
3095 process_edge_to_unknown_caller (cs);
3096 return;
3097 }
3098 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3099 gcc_checking_assert (from_ifs);
3100
3101 isra_call_summary *csum = call_sums->get (cs);
3102 unsigned args_count = csum->m_arg_flow.length ();
3103 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3104
3105 for (unsigned i = 0; i < args_count; i++)
3106 {
3107 bool unused_in_callee;
3108 if (i < param_count)
3109 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3110 else
3111 unused_in_callee = false;
3112
3113 if (!unused_in_callee)
3114 {
3115 isra_param_flow *ipf = &csum->m_arg_flow[i];
3116 for (int j = 0; j < ipf->length; j++)
3117 {
3118 int input_idx = ipf->inputs[j];
3119 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3120 }
3121 }
3122 }
3123 }
3124
3125 /* Unless it is already there, push NODE which is also described by IFS to
3126 STACK. */
3127
3128 static void
3129 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3130 vec<cgraph_node *> *stack)
3131 {
3132 if (!ifs->m_queued)
3133 {
3134 ifs->m_queued = true;
3135 stack->safe_push (node);
3136 }
3137 }
3138
3139 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3140 used and push CALLER on STACK. */
3141
3142 static void
3143 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3144 cgraph_node *caller, vec<cgraph_node *> *stack)
3145 {
3146 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3147 {
3148 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3149 isra_push_node_to_stack (caller, from_ifs, stack);
3150 }
3151 }
3152
3153
3154 /* Propagate information that any parameter is not used only locally within a
3155 SCC across CS to the caller, which must be in the same SCC as the
3156 callee. Push any callers that need to be re-processed to STACK. */
3157
3158 static void
3159 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3160 {
3161 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3162 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3163 return;
3164
3165 isra_call_summary *csum = call_sums->get (cs);
3166 gcc_checking_assert (csum);
3167 unsigned args_count = csum->m_arg_flow.length ();
3168 enum availability availability;
3169 cgraph_node *callee = cs->callee->function_symbol (&availability);
3170 isra_func_summary *to_ifs = func_sums->get (callee);
3171
3172 unsigned param_count
3173 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3174 ? vec_safe_length (to_ifs->m_parameters) : 0;
3175 for (unsigned i = 0; i < args_count; i++)
3176 {
3177 if (i < param_count
3178 && (*to_ifs->m_parameters)[i].locally_unused)
3179 continue;
3180
3181 /* The argument is needed in the callee it, we must mark the parameter as
3182 used also in the caller and its callers within this SCC. */
3183 isra_param_flow *ipf = &csum->m_arg_flow[i];
3184 for (int j = 0; j < ipf->length; j++)
3185 {
3186 int input_idx = ipf->inputs[j];
3187 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3188 }
3189 }
3190 }
3191
3192 /* Propagate information that any parameter is not used only locally within a
3193 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3194 same SCC. Push any callers that need to be re-processed to STACK. */
3195
3196 static bool
3197 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3198 {
3199 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3200 cgraph_edge *cs;
3201 for (cs = node->callers; cs; cs = cs->next_caller)
3202 if (ipa_edge_within_scc (cs))
3203 propagate_used_across_scc_edge (cs, stack);
3204 return false;
3205 }
3206
3207 /* Return true iff all certain accesses in ARG_DESC are also present as
3208 certain accesses in PARAM_DESC. */
3209
3210 static bool
3211 all_callee_accesses_present_p (isra_param_desc *param_desc,
3212 isra_param_desc *arg_desc)
3213 {
3214 unsigned aclen = vec_safe_length (arg_desc->accesses);
3215 for (unsigned j = 0; j < aclen; j++)
3216 {
3217 param_access *argacc = (*arg_desc->accesses)[j];
3218 if (!argacc->certain)
3219 continue;
3220 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3221 argacc->unit_size);
3222 if (!pacc
3223 || !pacc->certain
3224 || !types_compatible_p (argacc->type, pacc->type))
3225 return false;
3226 }
3227 return true;
3228 }
3229
3230 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3231 does not allow instantiating an auto_vec with a type defined within a
3232 function so it is a global type. */
3233 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3234
3235
3236 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3237 (which belongs to CALLER) if they would not violate some constraint there.
3238 If successful, return NULL, otherwise return the string reason for failure
3239 (which can be written to the dump file). DELTA_OFFSET is the known offset
3240 of the actual argument withing the formal parameter (so of ARG_DESCS within
3241 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3242 known. In case of success, set *CHANGE_P to true if propagation actually
3243 changed anything. */
3244
3245 static const char *
3246 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3247 isra_param_desc *arg_desc,
3248 unsigned delta_offset, unsigned arg_size,
3249 bool *change_p)
3250 {
3251 unsigned pclen = vec_safe_length (param_desc->accesses);
3252 unsigned aclen = vec_safe_length (arg_desc->accesses);
3253 unsigned prop_count = 0;
3254 unsigned prop_size = 0;
3255 bool change = false;
3256
3257 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3258 for (unsigned j = 0; j < aclen; j++)
3259 {
3260 param_access *argacc = (*arg_desc->accesses)[j];
3261 prop_kinds.safe_push (ACC_PROP_DONT);
3262
3263 if (arg_size > 0
3264 && argacc->unit_offset + argacc->unit_size > arg_size)
3265 return "callee access outsize size boundary";
3266
3267 if (!argacc->certain)
3268 continue;
3269
3270 unsigned offset = argacc->unit_offset + delta_offset;
3271 /* Given that accesses are initially stored according to increasing
3272 offset and decreasing size in case of equal offsets, the following
3273 searches could be written more efficiently if we kept the ordering
3274 when copying. But the number of accesses is capped at
3275 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3276 messy quickly, so let's improve on that only if necessary. */
3277
3278 bool exact_match = false;
3279 for (unsigned i = 0; i < pclen; i++)
3280 {
3281 /* Check for overlaps. */
3282 param_access *pacc = (*param_desc->accesses)[i];
3283 if (pacc->unit_offset == offset
3284 && pacc->unit_size == argacc->unit_size)
3285 {
3286 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3287 || !types_compatible_p (argacc->type, pacc->type))
3288 return "propagated access types would not match existing ones";
3289
3290 exact_match = true;
3291 if (!pacc->certain)
3292 {
3293 prop_kinds[j] = ACC_PROP_CERTAIN;
3294 prop_size += argacc->unit_size;
3295 change = true;
3296 }
3297 continue;
3298 }
3299
3300 if (offset < pacc->unit_offset + pacc->unit_size
3301 && offset + argacc->unit_size > pacc->unit_offset)
3302 {
3303 /* None permissible with load accesses, possible to fit into
3304 argument ones. */
3305 if (pacc->certain
3306 || offset < pacc->unit_offset
3307 || (offset + argacc->unit_size
3308 > pacc->unit_offset + pacc->unit_size))
3309 return "a propagated access would conflict in caller";
3310 }
3311 }
3312
3313 if (!exact_match)
3314 {
3315 prop_kinds[j] = ACC_PROP_COPY;
3316 prop_count++;
3317 prop_size += argacc->unit_size;
3318 change = true;
3319 }
3320 }
3321
3322 if (!change)
3323 return NULL;
3324
3325 if ((prop_count + pclen
3326 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3327 || size_would_violate_limit_p (param_desc,
3328 param_desc->size_reached + prop_size))
3329 return "propagating accesses would violate the count or size limit";
3330
3331 *change_p = true;
3332 for (unsigned j = 0; j < aclen; j++)
3333 {
3334 if (prop_kinds[j] == ACC_PROP_COPY)
3335 {
3336 param_access *argacc = (*arg_desc->accesses)[j];
3337
3338 param_access *copy = ggc_cleared_alloc<param_access> ();
3339 copy->unit_offset = argacc->unit_offset + delta_offset;
3340 copy->unit_size = argacc->unit_size;
3341 copy->type = argacc->type;
3342 copy->alias_ptr_type = argacc->alias_ptr_type;
3343 copy->certain = true;
3344 vec_safe_push (param_desc->accesses, copy);
3345 }
3346 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3347 {
3348 param_access *argacc = (*arg_desc->accesses)[j];
3349 param_access *csp
3350 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3351 argacc->unit_size);
3352 csp->certain = true;
3353 }
3354 }
3355
3356 param_desc->size_reached += prop_size;
3357
3358 return NULL;
3359 }
3360
3361 /* Propagate parameter splitting information through call graph edge CS.
3362 Return true if any changes that might need to be propagated within SCCs have
3363 been made. The function also clears the aggregate_pass_through and
3364 pointer_pass_through in call summaries which do not need to be processed
3365 again if this CS is revisited when iterating while changes are propagated
3366 within an SCC. */
3367
3368 static bool
3369 param_splitting_across_edge (cgraph_edge *cs)
3370 {
3371 bool res = false;
3372 bool cross_scc = !ipa_edge_within_scc (cs);
3373 enum availability availability;
3374 cgraph_node *callee = cs->callee->function_symbol (&availability);
3375 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3376 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3377
3378 isra_call_summary *csum = call_sums->get (cs);
3379 gcc_checking_assert (csum);
3380 unsigned args_count = csum->m_arg_flow.length ();
3381 isra_func_summary *to_ifs = func_sums->get (callee);
3382 unsigned param_count
3383 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3384 ? vec_safe_length (to_ifs->m_parameters)
3385 : 0);
3386
3387 if (dump_file && (dump_flags & TDF_DETAILS))
3388 fprintf (dump_file, "Splitting across %s->%s:\n",
3389 cs->caller->dump_name (), callee->dump_name ());
3390
3391 unsigned i;
3392 for (i = 0; (i < args_count) && (i < param_count); i++)
3393 {
3394 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3395 isra_param_flow *ipf = &csum->m_arg_flow[i];
3396
3397 if (arg_desc->locally_unused)
3398 {
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (dump_file, " ->%u: unused in callee\n", i);
3401 ipf->pointer_pass_through = false;
3402 continue;
3403 }
3404
3405 if (ipf->pointer_pass_through)
3406 {
3407 int idx = get_single_param_flow_source (ipf);
3408 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3409 if (!param_desc->split_candidate)
3410 continue;
3411 gcc_assert (param_desc->by_ref);
3412
3413 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3414 {
3415 if (dump_file && (dump_flags & TDF_DETAILS))
3416 fprintf (dump_file, " %u->%u: not candidate or not by "
3417 "reference in callee\n", idx, i);
3418 param_desc->split_candidate = false;
3419 ipf->pointer_pass_through = false;
3420 res = true;
3421 }
3422 else if (!ipf->safe_to_import_accesses)
3423 {
3424 if (!all_callee_accesses_present_p (param_desc, arg_desc))
3425 {
3426 if (dump_file && (dump_flags & TDF_DETAILS))
3427 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3428 idx, i);
3429 param_desc->split_candidate = false;
3430 ipf->pointer_pass_through = false;
3431 res = true;
3432
3433 }
3434 else
3435 {
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 fprintf (dump_file, " %u->%u: verified callee accesses "
3438 "present.\n", idx, i);
3439 if (cross_scc)
3440 ipf->pointer_pass_through = false;
3441 }
3442 }
3443 else
3444 {
3445 const char *pull_failure
3446 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3447 0, 0, &res);
3448 if (pull_failure)
3449 {
3450 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (dump_file, " %u->%u: by_ref access pull "
3452 "failed: %s.\n", idx, i, pull_failure);
3453 param_desc->split_candidate = false;
3454 ipf->pointer_pass_through = false;
3455 res = true;
3456 }
3457 else
3458 {
3459 if (dump_file && (dump_flags & TDF_DETAILS))
3460 fprintf (dump_file, " %u->%u: by_ref access pull "
3461 "succeeded.\n", idx, i);
3462 if (cross_scc)
3463 ipf->pointer_pass_through = false;
3464 }
3465 }
3466 }
3467 else if (ipf->aggregate_pass_through)
3468 {
3469 int idx = get_single_param_flow_source (ipf);
3470 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3471 if (!param_desc->split_candidate)
3472 continue;
3473 gcc_assert (!param_desc->by_ref);
3474 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3475 ipf->unit_size);
3476 gcc_checking_assert (pacc);
3477
3478 if (pacc->certain)
3479 {
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3481 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3482 ipf->aggregate_pass_through = false;
3483 }
3484 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3485 {
3486 if (dump_file && (dump_flags & TDF_DETAILS))
3487 fprintf (dump_file, " %u->%u: not candidate or by "
3488 "reference in callee\n", idx, i);
3489
3490 pacc->certain = true;
3491 if (overlapping_certain_accesses_p (param_desc, NULL))
3492 {
3493 if (dump_file && (dump_flags & TDF_DETAILS))
3494 fprintf (dump_file, " ...leading to overlap, "
3495 " disqualifying candidate parameter %u\n",
3496 idx);
3497 param_desc->split_candidate = false;
3498 }
3499 else
3500 bump_reached_size (param_desc, pacc->unit_size, idx);
3501
3502 ipf->aggregate_pass_through = false;
3503 res = true;
3504 }
3505 else
3506 {
3507 const char *pull_failure
3508 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3509 ipf->unit_offset,
3510 ipf->unit_size, &res);
3511 if (pull_failure)
3512 {
3513 if (dump_file && (dump_flags & TDF_DETAILS))
3514 fprintf (dump_file, " %u->%u: arg access pull "
3515 "failed: %s.\n", idx, i, pull_failure);
3516
3517 ipf->aggregate_pass_through = false;
3518 pacc->certain = true;
3519
3520 if (overlapping_certain_accesses_p (param_desc, NULL))
3521 {
3522 if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file, " ...leading to overlap, "
3524 " disqualifying candidate parameter %u\n",
3525 idx);
3526 param_desc->split_candidate = false;
3527 }
3528 else
3529 bump_reached_size (param_desc, pacc->unit_size, idx);
3530
3531 res = true;
3532 }
3533 else
3534 {
3535 if (dump_file && (dump_flags & TDF_DETAILS))
3536 fprintf (dump_file, " %u->%u: arg access pull "
3537 "succeeded.\n", idx, i);
3538 if (cross_scc)
3539 ipf->aggregate_pass_through = false;
3540 }
3541 }
3542 }
3543 }
3544
3545 /* Handle argument-parameter count mismatches. */
3546 for (; (i < args_count); i++)
3547 {
3548 isra_param_flow *ipf = &csum->m_arg_flow[i];
3549
3550 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3551 {
3552 int idx = get_single_param_flow_source (ipf);
3553 ipf->pointer_pass_through = false;
3554 ipf->aggregate_pass_through = false;
3555 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3556 if (!param_desc->split_candidate)
3557 continue;
3558
3559 if (dump_file && (dump_flags & TDF_DETAILS))
3560 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3561 idx, i);
3562 param_desc->split_candidate = false;
3563 res = true;
3564 }
3565 }
3566 return res;
3567 }
3568
3569 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3570 callers ignore the return value, or come from the same SCC and use the
3571 return value only to compute their return value, return false, otherwise
3572 return true. */
3573
3574 static bool
3575 retval_used_p (cgraph_node *node, void *)
3576 {
3577 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3578 {
3579 isra_call_summary *csum = call_sums->get (cs);
3580 gcc_checking_assert (csum);
3581 if (csum->m_return_ignored)
3582 continue;
3583 if (!csum->m_return_returned)
3584 return true;
3585
3586 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3587 if (!from_ifs || !from_ifs->m_candidate)
3588 return true;
3589
3590 if (!ipa_edge_within_scc (cs)
3591 && !from_ifs->m_return_ignored)
3592 return true;
3593 }
3594
3595 return false;
3596 }
3597
3598 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3599 modify parameter which originally had index BASE_INDEX, in the adjustment
3600 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3601 PREV_ADJUSTMENT. If the parent clone is the original function,
3602 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3603
3604
3605 static void
3606 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3607 unsigned prev_clone_index,
3608 ipa_adjusted_param *prev_adjustment,
3609 vec<ipa_adjusted_param, va_gc> **new_params)
3610 {
3611 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3612 if (desc->locally_unused)
3613 {
3614 if (dump_file)
3615 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3616 return;
3617 }
3618
3619 if (!desc->split_candidate)
3620 {
3621 ipa_adjusted_param adj;
3622 if (prev_adjustment)
3623 {
3624 adj = *prev_adjustment;
3625 adj.prev_clone_adjustment = true;
3626 adj.prev_clone_index = prev_clone_index;
3627 }
3628 else
3629 {
3630 memset (&adj, 0, sizeof (adj));
3631 adj.op = IPA_PARAM_OP_COPY;
3632 adj.base_index = base_index;
3633 adj.prev_clone_index = prev_clone_index;
3634 }
3635 vec_safe_push ((*new_params), adj);
3636 return;
3637 }
3638
3639 if (dump_file)
3640 fprintf (dump_file, " Will split parameter %u\n", base_index);
3641
3642 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3643 unsigned aclen = vec_safe_length (desc->accesses);
3644 for (unsigned j = 0; j < aclen; j++)
3645 {
3646 param_access *pa = (*desc->accesses)[j];
3647 if (!pa->certain)
3648 continue;
3649 if (dump_file)
3650 fprintf (dump_file, " - component at byte offset %u, "
3651 "size %u\n", pa->unit_offset, pa->unit_size);
3652
3653 ipa_adjusted_param adj;
3654 memset (&adj, 0, sizeof (adj));
3655 adj.op = IPA_PARAM_OP_SPLIT;
3656 adj.base_index = base_index;
3657 adj.prev_clone_index = prev_clone_index;
3658 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3659 adj.reverse = pa->reverse;
3660 adj.type = pa->type;
3661 adj.alias_ptr_type = pa->alias_ptr_type;
3662 adj.unit_offset = pa->unit_offset;
3663 vec_safe_push ((*new_params), adj);
3664 }
3665 }
3666
3667 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3668 flag of all callers of NODE. */
3669
3670 static bool
3671 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3672 {
3673 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3674 cs->caller->calls_comdat_local = true;
3675 return false;
3676 }
3677
3678
3679 /* Do final processing of results of IPA propagation regarding NODE, clone it
3680 if appropriate. */
3681
3682 static void
3683 process_isra_node_results (cgraph_node *node,
3684 hash_map<const char *, unsigned> *clone_num_suffixes)
3685 {
3686 isra_func_summary *ifs = func_sums->get (node);
3687 if (!ifs || !ifs->m_candidate)
3688 return;
3689
3690 auto_vec<bool, 16> surviving_params;
3691 bool check_surviving = false;
3692 clone_info *cinfo = clone_info::get (node);
3693 if (cinfo && cinfo->param_adjustments)
3694 {
3695 check_surviving = true;
3696 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3697 }
3698
3699 unsigned param_count = vec_safe_length (ifs->m_parameters);
3700 bool will_change_function = false;
3701 if (ifs->m_returns_value && ifs->m_return_ignored)
3702 will_change_function = true;
3703 else
3704 for (unsigned i = 0; i < param_count; i++)
3705 {
3706 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3707 if ((desc->locally_unused || desc->split_candidate)
3708 /* Make sure we do not clone just to attempt to remove an already
3709 removed unused argument. */
3710 && (!check_surviving
3711 || (i < surviving_params.length ()
3712 && surviving_params[i])))
3713 {
3714 will_change_function = true;
3715 break;
3716 }
3717 }
3718 if (!will_change_function)
3719 return;
3720
3721 if (dump_file)
3722 {
3723 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3724 node->dump_name ());
3725 if (ifs->m_returns_value && ifs->m_return_ignored)
3726 fprintf (dump_file, " Will remove return value.\n");
3727 }
3728
3729 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3730 if (ipa_param_adjustments *old_adjustments
3731 = cinfo ? cinfo->param_adjustments : NULL)
3732 {
3733 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3734 for (unsigned i = 0; i < old_adj_len; i++)
3735 {
3736 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3737 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3738 old_adj, &new_params);
3739 }
3740 }
3741 else
3742 for (unsigned i = 0; i < param_count; i++)
3743 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3744
3745 ipa_param_adjustments *new_adjustments
3746 = (new (ggc_alloc <ipa_param_adjustments> ())
3747 ipa_param_adjustments (new_params, param_count,
3748 ifs->m_returns_value && ifs->m_return_ignored));
3749
3750 if (dump_file && (dump_flags & TDF_DETAILS))
3751 {
3752 fprintf (dump_file, "\n Created adjustments:\n");
3753 new_adjustments->dump (dump_file);
3754 }
3755
3756 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3757 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3758 node->decl)));
3759 vec<cgraph_edge *> callers = node->collect_callers ();
3760 cgraph_node *new_node
3761 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3762 suffix_counter);
3763 suffix_counter++;
3764 if (node->calls_comdat_local && node->same_comdat_group)
3765 {
3766 new_node->add_to_same_comdat_group (node);
3767 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3768 NULL, true);
3769 }
3770 new_node->calls_comdat_local = node->calls_comdat_local;
3771
3772 if (dump_file)
3773 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3774 callers.release ();
3775 }
3776
3777 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3778 and disable transformations for those which have not or which should not
3779 transformed because the associated debug counter reached its limit. Return
3780 true if none survived or if there were no candidates to begin with. */
3781
3782 static bool
3783 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3784 {
3785 bool ret = true;
3786 unsigned len = vec_safe_length (ifs->m_parameters);
3787 if (!len)
3788 return true;
3789
3790 auto_vec<bool, 16> surviving_params;
3791 bool check_surviving = false;
3792 clone_info *cinfo = clone_info::get (node);
3793 if (cinfo && cinfo->param_adjustments)
3794 {
3795 check_surviving = true;
3796 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3797 }
3798 bool dumped_first = false;
3799 for (unsigned i = 0; i < len; i++)
3800 {
3801 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3802 if (!dbg_cnt (ipa_sra_params))
3803 {
3804 desc->locally_unused = false;
3805 desc->split_candidate = false;
3806 }
3807 else if (check_surviving
3808 && (i >= surviving_params.length ()
3809 || !surviving_params[i]))
3810 {
3811 /* Even if the parameter was removed by a previous IPA pass, we do
3812 not clear locally_unused because if it really is unused, this
3813 information might be useful in callers. */
3814 desc->split_candidate = false;
3815
3816 if (dump_file && (dump_flags & TDF_DETAILS))
3817 {
3818 if (!dumped_first)
3819 {
3820 fprintf (dump_file,
3821 "The following parameters of %s are dead on "
3822 "arrival:", node->dump_name ());
3823 dumped_first = true;
3824 }
3825 fprintf (dump_file, " %u", i);
3826 }
3827 }
3828 else if (desc->locally_unused || desc->split_candidate)
3829 ret = false;
3830 }
3831
3832 if (dumped_first)
3833 fprintf (dump_file, "\n");
3834
3835 return ret;
3836 }
3837
3838
3839 /* Run the interprocedural part of IPA-SRA. */
3840
3841 static unsigned int
3842 ipa_sra_analysis (void)
3843 {
3844 if (dump_file)
3845 {
3846 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3847 ipa_sra_dump_all_summaries (dump_file);
3848 }
3849
3850 gcc_checking_assert (func_sums);
3851 gcc_checking_assert (call_sums);
3852 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3853 auto_vec <cgraph_node *, 16> stack;
3854 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3855
3856 /* One sweep from callees to callers for parameter removal and splitting. */
3857 for (int i = 0; i < node_scc_count; i++)
3858 {
3859 cgraph_node *scc_rep = order[i];
3860 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3861 unsigned j;
3862
3863 /* Preliminary IPA function level checks and first step of parameter
3864 removal. */
3865 cgraph_node *v;
3866 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3867 {
3868 isra_func_summary *ifs = func_sums->get (v);
3869 if (!ifs || !ifs->m_candidate)
3870 continue;
3871 if (!ipa_sra_ipa_function_checks (v)
3872 || check_all_callers_for_issues (v))
3873 {
3874 ifs->zap ();
3875 continue;
3876 }
3877 if (disable_unavailable_parameters (v, ifs))
3878 continue;
3879 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3880 process_edge_to_unknown_caller (cs);
3881 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3882 if (!ipa_edge_within_scc (cs))
3883 param_removal_cross_scc_edge (cs);
3884 }
3885
3886 /* Look at edges within the current SCC and propagate used-ness across
3887 them, pushing onto the stack all notes which might need to be
3888 revisited. */
3889 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3890 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3891 &stack, true);
3892
3893 /* Keep revisiting and pushing until nothing changes. */
3894 while (!stack.is_empty ())
3895 {
3896 cgraph_node *v = stack.pop ();
3897 isra_func_summary *ifs = func_sums->get (v);
3898 gcc_checking_assert (ifs && ifs->m_queued);
3899 ifs->m_queued = false;
3900
3901 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3902 &stack, true);
3903 }
3904
3905 /* Parameter splitting. */
3906 bool repeat_scc_access_propagation;
3907 do
3908 {
3909 repeat_scc_access_propagation = false;
3910 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3911 {
3912 isra_func_summary *ifs = func_sums->get (v);
3913 if (!ifs
3914 || !ifs->m_candidate
3915 || vec_safe_is_empty (ifs->m_parameters))
3916 continue;
3917 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3918 if (param_splitting_across_edge (cs))
3919 repeat_scc_access_propagation = true;
3920 }
3921 }
3922 while (repeat_scc_access_propagation);
3923
3924 if (flag_checking)
3925 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3926 verify_splitting_accesses (v, true);
3927
3928 cycle_nodes.release ();
3929 }
3930
3931 /* One sweep from caller to callees for result removal. */
3932 for (int i = node_scc_count - 1; i >= 0 ; i--)
3933 {
3934 cgraph_node *scc_rep = order[i];
3935 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3936 unsigned j;
3937
3938 cgraph_node *v;
3939 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3940 {
3941 isra_func_summary *ifs = func_sums->get (v);
3942 if (!ifs || !ifs->m_candidate)
3943 continue;
3944
3945 bool return_needed
3946 = (ifs->m_returns_value
3947 && (!dbg_cnt (ipa_sra_retvalues)
3948 || v->call_for_symbol_and_aliases (retval_used_p,
3949 NULL, true)));
3950 ifs->m_return_ignored = !return_needed;
3951 if (return_needed)
3952 isra_push_node_to_stack (v, ifs, &stack);
3953 }
3954
3955 while (!stack.is_empty ())
3956 {
3957 cgraph_node *node = stack.pop ();
3958 isra_func_summary *ifs = func_sums->get (node);
3959 gcc_checking_assert (ifs && ifs->m_queued);
3960 ifs->m_queued = false;
3961
3962 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3963 if (ipa_edge_within_scc (cs)
3964 && call_sums->get (cs)->m_return_returned)
3965 {
3966 enum availability av;
3967 cgraph_node *callee = cs->callee->function_symbol (&av);
3968 isra_func_summary *to_ifs = func_sums->get (callee);
3969 if (to_ifs && to_ifs->m_return_ignored)
3970 {
3971 to_ifs->m_return_ignored = false;
3972 isra_push_node_to_stack (callee, to_ifs, &stack);
3973 }
3974 }
3975 }
3976 cycle_nodes.release ();
3977 }
3978
3979 ipa_free_postorder_info ();
3980 free (order);
3981
3982 if (dump_file)
3983 {
3984 if (dump_flags & TDF_DETAILS)
3985 {
3986 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3987 " ==========\n");
3988 ipa_sra_dump_all_summaries (dump_file);
3989 }
3990 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
3991 }
3992
3993 hash_map<const char *, unsigned> *clone_num_suffixes
3994 = new hash_map<const char *, unsigned>;
3995
3996 cgraph_node *node;
3997 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3998 process_isra_node_results (node, clone_num_suffixes);
3999
4000 delete clone_num_suffixes;
4001 ggc_delete (func_sums);
4002 func_sums = NULL;
4003 delete call_sums;
4004 call_sums = NULL;
4005
4006 if (dump_file)
4007 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4008 "==========\n\n");
4009 return 0;
4010 }
4011
4012
4013 const pass_data pass_data_ipa_sra =
4014 {
4015 IPA_PASS, /* type */
4016 "sra", /* name */
4017 OPTGROUP_NONE, /* optinfo_flags */
4018 TV_IPA_SRA, /* tv_id */
4019 0, /* properties_required */
4020 0, /* properties_provided */
4021 0, /* properties_destroyed */
4022 0, /* todo_flags_start */
4023 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4024 };
4025
4026 class pass_ipa_sra : public ipa_opt_pass_d
4027 {
4028 public:
4029 pass_ipa_sra (gcc::context *ctxt)
4030 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4031 ipa_sra_generate_summary, /* generate_summary */
4032 ipa_sra_write_summary, /* write_summary */
4033 ipa_sra_read_summary, /* read_summary */
4034 NULL , /* write_optimization_summary */
4035 NULL, /* read_optimization_summary */
4036 NULL, /* stmt_fixup */
4037 0, /* function_transform_todo_flags_start */
4038 NULL, /* function_transform */
4039 NULL) /* variable_transform */
4040 {}
4041
4042 /* opt_pass methods: */
4043 virtual bool gate (function *)
4044 {
4045 /* TODO: We should remove the optimize check after we ensure we never run
4046 IPA passes when not optimizing. */
4047 return (flag_ipa_sra && optimize);
4048 }
4049
4050 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4051
4052 }; // class pass_ipa_sra
4053
4054 } // anon namespace
4055
4056 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4057 create a summary structure describing IPA-SRA opportunities and constraints
4058 in it. */
4059
4060 static void
4061 ipa_sra_summarize_function (cgraph_node *node)
4062 {
4063 if (dump_file)
4064 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4065 node->order);
4066 if (!ipa_sra_preliminary_function_checks (node))
4067 return;
4068 gcc_obstack_init (&gensum_obstack);
4069 isra_func_summary *ifs = func_sums->get_create (node);
4070 ifs->m_candidate = true;
4071 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4072 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4073
4074 decl2desc = new hash_map<tree, gensum_param_desc *>;
4075 unsigned count = 0;
4076 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4077 count++;
4078
4079 if (count > 0)
4080 {
4081 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4082 param_descriptions.reserve_exact (count);
4083 param_descriptions.quick_grow_cleared (count);
4084
4085 bool cfun_pushed = false;
4086 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4087 if (create_parameter_descriptors (node, &param_descriptions))
4088 {
4089 push_cfun (fun);
4090 cfun_pushed = true;
4091 final_bbs = BITMAP_ALLOC (NULL);
4092 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4093 by_ref_count
4094 * last_basic_block_for_fn (fun));
4095 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4096 scan_function (node, fun);
4097
4098 if (dump_file)
4099 {
4100 dump_gensum_param_descriptors (dump_file, node->decl,
4101 &param_descriptions);
4102 fprintf (dump_file, "----------------------------------------\n");
4103 }
4104 }
4105 process_scan_results (node, fun, ifs, &param_descriptions);
4106
4107 if (cfun_pushed)
4108 pop_cfun ();
4109 if (bb_dereferences)
4110 {
4111 free (bb_dereferences);
4112 bb_dereferences = NULL;
4113 BITMAP_FREE (final_bbs);
4114 final_bbs = NULL;
4115 }
4116 }
4117 isra_analyze_all_outgoing_calls (node);
4118
4119 delete decl2desc;
4120 decl2desc = NULL;
4121 obstack_free (&gensum_obstack, NULL);
4122 if (dump_file)
4123 fprintf (dump_file, "\n\n");
4124 if (flag_checking)
4125 verify_splitting_accesses (node, false);
4126 return;
4127 }
4128
4129 ipa_opt_pass_d *
4130 make_pass_ipa_sra (gcc::context *ctxt)
4131 {
4132 return new pass_ipa_sra (ctxt);
4133 }
4134
4135
4136 #include "gt-ipa-sra.h"