b9e90ae0468a73915ec60810ed9c8bb96168510e
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2021 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-walk.h"
63 #include "ssa.h"
64 #include "tree-vectorizer.h"
65 #include "tree-ssa-propagate.h"
66 #include "intl.h"
67 #include "tm-constrs.h"
68 #include "target-globals.h"
69 #include "builtins.h"
70 #include "tree-vector-builder.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #include "case-cfn-macros.h"
78 #include "ppc-auxv.h"
79 #include "rs6000-internal.h"
80 #include "opts.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
86 systems will also set long double to be IEEE 128-bit. AIX and Darwin
87 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
88 those systems will not pick up this default. This needs to be after all
89 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
90 properly defined. */
91 #ifndef TARGET_IEEEQUAD_DEFAULT
92 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
93 #define TARGET_IEEEQUAD_DEFAULT 1
94 #else
95 #define TARGET_IEEEQUAD_DEFAULT 0
96 #endif
97 #endif
98
99 /* Don't enable PC-relative addressing if the target does not support it. */
100 #ifndef PCREL_SUPPORTED_BY_OS
101 #define PCREL_SUPPORTED_BY_OS 0
102 #endif
103
104 /* Support targetm.vectorize.builtin_mask_for_load. */
105 tree altivec_builtin_mask_for_load;
106
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
111
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
114
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
119
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
128
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
132
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
135
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
152
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
156
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
159
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
162
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
165
166 static int dbg_cost_ctrl;
167
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
171
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
175
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
179
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
186
187 section *toc_section = 0;
188
189 /* Describe the vector unit used for modes. */
190 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
191 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
192
193 /* Register classes for various constraints that are based on the target
194 switches. */
195 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
196
197 /* Describe the alignment of a vector. */
198 int rs6000_vector_align[NUM_MACHINE_MODES];
199
200 /* Map selected modes to types for builtins. */
201 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
202
203 /* What modes to automatically generate reciprocal divide estimate (fre) and
204 reciprocal sqrt (frsqrte) for. */
205 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
206
207 /* Masks to determine which reciprocal esitmate instructions to generate
208 automatically. */
209 enum rs6000_recip_mask {
210 RECIP_SF_DIV = 0x001, /* Use divide estimate */
211 RECIP_DF_DIV = 0x002,
212 RECIP_V4SF_DIV = 0x004,
213 RECIP_V2DF_DIV = 0x008,
214
215 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
216 RECIP_DF_RSQRT = 0x020,
217 RECIP_V4SF_RSQRT = 0x040,
218 RECIP_V2DF_RSQRT = 0x080,
219
220 /* Various combination of flags for -mrecip=xxx. */
221 RECIP_NONE = 0,
222 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
223 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
224 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
225
226 RECIP_HIGH_PRECISION = RECIP_ALL,
227
228 /* On low precision machines like the power5, don't enable double precision
229 reciprocal square root estimate, since it isn't accurate enough. */
230 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
231 };
232
233 /* -mrecip options. */
234 static struct
235 {
236 const char *string; /* option name */
237 unsigned int mask; /* mask bits to set */
238 } recip_options[] = {
239 { "all", RECIP_ALL },
240 { "none", RECIP_NONE },
241 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
242 | RECIP_V2DF_DIV) },
243 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
244 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
245 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
246 | RECIP_V2DF_RSQRT) },
247 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
248 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
249 };
250
251 /* On PowerPC, we have a limited number of target clones that we care about
252 which means we can use an array to hold the options, rather than having more
253 elaborate data structures to identify each possible variation. Order the
254 clones from the default to the highest ISA. */
255 enum {
256 CLONE_DEFAULT = 0, /* default clone. */
257 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
258 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
259 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
260 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
261 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
262 CLONE_MAX
263 };
264
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
269 };
270
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
277 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
278 };
279
280
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
289
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
299
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
309 };
310
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
319
320
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
324
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
331 };
332
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
343 };
344
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
350 };
351
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
356
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
375 };
376
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
382 {
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
385 }
386
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
390 {
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
393 }
394
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
398 {
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
400 }
401
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
407 {
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
410 }
411
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
420
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
426
427 int
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 {
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
434
435 in_set = single_set (in_insn);
436 if (in_set)
437 {
438 if (MEM_P (SET_DEST (in_set)))
439 {
440 out_set = single_set (out_insn);
441 if (!out_set)
442 {
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
445 {
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 {
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
454 }
455 }
456 }
457 }
458 }
459 else
460 {
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
464
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 {
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
472
473 if (MEM_P (SET_DEST (in_exp)))
474 {
475 out_set = single_set (out_insn);
476 if (!out_set)
477 {
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 {
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
489 }
490 }
491 }
492 }
493 }
494 return store_data_bypass_p (out_insn, in_insn);
495 }
496
497 \f
498 /* Processor costs (relative to an add) */
499
500 const struct processor_costs *rs6000_cost;
501
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
520 };
521
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
540 };
541
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
560 };
561
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
580 };
581
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
600 };
601
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
620 };
621
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
640 };
641
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
660 };
661
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
680 };
681
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
700 };
701
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
720 };
721
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
740 };
741
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
760 };
761
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
780 };
781
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
876 32,
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
941 };
942
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
961 };
962
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
981 };
982
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1001 };
1002
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1021 };
1022
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1041 };
1042
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1061 };
1062
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1081 };
1082
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1096 64,
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1101 };
1102
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1105
1106 \f
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1151
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1154
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1158
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1161
1162 const int INSN_NOT_AVAILABLE = -1;
1163
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1178
1179 /* Hash table stuff for keeping track of TOC entries. */
1180
1181 struct GTY((for_user)) toc_hash_struct
1182 {
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1188 };
1189
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1191 {
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 };
1195
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1197
1198
1199 \f
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1202 {
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1224 };
1225
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1228 {
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1250 };
1251 #endif
1252
1253 /* Table of valid machine attributes. */
1254
1255 static const struct attribute_spec rs6000_attribute_table[] =
1256 {
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 };
1274 \f
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1278 \f
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1286
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1289
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1314
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1319
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1324
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1327
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1340
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1343
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1346
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1349
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1352
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1355
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1358
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1361
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1366
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1369
1370 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1371 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1372
1373 #undef TARGET_LEGITIMIZE_ADDRESS
1374 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1375
1376 #undef TARGET_SCHED_VARIABLE_ISSUE
1377 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1378
1379 #undef TARGET_SCHED_ISSUE_RATE
1380 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1381 #undef TARGET_SCHED_ADJUST_COST
1382 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1383 #undef TARGET_SCHED_ADJUST_PRIORITY
1384 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1385 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1386 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1387 #undef TARGET_SCHED_INIT
1388 #define TARGET_SCHED_INIT rs6000_sched_init
1389 #undef TARGET_SCHED_FINISH
1390 #define TARGET_SCHED_FINISH rs6000_sched_finish
1391 #undef TARGET_SCHED_REORDER
1392 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1393 #undef TARGET_SCHED_REORDER2
1394 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1395
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1398
1399 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1400 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1401
1402 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1403 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1404 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1405 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1406 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1407 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1408 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1409 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1410
1411 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1412 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1413
1414 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1415 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1416 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1417 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1418 rs6000_builtin_support_vector_misalignment
1419 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1420 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1421 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1422 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1423 rs6000_builtin_vectorization_cost
1424 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1425 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1426 rs6000_preferred_simd_mode
1427 #undef TARGET_VECTORIZE_INIT_COST
1428 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1429 #undef TARGET_VECTORIZE_ADD_STMT_COST
1430 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1431 #undef TARGET_VECTORIZE_FINISH_COST
1432 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1433 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1434 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1435
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1438
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1443
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1448
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1451
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1454
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1457
1458 #if TARGET_MACHO
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1461 #endif
1462
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1465
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1468
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1471
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1474
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1490
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1493
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1496
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1499
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1502
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1505
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1508
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1530
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1533
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1536
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1539
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1542
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1545
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1548
1549 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1550 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1551
1552 #undef TARGET_FLOATN_MODE
1553 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1554
1555 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1556 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1557
1558 #undef TARGET_MD_ASM_ADJUST
1559 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1560
1561 #undef TARGET_OPTION_OVERRIDE
1562 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1563
1564 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1565 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1566 rs6000_builtin_vectorized_function
1567
1568 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1570 rs6000_builtin_md_vectorized_function
1571
1572 #undef TARGET_STACK_PROTECT_GUARD
1573 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1574
1575 #if !TARGET_MACHO
1576 #undef TARGET_STACK_PROTECT_FAIL
1577 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1578 #endif
1579
1580 #ifdef HAVE_AS_TLS
1581 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1582 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1583 #endif
1584
1585 /* Use a 32-bit anchor range. This leads to sequences like:
1586
1587 addis tmp,anchor,high
1588 add dest,tmp,low
1589
1590 where tmp itself acts as an anchor, and can be shared between
1591 accesses to the same 64k page. */
1592 #undef TARGET_MIN_ANCHOR_OFFSET
1593 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1594 #undef TARGET_MAX_ANCHOR_OFFSET
1595 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1596 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1597 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1598 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1599 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1600
1601 #undef TARGET_BUILTIN_RECIPROCAL
1602 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1603
1604 #undef TARGET_SECONDARY_RELOAD
1605 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1606 #undef TARGET_SECONDARY_MEMORY_NEEDED
1607 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1608 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1609 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1610
1611 #undef TARGET_LEGITIMATE_ADDRESS_P
1612 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1613
1614 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1615 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1616
1617 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1618 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1619
1620 #undef TARGET_CAN_ELIMINATE
1621 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1622
1623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1624 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1625
1626 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1627 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1628
1629 #undef TARGET_TRAMPOLINE_INIT
1630 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1631
1632 #undef TARGET_FUNCTION_VALUE
1633 #define TARGET_FUNCTION_VALUE rs6000_function_value
1634
1635 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1636 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1637
1638 #undef TARGET_OPTION_SAVE
1639 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1640
1641 #undef TARGET_OPTION_RESTORE
1642 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1643
1644 #undef TARGET_OPTION_PRINT
1645 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1646
1647 #undef TARGET_CAN_INLINE_P
1648 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1649
1650 #undef TARGET_SET_CURRENT_FUNCTION
1651 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1652
1653 #undef TARGET_LEGITIMATE_CONSTANT_P
1654 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1655
1656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1657 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1658
1659 #undef TARGET_CAN_USE_DOLOOP_P
1660 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1661
1662 #undef TARGET_PREDICT_DOLOOP_P
1663 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1664
1665 #undef TARGET_HAVE_COUNT_REG_DECR_P
1666 #define TARGET_HAVE_COUNT_REG_DECR_P true
1667
1668 /* 1000000000 is infinite cost in IVOPTs. */
1669 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1670 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1671
1672 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1673 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1674
1675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1677
1678 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1679 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1680 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1681 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1682 #undef TARGET_UNWIND_WORD_MODE
1683 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1684
1685 #undef TARGET_OFFLOAD_OPTIONS
1686 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1687
1688 #undef TARGET_C_MODE_FOR_SUFFIX
1689 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1690
1691 #undef TARGET_INVALID_BINARY_OP
1692 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1693
1694 #undef TARGET_OPTAB_SUPPORTED_P
1695 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1696
1697 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1698 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1699
1700 #undef TARGET_COMPARE_VERSION_PRIORITY
1701 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1702
1703 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1704 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1705 rs6000_generate_version_dispatcher_body
1706
1707 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1708 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1709 rs6000_get_function_versions_dispatcher
1710
1711 #undef TARGET_OPTION_FUNCTION_VERSIONS
1712 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1713
1714 #undef TARGET_HARD_REGNO_NREGS
1715 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1716 #undef TARGET_HARD_REGNO_MODE_OK
1717 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1718
1719 #undef TARGET_MODES_TIEABLE_P
1720 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1721
1722 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1723 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1724 rs6000_hard_regno_call_part_clobbered
1725
1726 #undef TARGET_SLOW_UNALIGNED_ACCESS
1727 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1728
1729 #undef TARGET_CAN_CHANGE_MODE_CLASS
1730 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1731
1732 #undef TARGET_CONSTANT_ALIGNMENT
1733 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1734
1735 #undef TARGET_STARTING_FRAME_OFFSET
1736 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1737
1738 #if TARGET_ELF && RS6000_WEAK
1739 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1740 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1741 #endif
1742
1743 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1744 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1745
1746 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1747 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1748
1749 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1750 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1751 rs6000_cannot_substitute_mem_equiv_p
1752
1753 #undef TARGET_INVALID_CONVERSION
1754 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1755 \f
1756
1757 /* Processor table. */
1758 struct rs6000_ptt
1759 {
1760 const char *const name; /* Canonical processor name. */
1761 const enum processor_type processor; /* Processor type enum value. */
1762 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1763 };
1764
1765 static struct rs6000_ptt const processor_target_table[] =
1766 {
1767 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1768 #include "rs6000-cpus.def"
1769 #undef RS6000_CPU
1770 };
1771
1772 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1773 name is invalid. */
1774
1775 static int
1776 rs6000_cpu_name_lookup (const char *name)
1777 {
1778 size_t i;
1779
1780 if (name != NULL)
1781 {
1782 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1783 if (! strcmp (name, processor_target_table[i].name))
1784 return (int)i;
1785 }
1786
1787 return -1;
1788 }
1789
1790 \f
1791 /* Return number of consecutive hard regs needed starting at reg REGNO
1792 to hold something of mode MODE.
1793 This is ordinarily the length in words of a value of mode MODE
1794 but can be less for certain modes in special long registers.
1795
1796 POWER and PowerPC GPRs hold 32 bits worth;
1797 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1798
1799 static int
1800 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1801 {
1802 unsigned HOST_WIDE_INT reg_size;
1803
1804 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1805 128-bit floating point that can go in vector registers, which has VSX
1806 memory addressing. */
1807 if (FP_REGNO_P (regno))
1808 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1809 ? UNITS_PER_VSX_WORD
1810 : UNITS_PER_FP_WORD);
1811
1812 else if (ALTIVEC_REGNO_P (regno))
1813 reg_size = UNITS_PER_ALTIVEC_WORD;
1814
1815 else
1816 reg_size = UNITS_PER_WORD;
1817
1818 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1819 }
1820
1821 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1822 MODE. */
1823 static int
1824 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1825 {
1826 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1827
1828 if (COMPLEX_MODE_P (mode))
1829 mode = GET_MODE_INNER (mode);
1830
1831 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1832 registers. */
1833 if (mode == OOmode)
1834 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1835
1836 /* MMA accumulator modes need FPR registers divisible by 4. */
1837 if (mode == XOmode)
1838 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1839
1840 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1841 register combinations, and use PTImode where we need to deal with quad
1842 word memory operations. Don't allow quad words in the argument or frame
1843 pointer registers, just registers 0..31. */
1844 if (mode == PTImode)
1845 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1846 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1847 && ((regno & 1) == 0));
1848
1849 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1850 implementations. Don't allow an item to be split between a FP register
1851 and an Altivec register. Allow TImode in all VSX registers if the user
1852 asked for it. */
1853 if (TARGET_VSX && VSX_REGNO_P (regno)
1854 && (VECTOR_MEM_VSX_P (mode)
1855 || VECTOR_ALIGNMENT_P (mode)
1856 || reg_addr[mode].scalar_in_vmx_p
1857 || mode == TImode
1858 || (TARGET_VADDUQM && mode == V1TImode)))
1859 {
1860 if (FP_REGNO_P (regno))
1861 return FP_REGNO_P (last_regno);
1862
1863 if (ALTIVEC_REGNO_P (regno))
1864 {
1865 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1866 return 0;
1867
1868 return ALTIVEC_REGNO_P (last_regno);
1869 }
1870 }
1871
1872 /* The GPRs can hold any mode, but values bigger than one register
1873 cannot go past R31. */
1874 if (INT_REGNO_P (regno))
1875 return INT_REGNO_P (last_regno);
1876
1877 /* The float registers (except for VSX vector modes) can only hold floating
1878 modes and DImode. */
1879 if (FP_REGNO_P (regno))
1880 {
1881 if (VECTOR_ALIGNMENT_P (mode))
1882 return false;
1883
1884 if (SCALAR_FLOAT_MODE_P (mode)
1885 && (mode != TDmode || (regno % 2) == 0)
1886 && FP_REGNO_P (last_regno))
1887 return 1;
1888
1889 if (GET_MODE_CLASS (mode) == MODE_INT)
1890 {
1891 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1892 return 1;
1893
1894 if (TARGET_P8_VECTOR && (mode == SImode))
1895 return 1;
1896
1897 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1898 return 1;
1899 }
1900
1901 return 0;
1902 }
1903
1904 /* The CR register can only hold CC modes. */
1905 if (CR_REGNO_P (regno))
1906 return GET_MODE_CLASS (mode) == MODE_CC;
1907
1908 if (CA_REGNO_P (regno))
1909 return mode == Pmode || mode == SImode;
1910
1911 /* AltiVec only in AldyVec registers. */
1912 if (ALTIVEC_REGNO_P (regno))
1913 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1914 || mode == V1TImode);
1915
1916 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1917 and it must be able to fit within the register set. */
1918
1919 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1920 }
1921
1922 /* Implement TARGET_HARD_REGNO_NREGS. */
1923
1924 static unsigned int
1925 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1926 {
1927 return rs6000_hard_regno_nregs[mode][regno];
1928 }
1929
1930 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1931
1932 static bool
1933 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1934 {
1935 return rs6000_hard_regno_mode_ok_p[mode][regno];
1936 }
1937
1938 /* Implement TARGET_MODES_TIEABLE_P.
1939
1940 PTImode cannot tie with other modes because PTImode is restricted to even
1941 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1942 57744).
1943
1944 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1945 registers) or XOmode (vector quad, restricted to FPR registers divisible
1946 by 4) to tie with other modes.
1947
1948 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1949 128-bit floating point on VSX systems ties with other vectors. */
1950
1951 static bool
1952 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1953 {
1954 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1955 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1956 return mode1 == mode2;
1957
1958 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1959 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1961 return false;
1962
1963 if (SCALAR_FLOAT_MODE_P (mode1))
1964 return SCALAR_FLOAT_MODE_P (mode2);
1965 if (SCALAR_FLOAT_MODE_P (mode2))
1966 return false;
1967
1968 if (GET_MODE_CLASS (mode1) == MODE_CC)
1969 return GET_MODE_CLASS (mode2) == MODE_CC;
1970 if (GET_MODE_CLASS (mode2) == MODE_CC)
1971 return false;
1972
1973 return true;
1974 }
1975
1976 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1977
1978 static bool
1979 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1980 machine_mode mode)
1981 {
1982 if (TARGET_32BIT
1983 && TARGET_POWERPC64
1984 && GET_MODE_SIZE (mode) > 4
1985 && INT_REGNO_P (regno))
1986 return true;
1987
1988 if (TARGET_VSX
1989 && FP_REGNO_P (regno)
1990 && GET_MODE_SIZE (mode) > 8
1991 && !FLOAT128_2REG_P (mode))
1992 return true;
1993
1994 return false;
1995 }
1996
1997 /* Print interesting facts about registers. */
1998 static void
1999 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2000 {
2001 int r, m;
2002
2003 for (r = first_regno; r <= last_regno; ++r)
2004 {
2005 const char *comma = "";
2006 int len;
2007
2008 if (first_regno == last_regno)
2009 fprintf (stderr, "%s:\t", reg_name);
2010 else
2011 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2012
2013 len = 8;
2014 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2015 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2016 {
2017 if (len > 70)
2018 {
2019 fprintf (stderr, ",\n\t");
2020 len = 8;
2021 comma = "";
2022 }
2023
2024 if (rs6000_hard_regno_nregs[m][r] > 1)
2025 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2026 rs6000_hard_regno_nregs[m][r]);
2027 else
2028 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2029
2030 comma = ", ";
2031 }
2032
2033 if (call_used_or_fixed_reg_p (r))
2034 {
2035 if (len > 70)
2036 {
2037 fprintf (stderr, ",\n\t");
2038 len = 8;
2039 comma = "";
2040 }
2041
2042 len += fprintf (stderr, "%s%s", comma, "call-used");
2043 comma = ", ";
2044 }
2045
2046 if (fixed_regs[r])
2047 {
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 len = 8;
2052 comma = "";
2053 }
2054
2055 len += fprintf (stderr, "%s%s", comma, "fixed");
2056 comma = ", ";
2057 }
2058
2059 if (len > 70)
2060 {
2061 fprintf (stderr, ",\n\t");
2062 comma = "";
2063 }
2064
2065 len += fprintf (stderr, "%sreg-class = %s", comma,
2066 reg_class_names[(int)rs6000_regno_regclass[r]]);
2067 comma = ", ";
2068
2069 if (len > 70)
2070 {
2071 fprintf (stderr, ",\n\t");
2072 comma = "";
2073 }
2074
2075 fprintf (stderr, "%sregno = %d\n", comma, r);
2076 }
2077 }
2078
2079 static const char *
2080 rs6000_debug_vector_unit (enum rs6000_vector v)
2081 {
2082 const char *ret;
2083
2084 switch (v)
2085 {
2086 case VECTOR_NONE: ret = "none"; break;
2087 case VECTOR_ALTIVEC: ret = "altivec"; break;
2088 case VECTOR_VSX: ret = "vsx"; break;
2089 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2090 default: ret = "unknown"; break;
2091 }
2092
2093 return ret;
2094 }
2095
2096 /* Inner function printing just the address mask for a particular reload
2097 register class. */
2098 DEBUG_FUNCTION char *
2099 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2100 {
2101 static char ret[8];
2102 char *p = ret;
2103
2104 if ((mask & RELOAD_REG_VALID) != 0)
2105 *p++ = 'v';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2108
2109 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2110 *p++ = 'm';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2113
2114 if ((mask & RELOAD_REG_INDEXED) != 0)
2115 *p++ = 'i';
2116 else if (keep_spaces)
2117 *p++ = ' ';
2118
2119 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2120 *p++ = 'O';
2121 else if ((mask & RELOAD_REG_OFFSET) != 0)
2122 *p++ = 'o';
2123 else if (keep_spaces)
2124 *p++ = ' ';
2125
2126 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2127 *p++ = '+';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2130
2131 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2132 *p++ = '+';
2133 else if (keep_spaces)
2134 *p++ = ' ';
2135
2136 if ((mask & RELOAD_REG_AND_M16) != 0)
2137 *p++ = '&';
2138 else if (keep_spaces)
2139 *p++ = ' ';
2140
2141 *p = '\0';
2142
2143 return ret;
2144 }
2145
2146 /* Print the address masks in a human readble fashion. */
2147 DEBUG_FUNCTION void
2148 rs6000_debug_print_mode (ssize_t m)
2149 {
2150 ssize_t rc;
2151 int spaces = 0;
2152
2153 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2154 for (rc = 0; rc < N_RELOAD_REG; rc++)
2155 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2156 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2157
2158 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2159 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2160 {
2161 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2162 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2163 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2164 spaces = 0;
2165 }
2166 else
2167 spaces += strlen (" Reload=sl");
2168
2169 if (reg_addr[m].scalar_in_vmx_p)
2170 {
2171 fprintf (stderr, "%*s Upper=y", spaces, "");
2172 spaces = 0;
2173 }
2174 else
2175 spaces += strlen (" Upper=y");
2176
2177 if (rs6000_vector_unit[m] != VECTOR_NONE
2178 || rs6000_vector_mem[m] != VECTOR_NONE)
2179 {
2180 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2181 spaces, "",
2182 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2183 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2184 }
2185
2186 fputs ("\n", stderr);
2187 }
2188
2189 #define DEBUG_FMT_ID "%-32s= "
2190 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2191 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2192 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2193
2194 /* Print various interesting information with -mdebug=reg. */
2195 static void
2196 rs6000_debug_reg_global (void)
2197 {
2198 static const char *const tf[2] = { "false", "true" };
2199 const char *nl = (const char *)0;
2200 int m;
2201 size_t m1, m2, v;
2202 char costly_num[20];
2203 char nop_num[20];
2204 char flags_buffer[40];
2205 const char *costly_str;
2206 const char *nop_str;
2207 const char *trace_str;
2208 const char *abi_str;
2209 const char *cmodel_str;
2210 struct cl_target_option cl_opts;
2211
2212 /* Modes we want tieable information on. */
2213 static const machine_mode print_tieable_modes[] = {
2214 QImode,
2215 HImode,
2216 SImode,
2217 DImode,
2218 TImode,
2219 PTImode,
2220 SFmode,
2221 DFmode,
2222 TFmode,
2223 IFmode,
2224 KFmode,
2225 SDmode,
2226 DDmode,
2227 TDmode,
2228 V2SImode,
2229 V2SFmode,
2230 V16QImode,
2231 V8HImode,
2232 V4SImode,
2233 V2DImode,
2234 V1TImode,
2235 V32QImode,
2236 V16HImode,
2237 V8SImode,
2238 V4DImode,
2239 V2TImode,
2240 V4SFmode,
2241 V2DFmode,
2242 V8SFmode,
2243 V4DFmode,
2244 OOmode,
2245 XOmode,
2246 CCmode,
2247 CCUNSmode,
2248 CCEQmode,
2249 CCFPmode,
2250 };
2251
2252 /* Virtual regs we are interested in. */
2253 const static struct {
2254 int regno; /* register number. */
2255 const char *name; /* register name. */
2256 } virtual_regs[] = {
2257 { STACK_POINTER_REGNUM, "stack pointer:" },
2258 { TOC_REGNUM, "toc: " },
2259 { STATIC_CHAIN_REGNUM, "static chain: " },
2260 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2261 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2262 { ARG_POINTER_REGNUM, "arg pointer: " },
2263 { FRAME_POINTER_REGNUM, "frame pointer:" },
2264 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2265 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2266 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2267 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2268 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2269 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2270 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2271 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2272 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2273 };
2274
2275 fputs ("\nHard register information:\n", stderr);
2276 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2277 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2278 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2279 LAST_ALTIVEC_REGNO,
2280 "vs");
2281 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2282 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2283 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2284 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2285 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2286 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2287
2288 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2289 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2290 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2291
2292 fprintf (stderr,
2293 "\n"
2294 "d reg_class = %s\n"
2295 "f reg_class = %s\n"
2296 "v reg_class = %s\n"
2297 "wa reg_class = %s\n"
2298 "we reg_class = %s\n"
2299 "wr reg_class = %s\n"
2300 "wx reg_class = %s\n"
2301 "wA reg_class = %s\n"
2302 "\n",
2303 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2304 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2305 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2306 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2307 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2308 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2309 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2311
2312 nl = "\n";
2313 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2314 rs6000_debug_print_mode (m);
2315
2316 fputs ("\n", stderr);
2317
2318 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2319 {
2320 machine_mode mode1 = print_tieable_modes[m1];
2321 bool first_time = true;
2322
2323 nl = (const char *)0;
2324 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2325 {
2326 machine_mode mode2 = print_tieable_modes[m2];
2327 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2328 {
2329 if (first_time)
2330 {
2331 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2332 nl = "\n";
2333 first_time = false;
2334 }
2335
2336 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2337 }
2338 }
2339
2340 if (!first_time)
2341 fputs ("\n", stderr);
2342 }
2343
2344 if (nl)
2345 fputs (nl, stderr);
2346
2347 if (rs6000_recip_control)
2348 {
2349 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2350
2351 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2352 if (rs6000_recip_bits[m])
2353 {
2354 fprintf (stderr,
2355 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2356 GET_MODE_NAME (m),
2357 (RS6000_RECIP_AUTO_RE_P (m)
2358 ? "auto"
2359 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2360 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2361 ? "auto"
2362 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2363 }
2364
2365 fputs ("\n", stderr);
2366 }
2367
2368 if (rs6000_cpu_index >= 0)
2369 {
2370 const char *name = processor_target_table[rs6000_cpu_index].name;
2371 HOST_WIDE_INT flags
2372 = processor_target_table[rs6000_cpu_index].target_enable;
2373
2374 sprintf (flags_buffer, "-mcpu=%s flags", name);
2375 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2376 }
2377 else
2378 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2379
2380 if (rs6000_tune_index >= 0)
2381 {
2382 const char *name = processor_target_table[rs6000_tune_index].name;
2383 HOST_WIDE_INT flags
2384 = processor_target_table[rs6000_tune_index].target_enable;
2385
2386 sprintf (flags_buffer, "-mtune=%s flags", name);
2387 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2388 }
2389 else
2390 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2391
2392 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2393 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2394 rs6000_isa_flags);
2395
2396 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2397 rs6000_isa_flags_explicit);
2398
2399 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2400 rs6000_builtin_mask);
2401
2402 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2403
2404 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2405 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2406
2407 switch (rs6000_sched_costly_dep)
2408 {
2409 case max_dep_latency:
2410 costly_str = "max_dep_latency";
2411 break;
2412
2413 case no_dep_costly:
2414 costly_str = "no_dep_costly";
2415 break;
2416
2417 case all_deps_costly:
2418 costly_str = "all_deps_costly";
2419 break;
2420
2421 case true_store_to_load_dep_costly:
2422 costly_str = "true_store_to_load_dep_costly";
2423 break;
2424
2425 case store_to_load_dep_costly:
2426 costly_str = "store_to_load_dep_costly";
2427 break;
2428
2429 default:
2430 costly_str = costly_num;
2431 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2432 break;
2433 }
2434
2435 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2436
2437 switch (rs6000_sched_insert_nops)
2438 {
2439 case sched_finish_regroup_exact:
2440 nop_str = "sched_finish_regroup_exact";
2441 break;
2442
2443 case sched_finish_pad_groups:
2444 nop_str = "sched_finish_pad_groups";
2445 break;
2446
2447 case sched_finish_none:
2448 nop_str = "sched_finish_none";
2449 break;
2450
2451 default:
2452 nop_str = nop_num;
2453 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2454 break;
2455 }
2456
2457 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2458
2459 switch (rs6000_sdata)
2460 {
2461 default:
2462 case SDATA_NONE:
2463 break;
2464
2465 case SDATA_DATA:
2466 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2467 break;
2468
2469 case SDATA_SYSV:
2470 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2471 break;
2472
2473 case SDATA_EABI:
2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2475 break;
2476
2477 }
2478
2479 switch (rs6000_traceback)
2480 {
2481 case traceback_default: trace_str = "default"; break;
2482 case traceback_none: trace_str = "none"; break;
2483 case traceback_part: trace_str = "part"; break;
2484 case traceback_full: trace_str = "full"; break;
2485 default: trace_str = "unknown"; break;
2486 }
2487
2488 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2489
2490 switch (rs6000_current_cmodel)
2491 {
2492 case CMODEL_SMALL: cmodel_str = "small"; break;
2493 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2494 case CMODEL_LARGE: cmodel_str = "large"; break;
2495 default: cmodel_str = "unknown"; break;
2496 }
2497
2498 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2499
2500 switch (rs6000_current_abi)
2501 {
2502 case ABI_NONE: abi_str = "none"; break;
2503 case ABI_AIX: abi_str = "aix"; break;
2504 case ABI_ELFv2: abi_str = "ELFv2"; break;
2505 case ABI_V4: abi_str = "V4"; break;
2506 case ABI_DARWIN: abi_str = "darwin"; break;
2507 default: abi_str = "unknown"; break;
2508 }
2509
2510 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2511
2512 if (rs6000_altivec_abi)
2513 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2514
2515 if (rs6000_darwin64_abi)
2516 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2517
2518 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2519 (TARGET_SOFT_FLOAT ? "true" : "false"));
2520
2521 if (TARGET_LINK_STACK)
2522 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2523
2524 if (TARGET_P8_FUSION)
2525 {
2526 char options[80];
2527
2528 strcpy (options, "power8");
2529 if (TARGET_P8_FUSION_SIGN)
2530 strcat (options, ", sign");
2531
2532 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2533 }
2534
2535 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2536 TARGET_SECURE_PLT ? "secure" : "bss");
2537 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2538 aix_struct_return ? "aix" : "sysv");
2539 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2540 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2541 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2542 tf[!!rs6000_align_branch_targets]);
2543 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2544 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2545 rs6000_long_double_type_size);
2546 if (rs6000_long_double_type_size > 64)
2547 {
2548 fprintf (stderr, DEBUG_FMT_S, "long double type",
2549 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2550 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2551 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2552 }
2553 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2554 (int)rs6000_sched_restricted_insns_priority);
2555 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2556 (int)END_BUILTINS);
2557 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2558 (int)RS6000_BUILTIN_COUNT);
2559
2560 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2561 (int)TARGET_FLOAT128_ENABLE_TYPE);
2562
2563 if (TARGET_VSX)
2564 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2565 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2566
2567 if (TARGET_DIRECT_MOVE_128)
2568 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2569 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2570 }
2571
2572 \f
2573 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2574 legitimate address support to figure out the appropriate addressing to
2575 use. */
2576
2577 static void
2578 rs6000_setup_reg_addr_masks (void)
2579 {
2580 ssize_t rc, reg, m, nregs;
2581 addr_mask_type any_addr_mask, addr_mask;
2582
2583 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2584 {
2585 machine_mode m2 = (machine_mode) m;
2586 bool complex_p = false;
2587 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2588 size_t msize;
2589
2590 if (COMPLEX_MODE_P (m2))
2591 {
2592 complex_p = true;
2593 m2 = GET_MODE_INNER (m2);
2594 }
2595
2596 msize = GET_MODE_SIZE (m2);
2597
2598 /* SDmode is special in that we want to access it only via REG+REG
2599 addressing on power7 and above, since we want to use the LFIWZX and
2600 STFIWZX instructions to load it. */
2601 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2602
2603 any_addr_mask = 0;
2604 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2605 {
2606 addr_mask = 0;
2607 reg = reload_reg_map[rc].reg;
2608
2609 /* Can mode values go in the GPR/FPR/Altivec registers? */
2610 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2611 {
2612 bool small_int_vsx_p = (small_int_p
2613 && (rc == RELOAD_REG_FPR
2614 || rc == RELOAD_REG_VMX));
2615
2616 nregs = rs6000_hard_regno_nregs[m][reg];
2617 addr_mask |= RELOAD_REG_VALID;
2618
2619 /* Indicate if the mode takes more than 1 physical register. If
2620 it takes a single register, indicate it can do REG+REG
2621 addressing. Small integers in VSX registers can only do
2622 REG+REG addressing. */
2623 if (small_int_vsx_p)
2624 addr_mask |= RELOAD_REG_INDEXED;
2625 else if (nregs > 1 || m == BLKmode || complex_p)
2626 addr_mask |= RELOAD_REG_MULTIPLE;
2627 else
2628 addr_mask |= RELOAD_REG_INDEXED;
2629
2630 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2631 addressing. If we allow scalars into Altivec registers,
2632 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2633
2634 For VSX systems, we don't allow update addressing for
2635 DFmode/SFmode if those registers can go in both the
2636 traditional floating point registers and Altivec registers.
2637 The load/store instructions for the Altivec registers do not
2638 have update forms. If we allowed update addressing, it seems
2639 to break IV-OPT code using floating point if the index type is
2640 int instead of long (PR target/81550 and target/84042). */
2641
2642 if (TARGET_UPDATE
2643 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2644 && msize <= 8
2645 && !VECTOR_MODE_P (m2)
2646 && !VECTOR_ALIGNMENT_P (m2)
2647 && !complex_p
2648 && (m != E_DFmode || !TARGET_VSX)
2649 && (m != E_SFmode || !TARGET_P8_VECTOR)
2650 && !small_int_vsx_p)
2651 {
2652 addr_mask |= RELOAD_REG_PRE_INCDEC;
2653
2654 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2655 we don't allow PRE_MODIFY for some multi-register
2656 operations. */
2657 switch (m)
2658 {
2659 default:
2660 addr_mask |= RELOAD_REG_PRE_MODIFY;
2661 break;
2662
2663 case E_DImode:
2664 if (TARGET_POWERPC64)
2665 addr_mask |= RELOAD_REG_PRE_MODIFY;
2666 break;
2667
2668 case E_DFmode:
2669 case E_DDmode:
2670 if (TARGET_HARD_FLOAT)
2671 addr_mask |= RELOAD_REG_PRE_MODIFY;
2672 break;
2673 }
2674 }
2675 }
2676
2677 /* GPR and FPR registers can do REG+OFFSET addressing, except
2678 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2679 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2680 if ((addr_mask != 0) && !indexed_only_p
2681 && msize <= 8
2682 && (rc == RELOAD_REG_GPR
2683 || ((msize == 8 || m2 == SFmode)
2684 && (rc == RELOAD_REG_FPR
2685 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2686 addr_mask |= RELOAD_REG_OFFSET;
2687
2688 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2689 instructions are enabled. The offset for 128-bit VSX registers is
2690 only 12-bits. While GPRs can handle the full offset range, VSX
2691 registers can only handle the restricted range. */
2692 else if ((addr_mask != 0) && !indexed_only_p
2693 && msize == 16 && TARGET_P9_VECTOR
2694 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2695 || (m2 == TImode && TARGET_VSX)))
2696 {
2697 addr_mask |= RELOAD_REG_OFFSET;
2698 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2699 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2700 }
2701
2702 /* Vector pairs can do both indexed and offset loads if the
2703 instructions are enabled, otherwise they can only do offset loads
2704 since it will be broken into two vector moves. Vector quads can
2705 only do offset loads. */
2706 else if ((addr_mask != 0) && TARGET_MMA
2707 && (m2 == OOmode || m2 == XOmode))
2708 {
2709 addr_mask |= RELOAD_REG_OFFSET;
2710 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2711 {
2712 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2713 if (m2 == OOmode)
2714 addr_mask |= RELOAD_REG_INDEXED;
2715 }
2716 }
2717
2718 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2719 addressing on 128-bit types. */
2720 if (rc == RELOAD_REG_VMX && msize == 16
2721 && (addr_mask & RELOAD_REG_VALID) != 0)
2722 addr_mask |= RELOAD_REG_AND_M16;
2723
2724 reg_addr[m].addr_mask[rc] = addr_mask;
2725 any_addr_mask |= addr_mask;
2726 }
2727
2728 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2729 }
2730 }
2731
2732 \f
2733 /* Initialize the various global tables that are based on register size. */
2734 static void
2735 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2736 {
2737 ssize_t r, m, c;
2738 int align64;
2739 int align32;
2740
2741 /* Precalculate REGNO_REG_CLASS. */
2742 rs6000_regno_regclass[0] = GENERAL_REGS;
2743 for (r = 1; r < 32; ++r)
2744 rs6000_regno_regclass[r] = BASE_REGS;
2745
2746 for (r = 32; r < 64; ++r)
2747 rs6000_regno_regclass[r] = FLOAT_REGS;
2748
2749 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2750 rs6000_regno_regclass[r] = NO_REGS;
2751
2752 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2753 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2754
2755 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2756 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2757 rs6000_regno_regclass[r] = CR_REGS;
2758
2759 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2760 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2761 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2762 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2763 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2764 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2765 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2766
2767 /* Precalculate register class to simpler reload register class. We don't
2768 need all of the register classes that are combinations of different
2769 classes, just the simple ones that have constraint letters. */
2770 for (c = 0; c < N_REG_CLASSES; c++)
2771 reg_class_to_reg_type[c] = NO_REG_TYPE;
2772
2773 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2774 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2775 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2776 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2777 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2779 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2780 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2781 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2782 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2783
2784 if (TARGET_VSX)
2785 {
2786 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2787 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2788 }
2789 else
2790 {
2791 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2792 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2793 }
2794
2795 /* Precalculate the valid memory formats as well as the vector information,
2796 this must be set up before the rs6000_hard_regno_nregs_internal calls
2797 below. */
2798 gcc_assert ((int)VECTOR_NONE == 0);
2799 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2800 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2801
2802 gcc_assert ((int)CODE_FOR_nothing == 0);
2803 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2804
2805 gcc_assert ((int)NO_REGS == 0);
2806 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2807
2808 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2809 believes it can use native alignment or still uses 128-bit alignment. */
2810 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2811 {
2812 align64 = 64;
2813 align32 = 32;
2814 }
2815 else
2816 {
2817 align64 = 128;
2818 align32 = 128;
2819 }
2820
2821 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2822 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2823 if (TARGET_FLOAT128_TYPE)
2824 {
2825 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2826 rs6000_vector_align[KFmode] = 128;
2827
2828 if (FLOAT128_IEEE_P (TFmode))
2829 {
2830 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2831 rs6000_vector_align[TFmode] = 128;
2832 }
2833 }
2834
2835 /* V2DF mode, VSX only. */
2836 if (TARGET_VSX)
2837 {
2838 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2839 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2840 rs6000_vector_align[V2DFmode] = align64;
2841 }
2842
2843 /* V4SF mode, either VSX or Altivec. */
2844 if (TARGET_VSX)
2845 {
2846 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2847 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2848 rs6000_vector_align[V4SFmode] = align32;
2849 }
2850 else if (TARGET_ALTIVEC)
2851 {
2852 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2853 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2854 rs6000_vector_align[V4SFmode] = align32;
2855 }
2856
2857 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2858 and stores. */
2859 if (TARGET_ALTIVEC)
2860 {
2861 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2862 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2863 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2864 rs6000_vector_align[V4SImode] = align32;
2865 rs6000_vector_align[V8HImode] = align32;
2866 rs6000_vector_align[V16QImode] = align32;
2867
2868 if (TARGET_VSX)
2869 {
2870 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2871 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2872 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2873 }
2874 else
2875 {
2876 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2877 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2878 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2879 }
2880 }
2881
2882 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2883 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2884 if (TARGET_VSX)
2885 {
2886 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2887 rs6000_vector_unit[V2DImode]
2888 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2889 rs6000_vector_align[V2DImode] = align64;
2890
2891 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2892 rs6000_vector_unit[V1TImode]
2893 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2894 rs6000_vector_align[V1TImode] = 128;
2895 }
2896
2897 /* DFmode, see if we want to use the VSX unit. Memory is handled
2898 differently, so don't set rs6000_vector_mem. */
2899 if (TARGET_VSX)
2900 {
2901 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2902 rs6000_vector_align[DFmode] = 64;
2903 }
2904
2905 /* SFmode, see if we want to use the VSX unit. */
2906 if (TARGET_P8_VECTOR)
2907 {
2908 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2909 rs6000_vector_align[SFmode] = 32;
2910 }
2911
2912 /* Allow TImode in VSX register and set the VSX memory macros. */
2913 if (TARGET_VSX)
2914 {
2915 rs6000_vector_mem[TImode] = VECTOR_VSX;
2916 rs6000_vector_align[TImode] = align64;
2917 }
2918
2919 /* Add support for vector pairs and vector quad registers. */
2920 if (TARGET_MMA)
2921 {
2922 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2923 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2924 rs6000_vector_align[OOmode] = 256;
2925
2926 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2927 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2928 rs6000_vector_align[XOmode] = 512;
2929 }
2930
2931 /* Register class constraints for the constraints that depend on compile
2932 switches. When the VSX code was added, different constraints were added
2933 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2934 of the VSX registers are used. The register classes for scalar floating
2935 point types is set, based on whether we allow that type into the upper
2936 (Altivec) registers. GCC has register classes to target the Altivec
2937 registers for load/store operations, to select using a VSX memory
2938 operation instead of the traditional floating point operation. The
2939 constraints are:
2940
2941 d - Register class to use with traditional DFmode instructions.
2942 f - Register class to use with traditional SFmode instructions.
2943 v - Altivec register.
2944 wa - Any VSX register.
2945 wc - Reserved to represent individual CR bits (used in LLVM).
2946 wn - always NO_REGS.
2947 wr - GPR if 64-bit mode is permitted.
2948 wx - Float register if we can do 32-bit int stores. */
2949
2950 if (TARGET_HARD_FLOAT)
2951 {
2952 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2953 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2954 }
2955
2956 if (TARGET_VSX)
2957 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2958
2959 /* Add conditional constraints based on various options, to allow us to
2960 collapse multiple insn patterns. */
2961 if (TARGET_ALTIVEC)
2962 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2963
2964 if (TARGET_POWERPC64)
2965 {
2966 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2967 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2968 }
2969
2970 if (TARGET_STFIWX)
2971 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2972
2973 /* Support for new direct moves (ISA 3.0 + 64bit). */
2974 if (TARGET_DIRECT_MOVE_128)
2975 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2976
2977 /* Set up the reload helper and direct move functions. */
2978 if (TARGET_VSX || TARGET_ALTIVEC)
2979 {
2980 if (TARGET_64BIT)
2981 {
2982 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2983 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2984 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2985 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2986 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2987 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2988 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2989 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2990 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2991 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2992 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2993 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2994 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2995 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2996 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2997 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2998 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2999 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3000 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3001 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3002
3003 if (FLOAT128_VECTOR_P (KFmode))
3004 {
3005 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3006 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3007 }
3008
3009 if (FLOAT128_VECTOR_P (TFmode))
3010 {
3011 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3012 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3013 }
3014
3015 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3016 available. */
3017 if (TARGET_NO_SDMODE_STACK)
3018 {
3019 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3020 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3021 }
3022
3023 if (TARGET_VSX)
3024 {
3025 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3026 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3027 }
3028
3029 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3030 {
3031 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3032 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3033 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3034 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3035 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3036 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3037 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3038 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3039 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3040
3041 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3042 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3043 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3044 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3045 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3046 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3047 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3048 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3049 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3050
3051 if (FLOAT128_VECTOR_P (KFmode))
3052 {
3053 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3054 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3055 }
3056
3057 if (FLOAT128_VECTOR_P (TFmode))
3058 {
3059 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3060 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3061 }
3062
3063 if (TARGET_MMA)
3064 {
3065 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3066 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3067 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3068 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3069 }
3070 }
3071 }
3072 else
3073 {
3074 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3075 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3076 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3077 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3078 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3079 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3080 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3081 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3082 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3083 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3084 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3085 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3086 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3087 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3088 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3089 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3090 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3091 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3092 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3093 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3094
3095 if (FLOAT128_VECTOR_P (KFmode))
3096 {
3097 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3098 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3099 }
3100
3101 if (FLOAT128_IEEE_P (TFmode))
3102 {
3103 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3104 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3105 }
3106
3107 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3108 available. */
3109 if (TARGET_NO_SDMODE_STACK)
3110 {
3111 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3112 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3113 }
3114
3115 if (TARGET_VSX)
3116 {
3117 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3118 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3119 }
3120
3121 if (TARGET_DIRECT_MOVE)
3122 {
3123 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3124 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3125 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3126 }
3127 }
3128
3129 reg_addr[DFmode].scalar_in_vmx_p = true;
3130 reg_addr[DImode].scalar_in_vmx_p = true;
3131
3132 if (TARGET_P8_VECTOR)
3133 {
3134 reg_addr[SFmode].scalar_in_vmx_p = true;
3135 reg_addr[SImode].scalar_in_vmx_p = true;
3136
3137 if (TARGET_P9_VECTOR)
3138 {
3139 reg_addr[HImode].scalar_in_vmx_p = true;
3140 reg_addr[QImode].scalar_in_vmx_p = true;
3141 }
3142 }
3143 }
3144
3145 /* Precalculate HARD_REGNO_NREGS. */
3146 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3147 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3148 rs6000_hard_regno_nregs[m][r]
3149 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3150
3151 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3152 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3153 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3154 rs6000_hard_regno_mode_ok_p[m][r]
3155 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3156
3157 /* Precalculate CLASS_MAX_NREGS sizes. */
3158 for (c = 0; c < LIM_REG_CLASSES; ++c)
3159 {
3160 int reg_size;
3161
3162 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3163 reg_size = UNITS_PER_VSX_WORD;
3164
3165 else if (c == ALTIVEC_REGS)
3166 reg_size = UNITS_PER_ALTIVEC_WORD;
3167
3168 else if (c == FLOAT_REGS)
3169 reg_size = UNITS_PER_FP_WORD;
3170
3171 else
3172 reg_size = UNITS_PER_WORD;
3173
3174 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3175 {
3176 machine_mode m2 = (machine_mode)m;
3177 int reg_size2 = reg_size;
3178
3179 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3180 in VSX. */
3181 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3182 reg_size2 = UNITS_PER_FP_WORD;
3183
3184 rs6000_class_max_nregs[m][c]
3185 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3186 }
3187 }
3188
3189 /* Calculate which modes to automatically generate code to use a the
3190 reciprocal divide and square root instructions. In the future, possibly
3191 automatically generate the instructions even if the user did not specify
3192 -mrecip. The older machines double precision reciprocal sqrt estimate is
3193 not accurate enough. */
3194 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3195 if (TARGET_FRES)
3196 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3197 if (TARGET_FRE)
3198 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3199 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3200 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3201 if (VECTOR_UNIT_VSX_P (V2DFmode))
3202 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3203
3204 if (TARGET_FRSQRTES)
3205 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3206 if (TARGET_FRSQRTE)
3207 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3208 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3209 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3210 if (VECTOR_UNIT_VSX_P (V2DFmode))
3211 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3212
3213 if (rs6000_recip_control)
3214 {
3215 if (!flag_finite_math_only)
3216 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3217 "-ffast-math");
3218 if (flag_trapping_math)
3219 warning (0, "%qs requires %qs or %qs", "-mrecip",
3220 "-fno-trapping-math", "-ffast-math");
3221 if (!flag_reciprocal_math)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3223 "-ffast-math");
3224 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3225 {
3226 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3227 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3228 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3229
3230 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3231 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3232 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3233
3234 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3235 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3236 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3237
3238 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3239 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3240 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3241
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3243 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3244 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3245
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3247 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3248 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3249
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3251 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3252 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3253
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3255 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3256 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3257 }
3258 }
3259
3260 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3261 legitimate address support to figure out the appropriate addressing to
3262 use. */
3263 rs6000_setup_reg_addr_masks ();
3264
3265 if (global_init_p || TARGET_DEBUG_TARGET)
3266 {
3267 if (TARGET_DEBUG_REG)
3268 rs6000_debug_reg_global ();
3269
3270 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3271 fprintf (stderr,
3272 "SImode variable mult cost = %d\n"
3273 "SImode constant mult cost = %d\n"
3274 "SImode short constant mult cost = %d\n"
3275 "DImode multipliciation cost = %d\n"
3276 "SImode division cost = %d\n"
3277 "DImode division cost = %d\n"
3278 "Simple fp operation cost = %d\n"
3279 "DFmode multiplication cost = %d\n"
3280 "SFmode division cost = %d\n"
3281 "DFmode division cost = %d\n"
3282 "cache line size = %d\n"
3283 "l1 cache size = %d\n"
3284 "l2 cache size = %d\n"
3285 "simultaneous prefetches = %d\n"
3286 "\n",
3287 rs6000_cost->mulsi,
3288 rs6000_cost->mulsi_const,
3289 rs6000_cost->mulsi_const9,
3290 rs6000_cost->muldi,
3291 rs6000_cost->divsi,
3292 rs6000_cost->divdi,
3293 rs6000_cost->fp,
3294 rs6000_cost->dmul,
3295 rs6000_cost->sdiv,
3296 rs6000_cost->ddiv,
3297 rs6000_cost->cache_line_size,
3298 rs6000_cost->l1_cache_size,
3299 rs6000_cost->l2_cache_size,
3300 rs6000_cost->simultaneous_prefetches);
3301 }
3302 }
3303
3304 #if TARGET_MACHO
3305 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3306
3307 static void
3308 darwin_rs6000_override_options (void)
3309 {
3310 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3311 off. */
3312 rs6000_altivec_abi = 1;
3313 TARGET_ALTIVEC_VRSAVE = 1;
3314 rs6000_current_abi = ABI_DARWIN;
3315
3316 if (DEFAULT_ABI == ABI_DARWIN
3317 && TARGET_64BIT)
3318 darwin_one_byte_bool = 1;
3319
3320 if (TARGET_64BIT && ! TARGET_POWERPC64)
3321 {
3322 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3323 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3324 }
3325
3326 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3327 optimisation, and will not work with the most generic case (where the
3328 symbol is undefined external, but there is no symbl stub). */
3329 if (TARGET_64BIT)
3330 rs6000_default_long_calls = 0;
3331
3332 /* ld_classic is (so far) still used for kernel (static) code, and supports
3333 the JBSR longcall / branch islands. */
3334 if (flag_mkernel)
3335 {
3336 rs6000_default_long_calls = 1;
3337
3338 /* Allow a kext author to do -mkernel -mhard-float. */
3339 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3340 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3341 }
3342
3343 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3344 Altivec. */
3345 if (!flag_mkernel && !flag_apple_kext
3346 && TARGET_64BIT
3347 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3348 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3349
3350 /* Unless the user (not the configurer) has explicitly overridden
3351 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3352 G4 unless targeting the kernel. */
3353 if (!flag_mkernel
3354 && !flag_apple_kext
3355 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3356 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3357 && ! global_options_set.x_rs6000_cpu_index)
3358 {
3359 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3360 }
3361 }
3362 #endif
3363
3364 /* If not otherwise specified by a target, make 'long double' equivalent to
3365 'double'. */
3366
3367 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3368 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3369 #endif
3370
3371 /* Return the builtin mask of the various options used that could affect which
3372 builtins were used. In the past we used target_flags, but we've run out of
3373 bits, and some options are no longer in target_flags. */
3374
3375 HOST_WIDE_INT
3376 rs6000_builtin_mask_calculate (void)
3377 {
3378 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3379 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3380 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3381 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3382 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3383 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3384 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3385 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3386 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3387 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3388 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3389 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3390 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3391 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3392 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3393 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3394 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3395 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3396 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3397 | ((TARGET_LONG_DOUBLE_128
3398 && TARGET_HARD_FLOAT
3399 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3400 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3401 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3402 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3403 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3404 }
3405
3406 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3407 to clobber the XER[CA] bit because clobbering that bit without telling
3408 the compiler worked just fine with versions of GCC before GCC 5, and
3409 breaking a lot of older code in ways that are hard to track down is
3410 not such a great idea. */
3411
3412 static rtx_insn *
3413 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3414 vec<const char *> &/*constraints*/,
3415 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3416 {
3417 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3418 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3419 return NULL;
3420 }
3421
3422 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3423 but is called when the optimize level is changed via an attribute or
3424 pragma or when it is reset at the end of the code affected by the
3425 attribute or pragma. It is not called at the beginning of compilation
3426 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3427 actions then, you should have TARGET_OPTION_OVERRIDE call
3428 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3429
3430 static void
3431 rs6000_override_options_after_change (void)
3432 {
3433 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3434 turns -frename-registers on. */
3435 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
3436 || (global_options_set.x_flag_unroll_all_loops
3437 && flag_unroll_all_loops))
3438 {
3439 if (!global_options_set.x_unroll_only_small_loops)
3440 unroll_only_small_loops = 0;
3441 if (!global_options_set.x_flag_rename_registers)
3442 flag_rename_registers = 1;
3443 if (!global_options_set.x_flag_cunroll_grow_size)
3444 flag_cunroll_grow_size = 1;
3445 }
3446 else if (!global_options_set.x_flag_cunroll_grow_size)
3447 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3448 }
3449
3450 #ifdef TARGET_USES_LINUX64_OPT
3451 static void
3452 rs6000_linux64_override_options ()
3453 {
3454 if (!global_options_set.x_rs6000_alignment_flags)
3455 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3456 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3457 {
3458 if (DEFAULT_ABI != ABI_AIX)
3459 {
3460 rs6000_current_abi = ABI_AIX;
3461 error (INVALID_64BIT, "call");
3462 }
3463 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3464 if (ELFv2_ABI_CHECK)
3465 {
3466 rs6000_current_abi = ABI_ELFv2;
3467 if (dot_symbols)
3468 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3469 }
3470 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3471 {
3472 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3473 error (INVALID_64BIT, "relocatable");
3474 }
3475 if (rs6000_isa_flags & OPTION_MASK_EABI)
3476 {
3477 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3478 error (INVALID_64BIT, "eabi");
3479 }
3480 if (TARGET_PROTOTYPE)
3481 {
3482 target_prototype = 0;
3483 error (INVALID_64BIT, "prototype");
3484 }
3485 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3486 {
3487 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3488 error ("%<-m64%> requires a PowerPC64 cpu");
3489 }
3490 if (!global_options_set.x_rs6000_current_cmodel)
3491 SET_CMODEL (CMODEL_MEDIUM);
3492 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3493 {
3494 if (global_options_set.x_rs6000_current_cmodel
3495 && rs6000_current_cmodel != CMODEL_SMALL)
3496 error ("%<-mcmodel incompatible with other toc options%>");
3497 if (TARGET_MINIMAL_TOC)
3498 SET_CMODEL (CMODEL_SMALL);
3499 else if (TARGET_PCREL
3500 || (PCREL_SUPPORTED_BY_OS
3501 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3502 /* Ignore -mno-minimal-toc. */
3503 ;
3504 else
3505 SET_CMODEL (CMODEL_SMALL);
3506 }
3507 if (rs6000_current_cmodel != CMODEL_SMALL)
3508 {
3509 if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
3510 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3511 if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
3512 TARGET_NO_SUM_IN_TOC = 0;
3513 }
3514 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3515 {
3516 if (global_options_set.x_rs6000_pltseq)
3517 warning (0, "%qs unsupported for this ABI",
3518 "-mpltseq");
3519 rs6000_pltseq = false;
3520 }
3521 }
3522 else if (TARGET_64BIT)
3523 error (INVALID_32BIT, "32");
3524 else
3525 {
3526 if (TARGET_PROFILE_KERNEL)
3527 {
3528 profile_kernel = 0;
3529 error (INVALID_32BIT, "profile-kernel");
3530 }
3531 if (global_options_set.x_rs6000_current_cmodel)
3532 {
3533 SET_CMODEL (CMODEL_SMALL);
3534 error (INVALID_32BIT, "cmodel");
3535 }
3536 }
3537 }
3538 #endif
3539
3540 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3541 This support is only in little endian GLIBC 2.32 or newer. */
3542 static bool
3543 glibc_supports_ieee_128bit (void)
3544 {
3545 #ifdef OPTION_GLIBC
3546 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3547 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3548 return true;
3549 #endif /* OPTION_GLIBC. */
3550
3551 return false;
3552 }
3553
3554 /* Override command line options.
3555
3556 Combine build-specific configuration information with options
3557 specified on the command line to set various state variables which
3558 influence code generation, optimization, and expansion of built-in
3559 functions. Assure that command-line configuration preferences are
3560 compatible with each other and with the build configuration; issue
3561 warnings while adjusting configuration or error messages while
3562 rejecting configuration.
3563
3564 Upon entry to this function:
3565
3566 This function is called once at the beginning of
3567 compilation, and then again at the start and end of compiling
3568 each section of code that has a different configuration, as
3569 indicated, for example, by adding the
3570
3571 __attribute__((__target__("cpu=power9")))
3572
3573 qualifier to a function definition or, for example, by bracketing
3574 code between
3575
3576 #pragma GCC target("altivec")
3577
3578 and
3579
3580 #pragma GCC reset_options
3581
3582 directives. Parameter global_init_p is true for the initial
3583 invocation, which initializes global variables, and false for all
3584 subsequent invocations.
3585
3586
3587 Various global state information is assumed to be valid. This
3588 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3589 default CPU specified at build configure time, TARGET_DEFAULT,
3590 representing the default set of option flags for the default
3591 target, and global_options_set.x_rs6000_isa_flags, representing
3592 which options were requested on the command line.
3593
3594 Upon return from this function:
3595
3596 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3597 was set by name on the command line. Additionally, if certain
3598 attributes are automatically enabled or disabled by this function
3599 in order to assure compatibility between options and
3600 configuration, the flags associated with those attributes are
3601 also set. By setting these "explicit bits", we avoid the risk
3602 that other code might accidentally overwrite these particular
3603 attributes with "default values".
3604
3605 The various bits of rs6000_isa_flags are set to indicate the
3606 target options that have been selected for the most current
3607 compilation efforts. This has the effect of also turning on the
3608 associated TARGET_XXX values since these are macros which are
3609 generally defined to test the corresponding bit of the
3610 rs6000_isa_flags variable.
3611
3612 The variable rs6000_builtin_mask is set to represent the target
3613 options for the most current compilation efforts, consistent with
3614 the current contents of rs6000_isa_flags. This variable controls
3615 expansion of built-in functions.
3616
3617 Various other global variables and fields of global structures
3618 (over 50 in all) are initialized to reflect the desired options
3619 for the most current compilation efforts. */
3620
3621 static bool
3622 rs6000_option_override_internal (bool global_init_p)
3623 {
3624 bool ret = true;
3625
3626 HOST_WIDE_INT set_masks;
3627 HOST_WIDE_INT ignore_masks;
3628 int cpu_index = -1;
3629 int tune_index;
3630 struct cl_target_option *main_target_opt
3631 = ((global_init_p || target_option_default_node == NULL)
3632 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3633
3634 /* Print defaults. */
3635 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3636 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3637
3638 /* Remember the explicit arguments. */
3639 if (global_init_p)
3640 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3641
3642 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3643 library functions, so warn about it. The flag may be useful for
3644 performance studies from time to time though, so don't disable it
3645 entirely. */
3646 if (global_options_set.x_rs6000_alignment_flags
3647 && rs6000_alignment_flags == MASK_ALIGN_POWER
3648 && DEFAULT_ABI == ABI_DARWIN
3649 && TARGET_64BIT)
3650 warning (0, "%qs is not supported for 64-bit Darwin;"
3651 " it is incompatible with the installed C and C++ libraries",
3652 "-malign-power");
3653
3654 /* Numerous experiment shows that IRA based loop pressure
3655 calculation works better for RTL loop invariant motion on targets
3656 with enough (>= 32) registers. It is an expensive optimization.
3657 So it is on only for peak performance. */
3658 if (optimize >= 3 && global_init_p
3659 && !global_options_set.x_flag_ira_loop_pressure)
3660 flag_ira_loop_pressure = 1;
3661
3662 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3663 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3664 options were already specified. */
3665 if (flag_sanitize & SANITIZE_USER_ADDRESS
3666 && !global_options_set.x_flag_asynchronous_unwind_tables)
3667 flag_asynchronous_unwind_tables = 1;
3668
3669 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3670 loop unroller is active. It is only checked during unrolling, so
3671 we can just set it on by default. */
3672 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3673 flag_variable_expansion_in_unroller = 1;
3674
3675 /* Set the pointer size. */
3676 if (TARGET_64BIT)
3677 {
3678 rs6000_pmode = DImode;
3679 rs6000_pointer_size = 64;
3680 }
3681 else
3682 {
3683 rs6000_pmode = SImode;
3684 rs6000_pointer_size = 32;
3685 }
3686
3687 /* Some OSs don't support saving the high part of 64-bit registers on context
3688 switch. Other OSs don't support saving Altivec registers. On those OSs,
3689 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3690 if the user wants either, the user must explicitly specify them and we
3691 won't interfere with the user's specification. */
3692
3693 set_masks = POWERPC_MASKS;
3694 #ifdef OS_MISSING_POWERPC64
3695 if (OS_MISSING_POWERPC64)
3696 set_masks &= ~OPTION_MASK_POWERPC64;
3697 #endif
3698 #ifdef OS_MISSING_ALTIVEC
3699 if (OS_MISSING_ALTIVEC)
3700 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3701 | OTHER_VSX_VECTOR_MASKS);
3702 #endif
3703
3704 /* Don't override by the processor default if given explicitly. */
3705 set_masks &= ~rs6000_isa_flags_explicit;
3706
3707 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3708 the cpu in a target attribute or pragma, but did not specify a tuning
3709 option, use the cpu for the tuning option rather than the option specified
3710 with -mtune on the command line. Process a '--with-cpu' configuration
3711 request as an implicit --cpu. */
3712 if (rs6000_cpu_index >= 0)
3713 cpu_index = rs6000_cpu_index;
3714 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3715 cpu_index = main_target_opt->x_rs6000_cpu_index;
3716 else if (OPTION_TARGET_CPU_DEFAULT)
3717 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3718
3719 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3720 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3721 with those from the cpu, except for options that were explicitly set. If
3722 we don't have a cpu, do not override the target bits set in
3723 TARGET_DEFAULT. */
3724 if (cpu_index >= 0)
3725 {
3726 rs6000_cpu_index = cpu_index;
3727 rs6000_isa_flags &= ~set_masks;
3728 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3729 & set_masks);
3730 }
3731 else
3732 {
3733 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3734 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3735 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3736 to using rs6000_isa_flags, we need to do the initialization here.
3737
3738 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3739 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3740 HOST_WIDE_INT flags;
3741 if (TARGET_DEFAULT)
3742 flags = TARGET_DEFAULT;
3743 else
3744 {
3745 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3746 const char *default_cpu = (!TARGET_POWERPC64
3747 ? "powerpc"
3748 : (BYTES_BIG_ENDIAN
3749 ? "powerpc64"
3750 : "powerpc64le"));
3751 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3752 flags = processor_target_table[default_cpu_index].target_enable;
3753 }
3754 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3755 }
3756
3757 if (rs6000_tune_index >= 0)
3758 tune_index = rs6000_tune_index;
3759 else if (cpu_index >= 0)
3760 rs6000_tune_index = tune_index = cpu_index;
3761 else
3762 {
3763 size_t i;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3766
3767 tune_index = -1;
3768 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3769 if (processor_target_table[i].processor == tune_proc)
3770 {
3771 tune_index = i;
3772 break;
3773 }
3774 }
3775
3776 if (cpu_index >= 0)
3777 rs6000_cpu = processor_target_table[cpu_index].processor;
3778 else
3779 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3780
3781 gcc_assert (tune_index >= 0);
3782 rs6000_tune = processor_target_table[tune_index].processor;
3783
3784 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3785 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3786 || rs6000_cpu == PROCESSOR_PPCE5500)
3787 {
3788 if (TARGET_ALTIVEC)
3789 error ("AltiVec not supported in this target");
3790 }
3791
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN && optimize_size)
3795 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3796
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3801
3802 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3803 {
3804 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3805 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3807 "-mmultiple");
3808 }
3809
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for direct_move matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table[tune_index].target_enable
3814 & OPTION_MASK_DIRECT_MOVE))
3815 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3816
3817 if (!rs6000_fold_gimple)
3818 fprintf (stderr,
3819 "gimple folding of rs6000 builtins has been disabled.\n");
3820
3821 /* Add some warnings for VSX. */
3822 if (TARGET_VSX)
3823 {
3824 const char *msg = NULL;
3825 if (!TARGET_HARD_FLOAT)
3826 {
3827 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3828 msg = N_("%<-mvsx%> requires hardware floating point");
3829 else
3830 {
3831 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3832 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3833 }
3834 }
3835 else if (TARGET_AVOID_XFORM > 0)
3836 msg = N_("%<-mvsx%> needs indexed addressing");
3837 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3838 & OPTION_MASK_ALTIVEC))
3839 {
3840 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3841 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3842 else
3843 msg = N_("%<-mno-altivec%> disables vsx");
3844 }
3845
3846 if (msg)
3847 {
3848 warning (0, msg);
3849 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3850 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3851 }
3852 }
3853
3854 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3855 the -mcpu setting to enable options that conflict. */
3856 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3857 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3858 | OPTION_MASK_ALTIVEC
3859 | OPTION_MASK_VSX)) != 0)
3860 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3861 | OPTION_MASK_DIRECT_MOVE)
3862 & ~rs6000_isa_flags_explicit);
3863
3864 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3865 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3866
3867 #ifdef XCOFF_DEBUGGING_INFO
3868 /* For AIX default to 64-bit DWARF. */
3869 if (!global_options_set.x_dwarf_offset_size)
3870 dwarf_offset_size = POINTER_SIZE_UNITS;
3871 #endif
3872
3873 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3874 off all of the options that depend on those flags. */
3875 ignore_masks = rs6000_disable_incompatible_switches ();
3876
3877 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3878 unless the user explicitly used the -mno-<option> to disable the code. */
3879 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3880 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3881 else if (TARGET_P9_MINMAX)
3882 {
3883 if (cpu_index >= 0)
3884 {
3885 if (cpu_index == PROCESSOR_POWER9)
3886 {
3887 /* legacy behavior: allow -mcpu=power9 with certain
3888 capabilities explicitly disabled. */
3889 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3890 }
3891 else
3892 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3893 "for <xxx> less than power9", "-mcpu");
3894 }
3895 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3896 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3897 & rs6000_isa_flags_explicit))
3898 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3899 were explicitly cleared. */
3900 error ("%qs incompatible with explicitly disabled options",
3901 "-mpower9-minmax");
3902 else
3903 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3904 }
3905 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3906 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3907 else if (TARGET_VSX)
3908 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3909 else if (TARGET_POPCNTD)
3910 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3911 else if (TARGET_DFP)
3912 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3913 else if (TARGET_CMPB)
3914 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3915 else if (TARGET_FPRND)
3916 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3917 else if (TARGET_POPCNTB)
3918 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3919 else if (TARGET_ALTIVEC)
3920 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3921
3922 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3923 {
3924 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3925 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3926 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3927 }
3928
3929 if (!TARGET_FPRND && TARGET_VSX)
3930 {
3931 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3932 /* TARGET_VSX = 1 implies Power 7 and newer */
3933 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3934 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3935 }
3936
3937 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3938 {
3939 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3940 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3941 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3942 }
3943
3944 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3945 {
3946 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3947 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3948 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3949 }
3950
3951 if (TARGET_P8_VECTOR && !TARGET_VSX)
3952 {
3953 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3954 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3955 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3956 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3957 {
3958 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3959 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3960 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3961 }
3962 else
3963 {
3964 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3965 not explicit. */
3966 rs6000_isa_flags |= OPTION_MASK_VSX;
3967 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3968 }
3969 }
3970
3971 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3972 {
3973 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3974 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3975 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3976 }
3977
3978 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3979 silently turn off quad memory mode. */
3980 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3981 {
3982 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3983 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3984
3985 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3986 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3987
3988 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3989 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3990 }
3991
3992 /* Non-atomic quad memory load/store are disabled for little endian, since
3993 the words are reversed, but atomic operations can still be done by
3994 swapping the words. */
3995 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3996 {
3997 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3998 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3999 "mode"));
4000
4001 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4002 }
4003
4004 /* Assume if the user asked for normal quad memory instructions, they want
4005 the atomic versions as well, unless they explicity told us not to use quad
4006 word atomic instructions. */
4007 if (TARGET_QUAD_MEMORY
4008 && !TARGET_QUAD_MEMORY_ATOMIC
4009 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4010 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4011
4012 /* If we can shrink-wrap the TOC register save separately, then use
4013 -msave-toc-indirect unless explicitly disabled. */
4014 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4015 && flag_shrink_wrap_separate
4016 && optimize_function_for_speed_p (cfun))
4017 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4018
4019 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4020 generating power8 instructions. Power9 does not optimize power8 fusion
4021 cases. */
4022 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4023 {
4024 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4025 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4026 else
4027 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4028 }
4029
4030 /* Setting additional fusion flags turns on base fusion. */
4031 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4032 {
4033 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4034 {
4035 if (TARGET_P8_FUSION_SIGN)
4036 error ("%qs requires %qs", "-mpower8-fusion-sign",
4037 "-mpower8-fusion");
4038
4039 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4040 }
4041 else
4042 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4043 }
4044
4045 /* Power8 does not fuse sign extended loads with the addis. If we are
4046 optimizing at high levels for speed, convert a sign extended load into a
4047 zero extending load, and an explicit sign extension. */
4048 if (TARGET_P8_FUSION
4049 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4050 && optimize_function_for_speed_p (cfun)
4051 && optimize >= 3)
4052 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4053
4054 /* ISA 3.0 vector instructions include ISA 2.07. */
4055 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4056 {
4057 /* We prefer to not mention undocumented options in
4058 error messages. However, if users have managed to select
4059 power9-vector without selecting power8-vector, they
4060 already know about undocumented flags. */
4061 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4062 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4063 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4064 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4065 {
4066 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4067 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4068 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4069 }
4070 else
4071 {
4072 /* OPTION_MASK_P9_VECTOR is explicit and
4073 OPTION_MASK_P8_VECTOR is not explicit. */
4074 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4075 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4076 }
4077 }
4078
4079 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4080 support. If we only have ISA 2.06 support, and the user did not specify
4081 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4082 but we don't enable the full vectorization support */
4083 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4084 TARGET_ALLOW_MOVMISALIGN = 1;
4085
4086 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4087 {
4088 if (TARGET_ALLOW_MOVMISALIGN > 0
4089 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4090 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4091
4092 TARGET_ALLOW_MOVMISALIGN = 0;
4093 }
4094
4095 /* Determine when unaligned vector accesses are permitted, and when
4096 they are preferred over masked Altivec loads. Note that if
4097 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4098 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4099 not true. */
4100 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4101 {
4102 if (!TARGET_VSX)
4103 {
4104 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4105 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4106
4107 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4108 }
4109
4110 else if (!TARGET_ALLOW_MOVMISALIGN)
4111 {
4112 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4113 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4114 "-mallow-movmisalign");
4115
4116 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4117 }
4118 }
4119
4120 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4121 {
4122 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4123 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4124 else
4125 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4126 }
4127
4128 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4129 {
4130 if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
4131 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4132 else
4133 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4134 }
4135
4136 /* Use long double size to select the appropriate long double. We use
4137 TYPE_PRECISION to differentiate the 3 different long double types. We map
4138 128 into the precision used for TFmode. */
4139 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4140 ? 64
4141 : FLOAT_PRECISION_TFmode);
4142
4143 /* Set long double size before the IEEE 128-bit tests. */
4144 if (!global_options_set.x_rs6000_long_double_type_size)
4145 {
4146 if (main_target_opt != NULL
4147 && (main_target_opt->x_rs6000_long_double_type_size
4148 != default_long_double_size))
4149 error ("target attribute or pragma changes %<long double%> size");
4150 else
4151 rs6000_long_double_type_size = default_long_double_size;
4152 }
4153 else if (rs6000_long_double_type_size == 128)
4154 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4155 else if (global_options_set.x_rs6000_ieeequad)
4156 {
4157 if (global_options.x_rs6000_ieeequad)
4158 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4159 else
4160 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4161 }
4162
4163 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4164 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4165 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4166 those systems will not pick up this default. Warn if the user changes the
4167 default unless -Wno-psabi. */
4168 if (!global_options_set.x_rs6000_ieeequad)
4169 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4170
4171 else
4172 {
4173 if (global_options.x_rs6000_ieeequad
4174 && (!TARGET_POPCNTD || !TARGET_VSX))
4175 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4176
4177 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4178 {
4179 /* Determine if the user can change the default long double type at
4180 compilation time. Only C and C++ support this, and you need GLIBC
4181 2.32 or newer. Only issue one warning. */
4182 static bool warned_change_long_double;
4183
4184 if (!warned_change_long_double
4185 && (!glibc_supports_ieee_128bit ()
4186 || (!lang_GNU_C () && !lang_GNU_CXX ())))
4187 {
4188 warned_change_long_double = true;
4189 if (TARGET_IEEEQUAD)
4190 warning (OPT_Wpsabi, "Using IEEE extended precision "
4191 "%<long double%>");
4192 else
4193 warning (OPT_Wpsabi, "Using IBM extended precision "
4194 "%<long double%>");
4195 }
4196 }
4197 }
4198
4199 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4200 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4201 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4202 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4203 the keyword as well as the type. */
4204 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4205
4206 /* IEEE 128-bit floating point requires VSX support. */
4207 if (TARGET_FLOAT128_KEYWORD)
4208 {
4209 if (!TARGET_VSX)
4210 {
4211 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4212 error ("%qs requires VSX support", "-mfloat128");
4213
4214 TARGET_FLOAT128_TYPE = 0;
4215 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4216 | OPTION_MASK_FLOAT128_HW);
4217 }
4218 else if (!TARGET_FLOAT128_TYPE)
4219 {
4220 TARGET_FLOAT128_TYPE = 1;
4221 warning (0, "The %<-mfloat128%> option may not be fully supported");
4222 }
4223 }
4224
4225 /* Enable the __float128 keyword under Linux by default. */
4226 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4227 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4228 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4229
4230 /* If we have are supporting the float128 type and full ISA 3.0 support,
4231 enable -mfloat128-hardware by default. However, don't enable the
4232 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4233 because sometimes the compiler wants to put things in an integer
4234 container, and if we don't have __int128 support, it is impossible. */
4235 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4236 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4237 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4238 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4239
4240 if (TARGET_FLOAT128_HW
4241 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4242 {
4243 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4244 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4245
4246 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4247 }
4248
4249 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4250 {
4251 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4252 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4253
4254 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4255 }
4256
4257 /* Enable -mprefixed by default on power10 systems. */
4258 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4259 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4260
4261 /* -mprefixed requires -mcpu=power10 (or later). */
4262 else if (TARGET_PREFIXED && !TARGET_POWER10)
4263 {
4264 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4265 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4266
4267 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4268 }
4269
4270 /* -mpcrel requires prefixed load/store addressing. */
4271 if (TARGET_PCREL && !TARGET_PREFIXED)
4272 {
4273 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4274 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4275
4276 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4277 }
4278
4279 /* Print the options after updating the defaults. */
4280 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4281 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4282
4283 /* E500mc does "better" if we inline more aggressively. Respect the
4284 user's opinion, though. */
4285 if (rs6000_block_move_inline_limit == 0
4286 && (rs6000_tune == PROCESSOR_PPCE500MC
4287 || rs6000_tune == PROCESSOR_PPCE500MC64
4288 || rs6000_tune == PROCESSOR_PPCE5500
4289 || rs6000_tune == PROCESSOR_PPCE6500))
4290 rs6000_block_move_inline_limit = 128;
4291
4292 /* store_one_arg depends on expand_block_move to handle at least the
4293 size of reg_parm_stack_space. */
4294 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4295 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4296
4297 if (global_init_p)
4298 {
4299 /* If the appropriate debug option is enabled, replace the target hooks
4300 with debug versions that call the real version and then prints
4301 debugging information. */
4302 if (TARGET_DEBUG_COST)
4303 {
4304 targetm.rtx_costs = rs6000_debug_rtx_costs;
4305 targetm.address_cost = rs6000_debug_address_cost;
4306 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4307 }
4308
4309 if (TARGET_DEBUG_ADDR)
4310 {
4311 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4312 targetm.legitimize_address = rs6000_debug_legitimize_address;
4313 rs6000_secondary_reload_class_ptr
4314 = rs6000_debug_secondary_reload_class;
4315 targetm.secondary_memory_needed
4316 = rs6000_debug_secondary_memory_needed;
4317 targetm.can_change_mode_class
4318 = rs6000_debug_can_change_mode_class;
4319 rs6000_preferred_reload_class_ptr
4320 = rs6000_debug_preferred_reload_class;
4321 rs6000_mode_dependent_address_ptr
4322 = rs6000_debug_mode_dependent_address;
4323 }
4324
4325 if (rs6000_veclibabi_name)
4326 {
4327 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4328 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4329 else
4330 {
4331 error ("unknown vectorization library ABI type (%qs) for "
4332 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4333 ret = false;
4334 }
4335 }
4336 }
4337
4338 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4339 target attribute or pragma which automatically enables both options,
4340 unless the altivec ABI was set. This is set by default for 64-bit, but
4341 not for 32-bit. */
4342 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4343 {
4344 TARGET_FLOAT128_TYPE = 0;
4345 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4346 | OPTION_MASK_FLOAT128_KEYWORD)
4347 & ~rs6000_isa_flags_explicit);
4348 }
4349
4350 /* Enable Altivec ABI for AIX -maltivec. */
4351 if (TARGET_XCOFF
4352 && (TARGET_ALTIVEC || TARGET_VSX)
4353 && !global_options_set.x_rs6000_altivec_abi)
4354 {
4355 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4356 error ("target attribute or pragma changes AltiVec ABI");
4357 else
4358 rs6000_altivec_abi = 1;
4359 }
4360
4361 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4362 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4363 be explicitly overridden in either case. */
4364 if (TARGET_ELF)
4365 {
4366 if (!global_options_set.x_rs6000_altivec_abi
4367 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4368 {
4369 if (main_target_opt != NULL &&
4370 !main_target_opt->x_rs6000_altivec_abi)
4371 error ("target attribute or pragma changes AltiVec ABI");
4372 else
4373 rs6000_altivec_abi = 1;
4374 }
4375 }
4376
4377 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4378 So far, the only darwin64 targets are also MACH-O. */
4379 if (TARGET_MACHO
4380 && DEFAULT_ABI == ABI_DARWIN
4381 && TARGET_64BIT)
4382 {
4383 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4384 error ("target attribute or pragma changes darwin64 ABI");
4385 else
4386 {
4387 rs6000_darwin64_abi = 1;
4388 /* Default to natural alignment, for better performance. */
4389 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4390 }
4391 }
4392
4393 /* Place FP constants in the constant pool instead of TOC
4394 if section anchors enabled. */
4395 if (flag_section_anchors
4396 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4397 TARGET_NO_FP_IN_TOC = 1;
4398
4399 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4400 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4401
4402 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4403 SUBTARGET_OVERRIDE_OPTIONS;
4404 #endif
4405 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4406 SUBSUBTARGET_OVERRIDE_OPTIONS;
4407 #endif
4408 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4409 SUB3TARGET_OVERRIDE_OPTIONS;
4410 #endif
4411
4412 /* If the ABI has support for PC-relative relocations, enable it by default.
4413 This test depends on the sub-target tests above setting the code model to
4414 medium for ELF v2 systems. */
4415 if (PCREL_SUPPORTED_BY_OS
4416 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4417 rs6000_isa_flags |= OPTION_MASK_PCREL;
4418
4419 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4420 after the subtarget override options are done. */
4421 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4422 {
4423 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4424 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4425
4426 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4427 }
4428
4429 /* Enable -mmma by default on power10 systems. */
4430 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4431 rs6000_isa_flags |= OPTION_MASK_MMA;
4432
4433 /* Turn off vector pair/mma options on non-power10 systems. */
4434 else if (!TARGET_POWER10 && TARGET_MMA)
4435 {
4436 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4437 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4438
4439 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4440 }
4441
4442 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4443 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4444
4445 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4446 && rs6000_tune != PROCESSOR_POWER5
4447 && rs6000_tune != PROCESSOR_POWER6
4448 && rs6000_tune != PROCESSOR_POWER7
4449 && rs6000_tune != PROCESSOR_POWER8
4450 && rs6000_tune != PROCESSOR_POWER9
4451 && rs6000_tune != PROCESSOR_POWER10
4452 && rs6000_tune != PROCESSOR_PPCA2
4453 && rs6000_tune != PROCESSOR_CELL
4454 && rs6000_tune != PROCESSOR_PPC476);
4455 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4456 || rs6000_tune == PROCESSOR_POWER5
4457 || rs6000_tune == PROCESSOR_POWER7
4458 || rs6000_tune == PROCESSOR_POWER8);
4459 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4460 || rs6000_tune == PROCESSOR_POWER5
4461 || rs6000_tune == PROCESSOR_POWER6
4462 || rs6000_tune == PROCESSOR_POWER7
4463 || rs6000_tune == PROCESSOR_POWER8
4464 || rs6000_tune == PROCESSOR_POWER9
4465 || rs6000_tune == PROCESSOR_POWER10
4466 || rs6000_tune == PROCESSOR_PPCE500MC
4467 || rs6000_tune == PROCESSOR_PPCE500MC64
4468 || rs6000_tune == PROCESSOR_PPCE5500
4469 || rs6000_tune == PROCESSOR_PPCE6500);
4470
4471 /* Allow debug switches to override the above settings. These are set to -1
4472 in rs6000.opt to indicate the user hasn't directly set the switch. */
4473 if (TARGET_ALWAYS_HINT >= 0)
4474 rs6000_always_hint = TARGET_ALWAYS_HINT;
4475
4476 if (TARGET_SCHED_GROUPS >= 0)
4477 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4478
4479 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4480 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4481
4482 rs6000_sched_restricted_insns_priority
4483 = (rs6000_sched_groups ? 1 : 0);
4484
4485 /* Handle -msched-costly-dep option. */
4486 rs6000_sched_costly_dep
4487 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4488
4489 if (rs6000_sched_costly_dep_str)
4490 {
4491 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4492 rs6000_sched_costly_dep = no_dep_costly;
4493 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4494 rs6000_sched_costly_dep = all_deps_costly;
4495 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4496 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4497 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4498 rs6000_sched_costly_dep = store_to_load_dep_costly;
4499 else
4500 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4501 atoi (rs6000_sched_costly_dep_str));
4502 }
4503
4504 /* Handle -minsert-sched-nops option. */
4505 rs6000_sched_insert_nops
4506 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4507
4508 if (rs6000_sched_insert_nops_str)
4509 {
4510 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4511 rs6000_sched_insert_nops = sched_finish_none;
4512 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4513 rs6000_sched_insert_nops = sched_finish_pad_groups;
4514 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4515 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4516 else
4517 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4518 atoi (rs6000_sched_insert_nops_str));
4519 }
4520
4521 /* Handle stack protector */
4522 if (!global_options_set.x_rs6000_stack_protector_guard)
4523 #ifdef TARGET_THREAD_SSP_OFFSET
4524 rs6000_stack_protector_guard = SSP_TLS;
4525 #else
4526 rs6000_stack_protector_guard = SSP_GLOBAL;
4527 #endif
4528
4529 #ifdef TARGET_THREAD_SSP_OFFSET
4530 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4531 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4532 #endif
4533
4534 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4535 {
4536 char *endp;
4537 const char *str = rs6000_stack_protector_guard_offset_str;
4538
4539 errno = 0;
4540 long offset = strtol (str, &endp, 0);
4541 if (!*str || *endp || errno)
4542 error ("%qs is not a valid number in %qs", str,
4543 "-mstack-protector-guard-offset=");
4544
4545 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4546 || (TARGET_64BIT && (offset & 3)))
4547 error ("%qs is not a valid offset in %qs", str,
4548 "-mstack-protector-guard-offset=");
4549
4550 rs6000_stack_protector_guard_offset = offset;
4551 }
4552
4553 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4554 {
4555 const char *str = rs6000_stack_protector_guard_reg_str;
4556 int reg = decode_reg_name (str);
4557
4558 if (!IN_RANGE (reg, 1, 31))
4559 error ("%qs is not a valid base register in %qs", str,
4560 "-mstack-protector-guard-reg=");
4561
4562 rs6000_stack_protector_guard_reg = reg;
4563 }
4564
4565 if (rs6000_stack_protector_guard == SSP_TLS
4566 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4567 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4568
4569 if (global_init_p)
4570 {
4571 #ifdef TARGET_REGNAMES
4572 /* If the user desires alternate register names, copy in the
4573 alternate names now. */
4574 if (TARGET_REGNAMES)
4575 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4576 #endif
4577
4578 /* Set aix_struct_return last, after the ABI is determined.
4579 If -maix-struct-return or -msvr4-struct-return was explicitly
4580 used, don't override with the ABI default. */
4581 if (!global_options_set.x_aix_struct_return)
4582 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4583
4584 #if 0
4585 /* IBM XL compiler defaults to unsigned bitfields. */
4586 if (TARGET_XL_COMPAT)
4587 flag_signed_bitfields = 0;
4588 #endif
4589
4590 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4591 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4592
4593 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4594
4595 /* We can only guarantee the availability of DI pseudo-ops when
4596 assembling for 64-bit targets. */
4597 if (!TARGET_64BIT)
4598 {
4599 targetm.asm_out.aligned_op.di = NULL;
4600 targetm.asm_out.unaligned_op.di = NULL;
4601 }
4602
4603
4604 /* Set branch target alignment, if not optimizing for size. */
4605 if (!optimize_size)
4606 {
4607 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4608 aligned 8byte to avoid misprediction by the branch predictor. */
4609 if (rs6000_tune == PROCESSOR_TITAN
4610 || rs6000_tune == PROCESSOR_CELL)
4611 {
4612 if (flag_align_functions && !str_align_functions)
4613 str_align_functions = "8";
4614 if (flag_align_jumps && !str_align_jumps)
4615 str_align_jumps = "8";
4616 if (flag_align_loops && !str_align_loops)
4617 str_align_loops = "8";
4618 }
4619 if (rs6000_align_branch_targets)
4620 {
4621 if (flag_align_functions && !str_align_functions)
4622 str_align_functions = "16";
4623 if (flag_align_jumps && !str_align_jumps)
4624 str_align_jumps = "16";
4625 if (flag_align_loops && !str_align_loops)
4626 {
4627 can_override_loop_align = 1;
4628 str_align_loops = "16";
4629 }
4630 }
4631 }
4632
4633 /* Arrange to save and restore machine status around nested functions. */
4634 init_machine_status = rs6000_init_machine_status;
4635
4636 /* We should always be splitting complex arguments, but we can't break
4637 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4638 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4639 targetm.calls.split_complex_arg = NULL;
4640
4641 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4642 if (DEFAULT_ABI == ABI_AIX)
4643 targetm.calls.custom_function_descriptors = 0;
4644 }
4645
4646 /* Initialize rs6000_cost with the appropriate target costs. */
4647 if (optimize_size)
4648 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4649 else
4650 switch (rs6000_tune)
4651 {
4652 case PROCESSOR_RS64A:
4653 rs6000_cost = &rs64a_cost;
4654 break;
4655
4656 case PROCESSOR_MPCCORE:
4657 rs6000_cost = &mpccore_cost;
4658 break;
4659
4660 case PROCESSOR_PPC403:
4661 rs6000_cost = &ppc403_cost;
4662 break;
4663
4664 case PROCESSOR_PPC405:
4665 rs6000_cost = &ppc405_cost;
4666 break;
4667
4668 case PROCESSOR_PPC440:
4669 rs6000_cost = &ppc440_cost;
4670 break;
4671
4672 case PROCESSOR_PPC476:
4673 rs6000_cost = &ppc476_cost;
4674 break;
4675
4676 case PROCESSOR_PPC601:
4677 rs6000_cost = &ppc601_cost;
4678 break;
4679
4680 case PROCESSOR_PPC603:
4681 rs6000_cost = &ppc603_cost;
4682 break;
4683
4684 case PROCESSOR_PPC604:
4685 rs6000_cost = &ppc604_cost;
4686 break;
4687
4688 case PROCESSOR_PPC604e:
4689 rs6000_cost = &ppc604e_cost;
4690 break;
4691
4692 case PROCESSOR_PPC620:
4693 rs6000_cost = &ppc620_cost;
4694 break;
4695
4696 case PROCESSOR_PPC630:
4697 rs6000_cost = &ppc630_cost;
4698 break;
4699
4700 case PROCESSOR_CELL:
4701 rs6000_cost = &ppccell_cost;
4702 break;
4703
4704 case PROCESSOR_PPC750:
4705 case PROCESSOR_PPC7400:
4706 rs6000_cost = &ppc750_cost;
4707 break;
4708
4709 case PROCESSOR_PPC7450:
4710 rs6000_cost = &ppc7450_cost;
4711 break;
4712
4713 case PROCESSOR_PPC8540:
4714 case PROCESSOR_PPC8548:
4715 rs6000_cost = &ppc8540_cost;
4716 break;
4717
4718 case PROCESSOR_PPCE300C2:
4719 case PROCESSOR_PPCE300C3:
4720 rs6000_cost = &ppce300c2c3_cost;
4721 break;
4722
4723 case PROCESSOR_PPCE500MC:
4724 rs6000_cost = &ppce500mc_cost;
4725 break;
4726
4727 case PROCESSOR_PPCE500MC64:
4728 rs6000_cost = &ppce500mc64_cost;
4729 break;
4730
4731 case PROCESSOR_PPCE5500:
4732 rs6000_cost = &ppce5500_cost;
4733 break;
4734
4735 case PROCESSOR_PPCE6500:
4736 rs6000_cost = &ppce6500_cost;
4737 break;
4738
4739 case PROCESSOR_TITAN:
4740 rs6000_cost = &titan_cost;
4741 break;
4742
4743 case PROCESSOR_POWER4:
4744 case PROCESSOR_POWER5:
4745 rs6000_cost = &power4_cost;
4746 break;
4747
4748 case PROCESSOR_POWER6:
4749 rs6000_cost = &power6_cost;
4750 break;
4751
4752 case PROCESSOR_POWER7:
4753 rs6000_cost = &power7_cost;
4754 break;
4755
4756 case PROCESSOR_POWER8:
4757 rs6000_cost = &power8_cost;
4758 break;
4759
4760 case PROCESSOR_POWER9:
4761 case PROCESSOR_POWER10:
4762 rs6000_cost = &power9_cost;
4763 break;
4764
4765 case PROCESSOR_PPCA2:
4766 rs6000_cost = &ppca2_cost;
4767 break;
4768
4769 default:
4770 gcc_unreachable ();
4771 }
4772
4773 if (global_init_p)
4774 {
4775 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4776 param_simultaneous_prefetches,
4777 rs6000_cost->simultaneous_prefetches);
4778 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4779 param_l1_cache_size,
4780 rs6000_cost->l1_cache_size);
4781 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4782 param_l1_cache_line_size,
4783 rs6000_cost->cache_line_size);
4784 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4785 param_l2_cache_size,
4786 rs6000_cost->l2_cache_size);
4787
4788 /* Increase loop peeling limits based on performance analysis. */
4789 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4790 param_max_peeled_insns, 400);
4791 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4792 param_max_completely_peeled_insns, 400);
4793
4794 /* The lxvl/stxvl instructions don't perform well before Power10. */
4795 if (TARGET_POWER10)
4796 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4797 param_vect_partial_vector_usage, 1);
4798 else
4799 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4800 param_vect_partial_vector_usage, 0);
4801
4802 /* Use the 'model' -fsched-pressure algorithm by default. */
4803 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4804 param_sched_pressure_algorithm,
4805 SCHED_PRESSURE_MODEL);
4806
4807 /* If using typedef char *va_list, signal that
4808 __builtin_va_start (&ap, 0) can be optimized to
4809 ap = __builtin_next_arg (0). */
4810 if (DEFAULT_ABI != ABI_V4)
4811 targetm.expand_builtin_va_start = NULL;
4812 }
4813
4814 rs6000_override_options_after_change ();
4815
4816 /* If not explicitly specified via option, decide whether to generate indexed
4817 load/store instructions. A value of -1 indicates that the
4818 initial value of this variable has not been overwritten. During
4819 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4820 if (TARGET_AVOID_XFORM == -1)
4821 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4822 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4823 need indexed accesses and the type used is the scalar type of the element
4824 being loaded or stored. */
4825 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4826 && !TARGET_ALTIVEC);
4827
4828 /* Set the -mrecip options. */
4829 if (rs6000_recip_name)
4830 {
4831 char *p = ASTRDUP (rs6000_recip_name);
4832 char *q;
4833 unsigned int mask, i;
4834 bool invert;
4835
4836 while ((q = strtok (p, ",")) != NULL)
4837 {
4838 p = NULL;
4839 if (*q == '!')
4840 {
4841 invert = true;
4842 q++;
4843 }
4844 else
4845 invert = false;
4846
4847 if (!strcmp (q, "default"))
4848 mask = ((TARGET_RECIP_PRECISION)
4849 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4850 else
4851 {
4852 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4853 if (!strcmp (q, recip_options[i].string))
4854 {
4855 mask = recip_options[i].mask;
4856 break;
4857 }
4858
4859 if (i == ARRAY_SIZE (recip_options))
4860 {
4861 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4862 invert = false;
4863 mask = 0;
4864 ret = false;
4865 }
4866 }
4867
4868 if (invert)
4869 rs6000_recip_control &= ~mask;
4870 else
4871 rs6000_recip_control |= mask;
4872 }
4873 }
4874
4875 /* Set the builtin mask of the various options used that could affect which
4876 builtins were used. In the past we used target_flags, but we've run out
4877 of bits, and some options are no longer in target_flags. */
4878 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4879 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4880 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4881 rs6000_builtin_mask);
4882
4883 /* Initialize all of the registers. */
4884 rs6000_init_hard_regno_mode_ok (global_init_p);
4885
4886 /* Save the initial options in case the user does function specific options */
4887 if (global_init_p)
4888 target_option_default_node = target_option_current_node
4889 = build_target_option_node (&global_options, &global_options_set);
4890
4891 /* If not explicitly specified via option, decide whether to generate the
4892 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4893 if (TARGET_LINK_STACK == -1)
4894 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4895
4896 /* Deprecate use of -mno-speculate-indirect-jumps. */
4897 if (!rs6000_speculate_indirect_jumps)
4898 warning (0, "%qs is deprecated and not recommended in any circumstances",
4899 "-mno-speculate-indirect-jumps");
4900
4901 return ret;
4902 }
4903
4904 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4905 define the target cpu type. */
4906
4907 static void
4908 rs6000_option_override (void)
4909 {
4910 (void) rs6000_option_override_internal (true);
4911 }
4912
4913 \f
4914 /* Implement targetm.vectorize.builtin_mask_for_load. */
4915 static tree
4916 rs6000_builtin_mask_for_load (void)
4917 {
4918 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4919 if ((TARGET_ALTIVEC && !TARGET_VSX)
4920 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4921 return altivec_builtin_mask_for_load;
4922 else
4923 return 0;
4924 }
4925
4926 /* Implement LOOP_ALIGN. */
4927 align_flags
4928 rs6000_loop_align (rtx label)
4929 {
4930 basic_block bb;
4931 int ninsns;
4932
4933 /* Don't override loop alignment if -falign-loops was specified. */
4934 if (!can_override_loop_align)
4935 return align_loops;
4936
4937 bb = BLOCK_FOR_INSN (label);
4938 ninsns = num_loop_insns(bb->loop_father);
4939
4940 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4941 if (ninsns > 4 && ninsns <= 8
4942 && (rs6000_tune == PROCESSOR_POWER4
4943 || rs6000_tune == PROCESSOR_POWER5
4944 || rs6000_tune == PROCESSOR_POWER6
4945 || rs6000_tune == PROCESSOR_POWER7
4946 || rs6000_tune == PROCESSOR_POWER8))
4947 return align_flags (5);
4948 else
4949 return align_loops;
4950 }
4951
4952 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4953 after applying N number of iterations. This routine does not determine
4954 how may iterations are required to reach desired alignment. */
4955
4956 static bool
4957 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4958 {
4959 if (is_packed)
4960 return false;
4961
4962 if (TARGET_32BIT)
4963 {
4964 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4965 return true;
4966
4967 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4968 return true;
4969
4970 return false;
4971 }
4972 else
4973 {
4974 if (TARGET_MACHO)
4975 return false;
4976
4977 /* Assuming that all other types are naturally aligned. CHECKME! */
4978 return true;
4979 }
4980 }
4981
4982 /* Return true if the vector misalignment factor is supported by the
4983 target. */
4984 static bool
4985 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4986 const_tree type,
4987 int misalignment,
4988 bool is_packed)
4989 {
4990 if (TARGET_VSX)
4991 {
4992 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4993 return true;
4994
4995 /* Return if movmisalign pattern is not supported for this mode. */
4996 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4997 return false;
4998
4999 if (misalignment == -1)
5000 {
5001 /* Misalignment factor is unknown at compile time but we know
5002 it's word aligned. */
5003 if (rs6000_vector_alignment_reachable (type, is_packed))
5004 {
5005 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5006
5007 if (element_size == 64 || element_size == 32)
5008 return true;
5009 }
5010
5011 return false;
5012 }
5013
5014 /* VSX supports word-aligned vector. */
5015 if (misalignment % 4 == 0)
5016 return true;
5017 }
5018 return false;
5019 }
5020
5021 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5022 static int
5023 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5024 tree vectype, int misalign)
5025 {
5026 unsigned elements;
5027 tree elem_type;
5028
5029 switch (type_of_cost)
5030 {
5031 case scalar_stmt:
5032 case scalar_store:
5033 case vector_stmt:
5034 case vector_store:
5035 case vec_to_scalar:
5036 case scalar_to_vec:
5037 case cond_branch_not_taken:
5038 return 1;
5039 case scalar_load:
5040 case vector_load:
5041 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5042 return 2;
5043
5044 case vec_perm:
5045 /* Power7 has only one permute unit, make it a bit expensive. */
5046 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5047 return 3;
5048 else
5049 return 1;
5050
5051 case vec_promote_demote:
5052 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5053 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5054 return 4;
5055 else
5056 return 1;
5057
5058 case cond_branch_taken:
5059 return 3;
5060
5061 case unaligned_load:
5062 case vector_gather_load:
5063 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5064 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5065 return 2;
5066
5067 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5068 {
5069 elements = TYPE_VECTOR_SUBPARTS (vectype);
5070 if (elements == 2)
5071 /* Double word aligned. */
5072 return 4;
5073
5074 if (elements == 4)
5075 {
5076 switch (misalign)
5077 {
5078 case 8:
5079 /* Double word aligned. */
5080 return 4;
5081
5082 case -1:
5083 /* Unknown misalignment. */
5084 case 4:
5085 case 12:
5086 /* Word aligned. */
5087 return 33;
5088
5089 default:
5090 gcc_unreachable ();
5091 }
5092 }
5093 }
5094
5095 if (TARGET_ALTIVEC)
5096 /* Misaligned loads are not supported. */
5097 gcc_unreachable ();
5098
5099 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5100 return 4;
5101
5102 case unaligned_store:
5103 case vector_scatter_store:
5104 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5105 return 1;
5106
5107 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5108 {
5109 elements = TYPE_VECTOR_SUBPARTS (vectype);
5110 if (elements == 2)
5111 /* Double word aligned. */
5112 return 2;
5113
5114 if (elements == 4)
5115 {
5116 switch (misalign)
5117 {
5118 case 8:
5119 /* Double word aligned. */
5120 return 2;
5121
5122 case -1:
5123 /* Unknown misalignment. */
5124 case 4:
5125 case 12:
5126 /* Word aligned. */
5127 return 23;
5128
5129 default:
5130 gcc_unreachable ();
5131 }
5132 }
5133 }
5134
5135 if (TARGET_ALTIVEC)
5136 /* Misaligned stores are not supported. */
5137 gcc_unreachable ();
5138
5139 return 2;
5140
5141 case vec_construct:
5142 /* This is a rough approximation assuming non-constant elements
5143 constructed into a vector via element insertion. FIXME:
5144 vec_construct is not granular enough for uniformly good
5145 decisions. If the initialization is a splat, this is
5146 cheaper than we estimate. Improve this someday. */
5147 elem_type = TREE_TYPE (vectype);
5148 /* 32-bit vectors loaded into registers are stored as double
5149 precision, so we need 2 permutes, 2 converts, and 1 merge
5150 to construct a vector of short floats from them. */
5151 if (SCALAR_FLOAT_TYPE_P (elem_type)
5152 && TYPE_PRECISION (elem_type) == 32)
5153 return 5;
5154 /* On POWER9, integer vector types are built up in GPRs and then
5155 use a direct move (2 cycles). For POWER8 this is even worse,
5156 as we need two direct moves and a merge, and the direct moves
5157 are five cycles. */
5158 else if (INTEGRAL_TYPE_P (elem_type))
5159 {
5160 if (TARGET_P9_VECTOR)
5161 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5162 else
5163 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5164 }
5165 else
5166 /* V2DFmode doesn't need a direct move. */
5167 return 2;
5168
5169 default:
5170 gcc_unreachable ();
5171 }
5172 }
5173
5174 /* Implement targetm.vectorize.preferred_simd_mode. */
5175
5176 static machine_mode
5177 rs6000_preferred_simd_mode (scalar_mode mode)
5178 {
5179 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5180
5181 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5182 return vmode.require ();
5183
5184 return word_mode;
5185 }
5186
5187 typedef struct _rs6000_cost_data
5188 {
5189 struct loop *loop_info;
5190 unsigned cost[3];
5191 } rs6000_cost_data;
5192
5193 /* Test for likely overcommitment of vector hardware resources. If a
5194 loop iteration is relatively large, and too large a percentage of
5195 instructions in the loop are vectorized, the cost model may not
5196 adequately reflect delays from unavailable vector resources.
5197 Penalize the loop body cost for this case. */
5198
5199 static void
5200 rs6000_density_test (rs6000_cost_data *data)
5201 {
5202 const int DENSITY_PCT_THRESHOLD = 85;
5203 const int DENSITY_SIZE_THRESHOLD = 70;
5204 const int DENSITY_PENALTY = 10;
5205 struct loop *loop = data->loop_info;
5206 basic_block *bbs = get_loop_body (loop);
5207 int nbbs = loop->num_nodes;
5208 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5209 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5210 int i, density_pct;
5211
5212 for (i = 0; i < nbbs; i++)
5213 {
5214 basic_block bb = bbs[i];
5215 gimple_stmt_iterator gsi;
5216
5217 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5218 {
5219 gimple *stmt = gsi_stmt (gsi);
5220 if (is_gimple_debug (stmt))
5221 continue;
5222
5223 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5224
5225 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5226 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5227 not_vec_cost++;
5228 }
5229 }
5230
5231 free (bbs);
5232 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5233
5234 if (density_pct > DENSITY_PCT_THRESHOLD
5235 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5236 {
5237 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_NOTE, vect_location,
5240 "density %d%%, cost %d exceeds threshold, penalizing "
5241 "loop body cost by %d%%", density_pct,
5242 vec_cost + not_vec_cost, DENSITY_PENALTY);
5243 }
5244 }
5245
5246 /* Implement targetm.vectorize.init_cost. */
5247
5248 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5249 instruction is needed by the vectorization. */
5250 static bool rs6000_vect_nonmem;
5251
5252 static void *
5253 rs6000_init_cost (struct loop *loop_info)
5254 {
5255 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5256 data->loop_info = loop_info;
5257 data->cost[vect_prologue] = 0;
5258 data->cost[vect_body] = 0;
5259 data->cost[vect_epilogue] = 0;
5260 rs6000_vect_nonmem = false;
5261 return data;
5262 }
5263
5264 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5265 For some statement, we would like to further fine-grain tweak the cost on
5266 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5267 information on statement operation codes etc. One typical case here is
5268 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5269 for scalar cost, but it should be priced more whatever transformed to either
5270 compare + branch or compare + isel instructions. */
5271
5272 static unsigned
5273 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5274 struct _stmt_vec_info *stmt_info)
5275 {
5276 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5277 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5278 {
5279 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5280 if (subcode == COND_EXPR)
5281 return 2;
5282 }
5283
5284 return 0;
5285 }
5286
5287 /* Implement targetm.vectorize.add_stmt_cost. */
5288
5289 static unsigned
5290 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5291 enum vect_cost_for_stmt kind,
5292 struct _stmt_vec_info *stmt_info, tree vectype,
5293 int misalign, enum vect_cost_model_location where)
5294 {
5295 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5296 unsigned retval = 0;
5297
5298 if (flag_vect_cost_model)
5299 {
5300 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5301 misalign);
5302 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5303 /* Statements in an inner loop relative to the loop being
5304 vectorized are weighted more heavily. The value here is
5305 arbitrary and could potentially be improved with analysis. */
5306 if (where == vect_body && stmt_info
5307 && stmt_in_inner_loop_p (vinfo, stmt_info))
5308 count *= 50; /* FIXME. */
5309
5310 retval = (unsigned) (count * stmt_cost);
5311 cost_data->cost[where] += retval;
5312
5313 /* Check whether we're doing something other than just a copy loop.
5314 Not all such loops may be profitably vectorized; see
5315 rs6000_finish_cost. */
5316 if ((kind == vec_to_scalar || kind == vec_perm
5317 || kind == vec_promote_demote || kind == vec_construct
5318 || kind == scalar_to_vec)
5319 || (where == vect_body && kind == vector_stmt))
5320 rs6000_vect_nonmem = true;
5321 }
5322
5323 return retval;
5324 }
5325
5326 /* For some target specific vectorization cost which can't be handled per stmt,
5327 we check the requisite conditions and adjust the vectorization cost
5328 accordingly if satisfied. One typical example is to model shift cost for
5329 vector with length by counting number of required lengths under condition
5330 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5331
5332 static void
5333 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
5334 {
5335 struct loop *loop = data->loop_info;
5336 gcc_assert (loop);
5337 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
5338
5339 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5340 {
5341 rgroup_controls *rgc;
5342 unsigned int num_vectors_m1;
5343 unsigned int shift_cnt = 0;
5344 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5345 if (rgc->type)
5346 /* Each length needs one shift to fill into bits 0-7. */
5347 shift_cnt += num_vectors_m1 + 1;
5348
5349 rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
5350 NULL, NULL_TREE, 0, vect_body);
5351 }
5352 }
5353
5354 /* Implement targetm.vectorize.finish_cost. */
5355
5356 static void
5357 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5358 unsigned *body_cost, unsigned *epilogue_cost)
5359 {
5360 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5361
5362 if (cost_data->loop_info)
5363 {
5364 rs6000_adjust_vect_cost_per_loop (cost_data);
5365 rs6000_density_test (cost_data);
5366 }
5367
5368 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5369 that require versioning for any reason. The vectorization is at
5370 best a wash inside the loop, and the versioning checks make
5371 profitability highly unlikely and potentially quite harmful. */
5372 if (cost_data->loop_info)
5373 {
5374 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5375 if (!rs6000_vect_nonmem
5376 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5377 && LOOP_REQUIRES_VERSIONING (vec_info))
5378 cost_data->cost[vect_body] += 10000;
5379 }
5380
5381 *prologue_cost = cost_data->cost[vect_prologue];
5382 *body_cost = cost_data->cost[vect_body];
5383 *epilogue_cost = cost_data->cost[vect_epilogue];
5384 }
5385
5386 /* Implement targetm.vectorize.destroy_cost_data. */
5387
5388 static void
5389 rs6000_destroy_cost_data (void *data)
5390 {
5391 free (data);
5392 }
5393
5394 /* Implement targetm.loop_unroll_adjust. */
5395
5396 static unsigned
5397 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5398 {
5399 if (unroll_only_small_loops)
5400 {
5401 /* TODO: These are hardcoded values right now. We probably should use
5402 a PARAM here. */
5403 if (loop->ninsns <= 6)
5404 return MIN (4, nunroll);
5405 if (loop->ninsns <= 10)
5406 return MIN (2, nunroll);
5407
5408 return 0;
5409 }
5410
5411 return nunroll;
5412 }
5413
5414 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5415 library with vectorized intrinsics. */
5416
5417 static tree
5418 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5419 tree type_in)
5420 {
5421 char name[32];
5422 const char *suffix = NULL;
5423 tree fntype, new_fndecl, bdecl = NULL_TREE;
5424 int n_args = 1;
5425 const char *bname;
5426 machine_mode el_mode, in_mode;
5427 int n, in_n;
5428
5429 /* Libmass is suitable for unsafe math only as it does not correctly support
5430 parts of IEEE with the required precision such as denormals. Only support
5431 it if we have VSX to use the simd d2 or f4 functions.
5432 XXX: Add variable length support. */
5433 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5434 return NULL_TREE;
5435
5436 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5437 n = TYPE_VECTOR_SUBPARTS (type_out);
5438 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5439 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5440 if (el_mode != in_mode
5441 || n != in_n)
5442 return NULL_TREE;
5443
5444 switch (fn)
5445 {
5446 CASE_CFN_ATAN2:
5447 CASE_CFN_HYPOT:
5448 CASE_CFN_POW:
5449 n_args = 2;
5450 gcc_fallthrough ();
5451
5452 CASE_CFN_ACOS:
5453 CASE_CFN_ACOSH:
5454 CASE_CFN_ASIN:
5455 CASE_CFN_ASINH:
5456 CASE_CFN_ATAN:
5457 CASE_CFN_ATANH:
5458 CASE_CFN_CBRT:
5459 CASE_CFN_COS:
5460 CASE_CFN_COSH:
5461 CASE_CFN_ERF:
5462 CASE_CFN_ERFC:
5463 CASE_CFN_EXP2:
5464 CASE_CFN_EXP:
5465 CASE_CFN_EXPM1:
5466 CASE_CFN_LGAMMA:
5467 CASE_CFN_LOG10:
5468 CASE_CFN_LOG1P:
5469 CASE_CFN_LOG2:
5470 CASE_CFN_LOG:
5471 CASE_CFN_SIN:
5472 CASE_CFN_SINH:
5473 CASE_CFN_SQRT:
5474 CASE_CFN_TAN:
5475 CASE_CFN_TANH:
5476 if (el_mode == DFmode && n == 2)
5477 {
5478 bdecl = mathfn_built_in (double_type_node, fn);
5479 suffix = "d2"; /* pow -> powd2 */
5480 }
5481 else if (el_mode == SFmode && n == 4)
5482 {
5483 bdecl = mathfn_built_in (float_type_node, fn);
5484 suffix = "4"; /* powf -> powf4 */
5485 }
5486 else
5487 return NULL_TREE;
5488 if (!bdecl)
5489 return NULL_TREE;
5490 break;
5491
5492 default:
5493 return NULL_TREE;
5494 }
5495
5496 gcc_assert (suffix != NULL);
5497 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5498 if (!bname)
5499 return NULL_TREE;
5500
5501 strcpy (name, bname + strlen ("__builtin_"));
5502 strcat (name, suffix);
5503
5504 if (n_args == 1)
5505 fntype = build_function_type_list (type_out, type_in, NULL);
5506 else if (n_args == 2)
5507 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5508 else
5509 gcc_unreachable ();
5510
5511 /* Build a function declaration for the vectorized function. */
5512 new_fndecl = build_decl (BUILTINS_LOCATION,
5513 FUNCTION_DECL, get_identifier (name), fntype);
5514 TREE_PUBLIC (new_fndecl) = 1;
5515 DECL_EXTERNAL (new_fndecl) = 1;
5516 DECL_IS_NOVOPS (new_fndecl) = 1;
5517 TREE_READONLY (new_fndecl) = 1;
5518
5519 return new_fndecl;
5520 }
5521
5522 /* Returns a function decl for a vectorized version of the builtin function
5523 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5524 if it is not available. */
5525
5526 static tree
5527 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5528 tree type_in)
5529 {
5530 machine_mode in_mode, out_mode;
5531 int in_n, out_n;
5532
5533 if (TARGET_DEBUG_BUILTIN)
5534 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5535 combined_fn_name (combined_fn (fn)),
5536 GET_MODE_NAME (TYPE_MODE (type_out)),
5537 GET_MODE_NAME (TYPE_MODE (type_in)));
5538
5539 if (TREE_CODE (type_out) != VECTOR_TYPE
5540 || TREE_CODE (type_in) != VECTOR_TYPE)
5541 return NULL_TREE;
5542
5543 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5544 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5545 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5546 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5547
5548 switch (fn)
5549 {
5550 CASE_CFN_COPYSIGN:
5551 if (VECTOR_UNIT_VSX_P (V2DFmode)
5552 && out_mode == DFmode && out_n == 2
5553 && in_mode == DFmode && in_n == 2)
5554 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5555 if (VECTOR_UNIT_VSX_P (V4SFmode)
5556 && out_mode == SFmode && out_n == 4
5557 && in_mode == SFmode && in_n == 4)
5558 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5559 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5560 && out_mode == SFmode && out_n == 4
5561 && in_mode == SFmode && in_n == 4)
5562 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5563 break;
5564 CASE_CFN_CEIL:
5565 if (VECTOR_UNIT_VSX_P (V2DFmode)
5566 && out_mode == DFmode && out_n == 2
5567 && in_mode == DFmode && in_n == 2)
5568 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5569 if (VECTOR_UNIT_VSX_P (V4SFmode)
5570 && out_mode == SFmode && out_n == 4
5571 && in_mode == SFmode && in_n == 4)
5572 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5573 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5574 && out_mode == SFmode && out_n == 4
5575 && in_mode == SFmode && in_n == 4)
5576 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5577 break;
5578 CASE_CFN_FLOOR:
5579 if (VECTOR_UNIT_VSX_P (V2DFmode)
5580 && out_mode == DFmode && out_n == 2
5581 && in_mode == DFmode && in_n == 2)
5582 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5583 if (VECTOR_UNIT_VSX_P (V4SFmode)
5584 && out_mode == SFmode && out_n == 4
5585 && in_mode == SFmode && in_n == 4)
5586 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5587 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5588 && out_mode == SFmode && out_n == 4
5589 && in_mode == SFmode && in_n == 4)
5590 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5591 break;
5592 CASE_CFN_FMA:
5593 if (VECTOR_UNIT_VSX_P (V2DFmode)
5594 && out_mode == DFmode && out_n == 2
5595 && in_mode == DFmode && in_n == 2)
5596 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5597 if (VECTOR_UNIT_VSX_P (V4SFmode)
5598 && out_mode == SFmode && out_n == 4
5599 && in_mode == SFmode && in_n == 4)
5600 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5601 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5602 && out_mode == SFmode && out_n == 4
5603 && in_mode == SFmode && in_n == 4)
5604 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5605 break;
5606 CASE_CFN_TRUNC:
5607 if (VECTOR_UNIT_VSX_P (V2DFmode)
5608 && out_mode == DFmode && out_n == 2
5609 && in_mode == DFmode && in_n == 2)
5610 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5611 if (VECTOR_UNIT_VSX_P (V4SFmode)
5612 && out_mode == SFmode && out_n == 4
5613 && in_mode == SFmode && in_n == 4)
5614 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5615 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5616 && out_mode == SFmode && out_n == 4
5617 && in_mode == SFmode && in_n == 4)
5618 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5619 break;
5620 CASE_CFN_NEARBYINT:
5621 if (VECTOR_UNIT_VSX_P (V2DFmode)
5622 && flag_unsafe_math_optimizations
5623 && out_mode == DFmode && out_n == 2
5624 && in_mode == DFmode && in_n == 2)
5625 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5626 if (VECTOR_UNIT_VSX_P (V4SFmode)
5627 && flag_unsafe_math_optimizations
5628 && out_mode == SFmode && out_n == 4
5629 && in_mode == SFmode && in_n == 4)
5630 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5631 break;
5632 CASE_CFN_RINT:
5633 if (VECTOR_UNIT_VSX_P (V2DFmode)
5634 && !flag_trapping_math
5635 && out_mode == DFmode && out_n == 2
5636 && in_mode == DFmode && in_n == 2)
5637 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode)
5639 && !flag_trapping_math
5640 && out_mode == SFmode && out_n == 4
5641 && in_mode == SFmode && in_n == 4)
5642 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5643 break;
5644 default:
5645 break;
5646 }
5647
5648 /* Generate calls to libmass if appropriate. */
5649 if (rs6000_veclib_handler)
5650 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5651
5652 return NULL_TREE;
5653 }
5654
5655 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5656
5657 static tree
5658 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5659 tree type_in)
5660 {
5661 machine_mode in_mode, out_mode;
5662 int in_n, out_n;
5663
5664 if (TARGET_DEBUG_BUILTIN)
5665 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5666 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5667 GET_MODE_NAME (TYPE_MODE (type_out)),
5668 GET_MODE_NAME (TYPE_MODE (type_in)));
5669
5670 if (TREE_CODE (type_out) != VECTOR_TYPE
5671 || TREE_CODE (type_in) != VECTOR_TYPE)
5672 return NULL_TREE;
5673
5674 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5675 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5676 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5677 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5678
5679 enum rs6000_builtins fn
5680 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5681 switch (fn)
5682 {
5683 case RS6000_BUILTIN_RSQRTF:
5684 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5685 && out_mode == SFmode && out_n == 4
5686 && in_mode == SFmode && in_n == 4)
5687 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5688 break;
5689 case RS6000_BUILTIN_RSQRT:
5690 if (VECTOR_UNIT_VSX_P (V2DFmode)
5691 && out_mode == DFmode && out_n == 2
5692 && in_mode == DFmode && in_n == 2)
5693 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5694 break;
5695 case RS6000_BUILTIN_RECIPF:
5696 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5697 && out_mode == SFmode && out_n == 4
5698 && in_mode == SFmode && in_n == 4)
5699 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5700 break;
5701 case RS6000_BUILTIN_RECIP:
5702 if (VECTOR_UNIT_VSX_P (V2DFmode)
5703 && out_mode == DFmode && out_n == 2
5704 && in_mode == DFmode && in_n == 2)
5705 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5706 break;
5707 default:
5708 break;
5709 }
5710 return NULL_TREE;
5711 }
5712 \f
5713 /* Default CPU string for rs6000*_file_start functions. */
5714 static const char *rs6000_default_cpu;
5715
5716 #ifdef USING_ELFOS_H
5717 const char *rs6000_machine;
5718
5719 const char *
5720 rs6000_machine_from_flags (void)
5721 {
5722 HOST_WIDE_INT flags = rs6000_isa_flags;
5723
5724 /* Disable the flags that should never influence the .machine selection. */
5725 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5726
5727 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5728 return "power10";
5729 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5730 return "power9";
5731 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5732 return "power8";
5733 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5734 return "power7";
5735 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5736 return "power6";
5737 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5738 return "power5";
5739 if ((flags & ISA_2_1_MASKS) != 0)
5740 return "power4";
5741 if ((flags & OPTION_MASK_POWERPC64) != 0)
5742 return "ppc64";
5743 return "ppc";
5744 }
5745
5746 void
5747 emit_asm_machine (void)
5748 {
5749 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5750 }
5751 #endif
5752
5753 /* Do anything needed at the start of the asm file. */
5754
5755 static void
5756 rs6000_file_start (void)
5757 {
5758 char buffer[80];
5759 const char *start = buffer;
5760 FILE *file = asm_out_file;
5761
5762 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5763
5764 default_file_start ();
5765
5766 if (flag_verbose_asm)
5767 {
5768 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5769
5770 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5771 {
5772 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5773 start = "";
5774 }
5775
5776 if (global_options_set.x_rs6000_cpu_index)
5777 {
5778 fprintf (file, "%s -mcpu=%s", start,
5779 processor_target_table[rs6000_cpu_index].name);
5780 start = "";
5781 }
5782
5783 if (global_options_set.x_rs6000_tune_index)
5784 {
5785 fprintf (file, "%s -mtune=%s", start,
5786 processor_target_table[rs6000_tune_index].name);
5787 start = "";
5788 }
5789
5790 if (PPC405_ERRATUM77)
5791 {
5792 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5793 start = "";
5794 }
5795
5796 #ifdef USING_ELFOS_H
5797 switch (rs6000_sdata)
5798 {
5799 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5800 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5801 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5802 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5803 }
5804
5805 if (rs6000_sdata && g_switch_value)
5806 {
5807 fprintf (file, "%s -G %d", start,
5808 g_switch_value);
5809 start = "";
5810 }
5811 #endif
5812
5813 if (*start == '\0')
5814 putc ('\n', file);
5815 }
5816
5817 #ifdef USING_ELFOS_H
5818 rs6000_machine = rs6000_machine_from_flags ();
5819 emit_asm_machine ();
5820 #endif
5821
5822 if (DEFAULT_ABI == ABI_ELFv2)
5823 fprintf (file, "\t.abiversion 2\n");
5824 }
5825
5826 \f
5827 /* Return nonzero if this function is known to have a null epilogue. */
5828
5829 int
5830 direct_return (void)
5831 {
5832 if (reload_completed)
5833 {
5834 rs6000_stack_t *info = rs6000_stack_info ();
5835
5836 if (info->first_gp_reg_save == 32
5837 && info->first_fp_reg_save == 64
5838 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5839 && ! info->lr_save_p
5840 && ! info->cr_save_p
5841 && info->vrsave_size == 0
5842 && ! info->push_p)
5843 return 1;
5844 }
5845
5846 return 0;
5847 }
5848
5849 /* Helper for num_insns_constant. Calculate number of instructions to
5850 load VALUE to a single gpr using combinations of addi, addis, ori,
5851 oris, sldi and rldimi instructions. */
5852
5853 static int
5854 num_insns_constant_gpr (HOST_WIDE_INT value)
5855 {
5856 /* signed constant loadable with addi */
5857 if (SIGNED_INTEGER_16BIT_P (value))
5858 return 1;
5859
5860 /* constant loadable with addis */
5861 else if ((value & 0xffff) == 0
5862 && (value >> 31 == -1 || value >> 31 == 0))
5863 return 1;
5864
5865 /* PADDI can support up to 34 bit signed integers. */
5866 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5867 return 1;
5868
5869 else if (TARGET_POWERPC64)
5870 {
5871 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5872 HOST_WIDE_INT high = value >> 31;
5873
5874 if (high == 0 || high == -1)
5875 return 2;
5876
5877 high >>= 1;
5878
5879 if (low == 0 || low == high)
5880 return num_insns_constant_gpr (high) + 1;
5881 else if (high == 0)
5882 return num_insns_constant_gpr (low) + 1;
5883 else
5884 return (num_insns_constant_gpr (high)
5885 + num_insns_constant_gpr (low) + 1);
5886 }
5887
5888 else
5889 return 2;
5890 }
5891
5892 /* Helper for num_insns_constant. Allow constants formed by the
5893 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5894 and handle modes that require multiple gprs. */
5895
5896 static int
5897 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5898 {
5899 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5900 int total = 0;
5901 while (nregs-- > 0)
5902 {
5903 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5904 int insns = num_insns_constant_gpr (low);
5905 if (insns > 2
5906 /* We won't get more than 2 from num_insns_constant_gpr
5907 except when TARGET_POWERPC64 and mode is DImode or
5908 wider, so the register mode must be DImode. */
5909 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5910 insns = 2;
5911 total += insns;
5912 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5913 it all at once would be UB. */
5914 value >>= (BITS_PER_WORD - 1);
5915 value >>= 1;
5916 }
5917 return total;
5918 }
5919
5920 /* Return the number of instructions it takes to form a constant in as
5921 many gprs are needed for MODE. */
5922
5923 int
5924 num_insns_constant (rtx op, machine_mode mode)
5925 {
5926 HOST_WIDE_INT val;
5927
5928 switch (GET_CODE (op))
5929 {
5930 case CONST_INT:
5931 val = INTVAL (op);
5932 break;
5933
5934 case CONST_WIDE_INT:
5935 {
5936 int insns = 0;
5937 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5938 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5939 DImode);
5940 return insns;
5941 }
5942
5943 case CONST_DOUBLE:
5944 {
5945 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5946
5947 if (mode == SFmode || mode == SDmode)
5948 {
5949 long l;
5950
5951 if (mode == SDmode)
5952 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5953 else
5954 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5955 /* See the first define_split in rs6000.md handling a
5956 const_double_operand. */
5957 val = l;
5958 mode = SImode;
5959 }
5960 else if (mode == DFmode || mode == DDmode)
5961 {
5962 long l[2];
5963
5964 if (mode == DDmode)
5965 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5966 else
5967 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5968
5969 /* See the second (32-bit) and third (64-bit) define_split
5970 in rs6000.md handling a const_double_operand. */
5971 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5972 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5973 mode = DImode;
5974 }
5975 else if (mode == TFmode || mode == TDmode
5976 || mode == KFmode || mode == IFmode)
5977 {
5978 long l[4];
5979 int insns;
5980
5981 if (mode == TDmode)
5982 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5983 else
5984 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5985
5986 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5987 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5988 insns = num_insns_constant_multi (val, DImode);
5989 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5990 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5991 insns += num_insns_constant_multi (val, DImode);
5992 return insns;
5993 }
5994 else
5995 gcc_unreachable ();
5996 }
5997 break;
5998
5999 default:
6000 gcc_unreachable ();
6001 }
6002
6003 return num_insns_constant_multi (val, mode);
6004 }
6005
6006 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6007 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6008 corresponding element of the vector, but for V4SFmode, the
6009 corresponding "float" is interpreted as an SImode integer. */
6010
6011 HOST_WIDE_INT
6012 const_vector_elt_as_int (rtx op, unsigned int elt)
6013 {
6014 rtx tmp;
6015
6016 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6017 gcc_assert (GET_MODE (op) != V2DImode
6018 && GET_MODE (op) != V2DFmode);
6019
6020 tmp = CONST_VECTOR_ELT (op, elt);
6021 if (GET_MODE (op) == V4SFmode)
6022 tmp = gen_lowpart (SImode, tmp);
6023 return INTVAL (tmp);
6024 }
6025
6026 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6027 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6028 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6029 all items are set to the same value and contain COPIES replicas of the
6030 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6031 operand and the others are set to the value of the operand's msb. */
6032
6033 static bool
6034 vspltis_constant (rtx op, unsigned step, unsigned copies)
6035 {
6036 machine_mode mode = GET_MODE (op);
6037 machine_mode inner = GET_MODE_INNER (mode);
6038
6039 unsigned i;
6040 unsigned nunits;
6041 unsigned bitsize;
6042 unsigned mask;
6043
6044 HOST_WIDE_INT val;
6045 HOST_WIDE_INT splat_val;
6046 HOST_WIDE_INT msb_val;
6047
6048 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6049 return false;
6050
6051 nunits = GET_MODE_NUNITS (mode);
6052 bitsize = GET_MODE_BITSIZE (inner);
6053 mask = GET_MODE_MASK (inner);
6054
6055 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6056 splat_val = val;
6057 msb_val = val >= 0 ? 0 : -1;
6058
6059 /* Construct the value to be splatted, if possible. If not, return 0. */
6060 for (i = 2; i <= copies; i *= 2)
6061 {
6062 HOST_WIDE_INT small_val;
6063 bitsize /= 2;
6064 small_val = splat_val >> bitsize;
6065 mask >>= bitsize;
6066 if (splat_val != ((HOST_WIDE_INT)
6067 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6068 | (small_val & mask)))
6069 return false;
6070 splat_val = small_val;
6071 }
6072
6073 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6074 if (EASY_VECTOR_15 (splat_val))
6075 ;
6076
6077 /* Also check if we can splat, and then add the result to itself. Do so if
6078 the value is positive, of if the splat instruction is using OP's mode;
6079 for splat_val < 0, the splat and the add should use the same mode. */
6080 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6081 && (splat_val >= 0 || (step == 1 && copies == 1)))
6082 ;
6083
6084 /* Also check if are loading up the most significant bit which can be done by
6085 loading up -1 and shifting the value left by -1. */
6086 else if (EASY_VECTOR_MSB (splat_val, inner))
6087 ;
6088
6089 else
6090 return false;
6091
6092 /* Check if VAL is present in every STEP-th element, and the
6093 other elements are filled with its most significant bit. */
6094 for (i = 1; i < nunits; ++i)
6095 {
6096 HOST_WIDE_INT desired_val;
6097 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6098 if ((i & (step - 1)) == 0)
6099 desired_val = val;
6100 else
6101 desired_val = msb_val;
6102
6103 if (desired_val != const_vector_elt_as_int (op, elt))
6104 return false;
6105 }
6106
6107 return true;
6108 }
6109
6110 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6111 instruction, filling in the bottom elements with 0 or -1.
6112
6113 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6114 for the number of zeroes to shift in, or negative for the number of 0xff
6115 bytes to shift in.
6116
6117 OP is a CONST_VECTOR. */
6118
6119 int
6120 vspltis_shifted (rtx op)
6121 {
6122 machine_mode mode = GET_MODE (op);
6123 machine_mode inner = GET_MODE_INNER (mode);
6124
6125 unsigned i, j;
6126 unsigned nunits;
6127 unsigned mask;
6128
6129 HOST_WIDE_INT val;
6130
6131 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6132 return false;
6133
6134 /* We need to create pseudo registers to do the shift, so don't recognize
6135 shift vector constants after reload. */
6136 if (!can_create_pseudo_p ())
6137 return false;
6138
6139 nunits = GET_MODE_NUNITS (mode);
6140 mask = GET_MODE_MASK (inner);
6141
6142 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6143
6144 /* Check if the value can really be the operand of a vspltis[bhw]. */
6145 if (EASY_VECTOR_15 (val))
6146 ;
6147
6148 /* Also check if we are loading up the most significant bit which can be done
6149 by loading up -1 and shifting the value left by -1. */
6150 else if (EASY_VECTOR_MSB (val, inner))
6151 ;
6152
6153 else
6154 return 0;
6155
6156 /* Check if VAL is present in every STEP-th element until we find elements
6157 that are 0 or all 1 bits. */
6158 for (i = 1; i < nunits; ++i)
6159 {
6160 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6161 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6162
6163 /* If the value isn't the splat value, check for the remaining elements
6164 being 0/-1. */
6165 if (val != elt_val)
6166 {
6167 if (elt_val == 0)
6168 {
6169 for (j = i+1; j < nunits; ++j)
6170 {
6171 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6172 if (const_vector_elt_as_int (op, elt2) != 0)
6173 return 0;
6174 }
6175
6176 return (nunits - i) * GET_MODE_SIZE (inner);
6177 }
6178
6179 else if ((elt_val & mask) == mask)
6180 {
6181 for (j = i+1; j < nunits; ++j)
6182 {
6183 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6184 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6185 return 0;
6186 }
6187
6188 return -((nunits - i) * GET_MODE_SIZE (inner));
6189 }
6190
6191 else
6192 return 0;
6193 }
6194 }
6195
6196 /* If all elements are equal, we don't need to do VLSDOI. */
6197 return 0;
6198 }
6199
6200
6201 /* Return true if OP is of the given MODE and can be synthesized
6202 with a vspltisb, vspltish or vspltisw. */
6203
6204 bool
6205 easy_altivec_constant (rtx op, machine_mode mode)
6206 {
6207 unsigned step, copies;
6208
6209 if (mode == VOIDmode)
6210 mode = GET_MODE (op);
6211 else if (mode != GET_MODE (op))
6212 return false;
6213
6214 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6215 constants. */
6216 if (mode == V2DFmode)
6217 return zero_constant (op, mode);
6218
6219 else if (mode == V2DImode)
6220 {
6221 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6222 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6223 return false;
6224
6225 if (zero_constant (op, mode))
6226 return true;
6227
6228 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6229 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6230 return true;
6231
6232 return false;
6233 }
6234
6235 /* V1TImode is a special container for TImode. Ignore for now. */
6236 else if (mode == V1TImode)
6237 return false;
6238
6239 /* Start with a vspltisw. */
6240 step = GET_MODE_NUNITS (mode) / 4;
6241 copies = 1;
6242
6243 if (vspltis_constant (op, step, copies))
6244 return true;
6245
6246 /* Then try with a vspltish. */
6247 if (step == 1)
6248 copies <<= 1;
6249 else
6250 step >>= 1;
6251
6252 if (vspltis_constant (op, step, copies))
6253 return true;
6254
6255 /* And finally a vspltisb. */
6256 if (step == 1)
6257 copies <<= 1;
6258 else
6259 step >>= 1;
6260
6261 if (vspltis_constant (op, step, copies))
6262 return true;
6263
6264 if (vspltis_shifted (op) != 0)
6265 return true;
6266
6267 return false;
6268 }
6269
6270 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6271 result is OP. Abort if it is not possible. */
6272
6273 rtx
6274 gen_easy_altivec_constant (rtx op)
6275 {
6276 machine_mode mode = GET_MODE (op);
6277 int nunits = GET_MODE_NUNITS (mode);
6278 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6279 unsigned step = nunits / 4;
6280 unsigned copies = 1;
6281
6282 /* Start with a vspltisw. */
6283 if (vspltis_constant (op, step, copies))
6284 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6285
6286 /* Then try with a vspltish. */
6287 if (step == 1)
6288 copies <<= 1;
6289 else
6290 step >>= 1;
6291
6292 if (vspltis_constant (op, step, copies))
6293 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6294
6295 /* And finally a vspltisb. */
6296 if (step == 1)
6297 copies <<= 1;
6298 else
6299 step >>= 1;
6300
6301 if (vspltis_constant (op, step, copies))
6302 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6303
6304 gcc_unreachable ();
6305 }
6306
6307 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6308 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6309
6310 Return the number of instructions needed (1 or 2) into the address pointed
6311 via NUM_INSNS_PTR.
6312
6313 Return the constant that is being split via CONSTANT_PTR. */
6314
6315 bool
6316 xxspltib_constant_p (rtx op,
6317 machine_mode mode,
6318 int *num_insns_ptr,
6319 int *constant_ptr)
6320 {
6321 size_t nunits = GET_MODE_NUNITS (mode);
6322 size_t i;
6323 HOST_WIDE_INT value;
6324 rtx element;
6325
6326 /* Set the returned values to out of bound values. */
6327 *num_insns_ptr = -1;
6328 *constant_ptr = 256;
6329
6330 if (!TARGET_P9_VECTOR)
6331 return false;
6332
6333 if (mode == VOIDmode)
6334 mode = GET_MODE (op);
6335
6336 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6337 return false;
6338
6339 /* Handle (vec_duplicate <constant>). */
6340 if (GET_CODE (op) == VEC_DUPLICATE)
6341 {
6342 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6343 && mode != V2DImode)
6344 return false;
6345
6346 element = XEXP (op, 0);
6347 if (!CONST_INT_P (element))
6348 return false;
6349
6350 value = INTVAL (element);
6351 if (!IN_RANGE (value, -128, 127))
6352 return false;
6353 }
6354
6355 /* Handle (const_vector [...]). */
6356 else if (GET_CODE (op) == CONST_VECTOR)
6357 {
6358 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6359 && mode != V2DImode)
6360 return false;
6361
6362 element = CONST_VECTOR_ELT (op, 0);
6363 if (!CONST_INT_P (element))
6364 return false;
6365
6366 value = INTVAL (element);
6367 if (!IN_RANGE (value, -128, 127))
6368 return false;
6369
6370 for (i = 1; i < nunits; i++)
6371 {
6372 element = CONST_VECTOR_ELT (op, i);
6373 if (!CONST_INT_P (element))
6374 return false;
6375
6376 if (value != INTVAL (element))
6377 return false;
6378 }
6379 }
6380
6381 /* Handle integer constants being loaded into the upper part of the VSX
6382 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6383 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6384 else if (CONST_INT_P (op))
6385 {
6386 if (!SCALAR_INT_MODE_P (mode))
6387 return false;
6388
6389 value = INTVAL (op);
6390 if (!IN_RANGE (value, -128, 127))
6391 return false;
6392
6393 if (!IN_RANGE (value, -1, 0))
6394 {
6395 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6396 return false;
6397
6398 if (EASY_VECTOR_15 (value))
6399 return false;
6400 }
6401 }
6402
6403 else
6404 return false;
6405
6406 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6407 sign extend. Special case 0/-1 to allow getting any VSX register instead
6408 of an Altivec register. */
6409 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6410 && EASY_VECTOR_15 (value))
6411 return false;
6412
6413 /* Return # of instructions and the constant byte for XXSPLTIB. */
6414 if (mode == V16QImode)
6415 *num_insns_ptr = 1;
6416
6417 else if (IN_RANGE (value, -1, 0))
6418 *num_insns_ptr = 1;
6419
6420 else
6421 *num_insns_ptr = 2;
6422
6423 *constant_ptr = (int) value;
6424 return true;
6425 }
6426
6427 const char *
6428 output_vec_const_move (rtx *operands)
6429 {
6430 int shift;
6431 machine_mode mode;
6432 rtx dest, vec;
6433
6434 dest = operands[0];
6435 vec = operands[1];
6436 mode = GET_MODE (dest);
6437
6438 if (TARGET_VSX)
6439 {
6440 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6441 int xxspltib_value = 256;
6442 int num_insns = -1;
6443
6444 if (zero_constant (vec, mode))
6445 {
6446 if (TARGET_P9_VECTOR)
6447 return "xxspltib %x0,0";
6448
6449 else if (dest_vmx_p)
6450 return "vspltisw %0,0";
6451
6452 else
6453 return "xxlxor %x0,%x0,%x0";
6454 }
6455
6456 if (all_ones_constant (vec, mode))
6457 {
6458 if (TARGET_P9_VECTOR)
6459 return "xxspltib %x0,255";
6460
6461 else if (dest_vmx_p)
6462 return "vspltisw %0,-1";
6463
6464 else if (TARGET_P8_VECTOR)
6465 return "xxlorc %x0,%x0,%x0";
6466
6467 else
6468 gcc_unreachable ();
6469 }
6470
6471 if (TARGET_P9_VECTOR
6472 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6473 {
6474 if (num_insns == 1)
6475 {
6476 operands[2] = GEN_INT (xxspltib_value & 0xff);
6477 return "xxspltib %x0,%2";
6478 }
6479
6480 return "#";
6481 }
6482 }
6483
6484 if (TARGET_ALTIVEC)
6485 {
6486 rtx splat_vec;
6487
6488 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6489 if (zero_constant (vec, mode))
6490 return "vspltisw %0,0";
6491
6492 if (all_ones_constant (vec, mode))
6493 return "vspltisw %0,-1";
6494
6495 /* Do we need to construct a value using VSLDOI? */
6496 shift = vspltis_shifted (vec);
6497 if (shift != 0)
6498 return "#";
6499
6500 splat_vec = gen_easy_altivec_constant (vec);
6501 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6502 operands[1] = XEXP (splat_vec, 0);
6503 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6504 return "#";
6505
6506 switch (GET_MODE (splat_vec))
6507 {
6508 case E_V4SImode:
6509 return "vspltisw %0,%1";
6510
6511 case E_V8HImode:
6512 return "vspltish %0,%1";
6513
6514 case E_V16QImode:
6515 return "vspltisb %0,%1";
6516
6517 default:
6518 gcc_unreachable ();
6519 }
6520 }
6521
6522 gcc_unreachable ();
6523 }
6524
6525 /* Initialize vector TARGET to VALS. */
6526
6527 void
6528 rs6000_expand_vector_init (rtx target, rtx vals)
6529 {
6530 machine_mode mode = GET_MODE (target);
6531 machine_mode inner_mode = GET_MODE_INNER (mode);
6532 unsigned int n_elts = GET_MODE_NUNITS (mode);
6533 int n_var = 0, one_var = -1;
6534 bool all_same = true, all_const_zero = true;
6535 rtx x, mem;
6536 unsigned int i;
6537
6538 for (i = 0; i < n_elts; ++i)
6539 {
6540 x = XVECEXP (vals, 0, i);
6541 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6542 ++n_var, one_var = i;
6543 else if (x != CONST0_RTX (inner_mode))
6544 all_const_zero = false;
6545
6546 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6547 all_same = false;
6548 }
6549
6550 if (n_var == 0)
6551 {
6552 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6553 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6554 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6555 {
6556 /* Zero register. */
6557 emit_move_insn (target, CONST0_RTX (mode));
6558 return;
6559 }
6560 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6561 {
6562 /* Splat immediate. */
6563 emit_insn (gen_rtx_SET (target, const_vec));
6564 return;
6565 }
6566 else
6567 {
6568 /* Load from constant pool. */
6569 emit_move_insn (target, const_vec);
6570 return;
6571 }
6572 }
6573
6574 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6575 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6576 {
6577 rtx op[2];
6578 size_t i;
6579 size_t num_elements = all_same ? 1 : 2;
6580 for (i = 0; i < num_elements; i++)
6581 {
6582 op[i] = XVECEXP (vals, 0, i);
6583 /* Just in case there is a SUBREG with a smaller mode, do a
6584 conversion. */
6585 if (GET_MODE (op[i]) != inner_mode)
6586 {
6587 rtx tmp = gen_reg_rtx (inner_mode);
6588 convert_move (tmp, op[i], 0);
6589 op[i] = tmp;
6590 }
6591 /* Allow load with splat double word. */
6592 else if (MEM_P (op[i]))
6593 {
6594 if (!all_same)
6595 op[i] = force_reg (inner_mode, op[i]);
6596 }
6597 else if (!REG_P (op[i]))
6598 op[i] = force_reg (inner_mode, op[i]);
6599 }
6600
6601 if (all_same)
6602 {
6603 if (mode == V2DFmode)
6604 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6605 else
6606 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6607 }
6608 else
6609 {
6610 if (mode == V2DFmode)
6611 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6612 else
6613 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6614 }
6615 return;
6616 }
6617
6618 /* Special case initializing vector int if we are on 64-bit systems with
6619 direct move or we have the ISA 3.0 instructions. */
6620 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6621 && TARGET_DIRECT_MOVE_64BIT)
6622 {
6623 if (all_same)
6624 {
6625 rtx element0 = XVECEXP (vals, 0, 0);
6626 if (MEM_P (element0))
6627 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6628 else
6629 element0 = force_reg (SImode, element0);
6630
6631 if (TARGET_P9_VECTOR)
6632 emit_insn (gen_vsx_splat_v4si (target, element0));
6633 else
6634 {
6635 rtx tmp = gen_reg_rtx (DImode);
6636 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6637 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6638 }
6639 return;
6640 }
6641 else
6642 {
6643 rtx elements[4];
6644 size_t i;
6645
6646 for (i = 0; i < 4; i++)
6647 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6648
6649 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6650 elements[2], elements[3]));
6651 return;
6652 }
6653 }
6654
6655 /* With single precision floating point on VSX, know that internally single
6656 precision is actually represented as a double, and either make 2 V2DF
6657 vectors, and convert these vectors to single precision, or do one
6658 conversion, and splat the result to the other elements. */
6659 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6660 {
6661 if (all_same)
6662 {
6663 rtx element0 = XVECEXP (vals, 0, 0);
6664
6665 if (TARGET_P9_VECTOR)
6666 {
6667 if (MEM_P (element0))
6668 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6669
6670 emit_insn (gen_vsx_splat_v4sf (target, element0));
6671 }
6672
6673 else
6674 {
6675 rtx freg = gen_reg_rtx (V4SFmode);
6676 rtx sreg = force_reg (SFmode, element0);
6677 rtx cvt = (TARGET_XSCVDPSPN
6678 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6679 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6680
6681 emit_insn (cvt);
6682 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6683 const0_rtx));
6684 }
6685 }
6686 else
6687 {
6688 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6689 {
6690 rtx tmp_sf[4];
6691 rtx tmp_si[4];
6692 rtx tmp_di[4];
6693 rtx mrg_di[4];
6694 for (i = 0; i < 4; i++)
6695 {
6696 tmp_si[i] = gen_reg_rtx (SImode);
6697 tmp_di[i] = gen_reg_rtx (DImode);
6698 mrg_di[i] = gen_reg_rtx (DImode);
6699 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6700 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6701 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6702 }
6703
6704 if (!BYTES_BIG_ENDIAN)
6705 {
6706 std::swap (tmp_di[0], tmp_di[1]);
6707 std::swap (tmp_di[2], tmp_di[3]);
6708 }
6709
6710 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6711 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6712 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6713 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6714
6715 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6716 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6717 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6718 }
6719 else
6720 {
6721 rtx dbl_even = gen_reg_rtx (V2DFmode);
6722 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6723 rtx flt_even = gen_reg_rtx (V4SFmode);
6724 rtx flt_odd = gen_reg_rtx (V4SFmode);
6725 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6726 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6727 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6728 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6729
6730 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6731 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6732 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6733 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6734 rs6000_expand_extract_even (target, flt_even, flt_odd);
6735 }
6736 }
6737 return;
6738 }
6739
6740 /* Special case initializing vector short/char that are splats if we are on
6741 64-bit systems with direct move. */
6742 if (all_same && TARGET_DIRECT_MOVE_64BIT
6743 && (mode == V16QImode || mode == V8HImode))
6744 {
6745 rtx op0 = XVECEXP (vals, 0, 0);
6746 rtx di_tmp = gen_reg_rtx (DImode);
6747
6748 if (!REG_P (op0))
6749 op0 = force_reg (GET_MODE_INNER (mode), op0);
6750
6751 if (mode == V16QImode)
6752 {
6753 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6754 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6755 return;
6756 }
6757
6758 if (mode == V8HImode)
6759 {
6760 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6761 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6762 return;
6763 }
6764 }
6765
6766 /* Store value to stack temp. Load vector element. Splat. However, splat
6767 of 64-bit items is not supported on Altivec. */
6768 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6769 {
6770 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6771 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6772 XVECEXP (vals, 0, 0));
6773 x = gen_rtx_UNSPEC (VOIDmode,
6774 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6775 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6776 gen_rtvec (2,
6777 gen_rtx_SET (target, mem),
6778 x)));
6779 x = gen_rtx_VEC_SELECT (inner_mode, target,
6780 gen_rtx_PARALLEL (VOIDmode,
6781 gen_rtvec (1, const0_rtx)));
6782 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6783 return;
6784 }
6785
6786 /* One field is non-constant. Load constant then overwrite
6787 varying field. */
6788 if (n_var == 1)
6789 {
6790 rtx copy = copy_rtx (vals);
6791
6792 /* Load constant part of vector, substitute neighboring value for
6793 varying element. */
6794 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6795 rs6000_expand_vector_init (target, copy);
6796
6797 /* Insert variable. */
6798 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6799 GEN_INT (one_var));
6800 return;
6801 }
6802
6803 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6804 {
6805 rtx op[16];
6806 /* Force the values into word_mode registers. */
6807 for (i = 0; i < n_elts; i++)
6808 {
6809 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6810 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6811 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6812 }
6813
6814 /* Take unsigned char big endianness on 64bit as example for below
6815 construction, the input values are: A, B, C, D, ..., O, P. */
6816
6817 if (TARGET_DIRECT_MOVE_128)
6818 {
6819 /* Move to VSX register with vec_concat, each has 2 values.
6820 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6821 vr1[1] = { xxxxxxxC, xxxxxxxD };
6822 ...
6823 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6824 rtx vr1[8];
6825 for (i = 0; i < n_elts / 2; i++)
6826 {
6827 vr1[i] = gen_reg_rtx (V2DImode);
6828 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6829 op[i * 2 + 1]));
6830 }
6831
6832 /* Pack vectors with 2 values into vectors with 4 values.
6833 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6834 vr2[1] = { xxxExxxF, xxxGxxxH };
6835 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6836 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6837 rtx vr2[4];
6838 for (i = 0; i < n_elts / 4; i++)
6839 {
6840 vr2[i] = gen_reg_rtx (V4SImode);
6841 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6842 vr1[i * 2 + 1]));
6843 }
6844
6845 /* Pack vectors with 4 values into vectors with 8 values.
6846 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6847 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6848 rtx vr3[2];
6849 for (i = 0; i < n_elts / 8; i++)
6850 {
6851 vr3[i] = gen_reg_rtx (V8HImode);
6852 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
6853 vr2[i * 2 + 1]));
6854 }
6855
6856 /* If it's V8HImode, it's done and return it. */
6857 if (mode == V8HImode)
6858 {
6859 emit_insn (gen_rtx_SET (target, vr3[0]));
6860 return;
6861 }
6862
6863 /* Pack vectors with 8 values into 16 values. */
6864 rtx res = gen_reg_rtx (V16QImode);
6865 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
6866 emit_insn (gen_rtx_SET (target, res));
6867 }
6868 else
6869 {
6870 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
6871 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
6872 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
6873 rtx perm_idx;
6874
6875 /* Set up some common gen routines and values. */
6876 if (BYTES_BIG_ENDIAN)
6877 {
6878 if (mode == V16QImode)
6879 {
6880 merge_v16qi = gen_altivec_vmrghb;
6881 merge_v8hi = gen_altivec_vmrglh;
6882 }
6883 else
6884 merge_v8hi = gen_altivec_vmrghh;
6885
6886 merge_v4si = gen_altivec_vmrglw;
6887 perm_idx = GEN_INT (3);
6888 }
6889 else
6890 {
6891 if (mode == V16QImode)
6892 {
6893 merge_v16qi = gen_altivec_vmrglb;
6894 merge_v8hi = gen_altivec_vmrghh;
6895 }
6896 else
6897 merge_v8hi = gen_altivec_vmrglh;
6898
6899 merge_v4si = gen_altivec_vmrghw;
6900 perm_idx = GEN_INT (0);
6901 }
6902
6903 /* Move to VSX register with direct move.
6904 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
6905 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
6906 ...
6907 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
6908 rtx vr_qi[16];
6909 for (i = 0; i < n_elts; i++)
6910 {
6911 vr_qi[i] = gen_reg_rtx (V16QImode);
6912 if (TARGET_POWERPC64)
6913 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
6914 else
6915 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
6916 }
6917
6918 /* Merge/move to vector short.
6919 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
6920 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
6921 ...
6922 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
6923 rtx vr_hi[8];
6924 for (i = 0; i < 8; i++)
6925 {
6926 rtx tmp = vr_qi[i];
6927 if (mode == V16QImode)
6928 {
6929 tmp = gen_reg_rtx (V16QImode);
6930 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
6931 }
6932 vr_hi[i] = gen_reg_rtx (V8HImode);
6933 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
6934 }
6935
6936 /* Merge vector short to vector int.
6937 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
6938 vr_si[1] = { xxxxxxxx, xxxxEFGH };
6939 ...
6940 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
6941 rtx vr_si[4];
6942 for (i = 0; i < 4; i++)
6943 {
6944 rtx tmp = gen_reg_rtx (V8HImode);
6945 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
6946 vr_si[i] = gen_reg_rtx (V4SImode);
6947 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
6948 }
6949
6950 /* Merge vector int to vector long.
6951 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
6952 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
6953 rtx vr_di[2];
6954 for (i = 0; i < 2; i++)
6955 {
6956 rtx tmp = gen_reg_rtx (V4SImode);
6957 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
6958 vr_di[i] = gen_reg_rtx (V2DImode);
6959 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
6960 }
6961
6962 rtx res = gen_reg_rtx (V2DImode);
6963 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
6964 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
6965 }
6966
6967 return;
6968 }
6969
6970 /* Construct the vector in memory one field at a time
6971 and load the whole vector. */
6972 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6973 for (i = 0; i < n_elts; i++)
6974 emit_move_insn (adjust_address_nv (mem, inner_mode,
6975 i * GET_MODE_SIZE (inner_mode)),
6976 XVECEXP (vals, 0, i));
6977 emit_move_insn (target, mem);
6978 }
6979
6980 /* Set field ELT_RTX of TARGET to VAL. */
6981
6982 void
6983 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
6984 {
6985 machine_mode mode = GET_MODE (target);
6986 machine_mode inner_mode = GET_MODE_INNER (mode);
6987 rtx reg = gen_reg_rtx (mode);
6988 rtx mask, mem, x;
6989 int width = GET_MODE_SIZE (inner_mode);
6990 int i;
6991
6992 val = force_reg (GET_MODE (val), val);
6993
6994 if (VECTOR_MEM_VSX_P (mode))
6995 {
6996 rtx insn = NULL_RTX;
6997
6998 if (mode == V2DFmode)
6999 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7000
7001 else if (mode == V2DImode)
7002 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7003
7004 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7005 {
7006 if (mode == V4SImode)
7007 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7008 else if (mode == V8HImode)
7009 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7010 else if (mode == V16QImode)
7011 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7012 else if (mode == V4SFmode)
7013 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7014 }
7015
7016 if (insn)
7017 {
7018 emit_insn (insn);
7019 return;
7020 }
7021 }
7022
7023 gcc_assert (CONST_INT_P (elt_rtx));
7024
7025 /* Simplify setting single element vectors like V1TImode. */
7026 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7027 && INTVAL (elt_rtx) == 0)
7028 {
7029 emit_move_insn (target, gen_lowpart (mode, val));
7030 return;
7031 }
7032
7033 /* Load single variable value. */
7034 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7035 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7036 x = gen_rtx_UNSPEC (VOIDmode,
7037 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7038 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7039 gen_rtvec (2,
7040 gen_rtx_SET (reg, mem),
7041 x)));
7042
7043 /* Linear sequence. */
7044 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7045 for (i = 0; i < 16; ++i)
7046 XVECEXP (mask, 0, i) = GEN_INT (i);
7047
7048 /* Set permute mask to insert element into target. */
7049 for (i = 0; i < width; ++i)
7050 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7051 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7052
7053 if (BYTES_BIG_ENDIAN)
7054 x = gen_rtx_UNSPEC (mode,
7055 gen_rtvec (3, target, reg,
7056 force_reg (V16QImode, x)),
7057 UNSPEC_VPERM);
7058 else
7059 {
7060 if (TARGET_P9_VECTOR)
7061 x = gen_rtx_UNSPEC (mode,
7062 gen_rtvec (3, reg, target,
7063 force_reg (V16QImode, x)),
7064 UNSPEC_VPERMR);
7065 else
7066 {
7067 /* Invert selector. We prefer to generate VNAND on P8 so
7068 that future fusion opportunities can kick in, but must
7069 generate VNOR elsewhere. */
7070 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7071 rtx iorx = (TARGET_P8_VECTOR
7072 ? gen_rtx_IOR (V16QImode, notx, notx)
7073 : gen_rtx_AND (V16QImode, notx, notx));
7074 rtx tmp = gen_reg_rtx (V16QImode);
7075 emit_insn (gen_rtx_SET (tmp, iorx));
7076
7077 /* Permute with operands reversed and adjusted selector. */
7078 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7079 UNSPEC_VPERM);
7080 }
7081 }
7082
7083 emit_insn (gen_rtx_SET (target, x));
7084 }
7085
7086 /* Extract field ELT from VEC into TARGET. */
7087
7088 void
7089 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7090 {
7091 machine_mode mode = GET_MODE (vec);
7092 machine_mode inner_mode = GET_MODE_INNER (mode);
7093 rtx mem;
7094
7095 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7096 {
7097 switch (mode)
7098 {
7099 default:
7100 break;
7101 case E_V1TImode:
7102 emit_move_insn (target, gen_lowpart (TImode, vec));
7103 break;
7104 case E_V2DFmode:
7105 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7106 return;
7107 case E_V2DImode:
7108 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7109 return;
7110 case E_V4SFmode:
7111 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7112 return;
7113 case E_V16QImode:
7114 if (TARGET_DIRECT_MOVE_64BIT)
7115 {
7116 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7117 return;
7118 }
7119 else
7120 break;
7121 case E_V8HImode:
7122 if (TARGET_DIRECT_MOVE_64BIT)
7123 {
7124 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7125 return;
7126 }
7127 else
7128 break;
7129 case E_V4SImode:
7130 if (TARGET_DIRECT_MOVE_64BIT)
7131 {
7132 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7133 return;
7134 }
7135 break;
7136 }
7137 }
7138 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7139 && TARGET_DIRECT_MOVE_64BIT)
7140 {
7141 if (GET_MODE (elt) != DImode)
7142 {
7143 rtx tmp = gen_reg_rtx (DImode);
7144 convert_move (tmp, elt, 0);
7145 elt = tmp;
7146 }
7147 else if (!REG_P (elt))
7148 elt = force_reg (DImode, elt);
7149
7150 switch (mode)
7151 {
7152 case E_V1TImode:
7153 emit_move_insn (target, gen_lowpart (TImode, vec));
7154 return;
7155
7156 case E_V2DFmode:
7157 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7158 return;
7159
7160 case E_V2DImode:
7161 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7162 return;
7163
7164 case E_V4SFmode:
7165 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7166 return;
7167
7168 case E_V4SImode:
7169 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7170 return;
7171
7172 case E_V8HImode:
7173 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7174 return;
7175
7176 case E_V16QImode:
7177 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7178 return;
7179
7180 default:
7181 gcc_unreachable ();
7182 }
7183 }
7184
7185 /* Allocate mode-sized buffer. */
7186 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7187
7188 emit_move_insn (mem, vec);
7189 if (CONST_INT_P (elt))
7190 {
7191 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7192
7193 /* Add offset to field within buffer matching vector element. */
7194 mem = adjust_address_nv (mem, inner_mode,
7195 modulo_elt * GET_MODE_SIZE (inner_mode));
7196 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7197 }
7198 else
7199 {
7200 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7201 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7202 rtx new_addr = gen_reg_rtx (Pmode);
7203
7204 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7205 if (ele_size > 1)
7206 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7207 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7208 new_addr = change_address (mem, inner_mode, new_addr);
7209 emit_move_insn (target, new_addr);
7210 }
7211 }
7212
7213 /* Return the offset within a memory object (MEM) of a vector type to a given
7214 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7215 the element is constant, we return a constant integer.
7216
7217 Otherwise, we use a base register temporary to calculate the offset after
7218 masking it to fit within the bounds of the vector and scaling it. The
7219 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7220 built-in function. */
7221
7222 static rtx
7223 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7224 {
7225 if (CONST_INT_P (element))
7226 return GEN_INT (INTVAL (element) * scalar_size);
7227
7228 /* All insns should use the 'Q' constraint (address is a single register) if
7229 the element number is not a constant. */
7230 gcc_assert (satisfies_constraint_Q (mem));
7231
7232 /* Mask the element to make sure the element number is between 0 and the
7233 maximum number of elements - 1 so that we don't generate an address
7234 outside the vector. */
7235 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7236 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7237 emit_insn (gen_rtx_SET (base_tmp, and_op));
7238
7239 /* Shift the element to get the byte offset from the element number. */
7240 int shift = exact_log2 (scalar_size);
7241 gcc_assert (shift >= 0);
7242
7243 if (shift > 0)
7244 {
7245 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7246 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7247 }
7248
7249 return base_tmp;
7250 }
7251
7252 /* Helper function update PC-relative addresses when we are adjusting a memory
7253 address (ADDR) to a vector to point to a scalar field within the vector with
7254 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7255 use the base register temporary (BASE_TMP) to form the address. */
7256
7257 static rtx
7258 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7259 {
7260 rtx new_addr = NULL;
7261
7262 gcc_assert (CONST_INT_P (element_offset));
7263
7264 if (GET_CODE (addr) == CONST)
7265 addr = XEXP (addr, 0);
7266
7267 if (GET_CODE (addr) == PLUS)
7268 {
7269 rtx op0 = XEXP (addr, 0);
7270 rtx op1 = XEXP (addr, 1);
7271
7272 if (CONST_INT_P (op1))
7273 {
7274 HOST_WIDE_INT offset
7275 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7276
7277 if (offset == 0)
7278 new_addr = op0;
7279
7280 else
7281 {
7282 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7283 new_addr = gen_rtx_CONST (Pmode, plus);
7284 }
7285 }
7286
7287 else
7288 {
7289 emit_move_insn (base_tmp, addr);
7290 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7291 }
7292 }
7293
7294 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7295 {
7296 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7297 new_addr = gen_rtx_CONST (Pmode, plus);
7298 }
7299
7300 else
7301 gcc_unreachable ();
7302
7303 return new_addr;
7304 }
7305
7306 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7307 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7308 temporary (BASE_TMP) to fixup the address. Return the new memory address
7309 that is valid for reads or writes to a given register (SCALAR_REG).
7310
7311 This function is expected to be called after reload is completed when we are
7312 splitting insns. The temporary BASE_TMP might be set multiple times with
7313 this code. */
7314
7315 rtx
7316 rs6000_adjust_vec_address (rtx scalar_reg,
7317 rtx mem,
7318 rtx element,
7319 rtx base_tmp,
7320 machine_mode scalar_mode)
7321 {
7322 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7323 rtx addr = XEXP (mem, 0);
7324 rtx new_addr;
7325
7326 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7327 gcc_assert (!reg_mentioned_p (base_tmp, element));
7328
7329 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7330 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7331
7332 /* Calculate what we need to add to the address to get the element
7333 address. */
7334 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7335
7336 /* Create the new address pointing to the element within the vector. If we
7337 are adding 0, we don't have to change the address. */
7338 if (element_offset == const0_rtx)
7339 new_addr = addr;
7340
7341 /* A simple indirect address can be converted into a reg + offset
7342 address. */
7343 else if (REG_P (addr) || SUBREG_P (addr))
7344 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7345
7346 /* For references to local static variables, fold a constant offset into the
7347 address. */
7348 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7349 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7350
7351 /* Optimize D-FORM addresses with constant offset with a constant element, to
7352 include the element offset in the address directly. */
7353 else if (GET_CODE (addr) == PLUS)
7354 {
7355 rtx op0 = XEXP (addr, 0);
7356 rtx op1 = XEXP (addr, 1);
7357
7358 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7359 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7360 {
7361 /* op0 should never be r0, because r0+offset is not valid. But it
7362 doesn't hurt to make sure it is not r0. */
7363 gcc_assert (reg_or_subregno (op0) != 0);
7364
7365 /* D-FORM address with constant element number. */
7366 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7367 rtx offset_rtx = GEN_INT (offset);
7368 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7369 }
7370 else
7371 {
7372 /* If we don't have a D-FORM address with a constant element number,
7373 add the two elements in the current address. Then add the offset.
7374
7375 Previously, we tried to add the offset to OP1 and change the
7376 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7377 complicated because we had to verify that op1 was not GPR0 and we
7378 had a constant element offset (due to the way ADDI is defined).
7379 By doing the add of OP0 and OP1 first, and then adding in the
7380 offset, it has the benefit that if D-FORM instructions are
7381 allowed, the offset is part of the memory access to the vector
7382 element. */
7383 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7384 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7385 }
7386 }
7387
7388 else
7389 {
7390 emit_move_insn (base_tmp, addr);
7391 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7392 }
7393
7394 /* If the address isn't valid, move the address into the temporary base
7395 register. Some reasons it could not be valid include:
7396
7397 The address offset overflowed the 16 or 34 bit offset size;
7398 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7399 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7400 Only X_FORM loads can be done, and the address is D_FORM. */
7401
7402 enum insn_form iform
7403 = address_to_insn_form (new_addr, scalar_mode,
7404 reg_to_non_prefixed (scalar_reg, scalar_mode));
7405
7406 if (iform == INSN_FORM_BAD)
7407 {
7408 emit_move_insn (base_tmp, new_addr);
7409 new_addr = base_tmp;
7410 }
7411
7412 return change_address (mem, scalar_mode, new_addr);
7413 }
7414
7415 /* Split a variable vec_extract operation into the component instructions. */
7416
7417 void
7418 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7419 rtx tmp_altivec)
7420 {
7421 machine_mode mode = GET_MODE (src);
7422 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7423 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7424 int byte_shift = exact_log2 (scalar_size);
7425
7426 gcc_assert (byte_shift >= 0);
7427
7428 /* If we are given a memory address, optimize to load just the element. We
7429 don't have to adjust the vector element number on little endian
7430 systems. */
7431 if (MEM_P (src))
7432 {
7433 emit_move_insn (dest,
7434 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7435 scalar_mode));
7436 return;
7437 }
7438
7439 else if (REG_P (src) || SUBREG_P (src))
7440 {
7441 int num_elements = GET_MODE_NUNITS (mode);
7442 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7443 int bit_shift = 7 - exact_log2 (num_elements);
7444 rtx element2;
7445 unsigned int dest_regno = reg_or_subregno (dest);
7446 unsigned int src_regno = reg_or_subregno (src);
7447 unsigned int element_regno = reg_or_subregno (element);
7448
7449 gcc_assert (REG_P (tmp_gpr));
7450
7451 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7452 a general purpose register. */
7453 if (TARGET_P9_VECTOR
7454 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7455 && INT_REGNO_P (dest_regno)
7456 && ALTIVEC_REGNO_P (src_regno)
7457 && INT_REGNO_P (element_regno))
7458 {
7459 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7460 rtx element_si = gen_rtx_REG (SImode, element_regno);
7461
7462 if (mode == V16QImode)
7463 emit_insn (BYTES_BIG_ENDIAN
7464 ? gen_vextublx (dest_si, element_si, src)
7465 : gen_vextubrx (dest_si, element_si, src));
7466
7467 else if (mode == V8HImode)
7468 {
7469 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7470 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7471 emit_insn (BYTES_BIG_ENDIAN
7472 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7473 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7474 }
7475
7476
7477 else
7478 {
7479 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7480 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7481 emit_insn (BYTES_BIG_ENDIAN
7482 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7483 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7484 }
7485
7486 return;
7487 }
7488
7489
7490 gcc_assert (REG_P (tmp_altivec));
7491
7492 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7493 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7494 will shift the element into the upper position (adding 3 to convert a
7495 byte shift into a bit shift). */
7496 if (scalar_size == 8)
7497 {
7498 if (!BYTES_BIG_ENDIAN)
7499 {
7500 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7501 element2 = tmp_gpr;
7502 }
7503 else
7504 element2 = element;
7505
7506 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7507 bit. */
7508 emit_insn (gen_rtx_SET (tmp_gpr,
7509 gen_rtx_AND (DImode,
7510 gen_rtx_ASHIFT (DImode,
7511 element2,
7512 GEN_INT (6)),
7513 GEN_INT (64))));
7514 }
7515 else
7516 {
7517 if (!BYTES_BIG_ENDIAN)
7518 {
7519 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7520
7521 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7522 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7523 element2 = tmp_gpr;
7524 }
7525 else
7526 element2 = element;
7527
7528 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7529 }
7530
7531 /* Get the value into the lower byte of the Altivec register where VSLO
7532 expects it. */
7533 if (TARGET_P9_VECTOR)
7534 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7535 else if (can_create_pseudo_p ())
7536 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7537 else
7538 {
7539 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7540 emit_move_insn (tmp_di, tmp_gpr);
7541 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7542 }
7543
7544 /* Do the VSLO to get the value into the final location. */
7545 switch (mode)
7546 {
7547 case E_V2DFmode:
7548 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7549 return;
7550
7551 case E_V2DImode:
7552 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7553 return;
7554
7555 case E_V4SFmode:
7556 {
7557 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7558 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7559 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7560 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7561 tmp_altivec));
7562
7563 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7564 return;
7565 }
7566
7567 case E_V4SImode:
7568 case E_V8HImode:
7569 case E_V16QImode:
7570 {
7571 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7572 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7573 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7574 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7575 tmp_altivec));
7576 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7577 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7578 GEN_INT (64 - bits_in_element)));
7579 return;
7580 }
7581
7582 default:
7583 gcc_unreachable ();
7584 }
7585
7586 return;
7587 }
7588 else
7589 gcc_unreachable ();
7590 }
7591
7592 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7593 selects whether the alignment is abi mandated, optional, or
7594 both abi and optional alignment. */
7595
7596 unsigned int
7597 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7598 {
7599 if (how != align_opt)
7600 {
7601 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7602 align = 128;
7603 }
7604
7605 if (how != align_abi)
7606 {
7607 if (TREE_CODE (type) == ARRAY_TYPE
7608 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7609 {
7610 if (align < BITS_PER_WORD)
7611 align = BITS_PER_WORD;
7612 }
7613 }
7614
7615 return align;
7616 }
7617
7618 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7619 instructions simply ignore the low bits; VSX memory instructions
7620 are aligned to 4 or 8 bytes. */
7621
7622 static bool
7623 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7624 {
7625 return (STRICT_ALIGNMENT
7626 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7627 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7628 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7629 && (int) align < VECTOR_ALIGN (mode)))));
7630 }
7631
7632 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7633
7634 bool
7635 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7636 {
7637 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7638 {
7639 if (computed != 128)
7640 {
7641 static bool warned;
7642 if (!warned && warn_psabi)
7643 {
7644 warned = true;
7645 inform (input_location,
7646 "the layout of aggregates containing vectors with"
7647 " %d-byte alignment has changed in GCC 5",
7648 computed / BITS_PER_UNIT);
7649 }
7650 }
7651 /* In current GCC there is no special case. */
7652 return false;
7653 }
7654
7655 return false;
7656 }
7657
7658 /* AIX increases natural record alignment to doubleword if the first
7659 field is an FP double while the FP fields remain word aligned. */
7660
7661 unsigned int
7662 rs6000_special_round_type_align (tree type, unsigned int computed,
7663 unsigned int specified)
7664 {
7665 unsigned int align = MAX (computed, specified);
7666 tree field = TYPE_FIELDS (type);
7667
7668 /* Skip all non field decls */
7669 while (field != NULL
7670 && (TREE_CODE (field) != FIELD_DECL
7671 || DECL_FIELD_ABI_IGNORED (field)))
7672 field = DECL_CHAIN (field);
7673
7674 if (field != NULL && field != type)
7675 {
7676 type = TREE_TYPE (field);
7677 while (TREE_CODE (type) == ARRAY_TYPE)
7678 type = TREE_TYPE (type);
7679
7680 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7681 align = MAX (align, 64);
7682 }
7683
7684 return align;
7685 }
7686
7687 /* Darwin increases record alignment to the natural alignment of
7688 the first field. */
7689
7690 unsigned int
7691 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7692 unsigned int specified)
7693 {
7694 unsigned int align = MAX (computed, specified);
7695
7696 if (TYPE_PACKED (type))
7697 return align;
7698
7699 /* Find the first field, looking down into aggregates. */
7700 do {
7701 tree field = TYPE_FIELDS (type);
7702 /* Skip all non field decls */
7703 while (field != NULL
7704 && (TREE_CODE (field) != FIELD_DECL
7705 || DECL_FIELD_ABI_IGNORED (field)))
7706 field = DECL_CHAIN (field);
7707 if (! field)
7708 break;
7709 /* A packed field does not contribute any extra alignment. */
7710 if (DECL_PACKED (field))
7711 return align;
7712 type = TREE_TYPE (field);
7713 while (TREE_CODE (type) == ARRAY_TYPE)
7714 type = TREE_TYPE (type);
7715 } while (AGGREGATE_TYPE_P (type));
7716
7717 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7718 align = MAX (align, TYPE_ALIGN (type));
7719
7720 return align;
7721 }
7722
7723 /* Return 1 for an operand in small memory on V.4/eabi. */
7724
7725 int
7726 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7727 machine_mode mode ATTRIBUTE_UNUSED)
7728 {
7729 #if TARGET_ELF
7730 rtx sym_ref;
7731
7732 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7733 return 0;
7734
7735 if (DEFAULT_ABI != ABI_V4)
7736 return 0;
7737
7738 if (SYMBOL_REF_P (op))
7739 sym_ref = op;
7740
7741 else if (GET_CODE (op) != CONST
7742 || GET_CODE (XEXP (op, 0)) != PLUS
7743 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7744 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7745 return 0;
7746
7747 else
7748 {
7749 rtx sum = XEXP (op, 0);
7750 HOST_WIDE_INT summand;
7751
7752 /* We have to be careful here, because it is the referenced address
7753 that must be 32k from _SDA_BASE_, not just the symbol. */
7754 summand = INTVAL (XEXP (sum, 1));
7755 if (summand < 0 || summand > g_switch_value)
7756 return 0;
7757
7758 sym_ref = XEXP (sum, 0);
7759 }
7760
7761 return SYMBOL_REF_SMALL_P (sym_ref);
7762 #else
7763 return 0;
7764 #endif
7765 }
7766
7767 /* Return true if either operand is a general purpose register. */
7768
7769 bool
7770 gpr_or_gpr_p (rtx op0, rtx op1)
7771 {
7772 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7773 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7774 }
7775
7776 /* Return true if this is a move direct operation between GPR registers and
7777 floating point/VSX registers. */
7778
7779 bool
7780 direct_move_p (rtx op0, rtx op1)
7781 {
7782 if (!REG_P (op0) || !REG_P (op1))
7783 return false;
7784
7785 if (!TARGET_DIRECT_MOVE)
7786 return false;
7787
7788 int regno0 = REGNO (op0);
7789 int regno1 = REGNO (op1);
7790 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7791 return false;
7792
7793 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7794 return true;
7795
7796 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7797 return true;
7798
7799 return false;
7800 }
7801
7802 /* Return true if the ADDR is an acceptable address for a quad memory
7803 operation of mode MODE (either LQ/STQ for general purpose registers, or
7804 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7805 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7806 3.0 LXV/STXV instruction. */
7807
7808 bool
7809 quad_address_p (rtx addr, machine_mode mode, bool strict)
7810 {
7811 rtx op0, op1;
7812
7813 if (GET_MODE_SIZE (mode) < 16)
7814 return false;
7815
7816 if (legitimate_indirect_address_p (addr, strict))
7817 return true;
7818
7819 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7820 return false;
7821
7822 /* Is this a valid prefixed address? If the bottom four bits of the offset
7823 are non-zero, we could use a prefixed instruction (which does not have the
7824 DQ-form constraint that the traditional instruction had) instead of
7825 forcing the unaligned offset to a GPR. */
7826 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7827 return true;
7828
7829 if (GET_CODE (addr) != PLUS)
7830 return false;
7831
7832 op0 = XEXP (addr, 0);
7833 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7834 return false;
7835
7836 op1 = XEXP (addr, 1);
7837 if (!CONST_INT_P (op1))
7838 return false;
7839
7840 return quad_address_offset_p (INTVAL (op1));
7841 }
7842
7843 /* Return true if this is a load or store quad operation. This function does
7844 not handle the atomic quad memory instructions. */
7845
7846 bool
7847 quad_load_store_p (rtx op0, rtx op1)
7848 {
7849 bool ret;
7850
7851 if (!TARGET_QUAD_MEMORY)
7852 ret = false;
7853
7854 else if (REG_P (op0) && MEM_P (op1))
7855 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7856 && quad_memory_operand (op1, GET_MODE (op1))
7857 && !reg_overlap_mentioned_p (op0, op1));
7858
7859 else if (MEM_P (op0) && REG_P (op1))
7860 ret = (quad_memory_operand (op0, GET_MODE (op0))
7861 && quad_int_reg_operand (op1, GET_MODE (op1)));
7862
7863 else
7864 ret = false;
7865
7866 if (TARGET_DEBUG_ADDR)
7867 {
7868 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7869 ret ? "true" : "false");
7870 debug_rtx (gen_rtx_SET (op0, op1));
7871 }
7872
7873 return ret;
7874 }
7875
7876 /* Given an address, return a constant offset term if one exists. */
7877
7878 static rtx
7879 address_offset (rtx op)
7880 {
7881 if (GET_CODE (op) == PRE_INC
7882 || GET_CODE (op) == PRE_DEC)
7883 op = XEXP (op, 0);
7884 else if (GET_CODE (op) == PRE_MODIFY
7885 || GET_CODE (op) == LO_SUM)
7886 op = XEXP (op, 1);
7887
7888 if (GET_CODE (op) == CONST)
7889 op = XEXP (op, 0);
7890
7891 if (GET_CODE (op) == PLUS)
7892 op = XEXP (op, 1);
7893
7894 if (CONST_INT_P (op))
7895 return op;
7896
7897 return NULL_RTX;
7898 }
7899
7900 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7901 the mode. If we can't find (or don't know) the alignment of the symbol
7902 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7903 should be pessimistic]. Offsets are validated in the same way as for
7904 reg + offset. */
7905 static bool
7906 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7907 {
7908 /* We should not get here with this. */
7909 gcc_checking_assert (! mode_supports_dq_form (mode));
7910
7911 if (GET_CODE (x) == CONST)
7912 x = XEXP (x, 0);
7913
7914 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7915 x = XVECEXP (x, 0, 0);
7916
7917 rtx sym = NULL_RTX;
7918 unsigned HOST_WIDE_INT offset = 0;
7919
7920 if (GET_CODE (x) == PLUS)
7921 {
7922 sym = XEXP (x, 0);
7923 if (! SYMBOL_REF_P (sym))
7924 return false;
7925 if (!CONST_INT_P (XEXP (x, 1)))
7926 return false;
7927 offset = INTVAL (XEXP (x, 1));
7928 }
7929 else if (SYMBOL_REF_P (x))
7930 sym = x;
7931 else if (CONST_INT_P (x))
7932 offset = INTVAL (x);
7933 else if (GET_CODE (x) == LABEL_REF)
7934 offset = 0; // We assume code labels are Pmode aligned
7935 else
7936 return false; // not sure what we have here.
7937
7938 /* If we don't know the alignment of the thing to which the symbol refers,
7939 we assume optimistically it is "enough".
7940 ??? maybe we should be pessimistic instead. */
7941 unsigned align = 0;
7942
7943 if (sym)
7944 {
7945 tree decl = SYMBOL_REF_DECL (sym);
7946 #if TARGET_MACHO
7947 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7948 /* The decl in an indirection symbol is the original one, which might
7949 be less aligned than the indirection. Our indirections are always
7950 pointer-aligned. */
7951 ;
7952 else
7953 #endif
7954 if (decl && DECL_ALIGN (decl))
7955 align = DECL_ALIGN_UNIT (decl);
7956 }
7957
7958 unsigned int extra = 0;
7959 switch (mode)
7960 {
7961 case E_DFmode:
7962 case E_DDmode:
7963 case E_DImode:
7964 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7965 addressing. */
7966 if (VECTOR_MEM_VSX_P (mode))
7967 return false;
7968
7969 if (!TARGET_POWERPC64)
7970 extra = 4;
7971 else if ((offset & 3) || (align & 3))
7972 return false;
7973 break;
7974
7975 case E_TFmode:
7976 case E_IFmode:
7977 case E_KFmode:
7978 case E_TDmode:
7979 case E_TImode:
7980 case E_PTImode:
7981 extra = 8;
7982 if (!TARGET_POWERPC64)
7983 extra = 12;
7984 else if ((offset & 3) || (align & 3))
7985 return false;
7986 break;
7987
7988 default:
7989 break;
7990 }
7991
7992 /* We only care if the access(es) would cause a change to the high part. */
7993 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7994 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7995 }
7996
7997 /* Return true if the MEM operand is a memory operand suitable for use
7998 with a (full width, possibly multiple) gpr load/store. On
7999 powerpc64 this means the offset must be divisible by 4.
8000 Implements 'Y' constraint.
8001
8002 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8003 a constraint function we know the operand has satisfied a suitable
8004 memory predicate.
8005
8006 Offsetting a lo_sum should not be allowed, except where we know by
8007 alignment that a 32k boundary is not crossed. Note that by
8008 "offsetting" here we mean a further offset to access parts of the
8009 MEM. It's fine to have a lo_sum where the inner address is offset
8010 from a sym, since the same sym+offset will appear in the high part
8011 of the address calculation. */
8012
8013 bool
8014 mem_operand_gpr (rtx op, machine_mode mode)
8015 {
8016 unsigned HOST_WIDE_INT offset;
8017 int extra;
8018 rtx addr = XEXP (op, 0);
8019
8020 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8021 if (TARGET_UPDATE
8022 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8023 && mode_supports_pre_incdec_p (mode)
8024 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8025 return true;
8026
8027 /* Allow prefixed instructions if supported. If the bottom two bits of the
8028 offset are non-zero, we could use a prefixed instruction (which does not
8029 have the DS-form constraint that the traditional instruction had) instead
8030 of forcing the unaligned offset to a GPR. */
8031 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8032 return true;
8033
8034 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8035 really OK. Doing this early avoids teaching all the other machinery
8036 about them. */
8037 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8038 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8039
8040 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8041 if (!rs6000_offsettable_memref_p (op, mode, false))
8042 return false;
8043
8044 op = address_offset (addr);
8045 if (op == NULL_RTX)
8046 return true;
8047
8048 offset = INTVAL (op);
8049 if (TARGET_POWERPC64 && (offset & 3) != 0)
8050 return false;
8051
8052 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8053 if (extra < 0)
8054 extra = 0;
8055
8056 if (GET_CODE (addr) == LO_SUM)
8057 /* For lo_sum addresses, we must allow any offset except one that
8058 causes a wrap, so test only the low 16 bits. */
8059 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8060
8061 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8062 }
8063
8064 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8065 enforce an offset divisible by 4 even for 32-bit. */
8066
8067 bool
8068 mem_operand_ds_form (rtx op, machine_mode mode)
8069 {
8070 unsigned HOST_WIDE_INT offset;
8071 int extra;
8072 rtx addr = XEXP (op, 0);
8073
8074 /* Allow prefixed instructions if supported. If the bottom two bits of the
8075 offset are non-zero, we could use a prefixed instruction (which does not
8076 have the DS-form constraint that the traditional instruction had) instead
8077 of forcing the unaligned offset to a GPR. */
8078 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8079 return true;
8080
8081 if (!offsettable_address_p (false, mode, addr))
8082 return false;
8083
8084 op = address_offset (addr);
8085 if (op == NULL_RTX)
8086 return true;
8087
8088 offset = INTVAL (op);
8089 if ((offset & 3) != 0)
8090 return false;
8091
8092 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8093 if (extra < 0)
8094 extra = 0;
8095
8096 if (GET_CODE (addr) == LO_SUM)
8097 /* For lo_sum addresses, we must allow any offset except one that
8098 causes a wrap, so test only the low 16 bits. */
8099 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8100
8101 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8102 }
8103 \f
8104 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8105
8106 static bool
8107 reg_offset_addressing_ok_p (machine_mode mode)
8108 {
8109 switch (mode)
8110 {
8111 case E_V16QImode:
8112 case E_V8HImode:
8113 case E_V4SFmode:
8114 case E_V4SImode:
8115 case E_V2DFmode:
8116 case E_V2DImode:
8117 case E_V1TImode:
8118 case E_TImode:
8119 case E_TFmode:
8120 case E_KFmode:
8121 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8122 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8123 a vector mode, if we want to use the VSX registers to move it around,
8124 we need to restrict ourselves to reg+reg addressing. Similarly for
8125 IEEE 128-bit floating point that is passed in a single vector
8126 register. */
8127 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8128 return mode_supports_dq_form (mode);
8129 break;
8130
8131 /* The vector pair/quad types support offset addressing if the
8132 underlying vectors support offset addressing. */
8133 case E_OOmode:
8134 case E_XOmode:
8135 return TARGET_MMA;
8136
8137 case E_SDmode:
8138 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8139 addressing for the LFIWZX and STFIWX instructions. */
8140 if (TARGET_NO_SDMODE_STACK)
8141 return false;
8142 break;
8143
8144 default:
8145 break;
8146 }
8147
8148 return true;
8149 }
8150
8151 static bool
8152 virtual_stack_registers_memory_p (rtx op)
8153 {
8154 int regnum;
8155
8156 if (REG_P (op))
8157 regnum = REGNO (op);
8158
8159 else if (GET_CODE (op) == PLUS
8160 && REG_P (XEXP (op, 0))
8161 && CONST_INT_P (XEXP (op, 1)))
8162 regnum = REGNO (XEXP (op, 0));
8163
8164 else
8165 return false;
8166
8167 return (regnum >= FIRST_VIRTUAL_REGISTER
8168 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8169 }
8170
8171 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8172 is known to not straddle a 32k boundary. This function is used
8173 to determine whether -mcmodel=medium code can use TOC pointer
8174 relative addressing for OP. This means the alignment of the TOC
8175 pointer must also be taken into account, and unfortunately that is
8176 only 8 bytes. */
8177
8178 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8179 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8180 #endif
8181
8182 static bool
8183 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8184 machine_mode mode)
8185 {
8186 tree decl;
8187 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8188
8189 if (!SYMBOL_REF_P (op))
8190 return false;
8191
8192 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8193 SYMBOL_REF. */
8194 if (mode_supports_dq_form (mode))
8195 return false;
8196
8197 dsize = GET_MODE_SIZE (mode);
8198 decl = SYMBOL_REF_DECL (op);
8199 if (!decl)
8200 {
8201 if (dsize == 0)
8202 return false;
8203
8204 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8205 replacing memory addresses with an anchor plus offset. We
8206 could find the decl by rummaging around in the block->objects
8207 VEC for the given offset but that seems like too much work. */
8208 dalign = BITS_PER_UNIT;
8209 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8210 && SYMBOL_REF_ANCHOR_P (op)
8211 && SYMBOL_REF_BLOCK (op) != NULL)
8212 {
8213 struct object_block *block = SYMBOL_REF_BLOCK (op);
8214
8215 dalign = block->alignment;
8216 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8217 }
8218 else if (CONSTANT_POOL_ADDRESS_P (op))
8219 {
8220 /* It would be nice to have get_pool_align().. */
8221 machine_mode cmode = get_pool_mode (op);
8222
8223 dalign = GET_MODE_ALIGNMENT (cmode);
8224 }
8225 }
8226 else if (DECL_P (decl))
8227 {
8228 dalign = DECL_ALIGN (decl);
8229
8230 if (dsize == 0)
8231 {
8232 /* Allow BLKmode when the entire object is known to not
8233 cross a 32k boundary. */
8234 if (!DECL_SIZE_UNIT (decl))
8235 return false;
8236
8237 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8238 return false;
8239
8240 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8241 if (dsize > 32768)
8242 return false;
8243
8244 dalign /= BITS_PER_UNIT;
8245 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8246 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8247 return dalign >= dsize;
8248 }
8249 }
8250 else
8251 gcc_unreachable ();
8252
8253 /* Find how many bits of the alignment we know for this access. */
8254 dalign /= BITS_PER_UNIT;
8255 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8256 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8257 mask = dalign - 1;
8258 lsb = offset & -offset;
8259 mask &= lsb - 1;
8260 dalign = mask + 1;
8261
8262 return dalign >= dsize;
8263 }
8264
8265 static bool
8266 constant_pool_expr_p (rtx op)
8267 {
8268 rtx base, offset;
8269
8270 split_const (op, &base, &offset);
8271 return (SYMBOL_REF_P (base)
8272 && CONSTANT_POOL_ADDRESS_P (base)
8273 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8274 }
8275
8276 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8277 use that as the register to put the HIGH value into if register allocation
8278 is already done. */
8279
8280 rtx
8281 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8282 {
8283 rtx tocrel, tocreg, hi;
8284
8285 gcc_assert (TARGET_TOC);
8286
8287 if (TARGET_DEBUG_ADDR)
8288 {
8289 if (SYMBOL_REF_P (symbol))
8290 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8291 XSTR (symbol, 0));
8292 else
8293 {
8294 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8295 GET_RTX_NAME (GET_CODE (symbol)));
8296 debug_rtx (symbol);
8297 }
8298 }
8299
8300 if (!can_create_pseudo_p ())
8301 df_set_regs_ever_live (TOC_REGISTER, true);
8302
8303 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8304 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8305 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8306 return tocrel;
8307
8308 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8309 if (largetoc_reg != NULL)
8310 {
8311 emit_move_insn (largetoc_reg, hi);
8312 hi = largetoc_reg;
8313 }
8314 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8315 }
8316
8317 /* These are only used to pass through from print_operand/print_operand_address
8318 to rs6000_output_addr_const_extra over the intervening function
8319 output_addr_const which is not target code. */
8320 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8321
8322 /* Return true if OP is a toc pointer relative address (the output
8323 of create_TOC_reference). If STRICT, do not match non-split
8324 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8325 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8326 TOCREL_OFFSET_RET respectively. */
8327
8328 bool
8329 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8330 const_rtx *tocrel_offset_ret)
8331 {
8332 if (!TARGET_TOC)
8333 return false;
8334
8335 if (TARGET_CMODEL != CMODEL_SMALL)
8336 {
8337 /* When strict ensure we have everything tidy. */
8338 if (strict
8339 && !(GET_CODE (op) == LO_SUM
8340 && REG_P (XEXP (op, 0))
8341 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8342 return false;
8343
8344 /* When not strict, allow non-split TOC addresses and also allow
8345 (lo_sum (high ..)) TOC addresses created during reload. */
8346 if (GET_CODE (op) == LO_SUM)
8347 op = XEXP (op, 1);
8348 }
8349
8350 const_rtx tocrel_base = op;
8351 const_rtx tocrel_offset = const0_rtx;
8352
8353 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8354 {
8355 tocrel_base = XEXP (op, 0);
8356 tocrel_offset = XEXP (op, 1);
8357 }
8358
8359 if (tocrel_base_ret)
8360 *tocrel_base_ret = tocrel_base;
8361 if (tocrel_offset_ret)
8362 *tocrel_offset_ret = tocrel_offset;
8363
8364 return (GET_CODE (tocrel_base) == UNSPEC
8365 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8366 && REG_P (XVECEXP (tocrel_base, 0, 1))
8367 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8368 }
8369
8370 /* Return true if X is a constant pool address, and also for cmodel=medium
8371 if X is a toc-relative address known to be offsettable within MODE. */
8372
8373 bool
8374 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8375 bool strict)
8376 {
8377 const_rtx tocrel_base, tocrel_offset;
8378 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8379 && (TARGET_CMODEL != CMODEL_MEDIUM
8380 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8381 || mode == QImode
8382 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8383 INTVAL (tocrel_offset), mode)));
8384 }
8385
8386 static bool
8387 legitimate_small_data_p (machine_mode mode, rtx x)
8388 {
8389 return (DEFAULT_ABI == ABI_V4
8390 && !flag_pic && !TARGET_TOC
8391 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8392 && small_data_operand (x, mode));
8393 }
8394
8395 bool
8396 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8397 bool strict, bool worst_case)
8398 {
8399 unsigned HOST_WIDE_INT offset;
8400 unsigned int extra;
8401
8402 if (GET_CODE (x) != PLUS)
8403 return false;
8404 if (!REG_P (XEXP (x, 0)))
8405 return false;
8406 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8407 return false;
8408 if (mode_supports_dq_form (mode))
8409 return quad_address_p (x, mode, strict);
8410 if (!reg_offset_addressing_ok_p (mode))
8411 return virtual_stack_registers_memory_p (x);
8412 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8413 return true;
8414 if (!CONST_INT_P (XEXP (x, 1)))
8415 return false;
8416
8417 offset = INTVAL (XEXP (x, 1));
8418 extra = 0;
8419 switch (mode)
8420 {
8421 case E_DFmode:
8422 case E_DDmode:
8423 case E_DImode:
8424 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8425 addressing. */
8426 if (VECTOR_MEM_VSX_P (mode))
8427 return false;
8428
8429 if (!worst_case)
8430 break;
8431 if (!TARGET_POWERPC64)
8432 extra = 4;
8433 else if (offset & 3)
8434 return false;
8435 break;
8436
8437 case E_TFmode:
8438 case E_IFmode:
8439 case E_KFmode:
8440 case E_TDmode:
8441 case E_TImode:
8442 case E_PTImode:
8443 extra = 8;
8444 if (!worst_case)
8445 break;
8446 if (!TARGET_POWERPC64)
8447 extra = 12;
8448 else if (offset & 3)
8449 return false;
8450 break;
8451
8452 default:
8453 break;
8454 }
8455
8456 if (TARGET_PREFIXED)
8457 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8458 else
8459 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8460 }
8461
8462 bool
8463 legitimate_indexed_address_p (rtx x, int strict)
8464 {
8465 rtx op0, op1;
8466
8467 if (GET_CODE (x) != PLUS)
8468 return false;
8469
8470 op0 = XEXP (x, 0);
8471 op1 = XEXP (x, 1);
8472
8473 return (REG_P (op0) && REG_P (op1)
8474 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8475 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8476 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8477 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8478 }
8479
8480 bool
8481 avoiding_indexed_address_p (machine_mode mode)
8482 {
8483 unsigned int msize = GET_MODE_SIZE (mode);
8484
8485 /* Avoid indexed addressing for modes that have non-indexed load/store
8486 instruction forms. On power10, vector pairs have an indexed
8487 form, but vector quads don't. */
8488 if (msize > 16)
8489 return msize != 32;
8490
8491 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8492 }
8493
8494 bool
8495 legitimate_indirect_address_p (rtx x, int strict)
8496 {
8497 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8498 }
8499
8500 bool
8501 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8502 {
8503 if (!TARGET_MACHO || !flag_pic
8504 || mode != SImode || !MEM_P (x))
8505 return false;
8506 x = XEXP (x, 0);
8507
8508 if (GET_CODE (x) != LO_SUM)
8509 return false;
8510 if (!REG_P (XEXP (x, 0)))
8511 return false;
8512 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8513 return false;
8514 x = XEXP (x, 1);
8515
8516 return CONSTANT_P (x);
8517 }
8518
8519 static bool
8520 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8521 {
8522 if (GET_CODE (x) != LO_SUM)
8523 return false;
8524 if (!REG_P (XEXP (x, 0)))
8525 return false;
8526 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8527 return false;
8528 /* quad word addresses are restricted, and we can't use LO_SUM. */
8529 if (mode_supports_dq_form (mode))
8530 return false;
8531 x = XEXP (x, 1);
8532
8533 if (TARGET_ELF || TARGET_MACHO)
8534 {
8535 bool large_toc_ok;
8536
8537 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8538 return false;
8539 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8540 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8541 recognizes some LO_SUM addresses as valid although this
8542 function says opposite. In most cases, LRA through different
8543 transformations can generate correct code for address reloads.
8544 It cannot manage only some LO_SUM cases. So we need to add
8545 code here saying that some addresses are still valid. */
8546 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8547 && small_toc_ref (x, VOIDmode));
8548 if (TARGET_TOC && ! large_toc_ok)
8549 return false;
8550 if (GET_MODE_NUNITS (mode) != 1)
8551 return false;
8552 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8553 && !(/* ??? Assume floating point reg based on mode? */
8554 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8555 return false;
8556
8557 return CONSTANT_P (x) || large_toc_ok;
8558 }
8559
8560 return false;
8561 }
8562
8563
8564 /* Try machine-dependent ways of modifying an illegitimate address
8565 to be legitimate. If we find one, return the new, valid address.
8566 This is used from only one place: `memory_address' in explow.c.
8567
8568 OLDX is the address as it was before break_out_memory_refs was
8569 called. In some cases it is useful to look at this to decide what
8570 needs to be done.
8571
8572 It is always safe for this function to do nothing. It exists to
8573 recognize opportunities to optimize the output.
8574
8575 On RS/6000, first check for the sum of a register with a constant
8576 integer that is out of range. If so, generate code to add the
8577 constant with the low-order 16 bits masked to the register and force
8578 this result into another register (this can be done with `cau').
8579 Then generate an address of REG+(CONST&0xffff), allowing for the
8580 possibility of bit 16 being a one.
8581
8582 Then check for the sum of a register and something not constant, try to
8583 load the other things into a register and return the sum. */
8584
8585 static rtx
8586 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8587 machine_mode mode)
8588 {
8589 unsigned int extra;
8590
8591 if (!reg_offset_addressing_ok_p (mode)
8592 || mode_supports_dq_form (mode))
8593 {
8594 if (virtual_stack_registers_memory_p (x))
8595 return x;
8596
8597 /* In theory we should not be seeing addresses of the form reg+0,
8598 but just in case it is generated, optimize it away. */
8599 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8600 return force_reg (Pmode, XEXP (x, 0));
8601
8602 /* For TImode with load/store quad, restrict addresses to just a single
8603 pointer, so it works with both GPRs and VSX registers. */
8604 /* Make sure both operands are registers. */
8605 else if (GET_CODE (x) == PLUS
8606 && (mode != TImode || !TARGET_VSX))
8607 return gen_rtx_PLUS (Pmode,
8608 force_reg (Pmode, XEXP (x, 0)),
8609 force_reg (Pmode, XEXP (x, 1)));
8610 else
8611 return force_reg (Pmode, x);
8612 }
8613 if (SYMBOL_REF_P (x))
8614 {
8615 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8616 if (model != 0)
8617 return rs6000_legitimize_tls_address (x, model);
8618 }
8619
8620 extra = 0;
8621 switch (mode)
8622 {
8623 case E_TFmode:
8624 case E_TDmode:
8625 case E_TImode:
8626 case E_PTImode:
8627 case E_IFmode:
8628 case E_KFmode:
8629 /* As in legitimate_offset_address_p we do not assume
8630 worst-case. The mode here is just a hint as to the registers
8631 used. A TImode is usually in gprs, but may actually be in
8632 fprs. Leave worst-case scenario for reload to handle via
8633 insn constraints. PTImode is only GPRs. */
8634 extra = 8;
8635 break;
8636 default:
8637 break;
8638 }
8639
8640 if (GET_CODE (x) == PLUS
8641 && REG_P (XEXP (x, 0))
8642 && CONST_INT_P (XEXP (x, 1))
8643 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8644 >= 0x10000 - extra))
8645 {
8646 HOST_WIDE_INT high_int, low_int;
8647 rtx sum;
8648 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8649 if (low_int >= 0x8000 - extra)
8650 low_int = 0;
8651 high_int = INTVAL (XEXP (x, 1)) - low_int;
8652 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8653 gen_int_mode (high_int, Pmode)), 0);
8654 return plus_constant (Pmode, sum, low_int);
8655 }
8656 else if (GET_CODE (x) == PLUS
8657 && REG_P (XEXP (x, 0))
8658 && !CONST_INT_P (XEXP (x, 1))
8659 && GET_MODE_NUNITS (mode) == 1
8660 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8661 || (/* ??? Assume floating point reg based on mode? */
8662 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8663 && !avoiding_indexed_address_p (mode))
8664 {
8665 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8666 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8667 }
8668 else if ((TARGET_ELF
8669 #if TARGET_MACHO
8670 || !MACHO_DYNAMIC_NO_PIC_P
8671 #endif
8672 )
8673 && TARGET_32BIT
8674 && TARGET_NO_TOC_OR_PCREL
8675 && !flag_pic
8676 && !CONST_INT_P (x)
8677 && !CONST_WIDE_INT_P (x)
8678 && !CONST_DOUBLE_P (x)
8679 && CONSTANT_P (x)
8680 && GET_MODE_NUNITS (mode) == 1
8681 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8682 || (/* ??? Assume floating point reg based on mode? */
8683 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8684 {
8685 rtx reg = gen_reg_rtx (Pmode);
8686 if (TARGET_ELF)
8687 emit_insn (gen_elf_high (reg, x));
8688 else
8689 emit_insn (gen_macho_high (Pmode, reg, x));
8690 return gen_rtx_LO_SUM (Pmode, reg, x);
8691 }
8692 else if (TARGET_TOC
8693 && SYMBOL_REF_P (x)
8694 && constant_pool_expr_p (x)
8695 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8696 return create_TOC_reference (x, NULL_RTX);
8697 else
8698 return x;
8699 }
8700
8701 /* Debug version of rs6000_legitimize_address. */
8702 static rtx
8703 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8704 {
8705 rtx ret;
8706 rtx_insn *insns;
8707
8708 start_sequence ();
8709 ret = rs6000_legitimize_address (x, oldx, mode);
8710 insns = get_insns ();
8711 end_sequence ();
8712
8713 if (ret != x)
8714 {
8715 fprintf (stderr,
8716 "\nrs6000_legitimize_address: mode %s, old code %s, "
8717 "new code %s, modified\n",
8718 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8719 GET_RTX_NAME (GET_CODE (ret)));
8720
8721 fprintf (stderr, "Original address:\n");
8722 debug_rtx (x);
8723
8724 fprintf (stderr, "oldx:\n");
8725 debug_rtx (oldx);
8726
8727 fprintf (stderr, "New address:\n");
8728 debug_rtx (ret);
8729
8730 if (insns)
8731 {
8732 fprintf (stderr, "Insns added:\n");
8733 debug_rtx_list (insns, 20);
8734 }
8735 }
8736 else
8737 {
8738 fprintf (stderr,
8739 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8740 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8741
8742 debug_rtx (x);
8743 }
8744
8745 if (insns)
8746 emit_insn (insns);
8747
8748 return ret;
8749 }
8750
8751 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8752 We need to emit DTP-relative relocations. */
8753
8754 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8755 static void
8756 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8757 {
8758 switch (size)
8759 {
8760 case 4:
8761 fputs ("\t.long\t", file);
8762 break;
8763 case 8:
8764 fputs (DOUBLE_INT_ASM_OP, file);
8765 break;
8766 default:
8767 gcc_unreachable ();
8768 }
8769 output_addr_const (file, x);
8770 if (TARGET_ELF)
8771 fputs ("@dtprel+0x8000", file);
8772 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8773 {
8774 switch (SYMBOL_REF_TLS_MODEL (x))
8775 {
8776 case 0:
8777 break;
8778 case TLS_MODEL_LOCAL_EXEC:
8779 fputs ("@le", file);
8780 break;
8781 case TLS_MODEL_INITIAL_EXEC:
8782 fputs ("@ie", file);
8783 break;
8784 case TLS_MODEL_GLOBAL_DYNAMIC:
8785 case TLS_MODEL_LOCAL_DYNAMIC:
8786 fputs ("@m", file);
8787 break;
8788 default:
8789 gcc_unreachable ();
8790 }
8791 }
8792 }
8793
8794 /* Return true if X is a symbol that refers to real (rather than emulated)
8795 TLS. */
8796
8797 static bool
8798 rs6000_real_tls_symbol_ref_p (rtx x)
8799 {
8800 return (SYMBOL_REF_P (x)
8801 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8802 }
8803
8804 /* In the name of slightly smaller debug output, and to cater to
8805 general assembler lossage, recognize various UNSPEC sequences
8806 and turn them back into a direct symbol reference. */
8807
8808 static rtx
8809 rs6000_delegitimize_address (rtx orig_x)
8810 {
8811 rtx x, y, offset;
8812
8813 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8814 orig_x = XVECEXP (orig_x, 0, 0);
8815
8816 orig_x = delegitimize_mem_from_attrs (orig_x);
8817
8818 x = orig_x;
8819 if (MEM_P (x))
8820 x = XEXP (x, 0);
8821
8822 y = x;
8823 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8824 y = XEXP (y, 1);
8825
8826 offset = NULL_RTX;
8827 if (GET_CODE (y) == PLUS
8828 && GET_MODE (y) == Pmode
8829 && CONST_INT_P (XEXP (y, 1)))
8830 {
8831 offset = XEXP (y, 1);
8832 y = XEXP (y, 0);
8833 }
8834
8835 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8836 {
8837 y = XVECEXP (y, 0, 0);
8838
8839 #ifdef HAVE_AS_TLS
8840 /* Do not associate thread-local symbols with the original
8841 constant pool symbol. */
8842 if (TARGET_XCOFF
8843 && SYMBOL_REF_P (y)
8844 && CONSTANT_POOL_ADDRESS_P (y)
8845 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8846 return orig_x;
8847 #endif
8848
8849 if (offset != NULL_RTX)
8850 y = gen_rtx_PLUS (Pmode, y, offset);
8851 if (!MEM_P (orig_x))
8852 return y;
8853 else
8854 return replace_equiv_address_nv (orig_x, y);
8855 }
8856
8857 if (TARGET_MACHO
8858 && GET_CODE (orig_x) == LO_SUM
8859 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8860 {
8861 y = XEXP (XEXP (orig_x, 1), 0);
8862 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8863 return XVECEXP (y, 0, 0);
8864 }
8865
8866 return orig_x;
8867 }
8868
8869 /* Return true if X shouldn't be emitted into the debug info.
8870 The linker doesn't like .toc section references from
8871 .debug_* sections, so reject .toc section symbols. */
8872
8873 static bool
8874 rs6000_const_not_ok_for_debug_p (rtx x)
8875 {
8876 if (GET_CODE (x) == UNSPEC)
8877 return true;
8878 if (SYMBOL_REF_P (x)
8879 && CONSTANT_POOL_ADDRESS_P (x))
8880 {
8881 rtx c = get_pool_constant (x);
8882 machine_mode cmode = get_pool_mode (x);
8883 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8884 return true;
8885 }
8886
8887 return false;
8888 }
8889
8890 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8891
8892 static bool
8893 rs6000_legitimate_combined_insn (rtx_insn *insn)
8894 {
8895 int icode = INSN_CODE (insn);
8896
8897 /* Reject creating doloop insns. Combine should not be allowed
8898 to create these for a number of reasons:
8899 1) In a nested loop, if combine creates one of these in an
8900 outer loop and the register allocator happens to allocate ctr
8901 to the outer loop insn, then the inner loop can't use ctr.
8902 Inner loops ought to be more highly optimized.
8903 2) Combine often wants to create one of these from what was
8904 originally a three insn sequence, first combining the three
8905 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8906 allocated ctr, the splitter takes use back to the three insn
8907 sequence. It's better to stop combine at the two insn
8908 sequence.
8909 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8910 insns, the register allocator sometimes uses floating point
8911 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8912 jump insn and output reloads are not implemented for jumps,
8913 the ctrsi/ctrdi splitters need to handle all possible cases.
8914 That's a pain, and it gets to be seriously difficult when a
8915 splitter that runs after reload needs memory to transfer from
8916 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8917 for the difficult case. It's better to not create problems
8918 in the first place. */
8919 if (icode != CODE_FOR_nothing
8920 && (icode == CODE_FOR_bdz_si
8921 || icode == CODE_FOR_bdz_di
8922 || icode == CODE_FOR_bdnz_si
8923 || icode == CODE_FOR_bdnz_di
8924 || icode == CODE_FOR_bdztf_si
8925 || icode == CODE_FOR_bdztf_di
8926 || icode == CODE_FOR_bdnztf_si
8927 || icode == CODE_FOR_bdnztf_di))
8928 return false;
8929
8930 return true;
8931 }
8932
8933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8934
8935 static GTY(()) rtx rs6000_tls_symbol;
8936 static rtx
8937 rs6000_tls_get_addr (void)
8938 {
8939 if (!rs6000_tls_symbol)
8940 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8941
8942 return rs6000_tls_symbol;
8943 }
8944
8945 /* Construct the SYMBOL_REF for TLS GOT references. */
8946
8947 static GTY(()) rtx rs6000_got_symbol;
8948 rtx
8949 rs6000_got_sym (void)
8950 {
8951 if (!rs6000_got_symbol)
8952 {
8953 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8954 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8955 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8956 }
8957
8958 return rs6000_got_symbol;
8959 }
8960
8961 /* AIX Thread-Local Address support. */
8962
8963 static rtx
8964 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8965 {
8966 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8967 const char *name;
8968 char *tlsname;
8969
8970 name = XSTR (addr, 0);
8971 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8972 or the symbol will be in TLS private data section. */
8973 if (name[strlen (name) - 1] != ']'
8974 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8975 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8976 {
8977 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8978 strcpy (tlsname, name);
8979 strcat (tlsname,
8980 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8981 tlsaddr = copy_rtx (addr);
8982 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8983 }
8984 else
8985 tlsaddr = addr;
8986
8987 /* Place addr into TOC constant pool. */
8988 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8989
8990 /* Output the TOC entry and create the MEM referencing the value. */
8991 if (constant_pool_expr_p (XEXP (sym, 0))
8992 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8993 {
8994 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8995 mem = gen_const_mem (Pmode, tocref);
8996 set_mem_alias_set (mem, get_TOC_alias_set ());
8997 }
8998 else
8999 return sym;
9000
9001 /* Use global-dynamic for local-dynamic. */
9002 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9003 || model == TLS_MODEL_LOCAL_DYNAMIC)
9004 {
9005 /* Create new TOC reference for @m symbol. */
9006 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9007 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9008 strcpy (tlsname, "*LCM");
9009 strcat (tlsname, name + 3);
9010 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9011 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9012 tocref = create_TOC_reference (modaddr, NULL_RTX);
9013 rtx modmem = gen_const_mem (Pmode, tocref);
9014 set_mem_alias_set (modmem, get_TOC_alias_set ());
9015
9016 rtx modreg = gen_reg_rtx (Pmode);
9017 emit_insn (gen_rtx_SET (modreg, modmem));
9018
9019 tmpreg = gen_reg_rtx (Pmode);
9020 emit_insn (gen_rtx_SET (tmpreg, mem));
9021
9022 dest = gen_reg_rtx (Pmode);
9023 if (TARGET_32BIT)
9024 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9025 else
9026 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9027 return dest;
9028 }
9029 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9030 else if (TARGET_32BIT)
9031 {
9032 tlsreg = gen_reg_rtx (SImode);
9033 emit_insn (gen_tls_get_tpointer (tlsreg));
9034 }
9035 else
9036 tlsreg = gen_rtx_REG (DImode, 13);
9037
9038 /* Load the TOC value into temporary register. */
9039 tmpreg = gen_reg_rtx (Pmode);
9040 emit_insn (gen_rtx_SET (tmpreg, mem));
9041 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9042 gen_rtx_MINUS (Pmode, addr, tlsreg));
9043
9044 /* Add TOC symbol value to TLS pointer. */
9045 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9046
9047 return dest;
9048 }
9049
9050 /* Passes the tls arg value for global dynamic and local dynamic
9051 emit_library_call_value in rs6000_legitimize_tls_address to
9052 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9053 marker relocs put on __tls_get_addr calls. */
9054 static rtx global_tlsarg;
9055
9056 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9057 this (thread-local) address. */
9058
9059 static rtx
9060 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9061 {
9062 rtx dest, insn;
9063
9064 if (TARGET_XCOFF)
9065 return rs6000_legitimize_tls_address_aix (addr, model);
9066
9067 dest = gen_reg_rtx (Pmode);
9068 if (model == TLS_MODEL_LOCAL_EXEC
9069 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9070 {
9071 rtx tlsreg;
9072
9073 if (TARGET_64BIT)
9074 {
9075 tlsreg = gen_rtx_REG (Pmode, 13);
9076 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9077 }
9078 else
9079 {
9080 tlsreg = gen_rtx_REG (Pmode, 2);
9081 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9082 }
9083 emit_insn (insn);
9084 }
9085 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9086 {
9087 rtx tlsreg, tmp;
9088
9089 tmp = gen_reg_rtx (Pmode);
9090 if (TARGET_64BIT)
9091 {
9092 tlsreg = gen_rtx_REG (Pmode, 13);
9093 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9094 }
9095 else
9096 {
9097 tlsreg = gen_rtx_REG (Pmode, 2);
9098 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9099 }
9100 emit_insn (insn);
9101 if (TARGET_64BIT)
9102 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9103 else
9104 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9105 emit_insn (insn);
9106 }
9107 else
9108 {
9109 rtx got, tga, tmp1, tmp2;
9110
9111 /* We currently use relocations like @got@tlsgd for tls, which
9112 means the linker will handle allocation of tls entries, placing
9113 them in the .got section. So use a pointer to the .got section,
9114 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9115 or to secondary GOT sections used by 32-bit -fPIC. */
9116 if (rs6000_pcrel_p ())
9117 got = const0_rtx;
9118 else if (TARGET_64BIT)
9119 got = gen_rtx_REG (Pmode, 2);
9120 else
9121 {
9122 if (flag_pic == 1)
9123 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9124 else
9125 {
9126 rtx gsym = rs6000_got_sym ();
9127 got = gen_reg_rtx (Pmode);
9128 if (flag_pic == 0)
9129 rs6000_emit_move (got, gsym, Pmode);
9130 else
9131 {
9132 rtx mem, lab;
9133
9134 tmp1 = gen_reg_rtx (Pmode);
9135 tmp2 = gen_reg_rtx (Pmode);
9136 mem = gen_const_mem (Pmode, tmp1);
9137 lab = gen_label_rtx ();
9138 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9139 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9140 if (TARGET_LINK_STACK)
9141 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9142 emit_move_insn (tmp2, mem);
9143 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9144 set_unique_reg_note (last, REG_EQUAL, gsym);
9145 }
9146 }
9147 }
9148
9149 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9150 {
9151 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9152 UNSPEC_TLSGD);
9153 tga = rs6000_tls_get_addr ();
9154 rtx argreg = gen_rtx_REG (Pmode, 3);
9155 emit_insn (gen_rtx_SET (argreg, arg));
9156 global_tlsarg = arg;
9157 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9158 global_tlsarg = NULL_RTX;
9159
9160 /* Make a note so that the result of this call can be CSEd. */
9161 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9162 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9163 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9164 }
9165 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9166 {
9167 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9168 tga = rs6000_tls_get_addr ();
9169 tmp1 = gen_reg_rtx (Pmode);
9170 rtx argreg = gen_rtx_REG (Pmode, 3);
9171 emit_insn (gen_rtx_SET (argreg, arg));
9172 global_tlsarg = arg;
9173 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9174 global_tlsarg = NULL_RTX;
9175
9176 /* Make a note so that the result of this call can be CSEd. */
9177 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9178 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9179 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9180
9181 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9182 {
9183 if (TARGET_64BIT)
9184 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9185 else
9186 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9187 }
9188 else if (rs6000_tls_size == 32)
9189 {
9190 tmp2 = gen_reg_rtx (Pmode);
9191 if (TARGET_64BIT)
9192 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9193 else
9194 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9195 emit_insn (insn);
9196 if (TARGET_64BIT)
9197 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9198 else
9199 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9200 }
9201 else
9202 {
9203 tmp2 = gen_reg_rtx (Pmode);
9204 if (TARGET_64BIT)
9205 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9206 else
9207 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9208 emit_insn (insn);
9209 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9210 }
9211 emit_insn (insn);
9212 }
9213 else
9214 {
9215 /* IE, or 64-bit offset LE. */
9216 tmp2 = gen_reg_rtx (Pmode);
9217 if (TARGET_64BIT)
9218 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9219 else
9220 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9221 emit_insn (insn);
9222 if (rs6000_pcrel_p ())
9223 {
9224 if (TARGET_64BIT)
9225 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9226 else
9227 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9228 }
9229 else if (TARGET_64BIT)
9230 insn = gen_tls_tls_64 (dest, tmp2, addr);
9231 else
9232 insn = gen_tls_tls_32 (dest, tmp2, addr);
9233 emit_insn (insn);
9234 }
9235 }
9236
9237 return dest;
9238 }
9239
9240 /* Only create the global variable for the stack protect guard if we are using
9241 the global flavor of that guard. */
9242 static tree
9243 rs6000_init_stack_protect_guard (void)
9244 {
9245 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9246 return default_stack_protect_guard ();
9247
9248 return NULL_TREE;
9249 }
9250
9251 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9252
9253 static bool
9254 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9255 {
9256 if (GET_CODE (x) == HIGH
9257 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9258 return true;
9259
9260 /* A TLS symbol in the TOC cannot contain a sum. */
9261 if (GET_CODE (x) == CONST
9262 && GET_CODE (XEXP (x, 0)) == PLUS
9263 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9264 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9265 return true;
9266
9267 /* Do not place an ELF TLS symbol in the constant pool. */
9268 return TARGET_ELF && tls_referenced_p (x);
9269 }
9270
9271 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9272 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9273 can be addressed relative to the toc pointer. */
9274
9275 static bool
9276 use_toc_relative_ref (rtx sym, machine_mode mode)
9277 {
9278 return ((constant_pool_expr_p (sym)
9279 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9280 get_pool_mode (sym)))
9281 || (TARGET_CMODEL == CMODEL_MEDIUM
9282 && SYMBOL_REF_LOCAL_P (sym)
9283 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9284 }
9285
9286 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9287 that is a valid memory address for an instruction.
9288 The MODE argument is the machine mode for the MEM expression
9289 that wants to use this address.
9290
9291 On the RS/6000, there are four valid address: a SYMBOL_REF that
9292 refers to a constant pool entry of an address (or the sum of it
9293 plus a constant), a short (16-bit signed) constant plus a register,
9294 the sum of two registers, or a register indirect, possibly with an
9295 auto-increment. For DFmode, DDmode and DImode with a constant plus
9296 register, we must ensure that both words are addressable or PowerPC64
9297 with offset word aligned.
9298
9299 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9300 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9301 because adjacent memory cells are accessed by adding word-sized offsets
9302 during assembly output. */
9303 static bool
9304 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9305 {
9306 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9307 bool quad_offset_p = mode_supports_dq_form (mode);
9308
9309 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9310 return 0;
9311
9312 /* Handle unaligned altivec lvx/stvx type addresses. */
9313 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9314 && GET_CODE (x) == AND
9315 && CONST_INT_P (XEXP (x, 1))
9316 && INTVAL (XEXP (x, 1)) == -16)
9317 {
9318 x = XEXP (x, 0);
9319 return (legitimate_indirect_address_p (x, reg_ok_strict)
9320 || legitimate_indexed_address_p (x, reg_ok_strict)
9321 || virtual_stack_registers_memory_p (x));
9322 }
9323
9324 if (legitimate_indirect_address_p (x, reg_ok_strict))
9325 return 1;
9326 if (TARGET_UPDATE
9327 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9328 && mode_supports_pre_incdec_p (mode)
9329 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9330 return 1;
9331
9332 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9333 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9334 return 1;
9335
9336 /* Handle restricted vector d-form offsets in ISA 3.0. */
9337 if (quad_offset_p)
9338 {
9339 if (quad_address_p (x, mode, reg_ok_strict))
9340 return 1;
9341 }
9342 else if (virtual_stack_registers_memory_p (x))
9343 return 1;
9344
9345 else if (reg_offset_p)
9346 {
9347 if (legitimate_small_data_p (mode, x))
9348 return 1;
9349 if (legitimate_constant_pool_address_p (x, mode,
9350 reg_ok_strict || lra_in_progress))
9351 return 1;
9352 }
9353
9354 /* For TImode, if we have TImode in VSX registers, only allow register
9355 indirect addresses. This will allow the values to go in either GPRs
9356 or VSX registers without reloading. The vector types would tend to
9357 go into VSX registers, so we allow REG+REG, while TImode seems
9358 somewhat split, in that some uses are GPR based, and some VSX based. */
9359 /* FIXME: We could loosen this by changing the following to
9360 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9361 but currently we cannot allow REG+REG addressing for TImode. See
9362 PR72827 for complete details on how this ends up hoodwinking DSE. */
9363 if (mode == TImode && TARGET_VSX)
9364 return 0;
9365 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9366 if (! reg_ok_strict
9367 && reg_offset_p
9368 && GET_CODE (x) == PLUS
9369 && REG_P (XEXP (x, 0))
9370 && (XEXP (x, 0) == virtual_stack_vars_rtx
9371 || XEXP (x, 0) == arg_pointer_rtx)
9372 && CONST_INT_P (XEXP (x, 1)))
9373 return 1;
9374 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9375 return 1;
9376 if (!FLOAT128_2REG_P (mode)
9377 && (TARGET_HARD_FLOAT
9378 || TARGET_POWERPC64
9379 || (mode != DFmode && mode != DDmode))
9380 && (TARGET_POWERPC64 || mode != DImode)
9381 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9382 && mode != PTImode
9383 && !avoiding_indexed_address_p (mode)
9384 && legitimate_indexed_address_p (x, reg_ok_strict))
9385 return 1;
9386 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9387 && mode_supports_pre_modify_p (mode)
9388 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9389 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9390 reg_ok_strict, false)
9391 || (!avoiding_indexed_address_p (mode)
9392 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9393 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9394 {
9395 /* There is no prefixed version of the load/store with update. */
9396 rtx addr = XEXP (x, 1);
9397 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9398 }
9399 if (reg_offset_p && !quad_offset_p
9400 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9401 return 1;
9402 return 0;
9403 }
9404
9405 /* Debug version of rs6000_legitimate_address_p. */
9406 static bool
9407 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9408 bool reg_ok_strict)
9409 {
9410 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9411 fprintf (stderr,
9412 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9413 "strict = %d, reload = %s, code = %s\n",
9414 ret ? "true" : "false",
9415 GET_MODE_NAME (mode),
9416 reg_ok_strict,
9417 (reload_completed ? "after" : "before"),
9418 GET_RTX_NAME (GET_CODE (x)));
9419 debug_rtx (x);
9420
9421 return ret;
9422 }
9423
9424 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9425
9426 static bool
9427 rs6000_mode_dependent_address_p (const_rtx addr,
9428 addr_space_t as ATTRIBUTE_UNUSED)
9429 {
9430 return rs6000_mode_dependent_address_ptr (addr);
9431 }
9432
9433 /* Go to LABEL if ADDR (a legitimate address expression)
9434 has an effect that depends on the machine mode it is used for.
9435
9436 On the RS/6000 this is true of all integral offsets (since AltiVec
9437 and VSX modes don't allow them) or is a pre-increment or decrement.
9438
9439 ??? Except that due to conceptual problems in offsettable_address_p
9440 we can't really report the problems of integral offsets. So leave
9441 this assuming that the adjustable offset must be valid for the
9442 sub-words of a TFmode operand, which is what we had before. */
9443
9444 static bool
9445 rs6000_mode_dependent_address (const_rtx addr)
9446 {
9447 switch (GET_CODE (addr))
9448 {
9449 case PLUS:
9450 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9451 is considered a legitimate address before reload, so there
9452 are no offset restrictions in that case. Note that this
9453 condition is safe in strict mode because any address involving
9454 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9455 been rejected as illegitimate. */
9456 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9457 && XEXP (addr, 0) != arg_pointer_rtx
9458 && CONST_INT_P (XEXP (addr, 1)))
9459 {
9460 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9461 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9462 if (TARGET_PREFIXED)
9463 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9464 else
9465 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9466 }
9467 break;
9468
9469 case LO_SUM:
9470 /* Anything in the constant pool is sufficiently aligned that
9471 all bytes have the same high part address. */
9472 return !legitimate_constant_pool_address_p (addr, QImode, false);
9473
9474 /* Auto-increment cases are now treated generically in recog.c. */
9475 case PRE_MODIFY:
9476 return TARGET_UPDATE;
9477
9478 /* AND is only allowed in Altivec loads. */
9479 case AND:
9480 return true;
9481
9482 default:
9483 break;
9484 }
9485
9486 return false;
9487 }
9488
9489 /* Debug version of rs6000_mode_dependent_address. */
9490 static bool
9491 rs6000_debug_mode_dependent_address (const_rtx addr)
9492 {
9493 bool ret = rs6000_mode_dependent_address (addr);
9494
9495 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9496 ret ? "true" : "false");
9497 debug_rtx (addr);
9498
9499 return ret;
9500 }
9501
9502 /* Implement FIND_BASE_TERM. */
9503
9504 rtx
9505 rs6000_find_base_term (rtx op)
9506 {
9507 rtx base;
9508
9509 base = op;
9510 if (GET_CODE (base) == CONST)
9511 base = XEXP (base, 0);
9512 if (GET_CODE (base) == PLUS)
9513 base = XEXP (base, 0);
9514 if (GET_CODE (base) == UNSPEC)
9515 switch (XINT (base, 1))
9516 {
9517 case UNSPEC_TOCREL:
9518 case UNSPEC_MACHOPIC_OFFSET:
9519 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9520 for aliasing purposes. */
9521 return XVECEXP (base, 0, 0);
9522 }
9523
9524 return op;
9525 }
9526
9527 /* More elaborate version of recog's offsettable_memref_p predicate
9528 that works around the ??? note of rs6000_mode_dependent_address.
9529 In particular it accepts
9530
9531 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9532
9533 in 32-bit mode, that the recog predicate rejects. */
9534
9535 static bool
9536 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9537 {
9538 bool worst_case;
9539
9540 if (!MEM_P (op))
9541 return false;
9542
9543 /* First mimic offsettable_memref_p. */
9544 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9545 return true;
9546
9547 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9548 the latter predicate knows nothing about the mode of the memory
9549 reference and, therefore, assumes that it is the largest supported
9550 mode (TFmode). As a consequence, legitimate offsettable memory
9551 references are rejected. rs6000_legitimate_offset_address_p contains
9552 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9553 at least with a little bit of help here given that we know the
9554 actual registers used. */
9555 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9556 || GET_MODE_SIZE (reg_mode) == 4);
9557 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9558 strict, worst_case);
9559 }
9560
9561 /* Determine the reassociation width to be used in reassociate_bb.
9562 This takes into account how many parallel operations we
9563 can actually do of a given type, and also the latency.
9564 P8:
9565 int add/sub 6/cycle
9566 mul 2/cycle
9567 vect add/sub/mul 2/cycle
9568 fp add/sub/mul 2/cycle
9569 dfp 1/cycle
9570 */
9571
9572 static int
9573 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9574 machine_mode mode)
9575 {
9576 switch (rs6000_tune)
9577 {
9578 case PROCESSOR_POWER8:
9579 case PROCESSOR_POWER9:
9580 case PROCESSOR_POWER10:
9581 if (DECIMAL_FLOAT_MODE_P (mode))
9582 return 1;
9583 if (VECTOR_MODE_P (mode))
9584 return 4;
9585 if (INTEGRAL_MODE_P (mode))
9586 return 1;
9587 if (FLOAT_MODE_P (mode))
9588 return 4;
9589 break;
9590 default:
9591 break;
9592 }
9593 return 1;
9594 }
9595
9596 /* Change register usage conditional on target flags. */
9597 static void
9598 rs6000_conditional_register_usage (void)
9599 {
9600 int i;
9601
9602 if (TARGET_DEBUG_TARGET)
9603 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9604
9605 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9606 if (TARGET_64BIT)
9607 fixed_regs[13] = call_used_regs[13] = 1;
9608
9609 /* Conditionally disable FPRs. */
9610 if (TARGET_SOFT_FLOAT)
9611 for (i = 32; i < 64; i++)
9612 fixed_regs[i] = call_used_regs[i] = 1;
9613
9614 /* The TOC register is not killed across calls in a way that is
9615 visible to the compiler. */
9616 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9617 call_used_regs[2] = 0;
9618
9619 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9620 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9621
9622 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9623 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9624 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9625
9626 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9627 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9628 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9629
9630 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9631 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9632
9633 if (!TARGET_ALTIVEC && !TARGET_VSX)
9634 {
9635 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9636 fixed_regs[i] = call_used_regs[i] = 1;
9637 call_used_regs[VRSAVE_REGNO] = 1;
9638 }
9639
9640 if (TARGET_ALTIVEC || TARGET_VSX)
9641 global_regs[VSCR_REGNO] = 1;
9642
9643 if (TARGET_ALTIVEC_ABI)
9644 {
9645 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9646 call_used_regs[i] = 1;
9647
9648 /* AIX reserves VR20:31 in non-extended ABI mode. */
9649 if (TARGET_XCOFF)
9650 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9651 fixed_regs[i] = call_used_regs[i] = 1;
9652 }
9653 }
9654
9655 \f
9656 /* Output insns to set DEST equal to the constant SOURCE as a series of
9657 lis, ori and shl instructions and return TRUE. */
9658
9659 bool
9660 rs6000_emit_set_const (rtx dest, rtx source)
9661 {
9662 machine_mode mode = GET_MODE (dest);
9663 rtx temp, set;
9664 rtx_insn *insn;
9665 HOST_WIDE_INT c;
9666
9667 gcc_checking_assert (CONST_INT_P (source));
9668 c = INTVAL (source);
9669 switch (mode)
9670 {
9671 case E_QImode:
9672 case E_HImode:
9673 emit_insn (gen_rtx_SET (dest, source));
9674 return true;
9675
9676 case E_SImode:
9677 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9678
9679 emit_insn (gen_rtx_SET (copy_rtx (temp),
9680 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9681 emit_insn (gen_rtx_SET (dest,
9682 gen_rtx_IOR (SImode, copy_rtx (temp),
9683 GEN_INT (c & 0xffff))));
9684 break;
9685
9686 case E_DImode:
9687 if (!TARGET_POWERPC64)
9688 {
9689 rtx hi, lo;
9690
9691 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9692 DImode);
9693 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9694 DImode);
9695 emit_move_insn (hi, GEN_INT (c >> 32));
9696 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9697 emit_move_insn (lo, GEN_INT (c));
9698 }
9699 else
9700 rs6000_emit_set_long_const (dest, c);
9701 break;
9702
9703 default:
9704 gcc_unreachable ();
9705 }
9706
9707 insn = get_last_insn ();
9708 set = single_set (insn);
9709 if (! CONSTANT_P (SET_SRC (set)))
9710 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9711
9712 return true;
9713 }
9714
9715 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9716 Output insns to set DEST equal to the constant C as a series of
9717 lis, ori and shl instructions. */
9718
9719 static void
9720 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9721 {
9722 rtx temp;
9723 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9724
9725 ud1 = c & 0xffff;
9726 c = c >> 16;
9727 ud2 = c & 0xffff;
9728 c = c >> 16;
9729 ud3 = c & 0xffff;
9730 c = c >> 16;
9731 ud4 = c & 0xffff;
9732
9733 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9734 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9735 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9736
9737 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9738 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9739 {
9740 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9741
9742 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9743 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9744 if (ud1 != 0)
9745 emit_move_insn (dest,
9746 gen_rtx_IOR (DImode, copy_rtx (temp),
9747 GEN_INT (ud1)));
9748 }
9749 else if (ud3 == 0 && ud4 == 0)
9750 {
9751 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9752
9753 gcc_assert (ud2 & 0x8000);
9754 emit_move_insn (copy_rtx (temp),
9755 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9756 if (ud1 != 0)
9757 emit_move_insn (copy_rtx (temp),
9758 gen_rtx_IOR (DImode, copy_rtx (temp),
9759 GEN_INT (ud1)));
9760 emit_move_insn (dest,
9761 gen_rtx_ZERO_EXTEND (DImode,
9762 gen_lowpart (SImode,
9763 copy_rtx (temp))));
9764 }
9765 else if (ud1 == ud3 && ud2 == ud4)
9766 {
9767 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9768 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9769 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9770 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9771 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9772 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9773 }
9774 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9775 || (ud4 == 0 && ! (ud3 & 0x8000)))
9776 {
9777 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9778
9779 emit_move_insn (copy_rtx (temp),
9780 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9781 if (ud2 != 0)
9782 emit_move_insn (copy_rtx (temp),
9783 gen_rtx_IOR (DImode, copy_rtx (temp),
9784 GEN_INT (ud2)));
9785 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9786 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9787 GEN_INT (16)));
9788 if (ud1 != 0)
9789 emit_move_insn (dest,
9790 gen_rtx_IOR (DImode, copy_rtx (temp),
9791 GEN_INT (ud1)));
9792 }
9793 else
9794 {
9795 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9796
9797 emit_move_insn (copy_rtx (temp),
9798 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9799 if (ud3 != 0)
9800 emit_move_insn (copy_rtx (temp),
9801 gen_rtx_IOR (DImode, copy_rtx (temp),
9802 GEN_INT (ud3)));
9803
9804 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9805 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9806 GEN_INT (32)));
9807 if (ud2 != 0)
9808 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9809 gen_rtx_IOR (DImode, copy_rtx (temp),
9810 GEN_INT (ud2 << 16)));
9811 if (ud1 != 0)
9812 emit_move_insn (dest,
9813 gen_rtx_IOR (DImode, copy_rtx (temp),
9814 GEN_INT (ud1)));
9815 }
9816 }
9817
9818 /* Helper for the following. Get rid of [r+r] memory refs
9819 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9820
9821 static void
9822 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9823 {
9824 if (MEM_P (operands[0])
9825 && !REG_P (XEXP (operands[0], 0))
9826 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9827 GET_MODE (operands[0]), false))
9828 operands[0]
9829 = replace_equiv_address (operands[0],
9830 copy_addr_to_reg (XEXP (operands[0], 0)));
9831
9832 if (MEM_P (operands[1])
9833 && !REG_P (XEXP (operands[1], 0))
9834 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9835 GET_MODE (operands[1]), false))
9836 operands[1]
9837 = replace_equiv_address (operands[1],
9838 copy_addr_to_reg (XEXP (operands[1], 0)));
9839 }
9840
9841 /* Generate a vector of constants to permute MODE for a little-endian
9842 storage operation by swapping the two halves of a vector. */
9843 static rtvec
9844 rs6000_const_vec (machine_mode mode)
9845 {
9846 int i, subparts;
9847 rtvec v;
9848
9849 switch (mode)
9850 {
9851 case E_V1TImode:
9852 subparts = 1;
9853 break;
9854 case E_V2DFmode:
9855 case E_V2DImode:
9856 subparts = 2;
9857 break;
9858 case E_V4SFmode:
9859 case E_V4SImode:
9860 subparts = 4;
9861 break;
9862 case E_V8HImode:
9863 subparts = 8;
9864 break;
9865 case E_V16QImode:
9866 subparts = 16;
9867 break;
9868 default:
9869 gcc_unreachable();
9870 }
9871
9872 v = rtvec_alloc (subparts);
9873
9874 for (i = 0; i < subparts / 2; ++i)
9875 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9876 for (i = subparts / 2; i < subparts; ++i)
9877 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9878
9879 return v;
9880 }
9881
9882 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9883 store operation. */
9884 void
9885 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9886 {
9887 /* Scalar permutations are easier to express in integer modes rather than
9888 floating-point modes, so cast them here. We use V1TImode instead
9889 of TImode to ensure that the values don't go through GPRs. */
9890 if (FLOAT128_VECTOR_P (mode))
9891 {
9892 dest = gen_lowpart (V1TImode, dest);
9893 source = gen_lowpart (V1TImode, source);
9894 mode = V1TImode;
9895 }
9896
9897 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9898 scalar. */
9899 if (mode == TImode || mode == V1TImode)
9900 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9901 GEN_INT (64))));
9902 else
9903 {
9904 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9905 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9906 }
9907 }
9908
9909 /* Emit a little-endian load from vector memory location SOURCE to VSX
9910 register DEST in mode MODE. The load is done with two permuting
9911 insn's that represent an lxvd2x and xxpermdi. */
9912 void
9913 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9914 {
9915 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9916 V1TImode). */
9917 if (mode == TImode || mode == V1TImode)
9918 {
9919 mode = V2DImode;
9920 dest = gen_lowpart (V2DImode, dest);
9921 source = adjust_address (source, V2DImode, 0);
9922 }
9923
9924 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9925 rs6000_emit_le_vsx_permute (tmp, source, mode);
9926 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9927 }
9928
9929 /* Emit a little-endian store to vector memory location DEST from VSX
9930 register SOURCE in mode MODE. The store is done with two permuting
9931 insn's that represent an xxpermdi and an stxvd2x. */
9932 void
9933 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9934 {
9935 /* This should never be called during or after LRA, because it does
9936 not re-permute the source register. It is intended only for use
9937 during expand. */
9938 gcc_assert (!lra_in_progress && !reload_completed);
9939
9940 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9941 V1TImode). */
9942 if (mode == TImode || mode == V1TImode)
9943 {
9944 mode = V2DImode;
9945 dest = adjust_address (dest, V2DImode, 0);
9946 source = gen_lowpart (V2DImode, source);
9947 }
9948
9949 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9950 rs6000_emit_le_vsx_permute (tmp, source, mode);
9951 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9952 }
9953
9954 /* Emit a sequence representing a little-endian VSX load or store,
9955 moving data from SOURCE to DEST in mode MODE. This is done
9956 separately from rs6000_emit_move to ensure it is called only
9957 during expand. LE VSX loads and stores introduced later are
9958 handled with a split. The expand-time RTL generation allows
9959 us to optimize away redundant pairs of register-permutes. */
9960 void
9961 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9962 {
9963 gcc_assert (!BYTES_BIG_ENDIAN
9964 && VECTOR_MEM_VSX_P (mode)
9965 && !TARGET_P9_VECTOR
9966 && !gpr_or_gpr_p (dest, source)
9967 && (MEM_P (source) ^ MEM_P (dest)));
9968
9969 if (MEM_P (source))
9970 {
9971 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9972 rs6000_emit_le_vsx_load (dest, source, mode);
9973 }
9974 else
9975 {
9976 if (!REG_P (source))
9977 source = force_reg (mode, source);
9978 rs6000_emit_le_vsx_store (dest, source, mode);
9979 }
9980 }
9981
9982 /* Return whether a SFmode or SImode move can be done without converting one
9983 mode to another. This arrises when we have:
9984
9985 (SUBREG:SF (REG:SI ...))
9986 (SUBREG:SI (REG:SF ...))
9987
9988 and one of the values is in a floating point/vector register, where SFmode
9989 scalars are stored in DFmode format. */
9990
9991 bool
9992 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9993 {
9994 if (TARGET_ALLOW_SF_SUBREG)
9995 return true;
9996
9997 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9998 return true;
9999
10000 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10001 return true;
10002
10003 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10004 if (SUBREG_P (dest))
10005 {
10006 rtx dest_subreg = SUBREG_REG (dest);
10007 rtx src_subreg = SUBREG_REG (src);
10008 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10009 }
10010
10011 return false;
10012 }
10013
10014
10015 /* Helper function to change moves with:
10016
10017 (SUBREG:SF (REG:SI)) and
10018 (SUBREG:SI (REG:SF))
10019
10020 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10021 values are stored as DFmode values in the VSX registers. We need to convert
10022 the bits before we can use a direct move or operate on the bits in the
10023 vector register as an integer type.
10024
10025 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10026
10027 static bool
10028 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10029 {
10030 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10031 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10032 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10033 {
10034 rtx inner_source = SUBREG_REG (source);
10035 machine_mode inner_mode = GET_MODE (inner_source);
10036
10037 if (mode == SImode && inner_mode == SFmode)
10038 {
10039 emit_insn (gen_movsi_from_sf (dest, inner_source));
10040 return true;
10041 }
10042
10043 if (mode == SFmode && inner_mode == SImode)
10044 {
10045 emit_insn (gen_movsf_from_si (dest, inner_source));
10046 return true;
10047 }
10048 }
10049
10050 return false;
10051 }
10052
10053 /* Emit a move from SOURCE to DEST in mode MODE. */
10054 void
10055 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10056 {
10057 rtx operands[2];
10058 operands[0] = dest;
10059 operands[1] = source;
10060
10061 if (TARGET_DEBUG_ADDR)
10062 {
10063 fprintf (stderr,
10064 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10065 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10066 GET_MODE_NAME (mode),
10067 lra_in_progress,
10068 reload_completed,
10069 can_create_pseudo_p ());
10070 debug_rtx (dest);
10071 fprintf (stderr, "source:\n");
10072 debug_rtx (source);
10073 }
10074
10075 /* Check that we get CONST_WIDE_INT only when we should. */
10076 if (CONST_WIDE_INT_P (operands[1])
10077 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10078 gcc_unreachable ();
10079
10080 #ifdef HAVE_AS_GNU_ATTRIBUTE
10081 /* If we use a long double type, set the flags in .gnu_attribute that say
10082 what the long double type is. This is to allow the linker's warning
10083 message for the wrong long double to be useful, even if the function does
10084 not do a call (for example, doing a 128-bit add on power9 if the long
10085 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10086 used if they aren't the default long dobule type. */
10087 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10088 {
10089 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10090 rs6000_passes_float = rs6000_passes_long_double = true;
10091
10092 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10093 rs6000_passes_float = rs6000_passes_long_double = true;
10094 }
10095 #endif
10096
10097 /* See if we need to special case SImode/SFmode SUBREG moves. */
10098 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10099 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10100 return;
10101
10102 /* Check if GCC is setting up a block move that will end up using FP
10103 registers as temporaries. We must make sure this is acceptable. */
10104 if (MEM_P (operands[0])
10105 && MEM_P (operands[1])
10106 && mode == DImode
10107 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10108 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10109 && ! (rs6000_slow_unaligned_access (SImode,
10110 (MEM_ALIGN (operands[0]) > 32
10111 ? 32 : MEM_ALIGN (operands[0])))
10112 || rs6000_slow_unaligned_access (SImode,
10113 (MEM_ALIGN (operands[1]) > 32
10114 ? 32 : MEM_ALIGN (operands[1]))))
10115 && ! MEM_VOLATILE_P (operands [0])
10116 && ! MEM_VOLATILE_P (operands [1]))
10117 {
10118 emit_move_insn (adjust_address (operands[0], SImode, 0),
10119 adjust_address (operands[1], SImode, 0));
10120 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10121 adjust_address (copy_rtx (operands[1]), SImode, 4));
10122 return;
10123 }
10124
10125 if (can_create_pseudo_p () && MEM_P (operands[0])
10126 && !gpc_reg_operand (operands[1], mode))
10127 operands[1] = force_reg (mode, operands[1]);
10128
10129 /* Recognize the case where operand[1] is a reference to thread-local
10130 data and load its address to a register. */
10131 if (tls_referenced_p (operands[1]))
10132 {
10133 enum tls_model model;
10134 rtx tmp = operands[1];
10135 rtx addend = NULL;
10136
10137 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10138 {
10139 addend = XEXP (XEXP (tmp, 0), 1);
10140 tmp = XEXP (XEXP (tmp, 0), 0);
10141 }
10142
10143 gcc_assert (SYMBOL_REF_P (tmp));
10144 model = SYMBOL_REF_TLS_MODEL (tmp);
10145 gcc_assert (model != 0);
10146
10147 tmp = rs6000_legitimize_tls_address (tmp, model);
10148 if (addend)
10149 {
10150 tmp = gen_rtx_PLUS (mode, tmp, addend);
10151 tmp = force_operand (tmp, operands[0]);
10152 }
10153 operands[1] = tmp;
10154 }
10155
10156 /* 128-bit constant floating-point values on Darwin should really be loaded
10157 as two parts. However, this premature splitting is a problem when DFmode
10158 values can go into Altivec registers. */
10159 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10160 && !reg_addr[DFmode].scalar_in_vmx_p)
10161 {
10162 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10163 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10164 DFmode);
10165 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10166 GET_MODE_SIZE (DFmode)),
10167 simplify_gen_subreg (DFmode, operands[1], mode,
10168 GET_MODE_SIZE (DFmode)),
10169 DFmode);
10170 return;
10171 }
10172
10173 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10174 p1:SD) if p1 is not of floating point class and p0 is spilled as
10175 we can have no analogous movsd_store for this. */
10176 if (lra_in_progress && mode == DDmode
10177 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10178 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10179 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10180 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10181 {
10182 enum reg_class cl;
10183 int regno = REGNO (SUBREG_REG (operands[1]));
10184
10185 if (!HARD_REGISTER_NUM_P (regno))
10186 {
10187 cl = reg_preferred_class (regno);
10188 regno = reg_renumber[regno];
10189 if (regno < 0)
10190 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10191 }
10192 if (regno >= 0 && ! FP_REGNO_P (regno))
10193 {
10194 mode = SDmode;
10195 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10196 operands[1] = SUBREG_REG (operands[1]);
10197 }
10198 }
10199 if (lra_in_progress
10200 && mode == SDmode
10201 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10202 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10203 && (REG_P (operands[1])
10204 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10205 {
10206 int regno = reg_or_subregno (operands[1]);
10207 enum reg_class cl;
10208
10209 if (!HARD_REGISTER_NUM_P (regno))
10210 {
10211 cl = reg_preferred_class (regno);
10212 gcc_assert (cl != NO_REGS);
10213 regno = reg_renumber[regno];
10214 if (regno < 0)
10215 regno = ira_class_hard_regs[cl][0];
10216 }
10217 if (FP_REGNO_P (regno))
10218 {
10219 if (GET_MODE (operands[0]) != DDmode)
10220 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10221 emit_insn (gen_movsd_store (operands[0], operands[1]));
10222 }
10223 else if (INT_REGNO_P (regno))
10224 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10225 else
10226 gcc_unreachable();
10227 return;
10228 }
10229 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10230 p:DD)) if p0 is not of floating point class and p1 is spilled as
10231 we can have no analogous movsd_load for this. */
10232 if (lra_in_progress && mode == DDmode
10233 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10234 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10235 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10236 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10237 {
10238 enum reg_class cl;
10239 int regno = REGNO (SUBREG_REG (operands[0]));
10240
10241 if (!HARD_REGISTER_NUM_P (regno))
10242 {
10243 cl = reg_preferred_class (regno);
10244 regno = reg_renumber[regno];
10245 if (regno < 0)
10246 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10247 }
10248 if (regno >= 0 && ! FP_REGNO_P (regno))
10249 {
10250 mode = SDmode;
10251 operands[0] = SUBREG_REG (operands[0]);
10252 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10253 }
10254 }
10255 if (lra_in_progress
10256 && mode == SDmode
10257 && (REG_P (operands[0])
10258 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10259 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10260 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10261 {
10262 int regno = reg_or_subregno (operands[0]);
10263 enum reg_class cl;
10264
10265 if (!HARD_REGISTER_NUM_P (regno))
10266 {
10267 cl = reg_preferred_class (regno);
10268 gcc_assert (cl != NO_REGS);
10269 regno = reg_renumber[regno];
10270 if (regno < 0)
10271 regno = ira_class_hard_regs[cl][0];
10272 }
10273 if (FP_REGNO_P (regno))
10274 {
10275 if (GET_MODE (operands[1]) != DDmode)
10276 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10277 emit_insn (gen_movsd_load (operands[0], operands[1]));
10278 }
10279 else if (INT_REGNO_P (regno))
10280 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10281 else
10282 gcc_unreachable();
10283 return;
10284 }
10285
10286 /* FIXME: In the long term, this switch statement should go away
10287 and be replaced by a sequence of tests based on things like
10288 mode == Pmode. */
10289 switch (mode)
10290 {
10291 case E_HImode:
10292 case E_QImode:
10293 if (CONSTANT_P (operands[1])
10294 && !CONST_INT_P (operands[1]))
10295 operands[1] = force_const_mem (mode, operands[1]);
10296 break;
10297
10298 case E_TFmode:
10299 case E_TDmode:
10300 case E_IFmode:
10301 case E_KFmode:
10302 if (FLOAT128_2REG_P (mode))
10303 rs6000_eliminate_indexed_memrefs (operands);
10304 /* fall through */
10305
10306 case E_DFmode:
10307 case E_DDmode:
10308 case E_SFmode:
10309 case E_SDmode:
10310 if (CONSTANT_P (operands[1])
10311 && ! easy_fp_constant (operands[1], mode))
10312 operands[1] = force_const_mem (mode, operands[1]);
10313 break;
10314
10315 case E_V16QImode:
10316 case E_V8HImode:
10317 case E_V4SFmode:
10318 case E_V4SImode:
10319 case E_V2DFmode:
10320 case E_V2DImode:
10321 case E_V1TImode:
10322 if (CONSTANT_P (operands[1])
10323 && !easy_vector_constant (operands[1], mode))
10324 operands[1] = force_const_mem (mode, operands[1]);
10325 break;
10326
10327 case E_OOmode:
10328 case E_XOmode:
10329 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10330 error ("%qs is an opaque type, and you can't set it to other values.",
10331 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10332 break;
10333
10334 case E_SImode:
10335 case E_DImode:
10336 /* Use default pattern for address of ELF small data */
10337 if (TARGET_ELF
10338 && mode == Pmode
10339 && DEFAULT_ABI == ABI_V4
10340 && (SYMBOL_REF_P (operands[1])
10341 || GET_CODE (operands[1]) == CONST)
10342 && small_data_operand (operands[1], mode))
10343 {
10344 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10345 return;
10346 }
10347
10348 /* Use the default pattern for loading up PC-relative addresses. */
10349 if (TARGET_PCREL && mode == Pmode
10350 && pcrel_local_or_external_address (operands[1], Pmode))
10351 {
10352 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10353 return;
10354 }
10355
10356 if (DEFAULT_ABI == ABI_V4
10357 && mode == Pmode && mode == SImode
10358 && flag_pic == 1 && got_operand (operands[1], mode))
10359 {
10360 emit_insn (gen_movsi_got (operands[0], operands[1]));
10361 return;
10362 }
10363
10364 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10365 && TARGET_NO_TOC_OR_PCREL
10366 && ! flag_pic
10367 && mode == Pmode
10368 && CONSTANT_P (operands[1])
10369 && GET_CODE (operands[1]) != HIGH
10370 && !CONST_INT_P (operands[1]))
10371 {
10372 rtx target = (!can_create_pseudo_p ()
10373 ? operands[0]
10374 : gen_reg_rtx (mode));
10375
10376 /* If this is a function address on -mcall-aixdesc,
10377 convert it to the address of the descriptor. */
10378 if (DEFAULT_ABI == ABI_AIX
10379 && SYMBOL_REF_P (operands[1])
10380 && XSTR (operands[1], 0)[0] == '.')
10381 {
10382 const char *name = XSTR (operands[1], 0);
10383 rtx new_ref;
10384 while (*name == '.')
10385 name++;
10386 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10387 CONSTANT_POOL_ADDRESS_P (new_ref)
10388 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10389 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10390 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10391 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10392 operands[1] = new_ref;
10393 }
10394
10395 if (DEFAULT_ABI == ABI_DARWIN)
10396 {
10397 #if TARGET_MACHO
10398 /* This is not PIC code, but could require the subset of
10399 indirections used by mdynamic-no-pic. */
10400 if (MACHO_DYNAMIC_NO_PIC_P)
10401 {
10402 /* Take care of any required data indirection. */
10403 operands[1] = rs6000_machopic_legitimize_pic_address (
10404 operands[1], mode, operands[0]);
10405 if (operands[0] != operands[1])
10406 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10407 return;
10408 }
10409 #endif
10410 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10411 emit_insn (gen_macho_low (Pmode, operands[0],
10412 target, operands[1]));
10413 return;
10414 }
10415
10416 emit_insn (gen_elf_high (target, operands[1]));
10417 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10418 return;
10419 }
10420
10421 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10422 and we have put it in the TOC, we just need to make a TOC-relative
10423 reference to it. */
10424 if (TARGET_TOC
10425 && SYMBOL_REF_P (operands[1])
10426 && use_toc_relative_ref (operands[1], mode))
10427 operands[1] = create_TOC_reference (operands[1], operands[0]);
10428 else if (mode == Pmode
10429 && CONSTANT_P (operands[1])
10430 && GET_CODE (operands[1]) != HIGH
10431 && ((REG_P (operands[0])
10432 && FP_REGNO_P (REGNO (operands[0])))
10433 || !CONST_INT_P (operands[1])
10434 || (num_insns_constant (operands[1], mode)
10435 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10436 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10437 && (TARGET_CMODEL == CMODEL_SMALL
10438 || can_create_pseudo_p ()
10439 || (REG_P (operands[0])
10440 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10441 {
10442
10443 #if TARGET_MACHO
10444 /* Darwin uses a special PIC legitimizer. */
10445 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10446 {
10447 operands[1] =
10448 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10449 operands[0]);
10450 if (operands[0] != operands[1])
10451 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10452 return;
10453 }
10454 #endif
10455
10456 /* If we are to limit the number of things we put in the TOC and
10457 this is a symbol plus a constant we can add in one insn,
10458 just put the symbol in the TOC and add the constant. */
10459 if (GET_CODE (operands[1]) == CONST
10460 && TARGET_NO_SUM_IN_TOC
10461 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10462 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10463 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10464 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10465 && ! side_effects_p (operands[0]))
10466 {
10467 rtx sym =
10468 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10469 rtx other = XEXP (XEXP (operands[1], 0), 1);
10470
10471 sym = force_reg (mode, sym);
10472 emit_insn (gen_add3_insn (operands[0], sym, other));
10473 return;
10474 }
10475
10476 operands[1] = force_const_mem (mode, operands[1]);
10477
10478 if (TARGET_TOC
10479 && SYMBOL_REF_P (XEXP (operands[1], 0))
10480 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10481 {
10482 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10483 operands[0]);
10484 operands[1] = gen_const_mem (mode, tocref);
10485 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10486 }
10487 }
10488 break;
10489
10490 case E_TImode:
10491 if (!VECTOR_MEM_VSX_P (TImode))
10492 rs6000_eliminate_indexed_memrefs (operands);
10493 break;
10494
10495 case E_PTImode:
10496 rs6000_eliminate_indexed_memrefs (operands);
10497 break;
10498
10499 default:
10500 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10501 }
10502
10503 /* Above, we may have called force_const_mem which may have returned
10504 an invalid address. If we can, fix this up; otherwise, reload will
10505 have to deal with it. */
10506 if (MEM_P (operands[1]))
10507 operands[1] = validize_mem (operands[1]);
10508
10509 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10510 }
10511 \f
10512
10513 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10514 static void
10515 init_float128_ibm (machine_mode mode)
10516 {
10517 if (!TARGET_XL_COMPAT)
10518 {
10519 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10520 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10521 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10522 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10523
10524 if (!TARGET_HARD_FLOAT)
10525 {
10526 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10527 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10528 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10529 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10530 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10531 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10532 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10533 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10534
10535 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10536 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10537 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10538 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10539 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10540 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10541 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10542 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10543 }
10544 }
10545 else
10546 {
10547 set_optab_libfunc (add_optab, mode, "_xlqadd");
10548 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10549 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10550 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10551 }
10552
10553 /* Add various conversions for IFmode to use the traditional TFmode
10554 names. */
10555 if (mode == IFmode)
10556 {
10557 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10558 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10559 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10560 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10561 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10562 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10563
10564 if (TARGET_POWERPC64)
10565 {
10566 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10567 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10568 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10569 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10570 }
10571 }
10572 }
10573
10574 /* Create a decl for either complex long double multiply or complex long double
10575 divide when long double is IEEE 128-bit floating point. We can't use
10576 __multc3 and __divtc3 because the original long double using IBM extended
10577 double used those names. The complex multiply/divide functions are encoded
10578 as builtin functions with a complex result and 4 scalar inputs. */
10579
10580 static void
10581 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10582 {
10583 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10584 name, NULL_TREE);
10585
10586 set_builtin_decl (fncode, fndecl, true);
10587
10588 if (TARGET_DEBUG_BUILTIN)
10589 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10590
10591 return;
10592 }
10593
10594 /* Set up IEEE 128-bit floating point routines. Use different names if the
10595 arguments can be passed in a vector register. The historical PowerPC
10596 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10597 continue to use that if we aren't using vector registers to pass IEEE
10598 128-bit floating point. */
10599
10600 static void
10601 init_float128_ieee (machine_mode mode)
10602 {
10603 if (FLOAT128_VECTOR_P (mode))
10604 {
10605 static bool complex_muldiv_init_p = false;
10606
10607 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10608 we have clone or target attributes, this will be called a second
10609 time. We want to create the built-in function only once. */
10610 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10611 {
10612 complex_muldiv_init_p = true;
10613 built_in_function fncode_mul =
10614 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10615 - MIN_MODE_COMPLEX_FLOAT);
10616 built_in_function fncode_div =
10617 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10618 - MIN_MODE_COMPLEX_FLOAT);
10619
10620 tree fntype = build_function_type_list (complex_long_double_type_node,
10621 long_double_type_node,
10622 long_double_type_node,
10623 long_double_type_node,
10624 long_double_type_node,
10625 NULL_TREE);
10626
10627 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10628 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10629 }
10630
10631 set_optab_libfunc (add_optab, mode, "__addkf3");
10632 set_optab_libfunc (sub_optab, mode, "__subkf3");
10633 set_optab_libfunc (neg_optab, mode, "__negkf2");
10634 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10635 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10636 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10637 set_optab_libfunc (abs_optab, mode, "__abskf2");
10638 set_optab_libfunc (powi_optab, mode, "__powikf2");
10639
10640 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10641 set_optab_libfunc (ne_optab, mode, "__nekf2");
10642 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10643 set_optab_libfunc (ge_optab, mode, "__gekf2");
10644 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10645 set_optab_libfunc (le_optab, mode, "__lekf2");
10646 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10647
10648 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10649 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10650 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10651 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10652
10653 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10654 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10655 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10656
10657 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10658 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10659 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10660
10661 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10662 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10663 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10664 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10665 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10666 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10667
10668 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10669 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10670 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10671 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10672
10673 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10674 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10675 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10676 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10677
10678 if (TARGET_POWERPC64)
10679 {
10680 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10681 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10682 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10683 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10684 }
10685 }
10686
10687 else
10688 {
10689 set_optab_libfunc (add_optab, mode, "_q_add");
10690 set_optab_libfunc (sub_optab, mode, "_q_sub");
10691 set_optab_libfunc (neg_optab, mode, "_q_neg");
10692 set_optab_libfunc (smul_optab, mode, "_q_mul");
10693 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10694 if (TARGET_PPC_GPOPT)
10695 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10696
10697 set_optab_libfunc (eq_optab, mode, "_q_feq");
10698 set_optab_libfunc (ne_optab, mode, "_q_fne");
10699 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10700 set_optab_libfunc (ge_optab, mode, "_q_fge");
10701 set_optab_libfunc (lt_optab, mode, "_q_flt");
10702 set_optab_libfunc (le_optab, mode, "_q_fle");
10703
10704 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10705 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10706 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10707 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10708 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10709 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10710 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10711 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10712 }
10713 }
10714
10715 static void
10716 rs6000_init_libfuncs (void)
10717 {
10718 /* __float128 support. */
10719 if (TARGET_FLOAT128_TYPE)
10720 {
10721 init_float128_ibm (IFmode);
10722 init_float128_ieee (KFmode);
10723 }
10724
10725 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10726 if (TARGET_LONG_DOUBLE_128)
10727 {
10728 if (!TARGET_IEEEQUAD)
10729 init_float128_ibm (TFmode);
10730
10731 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10732 else
10733 init_float128_ieee (TFmode);
10734 }
10735 }
10736
10737 /* Emit a potentially record-form instruction, setting DST from SRC.
10738 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10739 signed comparison of DST with zero. If DOT is 1, the generated RTL
10740 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10741 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10742 a separate COMPARE. */
10743
10744 void
10745 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10746 {
10747 if (dot == 0)
10748 {
10749 emit_move_insn (dst, src);
10750 return;
10751 }
10752
10753 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10754 {
10755 emit_move_insn (dst, src);
10756 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10757 return;
10758 }
10759
10760 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10761 if (dot == 1)
10762 {
10763 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10764 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10765 }
10766 else
10767 {
10768 rtx set = gen_rtx_SET (dst, src);
10769 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10770 }
10771 }
10772
10773 \f
10774 /* A validation routine: say whether CODE, a condition code, and MODE
10775 match. The other alternatives either don't make sense or should
10776 never be generated. */
10777
10778 void
10779 validate_condition_mode (enum rtx_code code, machine_mode mode)
10780 {
10781 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10782 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10783 && GET_MODE_CLASS (mode) == MODE_CC);
10784
10785 /* These don't make sense. */
10786 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10787 || mode != CCUNSmode);
10788
10789 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10790 || mode == CCUNSmode);
10791
10792 gcc_assert (mode == CCFPmode
10793 || (code != ORDERED && code != UNORDERED
10794 && code != UNEQ && code != LTGT
10795 && code != UNGT && code != UNLT
10796 && code != UNGE && code != UNLE));
10797
10798 /* These are invalid; the information is not there. */
10799 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10800 }
10801
10802 \f
10803 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10804 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10805 not zero, store there the bit offset (counted from the right) where
10806 the single stretch of 1 bits begins; and similarly for B, the bit
10807 offset where it ends. */
10808
10809 bool
10810 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10811 {
10812 unsigned HOST_WIDE_INT val = INTVAL (mask);
10813 unsigned HOST_WIDE_INT bit;
10814 int nb, ne;
10815 int n = GET_MODE_PRECISION (mode);
10816
10817 if (mode != DImode && mode != SImode)
10818 return false;
10819
10820 if (INTVAL (mask) >= 0)
10821 {
10822 bit = val & -val;
10823 ne = exact_log2 (bit);
10824 nb = exact_log2 (val + bit);
10825 }
10826 else if (val + 1 == 0)
10827 {
10828 nb = n;
10829 ne = 0;
10830 }
10831 else if (val & 1)
10832 {
10833 val = ~val;
10834 bit = val & -val;
10835 nb = exact_log2 (bit);
10836 ne = exact_log2 (val + bit);
10837 }
10838 else
10839 {
10840 bit = val & -val;
10841 ne = exact_log2 (bit);
10842 if (val + bit == 0)
10843 nb = n;
10844 else
10845 nb = 0;
10846 }
10847
10848 nb--;
10849
10850 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10851 return false;
10852
10853 if (b)
10854 *b = nb;
10855 if (e)
10856 *e = ne;
10857
10858 return true;
10859 }
10860
10861 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10862 or rldicr instruction, to implement an AND with it in mode MODE. */
10863
10864 bool
10865 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10866 {
10867 int nb, ne;
10868
10869 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10870 return false;
10871
10872 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10873 does not wrap. */
10874 if (mode == DImode)
10875 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10876
10877 /* For SImode, rlwinm can do everything. */
10878 if (mode == SImode)
10879 return (nb < 32 && ne < 32);
10880
10881 return false;
10882 }
10883
10884 /* Return the instruction template for an AND with mask in mode MODE, with
10885 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10886
10887 const char *
10888 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10889 {
10890 int nb, ne;
10891
10892 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10893 gcc_unreachable ();
10894
10895 if (mode == DImode && ne == 0)
10896 {
10897 operands[3] = GEN_INT (63 - nb);
10898 if (dot)
10899 return "rldicl. %0,%1,0,%3";
10900 return "rldicl %0,%1,0,%3";
10901 }
10902
10903 if (mode == DImode && nb == 63)
10904 {
10905 operands[3] = GEN_INT (63 - ne);
10906 if (dot)
10907 return "rldicr. %0,%1,0,%3";
10908 return "rldicr %0,%1,0,%3";
10909 }
10910
10911 if (nb < 32 && ne < 32)
10912 {
10913 operands[3] = GEN_INT (31 - nb);
10914 operands[4] = GEN_INT (31 - ne);
10915 if (dot)
10916 return "rlwinm. %0,%1,0,%3,%4";
10917 return "rlwinm %0,%1,0,%3,%4";
10918 }
10919
10920 gcc_unreachable ();
10921 }
10922
10923 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10924 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10925 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10926
10927 bool
10928 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10929 {
10930 int nb, ne;
10931
10932 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10933 return false;
10934
10935 int n = GET_MODE_PRECISION (mode);
10936 int sh = -1;
10937
10938 if (CONST_INT_P (XEXP (shift, 1)))
10939 {
10940 sh = INTVAL (XEXP (shift, 1));
10941 if (sh < 0 || sh >= n)
10942 return false;
10943 }
10944
10945 rtx_code code = GET_CODE (shift);
10946
10947 /* Convert any shift by 0 to a rotate, to simplify below code. */
10948 if (sh == 0)
10949 code = ROTATE;
10950
10951 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10952 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10953 code = ASHIFT;
10954 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10955 {
10956 code = LSHIFTRT;
10957 sh = n - sh;
10958 }
10959
10960 /* DImode rotates need rld*. */
10961 if (mode == DImode && code == ROTATE)
10962 return (nb == 63 || ne == 0 || ne == sh);
10963
10964 /* SImode rotates need rlw*. */
10965 if (mode == SImode && code == ROTATE)
10966 return (nb < 32 && ne < 32 && sh < 32);
10967
10968 /* Wrap-around masks are only okay for rotates. */
10969 if (ne > nb)
10970 return false;
10971
10972 /* Variable shifts are only okay for rotates. */
10973 if (sh < 0)
10974 return false;
10975
10976 /* Don't allow ASHIFT if the mask is wrong for that. */
10977 if (code == ASHIFT && ne < sh)
10978 return false;
10979
10980 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10981 if the mask is wrong for that. */
10982 if (nb < 32 && ne < 32 && sh < 32
10983 && !(code == LSHIFTRT && nb >= 32 - sh))
10984 return true;
10985
10986 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10987 if the mask is wrong for that. */
10988 if (code == LSHIFTRT)
10989 sh = 64 - sh;
10990 if (nb == 63 || ne == 0 || ne == sh)
10991 return !(code == LSHIFTRT && nb >= sh);
10992
10993 return false;
10994 }
10995
10996 /* Return the instruction template for a shift with mask in mode MODE, with
10997 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10998
10999 const char *
11000 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11001 {
11002 int nb, ne;
11003
11004 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11005 gcc_unreachable ();
11006
11007 if (mode == DImode && ne == 0)
11008 {
11009 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11010 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11011 operands[3] = GEN_INT (63 - nb);
11012 if (dot)
11013 return "rld%I2cl. %0,%1,%2,%3";
11014 return "rld%I2cl %0,%1,%2,%3";
11015 }
11016
11017 if (mode == DImode && nb == 63)
11018 {
11019 operands[3] = GEN_INT (63 - ne);
11020 if (dot)
11021 return "rld%I2cr. %0,%1,%2,%3";
11022 return "rld%I2cr %0,%1,%2,%3";
11023 }
11024
11025 if (mode == DImode
11026 && GET_CODE (operands[4]) != LSHIFTRT
11027 && CONST_INT_P (operands[2])
11028 && ne == INTVAL (operands[2]))
11029 {
11030 operands[3] = GEN_INT (63 - nb);
11031 if (dot)
11032 return "rld%I2c. %0,%1,%2,%3";
11033 return "rld%I2c %0,%1,%2,%3";
11034 }
11035
11036 if (nb < 32 && ne < 32)
11037 {
11038 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11039 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11040 operands[3] = GEN_INT (31 - nb);
11041 operands[4] = GEN_INT (31 - ne);
11042 /* This insn can also be a 64-bit rotate with mask that really makes
11043 it just a shift right (with mask); the %h below are to adjust for
11044 that situation (shift count is >= 32 in that case). */
11045 if (dot)
11046 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11047 return "rlw%I2nm %0,%1,%h2,%3,%4";
11048 }
11049
11050 gcc_unreachable ();
11051 }
11052
11053 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11054 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11055 ASHIFT, or LSHIFTRT) in mode MODE. */
11056
11057 bool
11058 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11059 {
11060 int nb, ne;
11061
11062 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11063 return false;
11064
11065 int n = GET_MODE_PRECISION (mode);
11066
11067 int sh = INTVAL (XEXP (shift, 1));
11068 if (sh < 0 || sh >= n)
11069 return false;
11070
11071 rtx_code code = GET_CODE (shift);
11072
11073 /* Convert any shift by 0 to a rotate, to simplify below code. */
11074 if (sh == 0)
11075 code = ROTATE;
11076
11077 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11078 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11079 code = ASHIFT;
11080 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11081 {
11082 code = LSHIFTRT;
11083 sh = n - sh;
11084 }
11085
11086 /* DImode rotates need rldimi. */
11087 if (mode == DImode && code == ROTATE)
11088 return (ne == sh);
11089
11090 /* SImode rotates need rlwimi. */
11091 if (mode == SImode && code == ROTATE)
11092 return (nb < 32 && ne < 32 && sh < 32);
11093
11094 /* Wrap-around masks are only okay for rotates. */
11095 if (ne > nb)
11096 return false;
11097
11098 /* Don't allow ASHIFT if the mask is wrong for that. */
11099 if (code == ASHIFT && ne < sh)
11100 return false;
11101
11102 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11103 if the mask is wrong for that. */
11104 if (nb < 32 && ne < 32 && sh < 32
11105 && !(code == LSHIFTRT && nb >= 32 - sh))
11106 return true;
11107
11108 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11109 if the mask is wrong for that. */
11110 if (code == LSHIFTRT)
11111 sh = 64 - sh;
11112 if (ne == sh)
11113 return !(code == LSHIFTRT && nb >= sh);
11114
11115 return false;
11116 }
11117
11118 /* Return the instruction template for an insert with mask in mode MODE, with
11119 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11120
11121 const char *
11122 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11123 {
11124 int nb, ne;
11125
11126 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11127 gcc_unreachable ();
11128
11129 /* Prefer rldimi because rlwimi is cracked. */
11130 if (TARGET_POWERPC64
11131 && (!dot || mode == DImode)
11132 && GET_CODE (operands[4]) != LSHIFTRT
11133 && ne == INTVAL (operands[2]))
11134 {
11135 operands[3] = GEN_INT (63 - nb);
11136 if (dot)
11137 return "rldimi. %0,%1,%2,%3";
11138 return "rldimi %0,%1,%2,%3";
11139 }
11140
11141 if (nb < 32 && ne < 32)
11142 {
11143 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11144 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11145 operands[3] = GEN_INT (31 - nb);
11146 operands[4] = GEN_INT (31 - ne);
11147 if (dot)
11148 return "rlwimi. %0,%1,%2,%3,%4";
11149 return "rlwimi %0,%1,%2,%3,%4";
11150 }
11151
11152 gcc_unreachable ();
11153 }
11154
11155 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11156 using two machine instructions. */
11157
11158 bool
11159 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11160 {
11161 /* There are two kinds of AND we can handle with two insns:
11162 1) those we can do with two rl* insn;
11163 2) ori[s];xori[s].
11164
11165 We do not handle that last case yet. */
11166
11167 /* If there is just one stretch of ones, we can do it. */
11168 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11169 return true;
11170
11171 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11172 one insn, we can do the whole thing with two. */
11173 unsigned HOST_WIDE_INT val = INTVAL (c);
11174 unsigned HOST_WIDE_INT bit1 = val & -val;
11175 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11176 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11177 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11178 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11179 }
11180
11181 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11182 If EXPAND is true, split rotate-and-mask instructions we generate to
11183 their constituent parts as well (this is used during expand); if DOT
11184 is 1, make the last insn a record-form instruction clobbering the
11185 destination GPR and setting the CC reg (from operands[3]); if 2, set
11186 that GPR as well as the CC reg. */
11187
11188 void
11189 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11190 {
11191 gcc_assert (!(expand && dot));
11192
11193 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11194
11195 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11196 shift right. This generates better code than doing the masks without
11197 shifts, or shifting first right and then left. */
11198 int nb, ne;
11199 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11200 {
11201 gcc_assert (mode == DImode);
11202
11203 int shift = 63 - nb;
11204 if (expand)
11205 {
11206 rtx tmp1 = gen_reg_rtx (DImode);
11207 rtx tmp2 = gen_reg_rtx (DImode);
11208 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11209 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11210 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11211 }
11212 else
11213 {
11214 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11215 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11216 emit_move_insn (operands[0], tmp);
11217 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11218 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11219 }
11220 return;
11221 }
11222
11223 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11224 that does the rest. */
11225 unsigned HOST_WIDE_INT bit1 = val & -val;
11226 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11227 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11228 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11229
11230 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11231 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11232
11233 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11234
11235 /* Two "no-rotate"-and-mask instructions, for SImode. */
11236 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11237 {
11238 gcc_assert (mode == SImode);
11239
11240 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11241 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11242 emit_move_insn (reg, tmp);
11243 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11244 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11245 return;
11246 }
11247
11248 gcc_assert (mode == DImode);
11249
11250 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11251 insns; we have to do the first in SImode, because it wraps. */
11252 if (mask2 <= 0xffffffff
11253 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11254 {
11255 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11256 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11257 GEN_INT (mask1));
11258 rtx reg_low = gen_lowpart (SImode, reg);
11259 emit_move_insn (reg_low, tmp);
11260 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11261 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11262 return;
11263 }
11264
11265 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11266 at the top end), rotate back and clear the other hole. */
11267 int right = exact_log2 (bit3);
11268 int left = 64 - right;
11269
11270 /* Rotate the mask too. */
11271 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11272
11273 if (expand)
11274 {
11275 rtx tmp1 = gen_reg_rtx (DImode);
11276 rtx tmp2 = gen_reg_rtx (DImode);
11277 rtx tmp3 = gen_reg_rtx (DImode);
11278 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11279 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11280 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11281 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11282 }
11283 else
11284 {
11285 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11286 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11287 emit_move_insn (operands[0], tmp);
11288 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11289 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11290 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11291 }
11292 }
11293 \f
11294 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11295 for lfq and stfq insns iff the registers are hard registers. */
11296
11297 int
11298 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11299 {
11300 /* We might have been passed a SUBREG. */
11301 if (!REG_P (reg1) || !REG_P (reg2))
11302 return 0;
11303
11304 /* We might have been passed non floating point registers. */
11305 if (!FP_REGNO_P (REGNO (reg1))
11306 || !FP_REGNO_P (REGNO (reg2)))
11307 return 0;
11308
11309 return (REGNO (reg1) == REGNO (reg2) - 1);
11310 }
11311
11312 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11313 addr1 and addr2 must be in consecutive memory locations
11314 (addr2 == addr1 + 8). */
11315
11316 int
11317 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11318 {
11319 rtx addr1, addr2;
11320 unsigned int reg1, reg2;
11321 int offset1, offset2;
11322
11323 /* The mems cannot be volatile. */
11324 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11325 return 0;
11326
11327 addr1 = XEXP (mem1, 0);
11328 addr2 = XEXP (mem2, 0);
11329
11330 /* Extract an offset (if used) from the first addr. */
11331 if (GET_CODE (addr1) == PLUS)
11332 {
11333 /* If not a REG, return zero. */
11334 if (!REG_P (XEXP (addr1, 0)))
11335 return 0;
11336 else
11337 {
11338 reg1 = REGNO (XEXP (addr1, 0));
11339 /* The offset must be constant! */
11340 if (!CONST_INT_P (XEXP (addr1, 1)))
11341 return 0;
11342 offset1 = INTVAL (XEXP (addr1, 1));
11343 }
11344 }
11345 else if (!REG_P (addr1))
11346 return 0;
11347 else
11348 {
11349 reg1 = REGNO (addr1);
11350 /* This was a simple (mem (reg)) expression. Offset is 0. */
11351 offset1 = 0;
11352 }
11353
11354 /* And now for the second addr. */
11355 if (GET_CODE (addr2) == PLUS)
11356 {
11357 /* If not a REG, return zero. */
11358 if (!REG_P (XEXP (addr2, 0)))
11359 return 0;
11360 else
11361 {
11362 reg2 = REGNO (XEXP (addr2, 0));
11363 /* The offset must be constant. */
11364 if (!CONST_INT_P (XEXP (addr2, 1)))
11365 return 0;
11366 offset2 = INTVAL (XEXP (addr2, 1));
11367 }
11368 }
11369 else if (!REG_P (addr2))
11370 return 0;
11371 else
11372 {
11373 reg2 = REGNO (addr2);
11374 /* This was a simple (mem (reg)) expression. Offset is 0. */
11375 offset2 = 0;
11376 }
11377
11378 /* Both of these must have the same base register. */
11379 if (reg1 != reg2)
11380 return 0;
11381
11382 /* The offset for the second addr must be 8 more than the first addr. */
11383 if (offset2 != offset1 + 8)
11384 return 0;
11385
11386 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11387 instructions. */
11388 return 1;
11389 }
11390 \f
11391 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11392 need to use DDmode, in all other cases we can use the same mode. */
11393 static machine_mode
11394 rs6000_secondary_memory_needed_mode (machine_mode mode)
11395 {
11396 if (lra_in_progress && mode == SDmode)
11397 return DDmode;
11398 return mode;
11399 }
11400
11401 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11402 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11403 only work on the traditional altivec registers, note if an altivec register
11404 was chosen. */
11405
11406 static enum rs6000_reg_type
11407 register_to_reg_type (rtx reg, bool *is_altivec)
11408 {
11409 HOST_WIDE_INT regno;
11410 enum reg_class rclass;
11411
11412 if (SUBREG_P (reg))
11413 reg = SUBREG_REG (reg);
11414
11415 if (!REG_P (reg))
11416 return NO_REG_TYPE;
11417
11418 regno = REGNO (reg);
11419 if (!HARD_REGISTER_NUM_P (regno))
11420 {
11421 if (!lra_in_progress && !reload_completed)
11422 return PSEUDO_REG_TYPE;
11423
11424 regno = true_regnum (reg);
11425 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11426 return PSEUDO_REG_TYPE;
11427 }
11428
11429 gcc_assert (regno >= 0);
11430
11431 if (is_altivec && ALTIVEC_REGNO_P (regno))
11432 *is_altivec = true;
11433
11434 rclass = rs6000_regno_regclass[regno];
11435 return reg_class_to_reg_type[(int)rclass];
11436 }
11437
11438 /* Helper function to return the cost of adding a TOC entry address. */
11439
11440 static inline int
11441 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11442 {
11443 int ret;
11444
11445 if (TARGET_CMODEL != CMODEL_SMALL)
11446 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11447
11448 else
11449 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11450
11451 return ret;
11452 }
11453
11454 /* Helper function for rs6000_secondary_reload to determine whether the memory
11455 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11456 needs reloading. Return negative if the memory is not handled by the memory
11457 helper functions and to try a different reload method, 0 if no additional
11458 instructions are need, and positive to give the extra cost for the
11459 memory. */
11460
11461 static int
11462 rs6000_secondary_reload_memory (rtx addr,
11463 enum reg_class rclass,
11464 machine_mode mode)
11465 {
11466 int extra_cost = 0;
11467 rtx reg, and_arg, plus_arg0, plus_arg1;
11468 addr_mask_type addr_mask;
11469 const char *type = NULL;
11470 const char *fail_msg = NULL;
11471
11472 if (GPR_REG_CLASS_P (rclass))
11473 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11474
11475 else if (rclass == FLOAT_REGS)
11476 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11477
11478 else if (rclass == ALTIVEC_REGS)
11479 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11480
11481 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11482 else if (rclass == VSX_REGS)
11483 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11484 & ~RELOAD_REG_AND_M16);
11485
11486 /* If the register allocator hasn't made up its mind yet on the register
11487 class to use, settle on defaults to use. */
11488 else if (rclass == NO_REGS)
11489 {
11490 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11491 & ~RELOAD_REG_AND_M16);
11492
11493 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11494 addr_mask &= ~(RELOAD_REG_INDEXED
11495 | RELOAD_REG_PRE_INCDEC
11496 | RELOAD_REG_PRE_MODIFY);
11497 }
11498
11499 else
11500 addr_mask = 0;
11501
11502 /* If the register isn't valid in this register class, just return now. */
11503 if ((addr_mask & RELOAD_REG_VALID) == 0)
11504 {
11505 if (TARGET_DEBUG_ADDR)
11506 {
11507 fprintf (stderr,
11508 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11509 "not valid in class\n",
11510 GET_MODE_NAME (mode), reg_class_names[rclass]);
11511 debug_rtx (addr);
11512 }
11513
11514 return -1;
11515 }
11516
11517 switch (GET_CODE (addr))
11518 {
11519 /* Does the register class supports auto update forms for this mode? We
11520 don't need a scratch register, since the powerpc only supports
11521 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11522 case PRE_INC:
11523 case PRE_DEC:
11524 reg = XEXP (addr, 0);
11525 if (!base_reg_operand (addr, GET_MODE (reg)))
11526 {
11527 fail_msg = "no base register #1";
11528 extra_cost = -1;
11529 }
11530
11531 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11532 {
11533 extra_cost = 1;
11534 type = "update";
11535 }
11536 break;
11537
11538 case PRE_MODIFY:
11539 reg = XEXP (addr, 0);
11540 plus_arg1 = XEXP (addr, 1);
11541 if (!base_reg_operand (reg, GET_MODE (reg))
11542 || GET_CODE (plus_arg1) != PLUS
11543 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11544 {
11545 fail_msg = "bad PRE_MODIFY";
11546 extra_cost = -1;
11547 }
11548
11549 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11550 {
11551 extra_cost = 1;
11552 type = "update";
11553 }
11554 break;
11555
11556 /* Do we need to simulate AND -16 to clear the bottom address bits used
11557 in VMX load/stores? Only allow the AND for vector sizes. */
11558 case AND:
11559 and_arg = XEXP (addr, 0);
11560 if (GET_MODE_SIZE (mode) != 16
11561 || !CONST_INT_P (XEXP (addr, 1))
11562 || INTVAL (XEXP (addr, 1)) != -16)
11563 {
11564 fail_msg = "bad Altivec AND #1";
11565 extra_cost = -1;
11566 }
11567
11568 if (rclass != ALTIVEC_REGS)
11569 {
11570 if (legitimate_indirect_address_p (and_arg, false))
11571 extra_cost = 1;
11572
11573 else if (legitimate_indexed_address_p (and_arg, false))
11574 extra_cost = 2;
11575
11576 else
11577 {
11578 fail_msg = "bad Altivec AND #2";
11579 extra_cost = -1;
11580 }
11581
11582 type = "and";
11583 }
11584 break;
11585
11586 /* If this is an indirect address, make sure it is a base register. */
11587 case REG:
11588 case SUBREG:
11589 if (!legitimate_indirect_address_p (addr, false))
11590 {
11591 extra_cost = 1;
11592 type = "move";
11593 }
11594 break;
11595
11596 /* If this is an indexed address, make sure the register class can handle
11597 indexed addresses for this mode. */
11598 case PLUS:
11599 plus_arg0 = XEXP (addr, 0);
11600 plus_arg1 = XEXP (addr, 1);
11601
11602 /* (plus (plus (reg) (constant)) (constant)) is generated during
11603 push_reload processing, so handle it now. */
11604 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11605 {
11606 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11607 {
11608 extra_cost = 1;
11609 type = "offset";
11610 }
11611 }
11612
11613 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11614 push_reload processing, so handle it now. */
11615 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11616 {
11617 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11618 {
11619 extra_cost = 1;
11620 type = "indexed #2";
11621 }
11622 }
11623
11624 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11625 {
11626 fail_msg = "no base register #2";
11627 extra_cost = -1;
11628 }
11629
11630 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11631 {
11632 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11633 || !legitimate_indexed_address_p (addr, false))
11634 {
11635 extra_cost = 1;
11636 type = "indexed";
11637 }
11638 }
11639
11640 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11641 && CONST_INT_P (plus_arg1))
11642 {
11643 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11644 {
11645 extra_cost = 1;
11646 type = "vector d-form offset";
11647 }
11648 }
11649
11650 /* Make sure the register class can handle offset addresses. */
11651 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11652 {
11653 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11654 {
11655 extra_cost = 1;
11656 type = "offset #2";
11657 }
11658 }
11659
11660 else
11661 {
11662 fail_msg = "bad PLUS";
11663 extra_cost = -1;
11664 }
11665
11666 break;
11667
11668 case LO_SUM:
11669 /* Quad offsets are restricted and can't handle normal addresses. */
11670 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11671 {
11672 extra_cost = -1;
11673 type = "vector d-form lo_sum";
11674 }
11675
11676 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11677 {
11678 fail_msg = "bad LO_SUM";
11679 extra_cost = -1;
11680 }
11681
11682 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11683 {
11684 extra_cost = 1;
11685 type = "lo_sum";
11686 }
11687 break;
11688
11689 /* Static addresses need to create a TOC entry. */
11690 case CONST:
11691 case SYMBOL_REF:
11692 case LABEL_REF:
11693 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11694 {
11695 extra_cost = -1;
11696 type = "vector d-form lo_sum #2";
11697 }
11698
11699 else
11700 {
11701 type = "address";
11702 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11703 }
11704 break;
11705
11706 /* TOC references look like offsetable memory. */
11707 case UNSPEC:
11708 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11709 {
11710 fail_msg = "bad UNSPEC";
11711 extra_cost = -1;
11712 }
11713
11714 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11715 {
11716 extra_cost = -1;
11717 type = "vector d-form lo_sum #3";
11718 }
11719
11720 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11721 {
11722 extra_cost = 1;
11723 type = "toc reference";
11724 }
11725 break;
11726
11727 default:
11728 {
11729 fail_msg = "bad address";
11730 extra_cost = -1;
11731 }
11732 }
11733
11734 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11735 {
11736 if (extra_cost < 0)
11737 fprintf (stderr,
11738 "rs6000_secondary_reload_memory error: mode = %s, "
11739 "class = %s, addr_mask = '%s', %s\n",
11740 GET_MODE_NAME (mode),
11741 reg_class_names[rclass],
11742 rs6000_debug_addr_mask (addr_mask, false),
11743 (fail_msg != NULL) ? fail_msg : "<bad address>");
11744
11745 else
11746 fprintf (stderr,
11747 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11748 "addr_mask = '%s', extra cost = %d, %s\n",
11749 GET_MODE_NAME (mode),
11750 reg_class_names[rclass],
11751 rs6000_debug_addr_mask (addr_mask, false),
11752 extra_cost,
11753 (type) ? type : "<none>");
11754
11755 debug_rtx (addr);
11756 }
11757
11758 return extra_cost;
11759 }
11760
11761 /* Helper function for rs6000_secondary_reload to return true if a move to a
11762 different register classe is really a simple move. */
11763
11764 static bool
11765 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11766 enum rs6000_reg_type from_type,
11767 machine_mode mode)
11768 {
11769 int size = GET_MODE_SIZE (mode);
11770
11771 /* Add support for various direct moves available. In this function, we only
11772 look at cases where we don't need any extra registers, and one or more
11773 simple move insns are issued. Originally small integers are not allowed
11774 in FPR/VSX registers. Single precision binary floating is not a simple
11775 move because we need to convert to the single precision memory layout.
11776 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11777 need special direct move handling, which we do not support yet. */
11778 if (TARGET_DIRECT_MOVE
11779 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11780 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11781 {
11782 if (TARGET_POWERPC64)
11783 {
11784 /* ISA 2.07: MTVSRD or MVFVSRD. */
11785 if (size == 8)
11786 return true;
11787
11788 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11789 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11790 return true;
11791 }
11792
11793 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11794 if (TARGET_P8_VECTOR)
11795 {
11796 if (mode == SImode)
11797 return true;
11798
11799 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11800 return true;
11801 }
11802
11803 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11804 if (mode == SDmode)
11805 return true;
11806 }
11807
11808 /* Move to/from SPR. */
11809 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11810 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11811 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11812 return true;
11813
11814 return false;
11815 }
11816
11817 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11818 special direct moves that involve allocating an extra register, return the
11819 insn code of the helper function if there is such a function or
11820 CODE_FOR_nothing if not. */
11821
11822 static bool
11823 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11824 enum rs6000_reg_type from_type,
11825 machine_mode mode,
11826 secondary_reload_info *sri,
11827 bool altivec_p)
11828 {
11829 bool ret = false;
11830 enum insn_code icode = CODE_FOR_nothing;
11831 int cost = 0;
11832 int size = GET_MODE_SIZE (mode);
11833
11834 if (TARGET_POWERPC64 && size == 16)
11835 {
11836 /* Handle moving 128-bit values from GPRs to VSX point registers on
11837 ISA 2.07 (power8, power9) when running in 64-bit mode using
11838 XXPERMDI to glue the two 64-bit values back together. */
11839 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11840 {
11841 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11842 icode = reg_addr[mode].reload_vsx_gpr;
11843 }
11844
11845 /* Handle moving 128-bit values from VSX point registers to GPRs on
11846 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11847 bottom 64-bit value. */
11848 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11849 {
11850 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11851 icode = reg_addr[mode].reload_gpr_vsx;
11852 }
11853 }
11854
11855 else if (TARGET_POWERPC64 && mode == SFmode)
11856 {
11857 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11858 {
11859 cost = 3; /* xscvdpspn, mfvsrd, and. */
11860 icode = reg_addr[mode].reload_gpr_vsx;
11861 }
11862
11863 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11864 {
11865 cost = 2; /* mtvsrz, xscvspdpn. */
11866 icode = reg_addr[mode].reload_vsx_gpr;
11867 }
11868 }
11869
11870 else if (!TARGET_POWERPC64 && size == 8)
11871 {
11872 /* Handle moving 64-bit values from GPRs to floating point registers on
11873 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11874 32-bit values back together. Altivec register classes must be handled
11875 specially since a different instruction is used, and the secondary
11876 reload support requires a single instruction class in the scratch
11877 register constraint. However, right now TFmode is not allowed in
11878 Altivec registers, so the pattern will never match. */
11879 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11880 {
11881 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11882 icode = reg_addr[mode].reload_fpr_gpr;
11883 }
11884 }
11885
11886 if (icode != CODE_FOR_nothing)
11887 {
11888 ret = true;
11889 if (sri)
11890 {
11891 sri->icode = icode;
11892 sri->extra_cost = cost;
11893 }
11894 }
11895
11896 return ret;
11897 }
11898
11899 /* Return whether a move between two register classes can be done either
11900 directly (simple move) or via a pattern that uses a single extra temporary
11901 (using ISA 2.07's direct move in this case. */
11902
11903 static bool
11904 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11905 enum rs6000_reg_type from_type,
11906 machine_mode mode,
11907 secondary_reload_info *sri,
11908 bool altivec_p)
11909 {
11910 /* Fall back to load/store reloads if either type is not a register. */
11911 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11912 return false;
11913
11914 /* If we haven't allocated registers yet, assume the move can be done for the
11915 standard register types. */
11916 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11917 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11918 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11919 return true;
11920
11921 /* Moves to the same set of registers is a simple move for non-specialized
11922 registers. */
11923 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11924 return true;
11925
11926 /* Check whether a simple move can be done directly. */
11927 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11928 {
11929 if (sri)
11930 {
11931 sri->icode = CODE_FOR_nothing;
11932 sri->extra_cost = 0;
11933 }
11934 return true;
11935 }
11936
11937 /* Now check if we can do it in a few steps. */
11938 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11939 altivec_p);
11940 }
11941
11942 /* Inform reload about cases where moving X with a mode MODE to a register in
11943 RCLASS requires an extra scratch or immediate register. Return the class
11944 needed for the immediate register.
11945
11946 For VSX and Altivec, we may need a register to convert sp+offset into
11947 reg+sp.
11948
11949 For misaligned 64-bit gpr loads and stores we need a register to
11950 convert an offset address to indirect. */
11951
11952 static reg_class_t
11953 rs6000_secondary_reload (bool in_p,
11954 rtx x,
11955 reg_class_t rclass_i,
11956 machine_mode mode,
11957 secondary_reload_info *sri)
11958 {
11959 enum reg_class rclass = (enum reg_class) rclass_i;
11960 reg_class_t ret = ALL_REGS;
11961 enum insn_code icode;
11962 bool default_p = false;
11963 bool done_p = false;
11964
11965 /* Allow subreg of memory before/during reload. */
11966 bool memory_p = (MEM_P (x)
11967 || (!reload_completed && SUBREG_P (x)
11968 && MEM_P (SUBREG_REG (x))));
11969
11970 sri->icode = CODE_FOR_nothing;
11971 sri->t_icode = CODE_FOR_nothing;
11972 sri->extra_cost = 0;
11973 icode = ((in_p)
11974 ? reg_addr[mode].reload_load
11975 : reg_addr[mode].reload_store);
11976
11977 if (REG_P (x) || register_operand (x, mode))
11978 {
11979 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11980 bool altivec_p = (rclass == ALTIVEC_REGS);
11981 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11982
11983 if (!in_p)
11984 std::swap (to_type, from_type);
11985
11986 /* Can we do a direct move of some sort? */
11987 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11988 altivec_p))
11989 {
11990 icode = (enum insn_code)sri->icode;
11991 default_p = false;
11992 done_p = true;
11993 ret = NO_REGS;
11994 }
11995 }
11996
11997 /* Make sure 0.0 is not reloaded or forced into memory. */
11998 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11999 {
12000 ret = NO_REGS;
12001 default_p = false;
12002 done_p = true;
12003 }
12004
12005 /* If this is a scalar floating point value and we want to load it into the
12006 traditional Altivec registers, do it via a move via a traditional floating
12007 point register, unless we have D-form addressing. Also make sure that
12008 non-zero constants use a FPR. */
12009 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12010 && !mode_supports_vmx_dform (mode)
12011 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12012 && (memory_p || CONST_DOUBLE_P (x)))
12013 {
12014 ret = FLOAT_REGS;
12015 default_p = false;
12016 done_p = true;
12017 }
12018
12019 /* Handle reload of load/stores if we have reload helper functions. */
12020 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12021 {
12022 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12023 mode);
12024
12025 if (extra_cost >= 0)
12026 {
12027 done_p = true;
12028 ret = NO_REGS;
12029 if (extra_cost > 0)
12030 {
12031 sri->extra_cost = extra_cost;
12032 sri->icode = icode;
12033 }
12034 }
12035 }
12036
12037 /* Handle unaligned loads and stores of integer registers. */
12038 if (!done_p && TARGET_POWERPC64
12039 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12040 && memory_p
12041 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12042 {
12043 rtx addr = XEXP (x, 0);
12044 rtx off = address_offset (addr);
12045
12046 if (off != NULL_RTX)
12047 {
12048 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12049 unsigned HOST_WIDE_INT offset = INTVAL (off);
12050
12051 /* We need a secondary reload when our legitimate_address_p
12052 says the address is good (as otherwise the entire address
12053 will be reloaded), and the offset is not a multiple of
12054 four or we have an address wrap. Address wrap will only
12055 occur for LO_SUMs since legitimate_offset_address_p
12056 rejects addresses for 16-byte mems that will wrap. */
12057 if (GET_CODE (addr) == LO_SUM
12058 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12059 && ((offset & 3) != 0
12060 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12061 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12062 && (offset & 3) != 0))
12063 {
12064 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12065 if (in_p)
12066 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12067 : CODE_FOR_reload_di_load);
12068 else
12069 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12070 : CODE_FOR_reload_di_store);
12071 sri->extra_cost = 2;
12072 ret = NO_REGS;
12073 done_p = true;
12074 }
12075 else
12076 default_p = true;
12077 }
12078 else
12079 default_p = true;
12080 }
12081
12082 if (!done_p && !TARGET_POWERPC64
12083 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12084 && memory_p
12085 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12086 {
12087 rtx addr = XEXP (x, 0);
12088 rtx off = address_offset (addr);
12089
12090 if (off != NULL_RTX)
12091 {
12092 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12093 unsigned HOST_WIDE_INT offset = INTVAL (off);
12094
12095 /* We need a secondary reload when our legitimate_address_p
12096 says the address is good (as otherwise the entire address
12097 will be reloaded), and we have a wrap.
12098
12099 legitimate_lo_sum_address_p allows LO_SUM addresses to
12100 have any offset so test for wrap in the low 16 bits.
12101
12102 legitimate_offset_address_p checks for the range
12103 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12104 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12105 [0x7ff4,0x7fff] respectively, so test for the
12106 intersection of these ranges, [0x7ffc,0x7fff] and
12107 [0x7ff4,0x7ff7] respectively.
12108
12109 Note that the address we see here may have been
12110 manipulated by legitimize_reload_address. */
12111 if (GET_CODE (addr) == LO_SUM
12112 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12113 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12114 {
12115 if (in_p)
12116 sri->icode = CODE_FOR_reload_si_load;
12117 else
12118 sri->icode = CODE_FOR_reload_si_store;
12119 sri->extra_cost = 2;
12120 ret = NO_REGS;
12121 done_p = true;
12122 }
12123 else
12124 default_p = true;
12125 }
12126 else
12127 default_p = true;
12128 }
12129
12130 if (!done_p)
12131 default_p = true;
12132
12133 if (default_p)
12134 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12135
12136 gcc_assert (ret != ALL_REGS);
12137
12138 if (TARGET_DEBUG_ADDR)
12139 {
12140 fprintf (stderr,
12141 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12142 "mode = %s",
12143 reg_class_names[ret],
12144 in_p ? "true" : "false",
12145 reg_class_names[rclass],
12146 GET_MODE_NAME (mode));
12147
12148 if (reload_completed)
12149 fputs (", after reload", stderr);
12150
12151 if (!done_p)
12152 fputs (", done_p not set", stderr);
12153
12154 if (default_p)
12155 fputs (", default secondary reload", stderr);
12156
12157 if (sri->icode != CODE_FOR_nothing)
12158 fprintf (stderr, ", reload func = %s, extra cost = %d",
12159 insn_data[sri->icode].name, sri->extra_cost);
12160
12161 else if (sri->extra_cost > 0)
12162 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12163
12164 fputs ("\n", stderr);
12165 debug_rtx (x);
12166 }
12167
12168 return ret;
12169 }
12170
12171 /* Better tracing for rs6000_secondary_reload_inner. */
12172
12173 static void
12174 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12175 bool store_p)
12176 {
12177 rtx set, clobber;
12178
12179 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12180
12181 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12182 store_p ? "store" : "load");
12183
12184 if (store_p)
12185 set = gen_rtx_SET (mem, reg);
12186 else
12187 set = gen_rtx_SET (reg, mem);
12188
12189 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12190 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12191 }
12192
12193 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12194 ATTRIBUTE_NORETURN;
12195
12196 static void
12197 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12198 bool store_p)
12199 {
12200 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12201 gcc_unreachable ();
12202 }
12203
12204 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12205 reload helper functions. These were identified in
12206 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12207 reload, it calls the insns:
12208 reload_<RELOAD:mode>_<P:mptrsize>_store
12209 reload_<RELOAD:mode>_<P:mptrsize>_load
12210
12211 which in turn calls this function, to do whatever is necessary to create
12212 valid addresses. */
12213
12214 void
12215 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12216 {
12217 int regno = true_regnum (reg);
12218 machine_mode mode = GET_MODE (reg);
12219 addr_mask_type addr_mask;
12220 rtx addr;
12221 rtx new_addr;
12222 rtx op_reg, op0, op1;
12223 rtx and_op;
12224 rtx cc_clobber;
12225 rtvec rv;
12226
12227 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12228 || !base_reg_operand (scratch, GET_MODE (scratch)))
12229 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12230
12231 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12232 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12233
12234 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12235 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12236
12237 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12238 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12239
12240 else
12241 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12242
12243 /* Make sure the mode is valid in this register class. */
12244 if ((addr_mask & RELOAD_REG_VALID) == 0)
12245 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12246
12247 if (TARGET_DEBUG_ADDR)
12248 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12249
12250 new_addr = addr = XEXP (mem, 0);
12251 switch (GET_CODE (addr))
12252 {
12253 /* Does the register class support auto update forms for this mode? If
12254 not, do the update now. We don't need a scratch register, since the
12255 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12256 case PRE_INC:
12257 case PRE_DEC:
12258 op_reg = XEXP (addr, 0);
12259 if (!base_reg_operand (op_reg, Pmode))
12260 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12261
12262 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12263 {
12264 int delta = GET_MODE_SIZE (mode);
12265 if (GET_CODE (addr) == PRE_DEC)
12266 delta = -delta;
12267 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12268 new_addr = op_reg;
12269 }
12270 break;
12271
12272 case PRE_MODIFY:
12273 op0 = XEXP (addr, 0);
12274 op1 = XEXP (addr, 1);
12275 if (!base_reg_operand (op0, Pmode)
12276 || GET_CODE (op1) != PLUS
12277 || !rtx_equal_p (op0, XEXP (op1, 0)))
12278 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12279
12280 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12281 {
12282 emit_insn (gen_rtx_SET (op0, op1));
12283 new_addr = reg;
12284 }
12285 break;
12286
12287 /* Do we need to simulate AND -16 to clear the bottom address bits used
12288 in VMX load/stores? */
12289 case AND:
12290 op0 = XEXP (addr, 0);
12291 op1 = XEXP (addr, 1);
12292 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12293 {
12294 if (REG_P (op0) || SUBREG_P (op0))
12295 op_reg = op0;
12296
12297 else if (GET_CODE (op1) == PLUS)
12298 {
12299 emit_insn (gen_rtx_SET (scratch, op1));
12300 op_reg = scratch;
12301 }
12302
12303 else
12304 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12305
12306 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12307 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12308 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12309 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12310 new_addr = scratch;
12311 }
12312 break;
12313
12314 /* If this is an indirect address, make sure it is a base register. */
12315 case REG:
12316 case SUBREG:
12317 if (!base_reg_operand (addr, GET_MODE (addr)))
12318 {
12319 emit_insn (gen_rtx_SET (scratch, addr));
12320 new_addr = scratch;
12321 }
12322 break;
12323
12324 /* If this is an indexed address, make sure the register class can handle
12325 indexed addresses for this mode. */
12326 case PLUS:
12327 op0 = XEXP (addr, 0);
12328 op1 = XEXP (addr, 1);
12329 if (!base_reg_operand (op0, Pmode))
12330 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12331
12332 else if (int_reg_operand (op1, Pmode))
12333 {
12334 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12335 {
12336 emit_insn (gen_rtx_SET (scratch, addr));
12337 new_addr = scratch;
12338 }
12339 }
12340
12341 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12342 {
12343 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12344 || !quad_address_p (addr, mode, false))
12345 {
12346 emit_insn (gen_rtx_SET (scratch, addr));
12347 new_addr = scratch;
12348 }
12349 }
12350
12351 /* Make sure the register class can handle offset addresses. */
12352 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12353 {
12354 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12355 {
12356 emit_insn (gen_rtx_SET (scratch, addr));
12357 new_addr = scratch;
12358 }
12359 }
12360
12361 else
12362 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12363
12364 break;
12365
12366 case LO_SUM:
12367 op0 = XEXP (addr, 0);
12368 op1 = XEXP (addr, 1);
12369 if (!base_reg_operand (op0, Pmode))
12370 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12371
12372 else if (int_reg_operand (op1, Pmode))
12373 {
12374 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12375 {
12376 emit_insn (gen_rtx_SET (scratch, addr));
12377 new_addr = scratch;
12378 }
12379 }
12380
12381 /* Quad offsets are restricted and can't handle normal addresses. */
12382 else if (mode_supports_dq_form (mode))
12383 {
12384 emit_insn (gen_rtx_SET (scratch, addr));
12385 new_addr = scratch;
12386 }
12387
12388 /* Make sure the register class can handle offset addresses. */
12389 else if (legitimate_lo_sum_address_p (mode, addr, false))
12390 {
12391 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12392 {
12393 emit_insn (gen_rtx_SET (scratch, addr));
12394 new_addr = scratch;
12395 }
12396 }
12397
12398 else
12399 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12400
12401 break;
12402
12403 case SYMBOL_REF:
12404 case CONST:
12405 case LABEL_REF:
12406 rs6000_emit_move (scratch, addr, Pmode);
12407 new_addr = scratch;
12408 break;
12409
12410 default:
12411 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12412 }
12413
12414 /* Adjust the address if it changed. */
12415 if (addr != new_addr)
12416 {
12417 mem = replace_equiv_address_nv (mem, new_addr);
12418 if (TARGET_DEBUG_ADDR)
12419 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12420 }
12421
12422 /* Now create the move. */
12423 if (store_p)
12424 emit_insn (gen_rtx_SET (mem, reg));
12425 else
12426 emit_insn (gen_rtx_SET (reg, mem));
12427
12428 return;
12429 }
12430
12431 /* Convert reloads involving 64-bit gprs and misaligned offset
12432 addressing, or multiple 32-bit gprs and offsets that are too large,
12433 to use indirect addressing. */
12434
12435 void
12436 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12437 {
12438 int regno = true_regnum (reg);
12439 enum reg_class rclass;
12440 rtx addr;
12441 rtx scratch_or_premodify = scratch;
12442
12443 if (TARGET_DEBUG_ADDR)
12444 {
12445 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12446 store_p ? "store" : "load");
12447 fprintf (stderr, "reg:\n");
12448 debug_rtx (reg);
12449 fprintf (stderr, "mem:\n");
12450 debug_rtx (mem);
12451 fprintf (stderr, "scratch:\n");
12452 debug_rtx (scratch);
12453 }
12454
12455 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12456 gcc_assert (MEM_P (mem));
12457 rclass = REGNO_REG_CLASS (regno);
12458 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12459 addr = XEXP (mem, 0);
12460
12461 if (GET_CODE (addr) == PRE_MODIFY)
12462 {
12463 gcc_assert (REG_P (XEXP (addr, 0))
12464 && GET_CODE (XEXP (addr, 1)) == PLUS
12465 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12466 scratch_or_premodify = XEXP (addr, 0);
12467 addr = XEXP (addr, 1);
12468 }
12469 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12470
12471 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12472
12473 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12474
12475 /* Now create the move. */
12476 if (store_p)
12477 emit_insn (gen_rtx_SET (mem, reg));
12478 else
12479 emit_insn (gen_rtx_SET (reg, mem));
12480
12481 return;
12482 }
12483
12484 /* Given an rtx X being reloaded into a reg required to be
12485 in class CLASS, return the class of reg to actually use.
12486 In general this is just CLASS; but on some machines
12487 in some cases it is preferable to use a more restrictive class.
12488
12489 On the RS/6000, we have to return NO_REGS when we want to reload a
12490 floating-point CONST_DOUBLE to force it to be copied to memory.
12491
12492 We also don't want to reload integer values into floating-point
12493 registers if we can at all help it. In fact, this can
12494 cause reload to die, if it tries to generate a reload of CTR
12495 into a FP register and discovers it doesn't have the memory location
12496 required.
12497
12498 ??? Would it be a good idea to have reload do the converse, that is
12499 try to reload floating modes into FP registers if possible?
12500 */
12501
12502 static enum reg_class
12503 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12504 {
12505 machine_mode mode = GET_MODE (x);
12506 bool is_constant = CONSTANT_P (x);
12507
12508 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12509 reload class for it. */
12510 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12511 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12512 return NO_REGS;
12513
12514 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12515 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12516 return NO_REGS;
12517
12518 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12519 the reloading of address expressions using PLUS into floating point
12520 registers. */
12521 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12522 {
12523 if (is_constant)
12524 {
12525 /* Zero is always allowed in all VSX registers. */
12526 if (x == CONST0_RTX (mode))
12527 return rclass;
12528
12529 /* If this is a vector constant that can be formed with a few Altivec
12530 instructions, we want altivec registers. */
12531 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12532 return ALTIVEC_REGS;
12533
12534 /* If this is an integer constant that can easily be loaded into
12535 vector registers, allow it. */
12536 if (CONST_INT_P (x))
12537 {
12538 HOST_WIDE_INT value = INTVAL (x);
12539
12540 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12541 2.06 can generate it in the Altivec registers with
12542 VSPLTI<x>. */
12543 if (value == -1)
12544 {
12545 if (TARGET_P8_VECTOR)
12546 return rclass;
12547 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12548 return ALTIVEC_REGS;
12549 else
12550 return NO_REGS;
12551 }
12552
12553 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12554 a sign extend in the Altivec registers. */
12555 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12556 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12557 return ALTIVEC_REGS;
12558 }
12559
12560 /* Force constant to memory. */
12561 return NO_REGS;
12562 }
12563
12564 /* D-form addressing can easily reload the value. */
12565 if (mode_supports_vmx_dform (mode)
12566 || mode_supports_dq_form (mode))
12567 return rclass;
12568
12569 /* If this is a scalar floating point value and we don't have D-form
12570 addressing, prefer the traditional floating point registers so that we
12571 can use D-form (register+offset) addressing. */
12572 if (rclass == VSX_REGS
12573 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12574 return FLOAT_REGS;
12575
12576 /* Prefer the Altivec registers if Altivec is handling the vector
12577 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12578 loads. */
12579 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12580 || mode == V1TImode)
12581 return ALTIVEC_REGS;
12582
12583 return rclass;
12584 }
12585
12586 if (is_constant || GET_CODE (x) == PLUS)
12587 {
12588 if (reg_class_subset_p (GENERAL_REGS, rclass))
12589 return GENERAL_REGS;
12590 if (reg_class_subset_p (BASE_REGS, rclass))
12591 return BASE_REGS;
12592 return NO_REGS;
12593 }
12594
12595 /* For the vector pair and vector quad modes, prefer their natural register
12596 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12597 the GPR registers. */
12598 if (rclass == GEN_OR_FLOAT_REGS)
12599 {
12600 if (mode == OOmode)
12601 return VSX_REGS;
12602
12603 if (mode == XOmode)
12604 return FLOAT_REGS;
12605
12606 if (GET_MODE_CLASS (mode) == MODE_INT)
12607 return GENERAL_REGS;
12608 }
12609
12610 return rclass;
12611 }
12612
12613 /* Debug version of rs6000_preferred_reload_class. */
12614 static enum reg_class
12615 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12616 {
12617 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12618
12619 fprintf (stderr,
12620 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12621 "mode = %s, x:\n",
12622 reg_class_names[ret], reg_class_names[rclass],
12623 GET_MODE_NAME (GET_MODE (x)));
12624 debug_rtx (x);
12625
12626 return ret;
12627 }
12628
12629 /* If we are copying between FP or AltiVec registers and anything else, we need
12630 a memory location. The exception is when we are targeting ppc64 and the
12631 move to/from fpr to gpr instructions are available. Also, under VSX, you
12632 can copy vector registers from the FP register set to the Altivec register
12633 set and vice versa. */
12634
12635 static bool
12636 rs6000_secondary_memory_needed (machine_mode mode,
12637 reg_class_t from_class,
12638 reg_class_t to_class)
12639 {
12640 enum rs6000_reg_type from_type, to_type;
12641 bool altivec_p = ((from_class == ALTIVEC_REGS)
12642 || (to_class == ALTIVEC_REGS));
12643
12644 /* If a simple/direct move is available, we don't need secondary memory */
12645 from_type = reg_class_to_reg_type[(int)from_class];
12646 to_type = reg_class_to_reg_type[(int)to_class];
12647
12648 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12649 (secondary_reload_info *)0, altivec_p))
12650 return false;
12651
12652 /* If we have a floating point or vector register class, we need to use
12653 memory to transfer the data. */
12654 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12655 return true;
12656
12657 return false;
12658 }
12659
12660 /* Debug version of rs6000_secondary_memory_needed. */
12661 static bool
12662 rs6000_debug_secondary_memory_needed (machine_mode mode,
12663 reg_class_t from_class,
12664 reg_class_t to_class)
12665 {
12666 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12667
12668 fprintf (stderr,
12669 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12670 "to_class = %s, mode = %s\n",
12671 ret ? "true" : "false",
12672 reg_class_names[from_class],
12673 reg_class_names[to_class],
12674 GET_MODE_NAME (mode));
12675
12676 return ret;
12677 }
12678
12679 /* Return the register class of a scratch register needed to copy IN into
12680 or out of a register in RCLASS in MODE. If it can be done directly,
12681 NO_REGS is returned. */
12682
12683 static enum reg_class
12684 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12685 rtx in)
12686 {
12687 int regno;
12688
12689 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12690 #if TARGET_MACHO
12691 && MACHOPIC_INDIRECT
12692 #endif
12693 ))
12694 {
12695 /* We cannot copy a symbolic operand directly into anything
12696 other than BASE_REGS for TARGET_ELF. So indicate that a
12697 register from BASE_REGS is needed as an intermediate
12698 register.
12699
12700 On Darwin, pic addresses require a load from memory, which
12701 needs a base register. */
12702 if (rclass != BASE_REGS
12703 && (SYMBOL_REF_P (in)
12704 || GET_CODE (in) == HIGH
12705 || GET_CODE (in) == LABEL_REF
12706 || GET_CODE (in) == CONST))
12707 return BASE_REGS;
12708 }
12709
12710 if (REG_P (in))
12711 {
12712 regno = REGNO (in);
12713 if (!HARD_REGISTER_NUM_P (regno))
12714 {
12715 regno = true_regnum (in);
12716 if (!HARD_REGISTER_NUM_P (regno))
12717 regno = -1;
12718 }
12719 }
12720 else if (SUBREG_P (in))
12721 {
12722 regno = true_regnum (in);
12723 if (!HARD_REGISTER_NUM_P (regno))
12724 regno = -1;
12725 }
12726 else
12727 regno = -1;
12728
12729 /* If we have VSX register moves, prefer moving scalar values between
12730 Altivec registers and GPR by going via an FPR (and then via memory)
12731 instead of reloading the secondary memory address for Altivec moves. */
12732 if (TARGET_VSX
12733 && GET_MODE_SIZE (mode) < 16
12734 && !mode_supports_vmx_dform (mode)
12735 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12736 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12737 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12738 && (regno >= 0 && INT_REGNO_P (regno)))))
12739 return FLOAT_REGS;
12740
12741 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12742 into anything. */
12743 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12744 || (regno >= 0 && INT_REGNO_P (regno)))
12745 return NO_REGS;
12746
12747 /* Constants, memory, and VSX registers can go into VSX registers (both the
12748 traditional floating point and the altivec registers). */
12749 if (rclass == VSX_REGS
12750 && (regno == -1 || VSX_REGNO_P (regno)))
12751 return NO_REGS;
12752
12753 /* Constants, memory, and FP registers can go into FP registers. */
12754 if ((regno == -1 || FP_REGNO_P (regno))
12755 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12756 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12757
12758 /* Memory, and AltiVec registers can go into AltiVec registers. */
12759 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12760 && rclass == ALTIVEC_REGS)
12761 return NO_REGS;
12762
12763 /* We can copy among the CR registers. */
12764 if ((rclass == CR_REGS || rclass == CR0_REGS)
12765 && regno >= 0 && CR_REGNO_P (regno))
12766 return NO_REGS;
12767
12768 /* Otherwise, we need GENERAL_REGS. */
12769 return GENERAL_REGS;
12770 }
12771
12772 /* Debug version of rs6000_secondary_reload_class. */
12773 static enum reg_class
12774 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12775 machine_mode mode, rtx in)
12776 {
12777 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12778 fprintf (stderr,
12779 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12780 "mode = %s, input rtx:\n",
12781 reg_class_names[ret], reg_class_names[rclass],
12782 GET_MODE_NAME (mode));
12783 debug_rtx (in);
12784
12785 return ret;
12786 }
12787
12788 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12789
12790 static bool
12791 rs6000_can_change_mode_class (machine_mode from,
12792 machine_mode to,
12793 reg_class_t rclass)
12794 {
12795 unsigned from_size = GET_MODE_SIZE (from);
12796 unsigned to_size = GET_MODE_SIZE (to);
12797
12798 if (from_size != to_size)
12799 {
12800 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12801
12802 if (reg_classes_intersect_p (xclass, rclass))
12803 {
12804 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12805 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12806 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12807 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12808
12809 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12810 single register under VSX because the scalar part of the register
12811 is in the upper 64-bits, and not the lower 64-bits. Types like
12812 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12813 IEEE floating point can't overlap, and neither can small
12814 values. */
12815
12816 if (to_float128_vector_p && from_float128_vector_p)
12817 return true;
12818
12819 else if (to_float128_vector_p || from_float128_vector_p)
12820 return false;
12821
12822 /* TDmode in floating-mode registers must always go into a register
12823 pair with the most significant word in the even-numbered register
12824 to match ISA requirements. In little-endian mode, this does not
12825 match subreg numbering, so we cannot allow subregs. */
12826 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12827 return false;
12828
12829 /* Allow SD<->DD changes, since SDmode values are stored in
12830 the low half of the DDmode, just like target-independent
12831 code expects. We need to allow at least SD->DD since
12832 rs6000_secondary_memory_needed_mode asks for that change
12833 to be made for SD reloads. */
12834 if ((to == DDmode && from == SDmode)
12835 || (to == SDmode && from == DDmode))
12836 return true;
12837
12838 if (from_size < 8 || to_size < 8)
12839 return false;
12840
12841 if (from_size == 8 && (8 * to_nregs) != to_size)
12842 return false;
12843
12844 if (to_size == 8 && (8 * from_nregs) != from_size)
12845 return false;
12846
12847 return true;
12848 }
12849 else
12850 return true;
12851 }
12852
12853 /* Since the VSX register set includes traditional floating point registers
12854 and altivec registers, just check for the size being different instead of
12855 trying to check whether the modes are vector modes. Otherwise it won't
12856 allow say DF and DI to change classes. For types like TFmode and TDmode
12857 that take 2 64-bit registers, rather than a single 128-bit register, don't
12858 allow subregs of those types to other 128 bit types. */
12859 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12860 {
12861 unsigned num_regs = (from_size + 15) / 16;
12862 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12863 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12864 return false;
12865
12866 return (from_size == 8 || from_size == 16);
12867 }
12868
12869 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12870 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12871 return false;
12872
12873 return true;
12874 }
12875
12876 /* Debug version of rs6000_can_change_mode_class. */
12877 static bool
12878 rs6000_debug_can_change_mode_class (machine_mode from,
12879 machine_mode to,
12880 reg_class_t rclass)
12881 {
12882 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12883
12884 fprintf (stderr,
12885 "rs6000_can_change_mode_class, return %s, from = %s, "
12886 "to = %s, rclass = %s\n",
12887 ret ? "true" : "false",
12888 GET_MODE_NAME (from), GET_MODE_NAME (to),
12889 reg_class_names[rclass]);
12890
12891 return ret;
12892 }
12893 \f
12894 /* Return a string to do a move operation of 128 bits of data. */
12895
12896 const char *
12897 rs6000_output_move_128bit (rtx operands[])
12898 {
12899 rtx dest = operands[0];
12900 rtx src = operands[1];
12901 machine_mode mode = GET_MODE (dest);
12902 int dest_regno;
12903 int src_regno;
12904 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12905 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12906
12907 if (REG_P (dest))
12908 {
12909 dest_regno = REGNO (dest);
12910 dest_gpr_p = INT_REGNO_P (dest_regno);
12911 dest_fp_p = FP_REGNO_P (dest_regno);
12912 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12913 dest_vsx_p = dest_fp_p | dest_vmx_p;
12914 }
12915 else
12916 {
12917 dest_regno = -1;
12918 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12919 }
12920
12921 if (REG_P (src))
12922 {
12923 src_regno = REGNO (src);
12924 src_gpr_p = INT_REGNO_P (src_regno);
12925 src_fp_p = FP_REGNO_P (src_regno);
12926 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12927 src_vsx_p = src_fp_p | src_vmx_p;
12928 }
12929 else
12930 {
12931 src_regno = -1;
12932 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12933 }
12934
12935 /* Register moves. */
12936 if (dest_regno >= 0 && src_regno >= 0)
12937 {
12938 if (dest_gpr_p)
12939 {
12940 if (src_gpr_p)
12941 return "#";
12942
12943 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12944 return (WORDS_BIG_ENDIAN
12945 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12946 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12947
12948 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12949 return "#";
12950 }
12951
12952 else if (TARGET_VSX && dest_vsx_p)
12953 {
12954 if (src_vsx_p)
12955 return "xxlor %x0,%x1,%x1";
12956
12957 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12958 return (WORDS_BIG_ENDIAN
12959 ? "mtvsrdd %x0,%1,%L1"
12960 : "mtvsrdd %x0,%L1,%1");
12961
12962 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12963 return "#";
12964 }
12965
12966 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12967 return "vor %0,%1,%1";
12968
12969 else if (dest_fp_p && src_fp_p)
12970 return "#";
12971 }
12972
12973 /* Loads. */
12974 else if (dest_regno >= 0 && MEM_P (src))
12975 {
12976 if (dest_gpr_p)
12977 {
12978 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12979 return "lq %0,%1";
12980 else
12981 return "#";
12982 }
12983
12984 else if (TARGET_ALTIVEC && dest_vmx_p
12985 && altivec_indexed_or_indirect_operand (src, mode))
12986 return "lvx %0,%y1";
12987
12988 else if (TARGET_VSX && dest_vsx_p)
12989 {
12990 if (mode_supports_dq_form (mode)
12991 && quad_address_p (XEXP (src, 0), mode, true))
12992 return "lxv %x0,%1";
12993
12994 else if (TARGET_P9_VECTOR)
12995 return "lxvx %x0,%y1";
12996
12997 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12998 return "lxvw4x %x0,%y1";
12999
13000 else
13001 return "lxvd2x %x0,%y1";
13002 }
13003
13004 else if (TARGET_ALTIVEC && dest_vmx_p)
13005 return "lvx %0,%y1";
13006
13007 else if (dest_fp_p)
13008 return "#";
13009 }
13010
13011 /* Stores. */
13012 else if (src_regno >= 0 && MEM_P (dest))
13013 {
13014 if (src_gpr_p)
13015 {
13016 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13017 return "stq %1,%0";
13018 else
13019 return "#";
13020 }
13021
13022 else if (TARGET_ALTIVEC && src_vmx_p
13023 && altivec_indexed_or_indirect_operand (dest, mode))
13024 return "stvx %1,%y0";
13025
13026 else if (TARGET_VSX && src_vsx_p)
13027 {
13028 if (mode_supports_dq_form (mode)
13029 && quad_address_p (XEXP (dest, 0), mode, true))
13030 return "stxv %x1,%0";
13031
13032 else if (TARGET_P9_VECTOR)
13033 return "stxvx %x1,%y0";
13034
13035 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13036 return "stxvw4x %x1,%y0";
13037
13038 else
13039 return "stxvd2x %x1,%y0";
13040 }
13041
13042 else if (TARGET_ALTIVEC && src_vmx_p)
13043 return "stvx %1,%y0";
13044
13045 else if (src_fp_p)
13046 return "#";
13047 }
13048
13049 /* Constants. */
13050 else if (dest_regno >= 0
13051 && (CONST_INT_P (src)
13052 || CONST_WIDE_INT_P (src)
13053 || CONST_DOUBLE_P (src)
13054 || GET_CODE (src) == CONST_VECTOR))
13055 {
13056 if (dest_gpr_p)
13057 return "#";
13058
13059 else if ((dest_vmx_p && TARGET_ALTIVEC)
13060 || (dest_vsx_p && TARGET_VSX))
13061 return output_vec_const_move (operands);
13062 }
13063
13064 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13065 }
13066
13067 /* Validate a 128-bit move. */
13068 bool
13069 rs6000_move_128bit_ok_p (rtx operands[])
13070 {
13071 machine_mode mode = GET_MODE (operands[0]);
13072 return (gpc_reg_operand (operands[0], mode)
13073 || gpc_reg_operand (operands[1], mode));
13074 }
13075
13076 /* Return true if a 128-bit move needs to be split. */
13077 bool
13078 rs6000_split_128bit_ok_p (rtx operands[])
13079 {
13080 if (!reload_completed)
13081 return false;
13082
13083 if (!gpr_or_gpr_p (operands[0], operands[1]))
13084 return false;
13085
13086 if (quad_load_store_p (operands[0], operands[1]))
13087 return false;
13088
13089 return true;
13090 }
13091
13092 \f
13093 /* Given a comparison operation, return the bit number in CCR to test. We
13094 know this is a valid comparison.
13095
13096 SCC_P is 1 if this is for an scc. That means that %D will have been
13097 used instead of %C, so the bits will be in different places.
13098
13099 Return -1 if OP isn't a valid comparison for some reason. */
13100
13101 int
13102 ccr_bit (rtx op, int scc_p)
13103 {
13104 enum rtx_code code = GET_CODE (op);
13105 machine_mode cc_mode;
13106 int cc_regnum;
13107 int base_bit;
13108 rtx reg;
13109
13110 if (!COMPARISON_P (op))
13111 return -1;
13112
13113 reg = XEXP (op, 0);
13114
13115 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13116 return -1;
13117
13118 cc_mode = GET_MODE (reg);
13119 cc_regnum = REGNO (reg);
13120 base_bit = 4 * (cc_regnum - CR0_REGNO);
13121
13122 validate_condition_mode (code, cc_mode);
13123
13124 /* When generating a sCOND operation, only positive conditions are
13125 allowed. */
13126 if (scc_p)
13127 switch (code)
13128 {
13129 case EQ:
13130 case GT:
13131 case LT:
13132 case UNORDERED:
13133 case GTU:
13134 case LTU:
13135 break;
13136 default:
13137 return -1;
13138 }
13139
13140 switch (code)
13141 {
13142 case NE:
13143 return scc_p ? base_bit + 3 : base_bit + 2;
13144 case EQ:
13145 return base_bit + 2;
13146 case GT: case GTU: case UNLE:
13147 return base_bit + 1;
13148 case LT: case LTU: case UNGE:
13149 return base_bit;
13150 case ORDERED: case UNORDERED:
13151 return base_bit + 3;
13152
13153 case GE: case GEU:
13154 /* If scc, we will have done a cror to put the bit in the
13155 unordered position. So test that bit. For integer, this is ! LT
13156 unless this is an scc insn. */
13157 return scc_p ? base_bit + 3 : base_bit;
13158
13159 case LE: case LEU:
13160 return scc_p ? base_bit + 3 : base_bit + 1;
13161
13162 default:
13163 return -1;
13164 }
13165 }
13166 \f
13167 /* Return the GOT register. */
13168
13169 rtx
13170 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13171 {
13172 /* The second flow pass currently (June 1999) can't update
13173 regs_ever_live without disturbing other parts of the compiler, so
13174 update it here to make the prolog/epilogue code happy. */
13175 if (!can_create_pseudo_p ()
13176 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13177 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13178
13179 crtl->uses_pic_offset_table = 1;
13180
13181 return pic_offset_table_rtx;
13182 }
13183 \f
13184 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13185
13186 /* Write out a function code label. */
13187
13188 void
13189 rs6000_output_function_entry (FILE *file, const char *fname)
13190 {
13191 if (fname[0] != '.')
13192 {
13193 switch (DEFAULT_ABI)
13194 {
13195 default:
13196 gcc_unreachable ();
13197
13198 case ABI_AIX:
13199 if (DOT_SYMBOLS)
13200 putc ('.', file);
13201 else
13202 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13203 break;
13204
13205 case ABI_ELFv2:
13206 case ABI_V4:
13207 case ABI_DARWIN:
13208 break;
13209 }
13210 }
13211
13212 RS6000_OUTPUT_BASENAME (file, fname);
13213 }
13214
13215 /* Print an operand. Recognize special options, documented below. */
13216
13217 #if TARGET_ELF
13218 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13219 only introduced by the linker, when applying the sda21
13220 relocation. */
13221 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13222 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13223 #else
13224 #define SMALL_DATA_RELOC "sda21"
13225 #define SMALL_DATA_REG 0
13226 #endif
13227
13228 void
13229 print_operand (FILE *file, rtx x, int code)
13230 {
13231 int i;
13232 unsigned HOST_WIDE_INT uval;
13233
13234 switch (code)
13235 {
13236 /* %a is output_address. */
13237
13238 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13239 output_operand. */
13240
13241 case 'A':
13242 /* Write the MMA accumulator number associated with VSX register X. */
13243 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13244 output_operand_lossage ("invalid %%A value");
13245 else
13246 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13247 return;
13248
13249 case 'D':
13250 /* Like 'J' but get to the GT bit only. */
13251 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13252 {
13253 output_operand_lossage ("invalid %%D value");
13254 return;
13255 }
13256
13257 /* Bit 1 is GT bit. */
13258 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13259
13260 /* Add one for shift count in rlinm for scc. */
13261 fprintf (file, "%d", i + 1);
13262 return;
13263
13264 case 'e':
13265 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13266 if (! INT_P (x))
13267 {
13268 output_operand_lossage ("invalid %%e value");
13269 return;
13270 }
13271
13272 uval = INTVAL (x);
13273 if ((uval & 0xffff) == 0 && uval != 0)
13274 putc ('s', file);
13275 return;
13276
13277 case 'E':
13278 /* X is a CR register. Print the number of the EQ bit of the CR */
13279 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13280 output_operand_lossage ("invalid %%E value");
13281 else
13282 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13283 return;
13284
13285 case 'f':
13286 /* X is a CR register. Print the shift count needed to move it
13287 to the high-order four bits. */
13288 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13289 output_operand_lossage ("invalid %%f value");
13290 else
13291 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13292 return;
13293
13294 case 'F':
13295 /* Similar, but print the count for the rotate in the opposite
13296 direction. */
13297 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13298 output_operand_lossage ("invalid %%F value");
13299 else
13300 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13301 return;
13302
13303 case 'G':
13304 /* X is a constant integer. If it is negative, print "m",
13305 otherwise print "z". This is to make an aze or ame insn. */
13306 if (!CONST_INT_P (x))
13307 output_operand_lossage ("invalid %%G value");
13308 else if (INTVAL (x) >= 0)
13309 putc ('z', file);
13310 else
13311 putc ('m', file);
13312 return;
13313
13314 case 'h':
13315 /* If constant, output low-order five bits. Otherwise, write
13316 normally. */
13317 if (INT_P (x))
13318 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13319 else
13320 print_operand (file, x, 0);
13321 return;
13322
13323 case 'H':
13324 /* If constant, output low-order six bits. Otherwise, write
13325 normally. */
13326 if (INT_P (x))
13327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13328 else
13329 print_operand (file, x, 0);
13330 return;
13331
13332 case 'I':
13333 /* Print `i' if this is a constant, else nothing. */
13334 if (INT_P (x))
13335 putc ('i', file);
13336 return;
13337
13338 case 'j':
13339 /* Write the bit number in CCR for jump. */
13340 i = ccr_bit (x, 0);
13341 if (i == -1)
13342 output_operand_lossage ("invalid %%j code");
13343 else
13344 fprintf (file, "%d", i);
13345 return;
13346
13347 case 'J':
13348 /* Similar, but add one for shift count in rlinm for scc and pass
13349 scc flag to `ccr_bit'. */
13350 i = ccr_bit (x, 1);
13351 if (i == -1)
13352 output_operand_lossage ("invalid %%J code");
13353 else
13354 /* If we want bit 31, write a shift count of zero, not 32. */
13355 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13356 return;
13357
13358 case 'k':
13359 /* X must be a constant. Write the 1's complement of the
13360 constant. */
13361 if (! INT_P (x))
13362 output_operand_lossage ("invalid %%k value");
13363 else
13364 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13365 return;
13366
13367 case 'K':
13368 /* X must be a symbolic constant on ELF. Write an
13369 expression suitable for an 'addi' that adds in the low 16
13370 bits of the MEM. */
13371 if (GET_CODE (x) == CONST)
13372 {
13373 if (GET_CODE (XEXP (x, 0)) != PLUS
13374 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13375 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13376 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13377 output_operand_lossage ("invalid %%K value");
13378 }
13379 print_operand_address (file, x);
13380 fputs ("@l", file);
13381 return;
13382
13383 /* %l is output_asm_label. */
13384
13385 case 'L':
13386 /* Write second word of DImode or DFmode reference. Works on register
13387 or non-indexed memory only. */
13388 if (REG_P (x))
13389 fputs (reg_names[REGNO (x) + 1], file);
13390 else if (MEM_P (x))
13391 {
13392 machine_mode mode = GET_MODE (x);
13393 /* Handle possible auto-increment. Since it is pre-increment and
13394 we have already done it, we can just use an offset of word. */
13395 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13396 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13397 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13398 UNITS_PER_WORD));
13399 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13400 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13401 UNITS_PER_WORD));
13402 else
13403 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13404 UNITS_PER_WORD),
13405 0));
13406
13407 if (small_data_operand (x, GET_MODE (x)))
13408 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13409 reg_names[SMALL_DATA_REG]);
13410 }
13411 return;
13412
13413 case 'N': /* Unused */
13414 /* Write the number of elements in the vector times 4. */
13415 if (GET_CODE (x) != PARALLEL)
13416 output_operand_lossage ("invalid %%N value");
13417 else
13418 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13419 return;
13420
13421 case 'O': /* Unused */
13422 /* Similar, but subtract 1 first. */
13423 if (GET_CODE (x) != PARALLEL)
13424 output_operand_lossage ("invalid %%O value");
13425 else
13426 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13427 return;
13428
13429 case 'p':
13430 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13431 if (! INT_P (x)
13432 || INTVAL (x) < 0
13433 || (i = exact_log2 (INTVAL (x))) < 0)
13434 output_operand_lossage ("invalid %%p value");
13435 else
13436 fprintf (file, "%d", i);
13437 return;
13438
13439 case 'P':
13440 /* The operand must be an indirect memory reference. The result
13441 is the register name. */
13442 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13443 || REGNO (XEXP (x, 0)) >= 32)
13444 output_operand_lossage ("invalid %%P value");
13445 else
13446 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13447 return;
13448
13449 case 'q':
13450 /* This outputs the logical code corresponding to a boolean
13451 expression. The expression may have one or both operands
13452 negated (if one, only the first one). For condition register
13453 logical operations, it will also treat the negated
13454 CR codes as NOTs, but not handle NOTs of them. */
13455 {
13456 const char *const *t = 0;
13457 const char *s;
13458 enum rtx_code code = GET_CODE (x);
13459 static const char * const tbl[3][3] = {
13460 { "and", "andc", "nor" },
13461 { "or", "orc", "nand" },
13462 { "xor", "eqv", "xor" } };
13463
13464 if (code == AND)
13465 t = tbl[0];
13466 else if (code == IOR)
13467 t = tbl[1];
13468 else if (code == XOR)
13469 t = tbl[2];
13470 else
13471 output_operand_lossage ("invalid %%q value");
13472
13473 if (GET_CODE (XEXP (x, 0)) != NOT)
13474 s = t[0];
13475 else
13476 {
13477 if (GET_CODE (XEXP (x, 1)) == NOT)
13478 s = t[2];
13479 else
13480 s = t[1];
13481 }
13482
13483 fputs (s, file);
13484 }
13485 return;
13486
13487 case 'Q':
13488 if (! TARGET_MFCRF)
13489 return;
13490 fputc (',', file);
13491 /* FALLTHRU */
13492
13493 case 'R':
13494 /* X is a CR register. Print the mask for `mtcrf'. */
13495 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13496 output_operand_lossage ("invalid %%R value");
13497 else
13498 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13499 return;
13500
13501 case 's':
13502 /* Low 5 bits of 32 - value */
13503 if (! INT_P (x))
13504 output_operand_lossage ("invalid %%s value");
13505 else
13506 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13507 return;
13508
13509 case 't':
13510 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13511 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13512 {
13513 output_operand_lossage ("invalid %%t value");
13514 return;
13515 }
13516
13517 /* Bit 3 is OV bit. */
13518 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13519
13520 /* If we want bit 31, write a shift count of zero, not 32. */
13521 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13522 return;
13523
13524 case 'T':
13525 /* Print the symbolic name of a branch target register. */
13526 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13527 x = XVECEXP (x, 0, 0);
13528 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13529 && REGNO (x) != CTR_REGNO))
13530 output_operand_lossage ("invalid %%T value");
13531 else if (REGNO (x) == LR_REGNO)
13532 fputs ("lr", file);
13533 else
13534 fputs ("ctr", file);
13535 return;
13536
13537 case 'u':
13538 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13539 for use in unsigned operand. */
13540 if (! INT_P (x))
13541 {
13542 output_operand_lossage ("invalid %%u value");
13543 return;
13544 }
13545
13546 uval = INTVAL (x);
13547 if ((uval & 0xffff) == 0)
13548 uval >>= 16;
13549
13550 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13551 return;
13552
13553 case 'v':
13554 /* High-order 16 bits of constant for use in signed operand. */
13555 if (! INT_P (x))
13556 output_operand_lossage ("invalid %%v value");
13557 else
13558 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13559 (INTVAL (x) >> 16) & 0xffff);
13560 return;
13561
13562 case 'U':
13563 /* Print `u' if this has an auto-increment or auto-decrement. */
13564 if (MEM_P (x)
13565 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13566 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13567 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13568 putc ('u', file);
13569 return;
13570
13571 case 'V':
13572 /* Print the trap code for this operand. */
13573 switch (GET_CODE (x))
13574 {
13575 case EQ:
13576 fputs ("eq", file); /* 4 */
13577 break;
13578 case NE:
13579 fputs ("ne", file); /* 24 */
13580 break;
13581 case LT:
13582 fputs ("lt", file); /* 16 */
13583 break;
13584 case LE:
13585 fputs ("le", file); /* 20 */
13586 break;
13587 case GT:
13588 fputs ("gt", file); /* 8 */
13589 break;
13590 case GE:
13591 fputs ("ge", file); /* 12 */
13592 break;
13593 case LTU:
13594 fputs ("llt", file); /* 2 */
13595 break;
13596 case LEU:
13597 fputs ("lle", file); /* 6 */
13598 break;
13599 case GTU:
13600 fputs ("lgt", file); /* 1 */
13601 break;
13602 case GEU:
13603 fputs ("lge", file); /* 5 */
13604 break;
13605 default:
13606 output_operand_lossage ("invalid %%V value");
13607 }
13608 break;
13609
13610 case 'w':
13611 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13612 normally. */
13613 if (INT_P (x))
13614 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13615 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13616 else
13617 print_operand (file, x, 0);
13618 return;
13619
13620 case 'x':
13621 /* X is a FPR or Altivec register used in a VSX context. */
13622 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13623 output_operand_lossage ("invalid %%x value");
13624 else
13625 {
13626 int reg = REGNO (x);
13627 int vsx_reg = (FP_REGNO_P (reg)
13628 ? reg - 32
13629 : reg - FIRST_ALTIVEC_REGNO + 32);
13630
13631 #ifdef TARGET_REGNAMES
13632 if (TARGET_REGNAMES)
13633 fprintf (file, "%%vs%d", vsx_reg);
13634 else
13635 #endif
13636 fprintf (file, "%d", vsx_reg);
13637 }
13638 return;
13639
13640 case 'X':
13641 if (MEM_P (x)
13642 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13643 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13644 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13645 putc ('x', file);
13646 return;
13647
13648 case 'Y':
13649 /* Like 'L', for third word of TImode/PTImode */
13650 if (REG_P (x))
13651 fputs (reg_names[REGNO (x) + 2], file);
13652 else if (MEM_P (x))
13653 {
13654 machine_mode mode = GET_MODE (x);
13655 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13656 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13657 output_address (mode, plus_constant (Pmode,
13658 XEXP (XEXP (x, 0), 0), 8));
13659 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13660 output_address (mode, plus_constant (Pmode,
13661 XEXP (XEXP (x, 0), 0), 8));
13662 else
13663 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13664 if (small_data_operand (x, GET_MODE (x)))
13665 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13666 reg_names[SMALL_DATA_REG]);
13667 }
13668 return;
13669
13670 case 'z':
13671 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13672 x = XVECEXP (x, 0, 1);
13673 /* X is a SYMBOL_REF. Write out the name preceded by a
13674 period and without any trailing data in brackets. Used for function
13675 names. If we are configured for System V (or the embedded ABI) on
13676 the PowerPC, do not emit the period, since those systems do not use
13677 TOCs and the like. */
13678 if (!SYMBOL_REF_P (x))
13679 {
13680 output_operand_lossage ("invalid %%z value");
13681 return;
13682 }
13683
13684 /* For macho, check to see if we need a stub. */
13685 if (TARGET_MACHO)
13686 {
13687 const char *name = XSTR (x, 0);
13688 #if TARGET_MACHO
13689 if (darwin_symbol_stubs
13690 && MACHOPIC_INDIRECT
13691 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13692 name = machopic_indirection_name (x, /*stub_p=*/true);
13693 #endif
13694 assemble_name (file, name);
13695 }
13696 else if (!DOT_SYMBOLS)
13697 assemble_name (file, XSTR (x, 0));
13698 else
13699 rs6000_output_function_entry (file, XSTR (x, 0));
13700 return;
13701
13702 case 'Z':
13703 /* Like 'L', for last word of TImode/PTImode. */
13704 if (REG_P (x))
13705 fputs (reg_names[REGNO (x) + 3], file);
13706 else if (MEM_P (x))
13707 {
13708 machine_mode mode = GET_MODE (x);
13709 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13710 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13711 output_address (mode, plus_constant (Pmode,
13712 XEXP (XEXP (x, 0), 0), 12));
13713 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13714 output_address (mode, plus_constant (Pmode,
13715 XEXP (XEXP (x, 0), 0), 12));
13716 else
13717 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13718 if (small_data_operand (x, GET_MODE (x)))
13719 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13720 reg_names[SMALL_DATA_REG]);
13721 }
13722 return;
13723
13724 /* Print AltiVec memory operand. */
13725 case 'y':
13726 {
13727 rtx tmp;
13728
13729 gcc_assert (MEM_P (x));
13730
13731 tmp = XEXP (x, 0);
13732
13733 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13734 && GET_CODE (tmp) == AND
13735 && CONST_INT_P (XEXP (tmp, 1))
13736 && INTVAL (XEXP (tmp, 1)) == -16)
13737 tmp = XEXP (tmp, 0);
13738 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13739 && GET_CODE (tmp) == PRE_MODIFY)
13740 tmp = XEXP (tmp, 1);
13741 if (REG_P (tmp))
13742 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13743 else
13744 {
13745 if (GET_CODE (tmp) != PLUS
13746 || !REG_P (XEXP (tmp, 0))
13747 || !REG_P (XEXP (tmp, 1)))
13748 {
13749 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13750 break;
13751 }
13752
13753 if (REGNO (XEXP (tmp, 0)) == 0)
13754 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13755 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13756 else
13757 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13758 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13759 }
13760 break;
13761 }
13762
13763 case 0:
13764 if (REG_P (x))
13765 fprintf (file, "%s", reg_names[REGNO (x)]);
13766 else if (MEM_P (x))
13767 {
13768 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13769 know the width from the mode. */
13770 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13771 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13772 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13773 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13774 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13775 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13776 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13777 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13778 else
13779 output_address (GET_MODE (x), XEXP (x, 0));
13780 }
13781 else if (toc_relative_expr_p (x, false,
13782 &tocrel_base_oac, &tocrel_offset_oac))
13783 /* This hack along with a corresponding hack in
13784 rs6000_output_addr_const_extra arranges to output addends
13785 where the assembler expects to find them. eg.
13786 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13787 without this hack would be output as "x@toc+4". We
13788 want "x+4@toc". */
13789 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13790 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13791 output_addr_const (file, XVECEXP (x, 0, 0));
13792 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13793 output_addr_const (file, XVECEXP (x, 0, 1));
13794 else
13795 output_addr_const (file, x);
13796 return;
13797
13798 case '&':
13799 if (const char *name = get_some_local_dynamic_name ())
13800 assemble_name (file, name);
13801 else
13802 output_operand_lossage ("'%%&' used without any "
13803 "local dynamic TLS references");
13804 return;
13805
13806 default:
13807 output_operand_lossage ("invalid %%xn code");
13808 }
13809 }
13810 \f
13811 /* Print the address of an operand. */
13812
13813 void
13814 print_operand_address (FILE *file, rtx x)
13815 {
13816 if (REG_P (x))
13817 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13818
13819 /* Is it a PC-relative address? */
13820 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13821 {
13822 HOST_WIDE_INT offset;
13823
13824 if (GET_CODE (x) == CONST)
13825 x = XEXP (x, 0);
13826
13827 if (GET_CODE (x) == PLUS)
13828 {
13829 offset = INTVAL (XEXP (x, 1));
13830 x = XEXP (x, 0);
13831 }
13832 else
13833 offset = 0;
13834
13835 output_addr_const (file, x);
13836
13837 if (offset)
13838 fprintf (file, "%+" PRId64, offset);
13839
13840 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13841 fprintf (file, "@got");
13842
13843 fprintf (file, "@pcrel");
13844 }
13845 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13846 || GET_CODE (x) == LABEL_REF)
13847 {
13848 output_addr_const (file, x);
13849 if (small_data_operand (x, GET_MODE (x)))
13850 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13851 reg_names[SMALL_DATA_REG]);
13852 else
13853 gcc_assert (!TARGET_TOC);
13854 }
13855 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13856 && REG_P (XEXP (x, 1)))
13857 {
13858 if (REGNO (XEXP (x, 0)) == 0)
13859 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13860 reg_names[ REGNO (XEXP (x, 0)) ]);
13861 else
13862 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13863 reg_names[ REGNO (XEXP (x, 1)) ]);
13864 }
13865 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13866 && CONST_INT_P (XEXP (x, 1)))
13867 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13868 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13869 #if TARGET_MACHO
13870 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13871 && CONSTANT_P (XEXP (x, 1)))
13872 {
13873 fprintf (file, "lo16(");
13874 output_addr_const (file, XEXP (x, 1));
13875 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13876 }
13877 #endif
13878 #if TARGET_ELF
13879 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13880 && CONSTANT_P (XEXP (x, 1)))
13881 {
13882 output_addr_const (file, XEXP (x, 1));
13883 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13884 }
13885 #endif
13886 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13887 {
13888 /* This hack along with a corresponding hack in
13889 rs6000_output_addr_const_extra arranges to output addends
13890 where the assembler expects to find them. eg.
13891 (lo_sum (reg 9)
13892 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13893 without this hack would be output as "x@toc+8@l(9)". We
13894 want "x+8@toc@l(9)". */
13895 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13896 if (GET_CODE (x) == LO_SUM)
13897 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13898 else
13899 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13900 }
13901 else
13902 output_addr_const (file, x);
13903 }
13904 \f
13905 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13906
13907 bool
13908 rs6000_output_addr_const_extra (FILE *file, rtx x)
13909 {
13910 if (GET_CODE (x) == UNSPEC)
13911 switch (XINT (x, 1))
13912 {
13913 case UNSPEC_TOCREL:
13914 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13915 && REG_P (XVECEXP (x, 0, 1))
13916 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13917 output_addr_const (file, XVECEXP (x, 0, 0));
13918 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13919 {
13920 if (INTVAL (tocrel_offset_oac) >= 0)
13921 fprintf (file, "+");
13922 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13923 }
13924 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13925 {
13926 putc ('-', file);
13927 assemble_name (file, toc_label_name);
13928 need_toc_init = 1;
13929 }
13930 else if (TARGET_ELF)
13931 fputs ("@toc", file);
13932 return true;
13933
13934 #if TARGET_MACHO
13935 case UNSPEC_MACHOPIC_OFFSET:
13936 output_addr_const (file, XVECEXP (x, 0, 0));
13937 putc ('-', file);
13938 machopic_output_function_base_name (file);
13939 return true;
13940 #endif
13941 }
13942 return false;
13943 }
13944 \f
13945 /* Target hook for assembling integer objects. The PowerPC version has
13946 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13947 is defined. It also needs to handle DI-mode objects on 64-bit
13948 targets. */
13949
13950 static bool
13951 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13952 {
13953 #ifdef RELOCATABLE_NEEDS_FIXUP
13954 /* Special handling for SI values. */
13955 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13956 {
13957 static int recurse = 0;
13958
13959 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13960 the .fixup section. Since the TOC section is already relocated, we
13961 don't need to mark it here. We used to skip the text section, but it
13962 should never be valid for relocated addresses to be placed in the text
13963 section. */
13964 if (DEFAULT_ABI == ABI_V4
13965 && (TARGET_RELOCATABLE || flag_pic > 1)
13966 && in_section != toc_section
13967 && !recurse
13968 && !CONST_SCALAR_INT_P (x)
13969 && CONSTANT_P (x))
13970 {
13971 char buf[256];
13972
13973 recurse = 1;
13974 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13975 fixuplabelno++;
13976 ASM_OUTPUT_LABEL (asm_out_file, buf);
13977 fprintf (asm_out_file, "\t.long\t(");
13978 output_addr_const (asm_out_file, x);
13979 fprintf (asm_out_file, ")@fixup\n");
13980 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13981 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13982 fprintf (asm_out_file, "\t.long\t");
13983 assemble_name (asm_out_file, buf);
13984 fprintf (asm_out_file, "\n\t.previous\n");
13985 recurse = 0;
13986 return true;
13987 }
13988 /* Remove initial .'s to turn a -mcall-aixdesc function
13989 address into the address of the descriptor, not the function
13990 itself. */
13991 else if (SYMBOL_REF_P (x)
13992 && XSTR (x, 0)[0] == '.'
13993 && DEFAULT_ABI == ABI_AIX)
13994 {
13995 const char *name = XSTR (x, 0);
13996 while (*name == '.')
13997 name++;
13998
13999 fprintf (asm_out_file, "\t.long\t%s\n", name);
14000 return true;
14001 }
14002 }
14003 #endif /* RELOCATABLE_NEEDS_FIXUP */
14004 return default_assemble_integer (x, size, aligned_p);
14005 }
14006
14007 /* Return a template string for assembly to emit when making an
14008 external call. FUNOP is the call mem argument operand number. */
14009
14010 static const char *
14011 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14012 {
14013 /* -Wformat-overflow workaround, without which gcc thinks that %u
14014 might produce 10 digits. */
14015 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14016
14017 char arg[12];
14018 arg[0] = 0;
14019 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14020 {
14021 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14022 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14023 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14024 sprintf (arg, "(%%&@tlsld)");
14025 }
14026
14027 /* The magic 32768 offset here corresponds to the offset of
14028 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14029 char z[11];
14030 sprintf (z, "%%z%u%s", funop,
14031 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14032 ? "+32768" : ""));
14033
14034 static char str[32]; /* 1 spare */
14035 if (rs6000_pcrel_p ())
14036 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14037 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14038 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14039 sibcall ? "" : "\n\tnop");
14040 else if (DEFAULT_ABI == ABI_V4)
14041 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14042 flag_pic ? "@plt" : "");
14043 #if TARGET_MACHO
14044 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14045 else if (DEFAULT_ABI == ABI_DARWIN)
14046 {
14047 /* The cookie is in operand func+2. */
14048 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14049 int cookie = INTVAL (operands[funop + 2]);
14050 if (cookie & CALL_LONG)
14051 {
14052 tree funname = get_identifier (XSTR (operands[funop], 0));
14053 tree labelname = get_prev_label (funname);
14054 gcc_checking_assert (labelname && !sibcall);
14055
14056 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14057 instruction will reach 'foo', otherwise link as 'bl L42'".
14058 "L42" should be a 'branch island', that will do a far jump to
14059 'foo'. Branch islands are generated in
14060 macho_branch_islands(). */
14061 sprintf (str, "jbsr %%z%u,%.10s", funop,
14062 IDENTIFIER_POINTER (labelname));
14063 }
14064 else
14065 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14066 after the call. */
14067 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14068 }
14069 #endif
14070 else
14071 gcc_unreachable ();
14072 return str;
14073 }
14074
14075 const char *
14076 rs6000_call_template (rtx *operands, unsigned int funop)
14077 {
14078 return rs6000_call_template_1 (operands, funop, false);
14079 }
14080
14081 const char *
14082 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14083 {
14084 return rs6000_call_template_1 (operands, funop, true);
14085 }
14086
14087 /* As above, for indirect calls. */
14088
14089 static const char *
14090 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14091 bool sibcall)
14092 {
14093 /* -Wformat-overflow workaround, without which gcc thinks that %u
14094 might produce 10 digits. Note that -Wformat-overflow will not
14095 currently warn here for str[], so do not rely on a warning to
14096 ensure str[] is correctly sized. */
14097 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14098
14099 /* Currently, funop is either 0 or 1. The maximum string is always
14100 a !speculate 64-bit __tls_get_addr call.
14101
14102 ABI_ELFv2, pcrel:
14103 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14104 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14105 . 9 crset 2\n\t
14106 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14107 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14108 . 8 beq%T1l-
14109 .---
14110 .142
14111
14112 ABI_AIX:
14113 . 9 ld 2,%3\n\t
14114 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14115 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14116 . 9 crset 2\n\t
14117 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14118 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14119 . 10 beq%T1l-\n\t
14120 . 10 ld 2,%4(1)
14121 .---
14122 .151
14123
14124 ABI_ELFv2:
14125 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14126 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14127 . 9 crset 2\n\t
14128 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14129 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14130 . 10 beq%T1l-\n\t
14131 . 10 ld 2,%3(1)
14132 .---
14133 .142
14134
14135 ABI_V4:
14136 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14137 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14138 . 9 crset 2\n\t
14139 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14140 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14141 . 8 beq%T1l-
14142 .---
14143 .141 */
14144 static char str[160]; /* 8 spare */
14145 char *s = str;
14146 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14147
14148 if (DEFAULT_ABI == ABI_AIX)
14149 s += sprintf (s,
14150 "l%s 2,%%%u\n\t",
14151 ptrload, funop + 3);
14152
14153 /* We don't need the extra code to stop indirect call speculation if
14154 calling via LR. */
14155 bool speculate = (TARGET_MACHO
14156 || rs6000_speculate_indirect_jumps
14157 || (REG_P (operands[funop])
14158 && REGNO (operands[funop]) == LR_REGNO));
14159
14160 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14161 {
14162 const char *rel64 = TARGET_64BIT ? "64" : "";
14163 char tls[29];
14164 tls[0] = 0;
14165 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14166 {
14167 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14168 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14169 rel64, funop + 1);
14170 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14171 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14172 rel64);
14173 }
14174
14175 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14176 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14177 && flag_pic == 2 ? "+32768" : "");
14178 if (!speculate)
14179 {
14180 s += sprintf (s,
14181 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14182 tls, rel64, notoc, funop, addend);
14183 s += sprintf (s, "crset 2\n\t");
14184 }
14185 s += sprintf (s,
14186 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14187 tls, rel64, notoc, funop, addend);
14188 }
14189 else if (!speculate)
14190 s += sprintf (s, "crset 2\n\t");
14191
14192 if (rs6000_pcrel_p ())
14193 {
14194 if (speculate)
14195 sprintf (s, "b%%T%ul", funop);
14196 else
14197 sprintf (s, "beq%%T%ul-", funop);
14198 }
14199 else if (DEFAULT_ABI == ABI_AIX)
14200 {
14201 if (speculate)
14202 sprintf (s,
14203 "b%%T%ul\n\t"
14204 "l%s 2,%%%u(1)",
14205 funop, ptrload, funop + 4);
14206 else
14207 sprintf (s,
14208 "beq%%T%ul-\n\t"
14209 "l%s 2,%%%u(1)",
14210 funop, ptrload, funop + 4);
14211 }
14212 else if (DEFAULT_ABI == ABI_ELFv2)
14213 {
14214 if (speculate)
14215 sprintf (s,
14216 "b%%T%ul\n\t"
14217 "l%s 2,%%%u(1)",
14218 funop, ptrload, funop + 3);
14219 else
14220 sprintf (s,
14221 "beq%%T%ul-\n\t"
14222 "l%s 2,%%%u(1)",
14223 funop, ptrload, funop + 3);
14224 }
14225 else
14226 {
14227 if (speculate)
14228 sprintf (s,
14229 "b%%T%u%s",
14230 funop, sibcall ? "" : "l");
14231 else
14232 sprintf (s,
14233 "beq%%T%u%s-%s",
14234 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14235 }
14236 return str;
14237 }
14238
14239 const char *
14240 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14241 {
14242 return rs6000_indirect_call_template_1 (operands, funop, false);
14243 }
14244
14245 const char *
14246 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14247 {
14248 return rs6000_indirect_call_template_1 (operands, funop, true);
14249 }
14250
14251 #if HAVE_AS_PLTSEQ
14252 /* Output indirect call insns. WHICH identifies the type of sequence. */
14253 const char *
14254 rs6000_pltseq_template (rtx *operands, int which)
14255 {
14256 const char *rel64 = TARGET_64BIT ? "64" : "";
14257 char tls[30];
14258 tls[0] = 0;
14259 if (GET_CODE (operands[3]) == UNSPEC)
14260 {
14261 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14262 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14263 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14264 off, rel64);
14265 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14266 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14267 off, rel64);
14268 }
14269
14270 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14271 static char str[96]; /* 10 spare */
14272 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14273 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14274 && flag_pic == 2 ? "+32768" : "");
14275 switch (which)
14276 {
14277 case RS6000_PLTSEQ_TOCSAVE:
14278 sprintf (str,
14279 "st%s\n\t"
14280 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14281 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14282 tls, rel64);
14283 break;
14284 case RS6000_PLTSEQ_PLT16_HA:
14285 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14286 sprintf (str,
14287 "lis %%0,0\n\t"
14288 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14289 tls, off, rel64);
14290 else
14291 sprintf (str,
14292 "addis %%0,%%1,0\n\t"
14293 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14294 tls, off, rel64, addend);
14295 break;
14296 case RS6000_PLTSEQ_PLT16_LO:
14297 sprintf (str,
14298 "l%s %%0,0(%%1)\n\t"
14299 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14300 TARGET_64BIT ? "d" : "wz",
14301 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14302 break;
14303 case RS6000_PLTSEQ_MTCTR:
14304 sprintf (str,
14305 "mtctr %%1\n\t"
14306 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14307 tls, rel64, addend);
14308 break;
14309 case RS6000_PLTSEQ_PLT_PCREL34:
14310 sprintf (str,
14311 "pl%s %%0,0(0),1\n\t"
14312 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14313 TARGET_64BIT ? "d" : "wz",
14314 tls, rel64);
14315 break;
14316 default:
14317 gcc_unreachable ();
14318 }
14319 return str;
14320 }
14321 #endif
14322 \f
14323 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14324 /* Emit an assembler directive to set symbol visibility for DECL to
14325 VISIBILITY_TYPE. */
14326
14327 static void
14328 rs6000_assemble_visibility (tree decl, int vis)
14329 {
14330 if (TARGET_XCOFF)
14331 return;
14332
14333 /* Functions need to have their entry point symbol visibility set as
14334 well as their descriptor symbol visibility. */
14335 if (DEFAULT_ABI == ABI_AIX
14336 && DOT_SYMBOLS
14337 && TREE_CODE (decl) == FUNCTION_DECL)
14338 {
14339 static const char * const visibility_types[] = {
14340 NULL, "protected", "hidden", "internal"
14341 };
14342
14343 const char *name, *type;
14344
14345 name = ((* targetm.strip_name_encoding)
14346 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14347 type = visibility_types[vis];
14348
14349 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14350 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14351 }
14352 else
14353 default_assemble_visibility (decl, vis);
14354 }
14355 #endif
14356 \f
14357 enum rtx_code
14358 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14359 {
14360 /* Reversal of FP compares takes care -- an ordered compare
14361 becomes an unordered compare and vice versa. */
14362 if (mode == CCFPmode
14363 && (!flag_finite_math_only
14364 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14365 || code == UNEQ || code == LTGT))
14366 return reverse_condition_maybe_unordered (code);
14367 else
14368 return reverse_condition (code);
14369 }
14370
14371 /* Generate a compare for CODE. Return a brand-new rtx that
14372 represents the result of the compare. */
14373
14374 static rtx
14375 rs6000_generate_compare (rtx cmp, machine_mode mode)
14376 {
14377 machine_mode comp_mode;
14378 rtx compare_result;
14379 enum rtx_code code = GET_CODE (cmp);
14380 rtx op0 = XEXP (cmp, 0);
14381 rtx op1 = XEXP (cmp, 1);
14382
14383 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14384 comp_mode = CCmode;
14385 else if (FLOAT_MODE_P (mode))
14386 comp_mode = CCFPmode;
14387 else if (code == GTU || code == LTU
14388 || code == GEU || code == LEU)
14389 comp_mode = CCUNSmode;
14390 else if ((code == EQ || code == NE)
14391 && unsigned_reg_p (op0)
14392 && (unsigned_reg_p (op1)
14393 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14394 /* These are unsigned values, perhaps there will be a later
14395 ordering compare that can be shared with this one. */
14396 comp_mode = CCUNSmode;
14397 else
14398 comp_mode = CCmode;
14399
14400 /* If we have an unsigned compare, make sure we don't have a signed value as
14401 an immediate. */
14402 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14403 && INTVAL (op1) < 0)
14404 {
14405 op0 = copy_rtx_if_shared (op0);
14406 op1 = force_reg (GET_MODE (op0), op1);
14407 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14408 }
14409
14410 /* First, the compare. */
14411 compare_result = gen_reg_rtx (comp_mode);
14412
14413 /* IEEE 128-bit support in VSX registers when we do not have hardware
14414 support. */
14415 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14416 {
14417 rtx libfunc = NULL_RTX;
14418 bool check_nan = false;
14419 rtx dest;
14420
14421 switch (code)
14422 {
14423 case EQ:
14424 case NE:
14425 libfunc = optab_libfunc (eq_optab, mode);
14426 break;
14427
14428 case GT:
14429 case GE:
14430 libfunc = optab_libfunc (ge_optab, mode);
14431 break;
14432
14433 case LT:
14434 case LE:
14435 libfunc = optab_libfunc (le_optab, mode);
14436 break;
14437
14438 case UNORDERED:
14439 case ORDERED:
14440 libfunc = optab_libfunc (unord_optab, mode);
14441 code = (code == UNORDERED) ? NE : EQ;
14442 break;
14443
14444 case UNGE:
14445 case UNGT:
14446 check_nan = true;
14447 libfunc = optab_libfunc (ge_optab, mode);
14448 code = (code == UNGE) ? GE : GT;
14449 break;
14450
14451 case UNLE:
14452 case UNLT:
14453 check_nan = true;
14454 libfunc = optab_libfunc (le_optab, mode);
14455 code = (code == UNLE) ? LE : LT;
14456 break;
14457
14458 case UNEQ:
14459 case LTGT:
14460 check_nan = true;
14461 libfunc = optab_libfunc (eq_optab, mode);
14462 code = (code = UNEQ) ? EQ : NE;
14463 break;
14464
14465 default:
14466 gcc_unreachable ();
14467 }
14468
14469 gcc_assert (libfunc);
14470
14471 if (!check_nan)
14472 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14473 SImode, op0, mode, op1, mode);
14474
14475 /* The library signals an exception for signalling NaNs, so we need to
14476 handle isgreater, etc. by first checking isordered. */
14477 else
14478 {
14479 rtx ne_rtx, normal_dest, unord_dest;
14480 rtx unord_func = optab_libfunc (unord_optab, mode);
14481 rtx join_label = gen_label_rtx ();
14482 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14483 rtx unord_cmp = gen_reg_rtx (comp_mode);
14484
14485
14486 /* Test for either value being a NaN. */
14487 gcc_assert (unord_func);
14488 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14489 SImode, op0, mode, op1, mode);
14490
14491 /* Set value (0) if either value is a NaN, and jump to the join
14492 label. */
14493 dest = gen_reg_rtx (SImode);
14494 emit_move_insn (dest, const1_rtx);
14495 emit_insn (gen_rtx_SET (unord_cmp,
14496 gen_rtx_COMPARE (comp_mode, unord_dest,
14497 const0_rtx)));
14498
14499 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14500 emit_jump_insn (gen_rtx_SET (pc_rtx,
14501 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14502 join_ref,
14503 pc_rtx)));
14504
14505 /* Do the normal comparison, knowing that the values are not
14506 NaNs. */
14507 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14508 SImode, op0, mode, op1, mode);
14509
14510 emit_insn (gen_cstoresi4 (dest,
14511 gen_rtx_fmt_ee (code, SImode, normal_dest,
14512 const0_rtx),
14513 normal_dest, const0_rtx));
14514
14515 /* Join NaN and non-Nan paths. Compare dest against 0. */
14516 emit_label (join_label);
14517 code = NE;
14518 }
14519
14520 emit_insn (gen_rtx_SET (compare_result,
14521 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14522 }
14523
14524 else
14525 {
14526 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14527 CLOBBERs to match cmptf_internal2 pattern. */
14528 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14529 && FLOAT128_IBM_P (GET_MODE (op0))
14530 && TARGET_HARD_FLOAT)
14531 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14532 gen_rtvec (10,
14533 gen_rtx_SET (compare_result,
14534 gen_rtx_COMPARE (comp_mode, op0, op1)),
14535 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14536 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14537 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14538 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14539 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14540 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14541 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14542 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14543 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14544 else if (GET_CODE (op1) == UNSPEC
14545 && XINT (op1, 1) == UNSPEC_SP_TEST)
14546 {
14547 rtx op1b = XVECEXP (op1, 0, 0);
14548 comp_mode = CCEQmode;
14549 compare_result = gen_reg_rtx (CCEQmode);
14550 if (TARGET_64BIT)
14551 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14552 else
14553 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14554 }
14555 else
14556 emit_insn (gen_rtx_SET (compare_result,
14557 gen_rtx_COMPARE (comp_mode, op0, op1)));
14558 }
14559
14560 validate_condition_mode (code, GET_MODE (compare_result));
14561
14562 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14563 }
14564
14565 \f
14566 /* Return the diagnostic message string if the binary operation OP is
14567 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14568
14569 static const char*
14570 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14571 const_tree type1,
14572 const_tree type2)
14573 {
14574 machine_mode mode1 = TYPE_MODE (type1);
14575 machine_mode mode2 = TYPE_MODE (type2);
14576
14577 /* For complex modes, use the inner type. */
14578 if (COMPLEX_MODE_P (mode1))
14579 mode1 = GET_MODE_INNER (mode1);
14580
14581 if (COMPLEX_MODE_P (mode2))
14582 mode2 = GET_MODE_INNER (mode2);
14583
14584 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14585 double to intermix unless -mfloat128-convert. */
14586 if (mode1 == mode2)
14587 return NULL;
14588
14589 if (!TARGET_FLOAT128_CVT)
14590 {
14591 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
14592 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
14593 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
14594 "point types");
14595 }
14596
14597 return NULL;
14598 }
14599
14600 \f
14601 /* Expand floating point conversion to/from __float128 and __ibm128. */
14602
14603 void
14604 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14605 {
14606 machine_mode dest_mode = GET_MODE (dest);
14607 machine_mode src_mode = GET_MODE (src);
14608 convert_optab cvt = unknown_optab;
14609 bool do_move = false;
14610 rtx libfunc = NULL_RTX;
14611 rtx dest2;
14612 typedef rtx (*rtx_2func_t) (rtx, rtx);
14613 rtx_2func_t hw_convert = (rtx_2func_t)0;
14614 size_t kf_or_tf;
14615
14616 struct hw_conv_t {
14617 rtx_2func_t from_df;
14618 rtx_2func_t from_sf;
14619 rtx_2func_t from_si_sign;
14620 rtx_2func_t from_si_uns;
14621 rtx_2func_t from_di_sign;
14622 rtx_2func_t from_di_uns;
14623 rtx_2func_t to_df;
14624 rtx_2func_t to_sf;
14625 rtx_2func_t to_si_sign;
14626 rtx_2func_t to_si_uns;
14627 rtx_2func_t to_di_sign;
14628 rtx_2func_t to_di_uns;
14629 } hw_conversions[2] = {
14630 /* convertions to/from KFmode */
14631 {
14632 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14633 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14634 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14635 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14636 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14637 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14638 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14639 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14640 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14641 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14642 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14643 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14644 },
14645
14646 /* convertions to/from TFmode */
14647 {
14648 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14649 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14650 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14651 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14652 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14653 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14654 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14655 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14656 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14657 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14658 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14659 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14660 },
14661 };
14662
14663 if (dest_mode == src_mode)
14664 gcc_unreachable ();
14665
14666 /* Eliminate memory operations. */
14667 if (MEM_P (src))
14668 src = force_reg (src_mode, src);
14669
14670 if (MEM_P (dest))
14671 {
14672 rtx tmp = gen_reg_rtx (dest_mode);
14673 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14674 rs6000_emit_move (dest, tmp, dest_mode);
14675 return;
14676 }
14677
14678 /* Convert to IEEE 128-bit floating point. */
14679 if (FLOAT128_IEEE_P (dest_mode))
14680 {
14681 if (dest_mode == KFmode)
14682 kf_or_tf = 0;
14683 else if (dest_mode == TFmode)
14684 kf_or_tf = 1;
14685 else
14686 gcc_unreachable ();
14687
14688 switch (src_mode)
14689 {
14690 case E_DFmode:
14691 cvt = sext_optab;
14692 hw_convert = hw_conversions[kf_or_tf].from_df;
14693 break;
14694
14695 case E_SFmode:
14696 cvt = sext_optab;
14697 hw_convert = hw_conversions[kf_or_tf].from_sf;
14698 break;
14699
14700 case E_KFmode:
14701 case E_IFmode:
14702 case E_TFmode:
14703 if (FLOAT128_IBM_P (src_mode))
14704 cvt = sext_optab;
14705 else
14706 do_move = true;
14707 break;
14708
14709 case E_SImode:
14710 if (unsigned_p)
14711 {
14712 cvt = ufloat_optab;
14713 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14714 }
14715 else
14716 {
14717 cvt = sfloat_optab;
14718 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14719 }
14720 break;
14721
14722 case E_DImode:
14723 if (unsigned_p)
14724 {
14725 cvt = ufloat_optab;
14726 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14727 }
14728 else
14729 {
14730 cvt = sfloat_optab;
14731 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14732 }
14733 break;
14734
14735 default:
14736 gcc_unreachable ();
14737 }
14738 }
14739
14740 /* Convert from IEEE 128-bit floating point. */
14741 else if (FLOAT128_IEEE_P (src_mode))
14742 {
14743 if (src_mode == KFmode)
14744 kf_or_tf = 0;
14745 else if (src_mode == TFmode)
14746 kf_or_tf = 1;
14747 else
14748 gcc_unreachable ();
14749
14750 switch (dest_mode)
14751 {
14752 case E_DFmode:
14753 cvt = trunc_optab;
14754 hw_convert = hw_conversions[kf_or_tf].to_df;
14755 break;
14756
14757 case E_SFmode:
14758 cvt = trunc_optab;
14759 hw_convert = hw_conversions[kf_or_tf].to_sf;
14760 break;
14761
14762 case E_KFmode:
14763 case E_IFmode:
14764 case E_TFmode:
14765 if (FLOAT128_IBM_P (dest_mode))
14766 cvt = trunc_optab;
14767 else
14768 do_move = true;
14769 break;
14770
14771 case E_SImode:
14772 if (unsigned_p)
14773 {
14774 cvt = ufix_optab;
14775 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14776 }
14777 else
14778 {
14779 cvt = sfix_optab;
14780 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14781 }
14782 break;
14783
14784 case E_DImode:
14785 if (unsigned_p)
14786 {
14787 cvt = ufix_optab;
14788 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14789 }
14790 else
14791 {
14792 cvt = sfix_optab;
14793 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14794 }
14795 break;
14796
14797 default:
14798 gcc_unreachable ();
14799 }
14800 }
14801
14802 /* Both IBM format. */
14803 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14804 do_move = true;
14805
14806 else
14807 gcc_unreachable ();
14808
14809 /* Handle conversion between TFmode/KFmode/IFmode. */
14810 if (do_move)
14811 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14812
14813 /* Handle conversion if we have hardware support. */
14814 else if (TARGET_FLOAT128_HW && hw_convert)
14815 emit_insn ((hw_convert) (dest, src));
14816
14817 /* Call an external function to do the conversion. */
14818 else if (cvt != unknown_optab)
14819 {
14820 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14821 gcc_assert (libfunc != NULL_RTX);
14822
14823 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14824 src, src_mode);
14825
14826 gcc_assert (dest2 != NULL_RTX);
14827 if (!rtx_equal_p (dest, dest2))
14828 emit_move_insn (dest, dest2);
14829 }
14830
14831 else
14832 gcc_unreachable ();
14833
14834 return;
14835 }
14836
14837 \f
14838 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14839 can be used as that dest register. Return the dest register. */
14840
14841 rtx
14842 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14843 {
14844 if (op2 == const0_rtx)
14845 return op1;
14846
14847 if (GET_CODE (scratch) == SCRATCH)
14848 scratch = gen_reg_rtx (mode);
14849
14850 if (logical_operand (op2, mode))
14851 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14852 else
14853 emit_insn (gen_rtx_SET (scratch,
14854 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14855
14856 return scratch;
14857 }
14858
14859 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14860 requires this. The result is mode MODE. */
14861 rtx
14862 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14863 {
14864 rtx cond[2];
14865 int n = 0;
14866 if (code == LTGT || code == LE || code == UNLT)
14867 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14868 if (code == LTGT || code == GE || code == UNGT)
14869 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14870 if (code == LE || code == GE || code == UNEQ)
14871 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14872 if (code == UNLT || code == UNGT || code == UNEQ)
14873 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14874
14875 gcc_assert (n == 2);
14876
14877 rtx cc = gen_reg_rtx (CCEQmode);
14878 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14879 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14880
14881 return cc;
14882 }
14883
14884 void
14885 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14886 {
14887 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14888 rtx_code cond_code = GET_CODE (condition_rtx);
14889
14890 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14891 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14892 ;
14893 else if (cond_code == NE
14894 || cond_code == GE || cond_code == LE
14895 || cond_code == GEU || cond_code == LEU
14896 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14897 {
14898 rtx not_result = gen_reg_rtx (CCEQmode);
14899 rtx not_op, rev_cond_rtx;
14900 machine_mode cc_mode;
14901
14902 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14903
14904 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14905 SImode, XEXP (condition_rtx, 0), const0_rtx);
14906 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14907 emit_insn (gen_rtx_SET (not_result, not_op));
14908 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14909 }
14910
14911 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14912 if (op_mode == VOIDmode)
14913 op_mode = GET_MODE (XEXP (operands[1], 1));
14914
14915 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14916 {
14917 PUT_MODE (condition_rtx, DImode);
14918 convert_move (operands[0], condition_rtx, 0);
14919 }
14920 else
14921 {
14922 PUT_MODE (condition_rtx, SImode);
14923 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14924 }
14925 }
14926
14927 /* Emit a branch of kind CODE to location LOC. */
14928
14929 void
14930 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14931 {
14932 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14933 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14934 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14935 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14936 }
14937
14938 /* Return the string to output a conditional branch to LABEL, which is
14939 the operand template of the label, or NULL if the branch is really a
14940 conditional return.
14941
14942 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14943 condition code register and its mode specifies what kind of
14944 comparison we made.
14945
14946 REVERSED is nonzero if we should reverse the sense of the comparison.
14947
14948 INSN is the insn. */
14949
14950 char *
14951 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14952 {
14953 static char string[64];
14954 enum rtx_code code = GET_CODE (op);
14955 rtx cc_reg = XEXP (op, 0);
14956 machine_mode mode = GET_MODE (cc_reg);
14957 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14958 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14959 int really_reversed = reversed ^ need_longbranch;
14960 char *s = string;
14961 const char *ccode;
14962 const char *pred;
14963 rtx note;
14964
14965 validate_condition_mode (code, mode);
14966
14967 /* Work out which way this really branches. We could use
14968 reverse_condition_maybe_unordered here always but this
14969 makes the resulting assembler clearer. */
14970 if (really_reversed)
14971 {
14972 /* Reversal of FP compares takes care -- an ordered compare
14973 becomes an unordered compare and vice versa. */
14974 if (mode == CCFPmode)
14975 code = reverse_condition_maybe_unordered (code);
14976 else
14977 code = reverse_condition (code);
14978 }
14979
14980 switch (code)
14981 {
14982 /* Not all of these are actually distinct opcodes, but
14983 we distinguish them for clarity of the resulting assembler. */
14984 case NE: case LTGT:
14985 ccode = "ne"; break;
14986 case EQ: case UNEQ:
14987 ccode = "eq"; break;
14988 case GE: case GEU:
14989 ccode = "ge"; break;
14990 case GT: case GTU: case UNGT:
14991 ccode = "gt"; break;
14992 case LE: case LEU:
14993 ccode = "le"; break;
14994 case LT: case LTU: case UNLT:
14995 ccode = "lt"; break;
14996 case UNORDERED: ccode = "un"; break;
14997 case ORDERED: ccode = "nu"; break;
14998 case UNGE: ccode = "nl"; break;
14999 case UNLE: ccode = "ng"; break;
15000 default:
15001 gcc_unreachable ();
15002 }
15003
15004 /* Maybe we have a guess as to how likely the branch is. */
15005 pred = "";
15006 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15007 if (note != NULL_RTX)
15008 {
15009 /* PROB is the difference from 50%. */
15010 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15011 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15012
15013 /* Only hint for highly probable/improbable branches on newer cpus when
15014 we have real profile data, as static prediction overrides processor
15015 dynamic prediction. For older cpus we may as well always hint, but
15016 assume not taken for branches that are very close to 50% as a
15017 mispredicted taken branch is more expensive than a
15018 mispredicted not-taken branch. */
15019 if (rs6000_always_hint
15020 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15021 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15022 && br_prob_note_reliable_p (note)))
15023 {
15024 if (abs (prob) > REG_BR_PROB_BASE / 20
15025 && ((prob > 0) ^ need_longbranch))
15026 pred = "+";
15027 else
15028 pred = "-";
15029 }
15030 }
15031
15032 if (label == NULL)
15033 s += sprintf (s, "b%slr%s ", ccode, pred);
15034 else
15035 s += sprintf (s, "b%s%s ", ccode, pred);
15036
15037 /* We need to escape any '%' characters in the reg_names string.
15038 Assume they'd only be the first character.... */
15039 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15040 *s++ = '%';
15041 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15042
15043 if (label != NULL)
15044 {
15045 /* If the branch distance was too far, we may have to use an
15046 unconditional branch to go the distance. */
15047 if (need_longbranch)
15048 s += sprintf (s, ",$+8\n\tb %s", label);
15049 else
15050 s += sprintf (s, ",%s", label);
15051 }
15052
15053 return string;
15054 }
15055
15056 /* Return insn for VSX or Altivec comparisons. */
15057
15058 static rtx
15059 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15060 {
15061 rtx mask;
15062 machine_mode mode = GET_MODE (op0);
15063
15064 switch (code)
15065 {
15066 default:
15067 break;
15068
15069 case GE:
15070 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15071 return NULL_RTX;
15072 /* FALLTHRU */
15073
15074 case EQ:
15075 case GT:
15076 case GTU:
15077 case ORDERED:
15078 case UNORDERED:
15079 case UNEQ:
15080 case LTGT:
15081 mask = gen_reg_rtx (mode);
15082 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15083 return mask;
15084 }
15085
15086 return NULL_RTX;
15087 }
15088
15089 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15090 DMODE is expected destination mode. This is a recursive function. */
15091
15092 static rtx
15093 rs6000_emit_vector_compare (enum rtx_code rcode,
15094 rtx op0, rtx op1,
15095 machine_mode dmode)
15096 {
15097 rtx mask;
15098 bool swap_operands = false;
15099 bool try_again = false;
15100
15101 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15102 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15103
15104 /* See if the comparison works as is. */
15105 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15106 if (mask)
15107 return mask;
15108
15109 switch (rcode)
15110 {
15111 case LT:
15112 rcode = GT;
15113 swap_operands = true;
15114 try_again = true;
15115 break;
15116 case LTU:
15117 rcode = GTU;
15118 swap_operands = true;
15119 try_again = true;
15120 break;
15121 case NE:
15122 case UNLE:
15123 case UNLT:
15124 case UNGE:
15125 case UNGT:
15126 /* Invert condition and try again.
15127 e.g., A != B becomes ~(A==B). */
15128 {
15129 enum rtx_code rev_code;
15130 enum insn_code nor_code;
15131 rtx mask2;
15132
15133 rev_code = reverse_condition_maybe_unordered (rcode);
15134 if (rev_code == UNKNOWN)
15135 return NULL_RTX;
15136
15137 nor_code = optab_handler (one_cmpl_optab, dmode);
15138 if (nor_code == CODE_FOR_nothing)
15139 return NULL_RTX;
15140
15141 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15142 if (!mask2)
15143 return NULL_RTX;
15144
15145 mask = gen_reg_rtx (dmode);
15146 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15147 return mask;
15148 }
15149 break;
15150 case GE:
15151 case GEU:
15152 case LE:
15153 case LEU:
15154 /* Try GT/GTU/LT/LTU OR EQ */
15155 {
15156 rtx c_rtx, eq_rtx;
15157 enum insn_code ior_code;
15158 enum rtx_code new_code;
15159
15160 switch (rcode)
15161 {
15162 case GE:
15163 new_code = GT;
15164 break;
15165
15166 case GEU:
15167 new_code = GTU;
15168 break;
15169
15170 case LE:
15171 new_code = LT;
15172 break;
15173
15174 case LEU:
15175 new_code = LTU;
15176 break;
15177
15178 default:
15179 gcc_unreachable ();
15180 }
15181
15182 ior_code = optab_handler (ior_optab, dmode);
15183 if (ior_code == CODE_FOR_nothing)
15184 return NULL_RTX;
15185
15186 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15187 if (!c_rtx)
15188 return NULL_RTX;
15189
15190 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15191 if (!eq_rtx)
15192 return NULL_RTX;
15193
15194 mask = gen_reg_rtx (dmode);
15195 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15196 return mask;
15197 }
15198 break;
15199 default:
15200 return NULL_RTX;
15201 }
15202
15203 if (try_again)
15204 {
15205 if (swap_operands)
15206 std::swap (op0, op1);
15207
15208 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15209 if (mask)
15210 return mask;
15211 }
15212
15213 /* You only get two chances. */
15214 return NULL_RTX;
15215 }
15216
15217 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15218 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15219 operands for the relation operation COND. */
15220
15221 int
15222 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15223 rtx cond, rtx cc_op0, rtx cc_op1)
15224 {
15225 machine_mode dest_mode = GET_MODE (dest);
15226 machine_mode mask_mode = GET_MODE (cc_op0);
15227 enum rtx_code rcode = GET_CODE (cond);
15228 machine_mode cc_mode = CCmode;
15229 rtx mask;
15230 rtx cond2;
15231 bool invert_move = false;
15232
15233 if (VECTOR_UNIT_NONE_P (dest_mode))
15234 return 0;
15235
15236 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15237 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15238
15239 switch (rcode)
15240 {
15241 /* Swap operands if we can, and fall back to doing the operation as
15242 specified, and doing a NOR to invert the test. */
15243 case NE:
15244 case UNLE:
15245 case UNLT:
15246 case UNGE:
15247 case UNGT:
15248 /* Invert condition and try again.
15249 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15250 invert_move = true;
15251 rcode = reverse_condition_maybe_unordered (rcode);
15252 if (rcode == UNKNOWN)
15253 return 0;
15254 break;
15255
15256 case GE:
15257 case LE:
15258 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15259 {
15260 /* Invert condition to avoid compound test. */
15261 invert_move = true;
15262 rcode = reverse_condition (rcode);
15263 }
15264 break;
15265
15266 case GTU:
15267 case GEU:
15268 case LTU:
15269 case LEU:
15270 /* Mark unsigned tests with CCUNSmode. */
15271 cc_mode = CCUNSmode;
15272
15273 /* Invert condition to avoid compound test if necessary. */
15274 if (rcode == GEU || rcode == LEU)
15275 {
15276 invert_move = true;
15277 rcode = reverse_condition (rcode);
15278 }
15279 break;
15280
15281 default:
15282 break;
15283 }
15284
15285 /* Get the vector mask for the given relational operations. */
15286 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15287
15288 if (!mask)
15289 return 0;
15290
15291 if (invert_move)
15292 std::swap (op_true, op_false);
15293
15294 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15295 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15296 && (GET_CODE (op_true) == CONST_VECTOR
15297 || GET_CODE (op_false) == CONST_VECTOR))
15298 {
15299 rtx constant_0 = CONST0_RTX (dest_mode);
15300 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15301
15302 if (op_true == constant_m1 && op_false == constant_0)
15303 {
15304 emit_move_insn (dest, mask);
15305 return 1;
15306 }
15307
15308 else if (op_true == constant_0 && op_false == constant_m1)
15309 {
15310 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15311 return 1;
15312 }
15313
15314 /* If we can't use the vector comparison directly, perhaps we can use
15315 the mask for the true or false fields, instead of loading up a
15316 constant. */
15317 if (op_true == constant_m1)
15318 op_true = mask;
15319
15320 if (op_false == constant_0)
15321 op_false = mask;
15322 }
15323
15324 if (!REG_P (op_true) && !SUBREG_P (op_true))
15325 op_true = force_reg (dest_mode, op_true);
15326
15327 if (!REG_P (op_false) && !SUBREG_P (op_false))
15328 op_false = force_reg (dest_mode, op_false);
15329
15330 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
15331 CONST0_RTX (dest_mode));
15332 emit_insn (gen_rtx_SET (dest,
15333 gen_rtx_IF_THEN_ELSE (dest_mode,
15334 cond2,
15335 op_true,
15336 op_false)));
15337 return 1;
15338 }
15339
15340 /* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
15341 minimum with "C" semantics.
15342
15343 Unless you use -ffast-math, you can't use these instructions to replace
15344 conditions that implicitly reverse the condition because the comparison
15345 might generate a NaN or signed zer0.
15346
15347 I.e. the following can be replaced all of the time
15348 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15349 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15350 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15351 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15352
15353 The following can be replaced only if -ffast-math is used:
15354 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15355 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15356 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15357 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15358
15359 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15360 nonzero/true, FALSE_COND if it is zero/false.
15361
15362 Return false if we can't generate the appropriate minimum or maximum, and
15363 true if we can did the minimum or maximum. */
15364
15365 static bool
15366 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15367 {
15368 enum rtx_code code = GET_CODE (op);
15369 rtx op0 = XEXP (op, 0);
15370 rtx op1 = XEXP (op, 1);
15371 machine_mode compare_mode = GET_MODE (op0);
15372 machine_mode result_mode = GET_MODE (dest);
15373 bool max_p = false;
15374
15375 if (result_mode != compare_mode)
15376 return false;
15377
15378 if (code == GE || code == GT)
15379 max_p = true;
15380 else if (code == LE || code == LT)
15381 max_p = false;
15382 else
15383 return false;
15384
15385 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15386 ;
15387
15388 /* Only when NaNs and signed-zeros are not in effect, smax could be
15389 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15390 `op0 > op1 ? op1 : op0`. */
15391 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15392 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15393 max_p = !max_p;
15394
15395 else
15396 return false;
15397
15398 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15399 return true;
15400 }
15401
15402 /* Possibly emit a floating point conditional move by generating a compare that
15403 sets a mask instruction and a XXSEL select instruction.
15404
15405 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15406 nonzero/true, FALSE_COND if it is zero/false.
15407
15408 Return false if the operation cannot be generated, and true if we could
15409 generate the instruction. */
15410
15411 static bool
15412 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15413 {
15414 enum rtx_code code = GET_CODE (op);
15415 rtx op0 = XEXP (op, 0);
15416 rtx op1 = XEXP (op, 1);
15417 machine_mode result_mode = GET_MODE (dest);
15418 rtx compare_rtx;
15419 rtx cmove_rtx;
15420 rtx clobber_rtx;
15421
15422 if (!can_create_pseudo_p ())
15423 return 0;
15424
15425 switch (code)
15426 {
15427 case EQ:
15428 case GE:
15429 case GT:
15430 break;
15431
15432 case NE:
15433 case LT:
15434 case LE:
15435 code = swap_condition (code);
15436 std::swap (op0, op1);
15437 break;
15438
15439 default:
15440 return false;
15441 }
15442
15443 /* Generate: [(parallel [(set (dest)
15444 (if_then_else (op (cmp1) (cmp2))
15445 (true)
15446 (false)))
15447 (clobber (scratch))])]. */
15448
15449 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15450 cmove_rtx = gen_rtx_SET (dest,
15451 gen_rtx_IF_THEN_ELSE (result_mode,
15452 compare_rtx,
15453 true_cond,
15454 false_cond));
15455
15456 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15457 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15458 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15459
15460 return true;
15461 }
15462
15463 /* Helper function to return true if the target has instructions to do a
15464 compare and set mask instruction that can be used with XXSEL to implement a
15465 conditional move. It is also assumed that such a target also supports the
15466 "C" minimum and maximum instructions. */
15467
15468 static bool
15469 have_compare_and_set_mask (machine_mode mode)
15470 {
15471 switch (mode)
15472 {
15473 case E_SFmode:
15474 case E_DFmode:
15475 return TARGET_P9_MINMAX;
15476
15477 default:
15478 break;
15479 }
15480
15481 return false;
15482 }
15483
15484 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15485 operands of the last comparison is nonzero/true, FALSE_COND if it
15486 is zero/false. Return 0 if the hardware has no such operation. */
15487
15488 bool
15489 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15490 {
15491 enum rtx_code code = GET_CODE (op);
15492 rtx op0 = XEXP (op, 0);
15493 rtx op1 = XEXP (op, 1);
15494 machine_mode compare_mode = GET_MODE (op0);
15495 machine_mode result_mode = GET_MODE (dest);
15496 rtx temp;
15497 bool is_against_zero;
15498
15499 /* These modes should always match. */
15500 if (GET_MODE (op1) != compare_mode
15501 /* In the isel case however, we can use a compare immediate, so
15502 op1 may be a small constant. */
15503 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15504 return false;
15505 if (GET_MODE (true_cond) != result_mode)
15506 return false;
15507 if (GET_MODE (false_cond) != result_mode)
15508 return false;
15509
15510 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15511 instructions. */
15512 if (have_compare_and_set_mask (compare_mode)
15513 && have_compare_and_set_mask (result_mode))
15514 {
15515 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15516 return true;
15517
15518 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15519 return true;
15520 }
15521
15522 /* Don't allow using floating point comparisons for integer results for
15523 now. */
15524 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15525 return false;
15526
15527 /* First, work out if the hardware can do this at all, or
15528 if it's too slow.... */
15529 if (!FLOAT_MODE_P (compare_mode))
15530 {
15531 if (TARGET_ISEL)
15532 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15533 return false;
15534 }
15535
15536 is_against_zero = op1 == CONST0_RTX (compare_mode);
15537
15538 /* A floating-point subtract might overflow, underflow, or produce
15539 an inexact result, thus changing the floating-point flags, so it
15540 can't be generated if we care about that. It's safe if one side
15541 of the construct is zero, since then no subtract will be
15542 generated. */
15543 if (SCALAR_FLOAT_MODE_P (compare_mode)
15544 && flag_trapping_math && ! is_against_zero)
15545 return false;
15546
15547 /* Eliminate half of the comparisons by switching operands, this
15548 makes the remaining code simpler. */
15549 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15550 || code == LTGT || code == LT || code == UNLE)
15551 {
15552 code = reverse_condition_maybe_unordered (code);
15553 temp = true_cond;
15554 true_cond = false_cond;
15555 false_cond = temp;
15556 }
15557
15558 /* UNEQ and LTGT take four instructions for a comparison with zero,
15559 it'll probably be faster to use a branch here too. */
15560 if (code == UNEQ && HONOR_NANS (compare_mode))
15561 return false;
15562
15563 /* We're going to try to implement comparisons by performing
15564 a subtract, then comparing against zero. Unfortunately,
15565 Inf - Inf is NaN which is not zero, and so if we don't
15566 know that the operand is finite and the comparison
15567 would treat EQ different to UNORDERED, we can't do it. */
15568 if (HONOR_INFINITIES (compare_mode)
15569 && code != GT && code != UNGE
15570 && (!CONST_DOUBLE_P (op1)
15571 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15572 /* Constructs of the form (a OP b ? a : b) are safe. */
15573 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15574 || (! rtx_equal_p (op0, true_cond)
15575 && ! rtx_equal_p (op1, true_cond))))
15576 return false;
15577
15578 /* At this point we know we can use fsel. */
15579
15580 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15581 is no fsel instruction. */
15582 if (compare_mode != SFmode && compare_mode != DFmode)
15583 return false;
15584
15585 /* Reduce the comparison to a comparison against zero. */
15586 if (! is_against_zero)
15587 {
15588 temp = gen_reg_rtx (compare_mode);
15589 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15590 op0 = temp;
15591 op1 = CONST0_RTX (compare_mode);
15592 }
15593
15594 /* If we don't care about NaNs we can reduce some of the comparisons
15595 down to faster ones. */
15596 if (! HONOR_NANS (compare_mode))
15597 switch (code)
15598 {
15599 case GT:
15600 code = LE;
15601 temp = true_cond;
15602 true_cond = false_cond;
15603 false_cond = temp;
15604 break;
15605 case UNGE:
15606 code = GE;
15607 break;
15608 case UNEQ:
15609 code = EQ;
15610 break;
15611 default:
15612 break;
15613 }
15614
15615 /* Now, reduce everything down to a GE. */
15616 switch (code)
15617 {
15618 case GE:
15619 break;
15620
15621 case LE:
15622 temp = gen_reg_rtx (compare_mode);
15623 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15624 op0 = temp;
15625 break;
15626
15627 case ORDERED:
15628 temp = gen_reg_rtx (compare_mode);
15629 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15630 op0 = temp;
15631 break;
15632
15633 case EQ:
15634 temp = gen_reg_rtx (compare_mode);
15635 emit_insn (gen_rtx_SET (temp,
15636 gen_rtx_NEG (compare_mode,
15637 gen_rtx_ABS (compare_mode, op0))));
15638 op0 = temp;
15639 break;
15640
15641 case UNGE:
15642 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15643 temp = gen_reg_rtx (result_mode);
15644 emit_insn (gen_rtx_SET (temp,
15645 gen_rtx_IF_THEN_ELSE (result_mode,
15646 gen_rtx_GE (VOIDmode,
15647 op0, op1),
15648 true_cond, false_cond)));
15649 false_cond = true_cond;
15650 true_cond = temp;
15651
15652 temp = gen_reg_rtx (compare_mode);
15653 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15654 op0 = temp;
15655 break;
15656
15657 case GT:
15658 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15659 temp = gen_reg_rtx (result_mode);
15660 emit_insn (gen_rtx_SET (temp,
15661 gen_rtx_IF_THEN_ELSE (result_mode,
15662 gen_rtx_GE (VOIDmode,
15663 op0, op1),
15664 true_cond, false_cond)));
15665 true_cond = false_cond;
15666 false_cond = temp;
15667
15668 temp = gen_reg_rtx (compare_mode);
15669 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15670 op0 = temp;
15671 break;
15672
15673 default:
15674 gcc_unreachable ();
15675 }
15676
15677 emit_insn (gen_rtx_SET (dest,
15678 gen_rtx_IF_THEN_ELSE (result_mode,
15679 gen_rtx_GE (VOIDmode,
15680 op0, op1),
15681 true_cond, false_cond)));
15682 return true;
15683 }
15684
15685 /* Same as above, but for ints (isel). */
15686
15687 bool
15688 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15689 {
15690 rtx condition_rtx, cr;
15691 machine_mode mode = GET_MODE (dest);
15692 enum rtx_code cond_code;
15693 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15694 bool signedp;
15695
15696 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15697 return false;
15698
15699 /* We still have to do the compare, because isel doesn't do a
15700 compare, it just looks at the CRx bits set by a previous compare
15701 instruction. */
15702 condition_rtx = rs6000_generate_compare (op, mode);
15703 cond_code = GET_CODE (condition_rtx);
15704 cr = XEXP (condition_rtx, 0);
15705 signedp = GET_MODE (cr) == CCmode;
15706
15707 isel_func = (mode == SImode
15708 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15709 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15710
15711 switch (cond_code)
15712 {
15713 case LT: case GT: case LTU: case GTU: case EQ:
15714 /* isel handles these directly. */
15715 break;
15716
15717 default:
15718 /* We need to swap the sense of the comparison. */
15719 {
15720 std::swap (false_cond, true_cond);
15721 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15722 }
15723 break;
15724 }
15725
15726 false_cond = force_reg (mode, false_cond);
15727 if (true_cond != const0_rtx)
15728 true_cond = force_reg (mode, true_cond);
15729
15730 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15731
15732 return true;
15733 }
15734
15735 void
15736 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15737 {
15738 machine_mode mode = GET_MODE (op0);
15739 enum rtx_code c;
15740 rtx target;
15741
15742 /* VSX/altivec have direct min/max insns. */
15743 if ((code == SMAX || code == SMIN)
15744 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15745 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15746 {
15747 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15748 return;
15749 }
15750
15751 if (code == SMAX || code == SMIN)
15752 c = GE;
15753 else
15754 c = GEU;
15755
15756 if (code == SMAX || code == UMAX)
15757 target = emit_conditional_move (dest, c, op0, op1, mode,
15758 op0, op1, mode, 0);
15759 else
15760 target = emit_conditional_move (dest, c, op0, op1, mode,
15761 op1, op0, mode, 0);
15762 gcc_assert (target);
15763 if (target != dest)
15764 emit_move_insn (dest, target);
15765 }
15766
15767 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15768 COND is true. Mark the jump as unlikely to be taken. */
15769
15770 static void
15771 emit_unlikely_jump (rtx cond, rtx label)
15772 {
15773 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15774 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15775 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15776 }
15777
15778 /* A subroutine of the atomic operation splitters. Emit a load-locked
15779 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15780 the zero_extend operation. */
15781
15782 static void
15783 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15784 {
15785 rtx (*fn) (rtx, rtx) = NULL;
15786
15787 switch (mode)
15788 {
15789 case E_QImode:
15790 fn = gen_load_lockedqi;
15791 break;
15792 case E_HImode:
15793 fn = gen_load_lockedhi;
15794 break;
15795 case E_SImode:
15796 if (GET_MODE (mem) == QImode)
15797 fn = gen_load_lockedqi_si;
15798 else if (GET_MODE (mem) == HImode)
15799 fn = gen_load_lockedhi_si;
15800 else
15801 fn = gen_load_lockedsi;
15802 break;
15803 case E_DImode:
15804 fn = gen_load_lockeddi;
15805 break;
15806 case E_TImode:
15807 fn = gen_load_lockedti;
15808 break;
15809 default:
15810 gcc_unreachable ();
15811 }
15812 emit_insn (fn (reg, mem));
15813 }
15814
15815 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15816 instruction in MODE. */
15817
15818 static void
15819 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15820 {
15821 rtx (*fn) (rtx, rtx, rtx) = NULL;
15822
15823 switch (mode)
15824 {
15825 case E_QImode:
15826 fn = gen_store_conditionalqi;
15827 break;
15828 case E_HImode:
15829 fn = gen_store_conditionalhi;
15830 break;
15831 case E_SImode:
15832 fn = gen_store_conditionalsi;
15833 break;
15834 case E_DImode:
15835 fn = gen_store_conditionaldi;
15836 break;
15837 case E_TImode:
15838 fn = gen_store_conditionalti;
15839 break;
15840 default:
15841 gcc_unreachable ();
15842 }
15843
15844 /* Emit sync before stwcx. to address PPC405 Erratum. */
15845 if (PPC405_ERRATUM77)
15846 emit_insn (gen_hwsync ());
15847
15848 emit_insn (fn (res, mem, val));
15849 }
15850
15851 /* Expand barriers before and after a load_locked/store_cond sequence. */
15852
15853 static rtx
15854 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15855 {
15856 rtx addr = XEXP (mem, 0);
15857
15858 if (!legitimate_indirect_address_p (addr, reload_completed)
15859 && !legitimate_indexed_address_p (addr, reload_completed))
15860 {
15861 addr = force_reg (Pmode, addr);
15862 mem = replace_equiv_address_nv (mem, addr);
15863 }
15864
15865 switch (model)
15866 {
15867 case MEMMODEL_RELAXED:
15868 case MEMMODEL_CONSUME:
15869 case MEMMODEL_ACQUIRE:
15870 break;
15871 case MEMMODEL_RELEASE:
15872 case MEMMODEL_ACQ_REL:
15873 emit_insn (gen_lwsync ());
15874 break;
15875 case MEMMODEL_SEQ_CST:
15876 emit_insn (gen_hwsync ());
15877 break;
15878 default:
15879 gcc_unreachable ();
15880 }
15881 return mem;
15882 }
15883
15884 static void
15885 rs6000_post_atomic_barrier (enum memmodel model)
15886 {
15887 switch (model)
15888 {
15889 case MEMMODEL_RELAXED:
15890 case MEMMODEL_CONSUME:
15891 case MEMMODEL_RELEASE:
15892 break;
15893 case MEMMODEL_ACQUIRE:
15894 case MEMMODEL_ACQ_REL:
15895 case MEMMODEL_SEQ_CST:
15896 emit_insn (gen_isync ());
15897 break;
15898 default:
15899 gcc_unreachable ();
15900 }
15901 }
15902
15903 /* A subroutine of the various atomic expanders. For sub-word operations,
15904 we must adjust things to operate on SImode. Given the original MEM,
15905 return a new aligned memory. Also build and return the quantities by
15906 which to shift and mask. */
15907
15908 static rtx
15909 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15910 {
15911 rtx addr, align, shift, mask, mem;
15912 HOST_WIDE_INT shift_mask;
15913 machine_mode mode = GET_MODE (orig_mem);
15914
15915 /* For smaller modes, we have to implement this via SImode. */
15916 shift_mask = (mode == QImode ? 0x18 : 0x10);
15917
15918 addr = XEXP (orig_mem, 0);
15919 addr = force_reg (GET_MODE (addr), addr);
15920
15921 /* Aligned memory containing subword. Generate a new memory. We
15922 do not want any of the existing MEM_ATTR data, as we're now
15923 accessing memory outside the original object. */
15924 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15925 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15926 mem = gen_rtx_MEM (SImode, align);
15927 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15928 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15929 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15930
15931 /* Shift amount for subword relative to aligned word. */
15932 shift = gen_reg_rtx (SImode);
15933 addr = gen_lowpart (SImode, addr);
15934 rtx tmp = gen_reg_rtx (SImode);
15935 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15936 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15937 if (BYTES_BIG_ENDIAN)
15938 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15939 shift, 1, OPTAB_LIB_WIDEN);
15940 *pshift = shift;
15941
15942 /* Mask for insertion. */
15943 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15944 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15945 *pmask = mask;
15946
15947 return mem;
15948 }
15949
15950 /* A subroutine of the various atomic expanders. For sub-word operands,
15951 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15952
15953 static rtx
15954 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15955 {
15956 rtx x;
15957
15958 x = gen_reg_rtx (SImode);
15959 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15960 gen_rtx_NOT (SImode, mask),
15961 oldval)));
15962
15963 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15964
15965 return x;
15966 }
15967
15968 /* A subroutine of the various atomic expanders. For sub-word operands,
15969 extract WIDE to NARROW via SHIFT. */
15970
15971 static void
15972 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15973 {
15974 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15975 wide, 1, OPTAB_LIB_WIDEN);
15976 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15977 }
15978
15979 /* Expand an atomic compare and swap operation. */
15980
15981 void
15982 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15983 {
15984 rtx boolval, retval, mem, oldval, newval, cond;
15985 rtx label1, label2, x, mask, shift;
15986 machine_mode mode, orig_mode;
15987 enum memmodel mod_s, mod_f;
15988 bool is_weak;
15989
15990 boolval = operands[0];
15991 retval = operands[1];
15992 mem = operands[2];
15993 oldval = operands[3];
15994 newval = operands[4];
15995 is_weak = (INTVAL (operands[5]) != 0);
15996 mod_s = memmodel_base (INTVAL (operands[6]));
15997 mod_f = memmodel_base (INTVAL (operands[7]));
15998 orig_mode = mode = GET_MODE (mem);
15999
16000 mask = shift = NULL_RTX;
16001 if (mode == QImode || mode == HImode)
16002 {
16003 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16004 lwarx and shift/mask operations. With power8, we need to do the
16005 comparison in SImode, but the store is still done in QI/HImode. */
16006 oldval = convert_modes (SImode, mode, oldval, 1);
16007
16008 if (!TARGET_SYNC_HI_QI)
16009 {
16010 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16011
16012 /* Shift and mask OLDVAL into position with the word. */
16013 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16014 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16015
16016 /* Shift and mask NEWVAL into position within the word. */
16017 newval = convert_modes (SImode, mode, newval, 1);
16018 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16019 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16020 }
16021
16022 /* Prepare to adjust the return value. */
16023 retval = gen_reg_rtx (SImode);
16024 mode = SImode;
16025 }
16026 else if (reg_overlap_mentioned_p (retval, oldval))
16027 oldval = copy_to_reg (oldval);
16028
16029 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16030 oldval = copy_to_mode_reg (mode, oldval);
16031
16032 if (reg_overlap_mentioned_p (retval, newval))
16033 newval = copy_to_reg (newval);
16034
16035 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16036
16037 label1 = NULL_RTX;
16038 if (!is_weak)
16039 {
16040 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16041 emit_label (XEXP (label1, 0));
16042 }
16043 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16044
16045 emit_load_locked (mode, retval, mem);
16046
16047 x = retval;
16048 if (mask)
16049 x = expand_simple_binop (SImode, AND, retval, mask,
16050 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16051
16052 cond = gen_reg_rtx (CCmode);
16053 /* If we have TImode, synthesize a comparison. */
16054 if (mode != TImode)
16055 x = gen_rtx_COMPARE (CCmode, x, oldval);
16056 else
16057 {
16058 rtx xor1_result = gen_reg_rtx (DImode);
16059 rtx xor2_result = gen_reg_rtx (DImode);
16060 rtx or_result = gen_reg_rtx (DImode);
16061 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16062 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16063 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16064 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16065
16066 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16067 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16068 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16069 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16070 }
16071
16072 emit_insn (gen_rtx_SET (cond, x));
16073
16074 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16075 emit_unlikely_jump (x, label2);
16076
16077 x = newval;
16078 if (mask)
16079 x = rs6000_mask_atomic_subword (retval, newval, mask);
16080
16081 emit_store_conditional (orig_mode, cond, mem, x);
16082
16083 if (!is_weak)
16084 {
16085 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16086 emit_unlikely_jump (x, label1);
16087 }
16088
16089 if (!is_mm_relaxed (mod_f))
16090 emit_label (XEXP (label2, 0));
16091
16092 rs6000_post_atomic_barrier (mod_s);
16093
16094 if (is_mm_relaxed (mod_f))
16095 emit_label (XEXP (label2, 0));
16096
16097 if (shift)
16098 rs6000_finish_atomic_subword (operands[1], retval, shift);
16099 else if (mode != GET_MODE (operands[1]))
16100 convert_move (operands[1], retval, 1);
16101
16102 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16103 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16104 emit_insn (gen_rtx_SET (boolval, x));
16105 }
16106
16107 /* Expand an atomic exchange operation. */
16108
16109 void
16110 rs6000_expand_atomic_exchange (rtx operands[])
16111 {
16112 rtx retval, mem, val, cond;
16113 machine_mode mode;
16114 enum memmodel model;
16115 rtx label, x, mask, shift;
16116
16117 retval = operands[0];
16118 mem = operands[1];
16119 val = operands[2];
16120 model = memmodel_base (INTVAL (operands[3]));
16121 mode = GET_MODE (mem);
16122
16123 mask = shift = NULL_RTX;
16124 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16125 {
16126 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16127
16128 /* Shift and mask VAL into position with the word. */
16129 val = convert_modes (SImode, mode, val, 1);
16130 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16131 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16132
16133 /* Prepare to adjust the return value. */
16134 retval = gen_reg_rtx (SImode);
16135 mode = SImode;
16136 }
16137
16138 mem = rs6000_pre_atomic_barrier (mem, model);
16139
16140 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16141 emit_label (XEXP (label, 0));
16142
16143 emit_load_locked (mode, retval, mem);
16144
16145 x = val;
16146 if (mask)
16147 x = rs6000_mask_atomic_subword (retval, val, mask);
16148
16149 cond = gen_reg_rtx (CCmode);
16150 emit_store_conditional (mode, cond, mem, x);
16151
16152 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16153 emit_unlikely_jump (x, label);
16154
16155 rs6000_post_atomic_barrier (model);
16156
16157 if (shift)
16158 rs6000_finish_atomic_subword (operands[0], retval, shift);
16159 }
16160
16161 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16162 to perform. MEM is the memory on which to operate. VAL is the second
16163 operand of the binary operator. BEFORE and AFTER are optional locations to
16164 return the value of MEM either before of after the operation. MODEL_RTX
16165 is a CONST_INT containing the memory model to use. */
16166
16167 void
16168 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16169 rtx orig_before, rtx orig_after, rtx model_rtx)
16170 {
16171 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16172 machine_mode mode = GET_MODE (mem);
16173 machine_mode store_mode = mode;
16174 rtx label, x, cond, mask, shift;
16175 rtx before = orig_before, after = orig_after;
16176
16177 mask = shift = NULL_RTX;
16178 /* On power8, we want to use SImode for the operation. On previous systems,
16179 use the operation in a subword and shift/mask to get the proper byte or
16180 halfword. */
16181 if (mode == QImode || mode == HImode)
16182 {
16183 if (TARGET_SYNC_HI_QI)
16184 {
16185 val = convert_modes (SImode, mode, val, 1);
16186
16187 /* Prepare to adjust the return value. */
16188 before = gen_reg_rtx (SImode);
16189 if (after)
16190 after = gen_reg_rtx (SImode);
16191 mode = SImode;
16192 }
16193 else
16194 {
16195 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16196
16197 /* Shift and mask VAL into position with the word. */
16198 val = convert_modes (SImode, mode, val, 1);
16199 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16200 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16201
16202 switch (code)
16203 {
16204 case IOR:
16205 case XOR:
16206 /* We've already zero-extended VAL. That is sufficient to
16207 make certain that it does not affect other bits. */
16208 mask = NULL;
16209 break;
16210
16211 case AND:
16212 /* If we make certain that all of the other bits in VAL are
16213 set, that will be sufficient to not affect other bits. */
16214 x = gen_rtx_NOT (SImode, mask);
16215 x = gen_rtx_IOR (SImode, x, val);
16216 emit_insn (gen_rtx_SET (val, x));
16217 mask = NULL;
16218 break;
16219
16220 case NOT:
16221 case PLUS:
16222 case MINUS:
16223 /* These will all affect bits outside the field and need
16224 adjustment via MASK within the loop. */
16225 break;
16226
16227 default:
16228 gcc_unreachable ();
16229 }
16230
16231 /* Prepare to adjust the return value. */
16232 before = gen_reg_rtx (SImode);
16233 if (after)
16234 after = gen_reg_rtx (SImode);
16235 store_mode = mode = SImode;
16236 }
16237 }
16238
16239 mem = rs6000_pre_atomic_barrier (mem, model);
16240
16241 label = gen_label_rtx ();
16242 emit_label (label);
16243 label = gen_rtx_LABEL_REF (VOIDmode, label);
16244
16245 if (before == NULL_RTX)
16246 before = gen_reg_rtx (mode);
16247
16248 emit_load_locked (mode, before, mem);
16249
16250 if (code == NOT)
16251 {
16252 x = expand_simple_binop (mode, AND, before, val,
16253 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16254 after = expand_simple_unop (mode, NOT, x, after, 1);
16255 }
16256 else
16257 {
16258 after = expand_simple_binop (mode, code, before, val,
16259 after, 1, OPTAB_LIB_WIDEN);
16260 }
16261
16262 x = after;
16263 if (mask)
16264 {
16265 x = expand_simple_binop (SImode, AND, after, mask,
16266 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16267 x = rs6000_mask_atomic_subword (before, x, mask);
16268 }
16269 else if (store_mode != mode)
16270 x = convert_modes (store_mode, mode, x, 1);
16271
16272 cond = gen_reg_rtx (CCmode);
16273 emit_store_conditional (store_mode, cond, mem, x);
16274
16275 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16276 emit_unlikely_jump (x, label);
16277
16278 rs6000_post_atomic_barrier (model);
16279
16280 if (shift)
16281 {
16282 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16283 then do the calcuations in a SImode register. */
16284 if (orig_before)
16285 rs6000_finish_atomic_subword (orig_before, before, shift);
16286 if (orig_after)
16287 rs6000_finish_atomic_subword (orig_after, after, shift);
16288 }
16289 else if (store_mode != mode)
16290 {
16291 /* QImode/HImode on machines with lbarx/lharx where we do the native
16292 operation and then do the calcuations in a SImode register. */
16293 if (orig_before)
16294 convert_move (orig_before, before, 1);
16295 if (orig_after)
16296 convert_move (orig_after, after, 1);
16297 }
16298 else if (orig_after && after != orig_after)
16299 emit_move_insn (orig_after, after);
16300 }
16301
16302 /* Emit instructions to move SRC to DST. Called by splitters for
16303 multi-register moves. It will emit at most one instruction for
16304 each register that is accessed; that is, it won't emit li/lis pairs
16305 (or equivalent for 64-bit code). One of SRC or DST must be a hard
16306 register. */
16307
16308 void
16309 rs6000_split_multireg_move (rtx dst, rtx src)
16310 {
16311 /* The register number of the first register being moved. */
16312 int reg;
16313 /* The mode that is to be moved. */
16314 machine_mode mode;
16315 /* The mode that the move is being done in, and its size. */
16316 machine_mode reg_mode;
16317 int reg_mode_size;
16318 /* The number of registers that will be moved. */
16319 int nregs;
16320
16321 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
16322 mode = GET_MODE (dst);
16323 nregs = hard_regno_nregs (reg, mode);
16324
16325 /* If we have a vector quad register for MMA, and this is a load or store,
16326 see if we can use vector paired load/stores. */
16327 if (mode == XOmode && TARGET_MMA
16328 && (MEM_P (dst) || MEM_P (src)))
16329 {
16330 reg_mode = OOmode;
16331 nregs /= 2;
16332 }
16333 /* If we have a vector pair/quad mode, split it into two/four separate
16334 vectors. */
16335 else if (mode == OOmode || mode == XOmode)
16336 reg_mode = V1TImode;
16337 else if (FP_REGNO_P (reg))
16338 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
16339 (TARGET_HARD_FLOAT ? DFmode : SFmode);
16340 else if (ALTIVEC_REGNO_P (reg))
16341 reg_mode = V16QImode;
16342 else
16343 reg_mode = word_mode;
16344 reg_mode_size = GET_MODE_SIZE (reg_mode);
16345
16346 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
16347
16348 /* TDmode residing in FP registers is special, since the ISA requires that
16349 the lower-numbered word of a register pair is always the most significant
16350 word, even in little-endian mode. This does not match the usual subreg
16351 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
16352 the appropriate constituent registers "by hand" in little-endian mode.
16353
16354 Note we do not need to check for destructive overlap here since TDmode
16355 can only reside in even/odd register pairs. */
16356 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
16357 {
16358 rtx p_src, p_dst;
16359 int i;
16360
16361 for (i = 0; i < nregs; i++)
16362 {
16363 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
16364 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
16365 else
16366 p_src = simplify_gen_subreg (reg_mode, src, mode,
16367 i * reg_mode_size);
16368
16369 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
16370 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
16371 else
16372 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
16373 i * reg_mode_size);
16374
16375 emit_insn (gen_rtx_SET (p_dst, p_src));
16376 }
16377
16378 return;
16379 }
16380
16381 /* The __vector_pair and __vector_quad modes are multi-register
16382 modes, so if we have to load or store the registers, we have to be
16383 careful to properly swap them if we're in little endian mode
16384 below. This means the last register gets the first memory
16385 location. We also need to be careful of using the right register
16386 numbers if we are splitting XO to OO. */
16387 if (mode == OOmode || mode == XOmode)
16388 {
16389 nregs = hard_regno_nregs (reg, mode);
16390 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
16391 if (MEM_P (dst))
16392 {
16393 unsigned offset = 0;
16394 unsigned size = GET_MODE_SIZE (reg_mode);
16395
16396 /* If we are reading an accumulator register, we have to
16397 deprime it before we can access it. */
16398 if (TARGET_MMA
16399 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16400 emit_insn (gen_mma_xxmfacc (src, src));
16401
16402 for (int i = 0; i < nregs; i += reg_mode_nregs)
16403 {
16404 unsigned subreg =
16405 (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
16406 rtx dst2 = adjust_address (dst, reg_mode, offset);
16407 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
16408 offset += size;
16409 emit_insn (gen_rtx_SET (dst2, src2));
16410 }
16411
16412 return;
16413 }
16414
16415 if (MEM_P (src))
16416 {
16417 unsigned offset = 0;
16418 unsigned size = GET_MODE_SIZE (reg_mode);
16419
16420 for (int i = 0; i < nregs; i += reg_mode_nregs)
16421 {
16422 unsigned subreg =
16423 (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
16424 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
16425 rtx src2 = adjust_address (src, reg_mode, offset);
16426 offset += size;
16427 emit_insn (gen_rtx_SET (dst2, src2));
16428 }
16429
16430 /* If we are writing an accumulator register, we have to
16431 prime it after we've written it. */
16432 if (TARGET_MMA
16433 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16434 emit_insn (gen_mma_xxmtacc (dst, dst));
16435
16436 return;
16437 }
16438
16439 if (GET_CODE (src) == UNSPEC)
16440 {
16441 gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
16442 gcc_assert (REG_P (dst));
16443 if (GET_MODE (src) == XOmode)
16444 gcc_assert (FP_REGNO_P (REGNO (dst)));
16445 if (GET_MODE (src) == OOmode)
16446 gcc_assert (VSX_REGNO_P (REGNO (dst)));
16447
16448 reg_mode = GET_MODE (XVECEXP (src, 0, 0));
16449 for (int i = 0; i < XVECLEN (src, 0); i++)
16450 {
16451 rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
16452 emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
16453 }
16454
16455 /* We are writing an accumulator register, so we have to
16456 prime it after we've written it. */
16457 if (GET_MODE (src) == XOmode)
16458 emit_insn (gen_mma_xxmtacc (dst, dst));
16459
16460 return;
16461 }
16462
16463 /* Register -> register moves can use common code. */
16464 }
16465
16466 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
16467 {
16468 /* If we are reading an accumulator register, we have to
16469 deprime it before we can access it. */
16470 if (TARGET_MMA
16471 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16472 emit_insn (gen_mma_xxmfacc (src, src));
16473
16474 /* Move register range backwards, if we might have destructive
16475 overlap. */
16476 int i;
16477 /* XO/OO are opaque so cannot use subregs. */
16478 if (mode == OOmode || mode == XOmode )
16479 {
16480 for (i = nregs - 1; i >= 0; i--)
16481 {
16482 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
16483 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
16484 emit_insn (gen_rtx_SET (dst_i, src_i));
16485 }
16486 }
16487 else
16488 {
16489 for (i = nregs - 1; i >= 0; i--)
16490 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16491 i * reg_mode_size),
16492 simplify_gen_subreg (reg_mode, src, mode,
16493 i * reg_mode_size)));
16494 }
16495
16496 /* If we are writing an accumulator register, we have to
16497 prime it after we've written it. */
16498 if (TARGET_MMA
16499 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16500 emit_insn (gen_mma_xxmtacc (dst, dst));
16501 }
16502 else
16503 {
16504 int i;
16505 int j = -1;
16506 bool used_update = false;
16507 rtx restore_basereg = NULL_RTX;
16508
16509 if (MEM_P (src) && INT_REGNO_P (reg))
16510 {
16511 rtx breg;
16512
16513 if (GET_CODE (XEXP (src, 0)) == PRE_INC
16514 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
16515 {
16516 rtx delta_rtx;
16517 breg = XEXP (XEXP (src, 0), 0);
16518 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
16519 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
16520 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
16521 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16522 src = replace_equiv_address (src, breg);
16523 }
16524 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
16525 {
16526 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
16527 {
16528 rtx basereg = XEXP (XEXP (src, 0), 0);
16529 if (TARGET_UPDATE)
16530 {
16531 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
16532 emit_insn (gen_rtx_SET (ndst,
16533 gen_rtx_MEM (reg_mode,
16534 XEXP (src, 0))));
16535 used_update = true;
16536 }
16537 else
16538 emit_insn (gen_rtx_SET (basereg,
16539 XEXP (XEXP (src, 0), 1)));
16540 src = replace_equiv_address (src, basereg);
16541 }
16542 else
16543 {
16544 rtx basereg = gen_rtx_REG (Pmode, reg);
16545 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
16546 src = replace_equiv_address (src, basereg);
16547 }
16548 }
16549
16550 breg = XEXP (src, 0);
16551 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
16552 breg = XEXP (breg, 0);
16553
16554 /* If the base register we are using to address memory is
16555 also a destination reg, then change that register last. */
16556 if (REG_P (breg)
16557 && REGNO (breg) >= REGNO (dst)
16558 && REGNO (breg) < REGNO (dst) + nregs)
16559 j = REGNO (breg) - REGNO (dst);
16560 }
16561 else if (MEM_P (dst) && INT_REGNO_P (reg))
16562 {
16563 rtx breg;
16564
16565 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
16566 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
16567 {
16568 rtx delta_rtx;
16569 breg = XEXP (XEXP (dst, 0), 0);
16570 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
16571 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
16572 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
16573
16574 /* We have to update the breg before doing the store.
16575 Use store with update, if available. */
16576
16577 if (TARGET_UPDATE)
16578 {
16579 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16580 emit_insn (TARGET_32BIT
16581 ? (TARGET_POWERPC64
16582 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
16583 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
16584 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
16585 used_update = true;
16586 }
16587 else
16588 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16589 dst = replace_equiv_address (dst, breg);
16590 }
16591 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
16592 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
16593 {
16594 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
16595 {
16596 rtx basereg = XEXP (XEXP (dst, 0), 0);
16597 if (TARGET_UPDATE)
16598 {
16599 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16600 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
16601 XEXP (dst, 0)),
16602 nsrc));
16603 used_update = true;
16604 }
16605 else
16606 emit_insn (gen_rtx_SET (basereg,
16607 XEXP (XEXP (dst, 0), 1)));
16608 dst = replace_equiv_address (dst, basereg);
16609 }
16610 else
16611 {
16612 rtx basereg = XEXP (XEXP (dst, 0), 0);
16613 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
16614 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
16615 && REG_P (basereg)
16616 && REG_P (offsetreg)
16617 && REGNO (basereg) != REGNO (offsetreg));
16618 if (REGNO (basereg) == 0)
16619 {
16620 rtx tmp = offsetreg;
16621 offsetreg = basereg;
16622 basereg = tmp;
16623 }
16624 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
16625 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
16626 dst = replace_equiv_address (dst, basereg);
16627 }
16628 }
16629 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
16630 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
16631 }
16632
16633 /* If we are reading an accumulator register, we have to
16634 deprime it before we can access it. */
16635 if (TARGET_MMA && REG_P (src)
16636 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16637 emit_insn (gen_mma_xxmfacc (src, src));
16638
16639 for (i = 0; i < nregs; i++)
16640 {
16641 /* Calculate index to next subword. */
16642 ++j;
16643 if (j == nregs)
16644 j = 0;
16645
16646 /* If compiler already emitted move of first word by
16647 store with update, no need to do anything. */
16648 if (j == 0 && used_update)
16649 continue;
16650
16651 /* XO/OO are opaque so cannot use subregs. */
16652 if (mode == OOmode || mode == XOmode )
16653 {
16654 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
16655 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
16656 emit_insn (gen_rtx_SET (dst_i, src_i));
16657 }
16658 else
16659 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16660 j * reg_mode_size),
16661 simplify_gen_subreg (reg_mode, src, mode,
16662 j * reg_mode_size)));
16663 }
16664
16665 /* If we are writing an accumulator register, we have to
16666 prime it after we've written it. */
16667 if (TARGET_MMA && REG_P (dst)
16668 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16669 emit_insn (gen_mma_xxmtacc (dst, dst));
16670
16671 if (restore_basereg != NULL_RTX)
16672 emit_insn (restore_basereg);
16673 }
16674 }
16675
16676 static GTY(()) alias_set_type TOC_alias_set = -1;
16677
16678 alias_set_type
16679 get_TOC_alias_set (void)
16680 {
16681 if (TOC_alias_set == -1)
16682 TOC_alias_set = new_alias_set ();
16683 return TOC_alias_set;
16684 }
16685
16686 /* The mode the ABI uses for a word. This is not the same as word_mode
16687 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16688
16689 static scalar_int_mode
16690 rs6000_abi_word_mode (void)
16691 {
16692 return TARGET_32BIT ? SImode : DImode;
16693 }
16694
16695 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16696 static char *
16697 rs6000_offload_options (void)
16698 {
16699 if (TARGET_64BIT)
16700 return xstrdup ("-foffload-abi=lp64");
16701 else
16702 return xstrdup ("-foffload-abi=ilp32");
16703 }
16704
16705 \f
16706 /* A quick summary of the various types of 'constant-pool tables'
16707 under PowerPC:
16708
16709 Target Flags Name One table per
16710 AIX (none) AIX TOC object file
16711 AIX -mfull-toc AIX TOC object file
16712 AIX -mminimal-toc AIX minimal TOC translation unit
16713 SVR4/EABI (none) SVR4 SDATA object file
16714 SVR4/EABI -fpic SVR4 pic object file
16715 SVR4/EABI -fPIC SVR4 PIC translation unit
16716 SVR4/EABI -mrelocatable EABI TOC function
16717 SVR4/EABI -maix AIX TOC object file
16718 SVR4/EABI -maix -mminimal-toc
16719 AIX minimal TOC translation unit
16720
16721 Name Reg. Set by entries contains:
16722 made by addrs? fp? sum?
16723
16724 AIX TOC 2 crt0 as Y option option
16725 AIX minimal TOC 30 prolog gcc Y Y option
16726 SVR4 SDATA 13 crt0 gcc N Y N
16727 SVR4 pic 30 prolog ld Y not yet N
16728 SVR4 PIC 30 prolog gcc Y option option
16729 EABI TOC 30 prolog gcc Y option option
16730
16731 */
16732
16733 /* Hash functions for the hash table. */
16734
16735 static unsigned
16736 rs6000_hash_constant (rtx k)
16737 {
16738 enum rtx_code code = GET_CODE (k);
16739 machine_mode mode = GET_MODE (k);
16740 unsigned result = (code << 3) ^ mode;
16741 const char *format;
16742 int flen, fidx;
16743
16744 format = GET_RTX_FORMAT (code);
16745 flen = strlen (format);
16746 fidx = 0;
16747
16748 switch (code)
16749 {
16750 case LABEL_REF:
16751 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16752
16753 case CONST_WIDE_INT:
16754 {
16755 int i;
16756 flen = CONST_WIDE_INT_NUNITS (k);
16757 for (i = 0; i < flen; i++)
16758 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16759 return result;
16760 }
16761
16762 case CONST_DOUBLE:
16763 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16764
16765 case CODE_LABEL:
16766 fidx = 3;
16767 break;
16768
16769 default:
16770 break;
16771 }
16772
16773 for (; fidx < flen; fidx++)
16774 switch (format[fidx])
16775 {
16776 case 's':
16777 {
16778 unsigned i, len;
16779 const char *str = XSTR (k, fidx);
16780 len = strlen (str);
16781 result = result * 613 + len;
16782 for (i = 0; i < len; i++)
16783 result = result * 613 + (unsigned) str[i];
16784 break;
16785 }
16786 case 'u':
16787 case 'e':
16788 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16789 break;
16790 case 'i':
16791 case 'n':
16792 result = result * 613 + (unsigned) XINT (k, fidx);
16793 break;
16794 case 'w':
16795 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16796 result = result * 613 + (unsigned) XWINT (k, fidx);
16797 else
16798 {
16799 size_t i;
16800 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16801 result = result * 613 + (unsigned) (XWINT (k, fidx)
16802 >> CHAR_BIT * i);
16803 }
16804 break;
16805 case '0':
16806 break;
16807 default:
16808 gcc_unreachable ();
16809 }
16810
16811 return result;
16812 }
16813
16814 hashval_t
16815 toc_hasher::hash (toc_hash_struct *thc)
16816 {
16817 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16818 }
16819
16820 /* Compare H1 and H2 for equivalence. */
16821
16822 bool
16823 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16824 {
16825 rtx r1 = h1->key;
16826 rtx r2 = h2->key;
16827
16828 if (h1->key_mode != h2->key_mode)
16829 return 0;
16830
16831 return rtx_equal_p (r1, r2);
16832 }
16833
16834 /* These are the names given by the C++ front-end to vtables, and
16835 vtable-like objects. Ideally, this logic should not be here;
16836 instead, there should be some programmatic way of inquiring as
16837 to whether or not an object is a vtable. */
16838
16839 #define VTABLE_NAME_P(NAME) \
16840 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16841 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16842 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16843 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16844 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16845
16846 #ifdef NO_DOLLAR_IN_LABEL
16847 /* Return a GGC-allocated character string translating dollar signs in
16848 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16849
16850 const char *
16851 rs6000_xcoff_strip_dollar (const char *name)
16852 {
16853 char *strip, *p;
16854 const char *q;
16855 size_t len;
16856
16857 q = (const char *) strchr (name, '$');
16858
16859 if (q == 0 || q == name)
16860 return name;
16861
16862 len = strlen (name);
16863 strip = XALLOCAVEC (char, len + 1);
16864 strcpy (strip, name);
16865 p = strip + (q - name);
16866 while (p)
16867 {
16868 *p = '_';
16869 p = strchr (p + 1, '$');
16870 }
16871
16872 return ggc_alloc_string (strip, len);
16873 }
16874 #endif
16875
16876 void
16877 rs6000_output_symbol_ref (FILE *file, rtx x)
16878 {
16879 const char *name = XSTR (x, 0);
16880
16881 /* Currently C++ toc references to vtables can be emitted before it
16882 is decided whether the vtable is public or private. If this is
16883 the case, then the linker will eventually complain that there is
16884 a reference to an unknown section. Thus, for vtables only,
16885 we emit the TOC reference to reference the identifier and not the
16886 symbol. */
16887 if (VTABLE_NAME_P (name))
16888 {
16889 RS6000_OUTPUT_BASENAME (file, name);
16890 }
16891 else
16892 assemble_name (file, name);
16893 }
16894
16895 /* Output a TOC entry. We derive the entry name from what is being
16896 written. */
16897
16898 void
16899 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16900 {
16901 char buf[256];
16902 const char *name = buf;
16903 rtx base = x;
16904 HOST_WIDE_INT offset = 0;
16905
16906 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16907
16908 /* When the linker won't eliminate them, don't output duplicate
16909 TOC entries (this happens on AIX if there is any kind of TOC,
16910 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16911 CODE_LABELs. */
16912 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16913 {
16914 struct toc_hash_struct *h;
16915
16916 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16917 time because GGC is not initialized at that point. */
16918 if (toc_hash_table == NULL)
16919 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16920
16921 h = ggc_alloc<toc_hash_struct> ();
16922 h->key = x;
16923 h->key_mode = mode;
16924 h->labelno = labelno;
16925
16926 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16927 if (*found == NULL)
16928 *found = h;
16929 else /* This is indeed a duplicate.
16930 Set this label equal to that label. */
16931 {
16932 fputs ("\t.set ", file);
16933 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16934 fprintf (file, "%d,", labelno);
16935 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16936 fprintf (file, "%d\n", ((*found)->labelno));
16937
16938 #ifdef HAVE_AS_TLS
16939 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16940 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16941 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16942 {
16943 fputs ("\t.set ", file);
16944 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16945 fprintf (file, "%d,", labelno);
16946 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16947 fprintf (file, "%d\n", ((*found)->labelno));
16948 }
16949 #endif
16950 return;
16951 }
16952 }
16953
16954 /* If we're going to put a double constant in the TOC, make sure it's
16955 aligned properly when strict alignment is on. */
16956 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16957 && STRICT_ALIGNMENT
16958 && GET_MODE_BITSIZE (mode) >= 64
16959 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16960 ASM_OUTPUT_ALIGN (file, 3);
16961 }
16962
16963 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16964
16965 /* Handle FP constants specially. Note that if we have a minimal
16966 TOC, things we put here aren't actually in the TOC, so we can allow
16967 FP constants. */
16968 if (CONST_DOUBLE_P (x)
16969 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16970 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16971 {
16972 long k[4];
16973
16974 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16975 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16976 else
16977 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16978
16979 if (TARGET_64BIT)
16980 {
16981 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16982 fputs (DOUBLE_INT_ASM_OP, file);
16983 else
16984 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16985 k[0] & 0xffffffff, k[1] & 0xffffffff,
16986 k[2] & 0xffffffff, k[3] & 0xffffffff);
16987 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16988 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16989 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16990 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16991 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16992 return;
16993 }
16994 else
16995 {
16996 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16997 fputs ("\t.long ", file);
16998 else
16999 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17000 k[0] & 0xffffffff, k[1] & 0xffffffff,
17001 k[2] & 0xffffffff, k[3] & 0xffffffff);
17002 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17003 k[0] & 0xffffffff, k[1] & 0xffffffff,
17004 k[2] & 0xffffffff, k[3] & 0xffffffff);
17005 return;
17006 }
17007 }
17008 else if (CONST_DOUBLE_P (x)
17009 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17010 {
17011 long k[2];
17012
17013 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17014 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17015 else
17016 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17017
17018 if (TARGET_64BIT)
17019 {
17020 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17021 fputs (DOUBLE_INT_ASM_OP, file);
17022 else
17023 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17024 k[0] & 0xffffffff, k[1] & 0xffffffff);
17025 fprintf (file, "0x%lx%08lx\n",
17026 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17027 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17028 return;
17029 }
17030 else
17031 {
17032 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17033 fputs ("\t.long ", file);
17034 else
17035 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17036 k[0] & 0xffffffff, k[1] & 0xffffffff);
17037 fprintf (file, "0x%lx,0x%lx\n",
17038 k[0] & 0xffffffff, k[1] & 0xffffffff);
17039 return;
17040 }
17041 }
17042 else if (CONST_DOUBLE_P (x)
17043 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17044 {
17045 long l;
17046
17047 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17048 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17049 else
17050 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17051
17052 if (TARGET_64BIT)
17053 {
17054 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17055 fputs (DOUBLE_INT_ASM_OP, file);
17056 else
17057 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17058 if (WORDS_BIG_ENDIAN)
17059 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17060 else
17061 fprintf (file, "0x%lx\n", l & 0xffffffff);
17062 return;
17063 }
17064 else
17065 {
17066 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17067 fputs ("\t.long ", file);
17068 else
17069 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17070 fprintf (file, "0x%lx\n", l & 0xffffffff);
17071 return;
17072 }
17073 }
17074 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17075 {
17076 unsigned HOST_WIDE_INT low;
17077 HOST_WIDE_INT high;
17078
17079 low = INTVAL (x) & 0xffffffff;
17080 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17081
17082 /* TOC entries are always Pmode-sized, so when big-endian
17083 smaller integer constants in the TOC need to be padded.
17084 (This is still a win over putting the constants in
17085 a separate constant pool, because then we'd have
17086 to have both a TOC entry _and_ the actual constant.)
17087
17088 For a 32-bit target, CONST_INT values are loaded and shifted
17089 entirely within `low' and can be stored in one TOC entry. */
17090
17091 /* It would be easy to make this work, but it doesn't now. */
17092 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17093
17094 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17095 {
17096 low |= high << 32;
17097 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17098 high = (HOST_WIDE_INT) low >> 32;
17099 low &= 0xffffffff;
17100 }
17101
17102 if (TARGET_64BIT)
17103 {
17104 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17105 fputs (DOUBLE_INT_ASM_OP, file);
17106 else
17107 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17108 (long) high & 0xffffffff, (long) low & 0xffffffff);
17109 fprintf (file, "0x%lx%08lx\n",
17110 (long) high & 0xffffffff, (long) low & 0xffffffff);
17111 return;
17112 }
17113 else
17114 {
17115 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17116 {
17117 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17118 fputs ("\t.long ", file);
17119 else
17120 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17121 (long) high & 0xffffffff, (long) low & 0xffffffff);
17122 fprintf (file, "0x%lx,0x%lx\n",
17123 (long) high & 0xffffffff, (long) low & 0xffffffff);
17124 }
17125 else
17126 {
17127 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17128 fputs ("\t.long ", file);
17129 else
17130 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17131 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17132 }
17133 return;
17134 }
17135 }
17136
17137 if (GET_CODE (x) == CONST)
17138 {
17139 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17140 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17141
17142 base = XEXP (XEXP (x, 0), 0);
17143 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17144 }
17145
17146 switch (GET_CODE (base))
17147 {
17148 case SYMBOL_REF:
17149 name = XSTR (base, 0);
17150 break;
17151
17152 case LABEL_REF:
17153 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17154 CODE_LABEL_NUMBER (XEXP (base, 0)));
17155 break;
17156
17157 case CODE_LABEL:
17158 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17159 break;
17160
17161 default:
17162 gcc_unreachable ();
17163 }
17164
17165 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17166 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17167 else
17168 {
17169 fputs ("\t.tc ", file);
17170 RS6000_OUTPUT_BASENAME (file, name);
17171
17172 if (offset < 0)
17173 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17174 else if (offset)
17175 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17176
17177 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17178 after other TOC symbols, reducing overflow of small TOC access
17179 to [TC] symbols. */
17180 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17181 ? "[TE]," : "[TC],", file);
17182 }
17183
17184 /* Currently C++ toc references to vtables can be emitted before it
17185 is decided whether the vtable is public or private. If this is
17186 the case, then the linker will eventually complain that there is
17187 a TOC reference to an unknown section. Thus, for vtables only,
17188 we emit the TOC reference to reference the symbol and not the
17189 section. */
17190 if (VTABLE_NAME_P (name))
17191 {
17192 RS6000_OUTPUT_BASENAME (file, name);
17193 if (offset < 0)
17194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17195 else if (offset > 0)
17196 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17197 }
17198 else
17199 output_addr_const (file, x);
17200
17201 #if HAVE_AS_TLS
17202 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17203 {
17204 switch (SYMBOL_REF_TLS_MODEL (base))
17205 {
17206 case 0:
17207 break;
17208 case TLS_MODEL_LOCAL_EXEC:
17209 fputs ("@le", file);
17210 break;
17211 case TLS_MODEL_INITIAL_EXEC:
17212 fputs ("@ie", file);
17213 break;
17214 /* Use global-dynamic for local-dynamic. */
17215 case TLS_MODEL_GLOBAL_DYNAMIC:
17216 case TLS_MODEL_LOCAL_DYNAMIC:
17217 putc ('\n', file);
17218 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17219 fputs ("\t.tc .", file);
17220 RS6000_OUTPUT_BASENAME (file, name);
17221 fputs ("[TC],", file);
17222 output_addr_const (file, x);
17223 fputs ("@m", file);
17224 break;
17225 default:
17226 gcc_unreachable ();
17227 }
17228 }
17229 #endif
17230
17231 putc ('\n', file);
17232 }
17233 \f
17234 /* Output an assembler pseudo-op to write an ASCII string of N characters
17235 starting at P to FILE.
17236
17237 On the RS/6000, we have to do this using the .byte operation and
17238 write out special characters outside the quoted string.
17239 Also, the assembler is broken; very long strings are truncated,
17240 so we must artificially break them up early. */
17241
17242 void
17243 output_ascii (FILE *file, const char *p, int n)
17244 {
17245 char c;
17246 int i, count_string;
17247 const char *for_string = "\t.byte \"";
17248 const char *for_decimal = "\t.byte ";
17249 const char *to_close = NULL;
17250
17251 count_string = 0;
17252 for (i = 0; i < n; i++)
17253 {
17254 c = *p++;
17255 if (c >= ' ' && c < 0177)
17256 {
17257 if (for_string)
17258 fputs (for_string, file);
17259 putc (c, file);
17260
17261 /* Write two quotes to get one. */
17262 if (c == '"')
17263 {
17264 putc (c, file);
17265 ++count_string;
17266 }
17267
17268 for_string = NULL;
17269 for_decimal = "\"\n\t.byte ";
17270 to_close = "\"\n";
17271 ++count_string;
17272
17273 if (count_string >= 512)
17274 {
17275 fputs (to_close, file);
17276
17277 for_string = "\t.byte \"";
17278 for_decimal = "\t.byte ";
17279 to_close = NULL;
17280 count_string = 0;
17281 }
17282 }
17283 else
17284 {
17285 if (for_decimal)
17286 fputs (for_decimal, file);
17287 fprintf (file, "%d", c);
17288
17289 for_string = "\n\t.byte \"";
17290 for_decimal = ", ";
17291 to_close = "\n";
17292 count_string = 0;
17293 }
17294 }
17295
17296 /* Now close the string if we have written one. Then end the line. */
17297 if (to_close)
17298 fputs (to_close, file);
17299 }
17300 \f
17301 /* Generate a unique section name for FILENAME for a section type
17302 represented by SECTION_DESC. Output goes into BUF.
17303
17304 SECTION_DESC can be any string, as long as it is different for each
17305 possible section type.
17306
17307 We name the section in the same manner as xlc. The name begins with an
17308 underscore followed by the filename (after stripping any leading directory
17309 names) with the last period replaced by the string SECTION_DESC. If
17310 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17311 the name. */
17312
17313 void
17314 rs6000_gen_section_name (char **buf, const char *filename,
17315 const char *section_desc)
17316 {
17317 const char *q, *after_last_slash, *last_period = 0;
17318 char *p;
17319 int len;
17320
17321 after_last_slash = filename;
17322 for (q = filename; *q; q++)
17323 {
17324 if (*q == '/')
17325 after_last_slash = q + 1;
17326 else if (*q == '.')
17327 last_period = q;
17328 }
17329
17330 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17331 *buf = (char *) xmalloc (len);
17332
17333 p = *buf;
17334 *p++ = '_';
17335
17336 for (q = after_last_slash; *q; q++)
17337 {
17338 if (q == last_period)
17339 {
17340 strcpy (p, section_desc);
17341 p += strlen (section_desc);
17342 break;
17343 }
17344
17345 else if (ISALNUM (*q))
17346 *p++ = *q;
17347 }
17348
17349 if (last_period == 0)
17350 strcpy (p, section_desc);
17351 else
17352 *p = '\0';
17353 }
17354 \f
17355 /* Emit profile function. */
17356
17357 void
17358 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17359 {
17360 /* Non-standard profiling for kernels, which just saves LR then calls
17361 _mcount without worrying about arg saves. The idea is to change
17362 the function prologue as little as possible as it isn't easy to
17363 account for arg save/restore code added just for _mcount. */
17364 if (TARGET_PROFILE_KERNEL)
17365 return;
17366
17367 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17368 {
17369 #ifndef NO_PROFILE_COUNTERS
17370 # define NO_PROFILE_COUNTERS 0
17371 #endif
17372 if (NO_PROFILE_COUNTERS)
17373 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17374 LCT_NORMAL, VOIDmode);
17375 else
17376 {
17377 char buf[30];
17378 const char *label_name;
17379 rtx fun;
17380
17381 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17382 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17383 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17384
17385 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17386 LCT_NORMAL, VOIDmode, fun, Pmode);
17387 }
17388 }
17389 else if (DEFAULT_ABI == ABI_DARWIN)
17390 {
17391 const char *mcount_name = RS6000_MCOUNT;
17392 int caller_addr_regno = LR_REGNO;
17393
17394 /* Be conservative and always set this, at least for now. */
17395 crtl->uses_pic_offset_table = 1;
17396
17397 #if TARGET_MACHO
17398 /* For PIC code, set up a stub and collect the caller's address
17399 from r0, which is where the prologue puts it. */
17400 if (MACHOPIC_INDIRECT
17401 && crtl->uses_pic_offset_table)
17402 caller_addr_regno = 0;
17403 #endif
17404 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17405 LCT_NORMAL, VOIDmode,
17406 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17407 }
17408 }
17409
17410 /* Write function profiler code. */
17411
17412 void
17413 output_function_profiler (FILE *file, int labelno)
17414 {
17415 char buf[100];
17416
17417 switch (DEFAULT_ABI)
17418 {
17419 default:
17420 gcc_unreachable ();
17421
17422 case ABI_V4:
17423 if (!TARGET_32BIT)
17424 {
17425 warning (0, "no profiling of 64-bit code for this ABI");
17426 return;
17427 }
17428 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17429 fprintf (file, "\tmflr %s\n", reg_names[0]);
17430 if (NO_PROFILE_COUNTERS)
17431 {
17432 asm_fprintf (file, "\tstw %s,4(%s)\n",
17433 reg_names[0], reg_names[1]);
17434 }
17435 else if (TARGET_SECURE_PLT && flag_pic)
17436 {
17437 if (TARGET_LINK_STACK)
17438 {
17439 char name[32];
17440 get_ppc476_thunk_name (name);
17441 asm_fprintf (file, "\tbl %s\n", name);
17442 }
17443 else
17444 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17445 asm_fprintf (file, "\tstw %s,4(%s)\n",
17446 reg_names[0], reg_names[1]);
17447 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17448 asm_fprintf (file, "\taddis %s,%s,",
17449 reg_names[12], reg_names[12]);
17450 assemble_name (file, buf);
17451 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17452 assemble_name (file, buf);
17453 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17454 }
17455 else if (flag_pic == 1)
17456 {
17457 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17458 asm_fprintf (file, "\tstw %s,4(%s)\n",
17459 reg_names[0], reg_names[1]);
17460 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17461 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17462 assemble_name (file, buf);
17463 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17464 }
17465 else if (flag_pic > 1)
17466 {
17467 asm_fprintf (file, "\tstw %s,4(%s)\n",
17468 reg_names[0], reg_names[1]);
17469 /* Now, we need to get the address of the label. */
17470 if (TARGET_LINK_STACK)
17471 {
17472 char name[32];
17473 get_ppc476_thunk_name (name);
17474 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17475 assemble_name (file, buf);
17476 fputs ("-.\n1:", file);
17477 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17478 asm_fprintf (file, "\taddi %s,%s,4\n",
17479 reg_names[11], reg_names[11]);
17480 }
17481 else
17482 {
17483 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17484 assemble_name (file, buf);
17485 fputs ("-.\n1:", file);
17486 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17487 }
17488 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17489 reg_names[0], reg_names[11]);
17490 asm_fprintf (file, "\tadd %s,%s,%s\n",
17491 reg_names[0], reg_names[0], reg_names[11]);
17492 }
17493 else
17494 {
17495 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17496 assemble_name (file, buf);
17497 fputs ("@ha\n", file);
17498 asm_fprintf (file, "\tstw %s,4(%s)\n",
17499 reg_names[0], reg_names[1]);
17500 asm_fprintf (file, "\tla %s,", reg_names[0]);
17501 assemble_name (file, buf);
17502 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17503 }
17504
17505 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17506 fprintf (file, "\tbl %s%s\n",
17507 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17508 break;
17509
17510 case ABI_AIX:
17511 case ABI_ELFv2:
17512 case ABI_DARWIN:
17513 /* Don't do anything, done in output_profile_hook (). */
17514 break;
17515 }
17516 }
17517
17518 \f
17519
17520 /* The following variable value is the last issued insn. */
17521
17522 static rtx_insn *last_scheduled_insn;
17523
17524 /* The following variable helps to balance issuing of load and
17525 store instructions */
17526
17527 static int load_store_pendulum;
17528
17529 /* The following variable helps pair divide insns during scheduling. */
17530 static int divide_cnt;
17531 /* The following variable helps pair and alternate vector and vector load
17532 insns during scheduling. */
17533 static int vec_pairing;
17534
17535
17536 /* Power4 load update and store update instructions are cracked into a
17537 load or store and an integer insn which are executed in the same cycle.
17538 Branches have their own dispatch slot which does not count against the
17539 GCC issue rate, but it changes the program flow so there are no other
17540 instructions to issue in this cycle. */
17541
17542 static int
17543 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17544 {
17545 last_scheduled_insn = insn;
17546 if (GET_CODE (PATTERN (insn)) == USE
17547 || GET_CODE (PATTERN (insn)) == CLOBBER)
17548 {
17549 cached_can_issue_more = more;
17550 return cached_can_issue_more;
17551 }
17552
17553 if (insn_terminates_group_p (insn, current_group))
17554 {
17555 cached_can_issue_more = 0;
17556 return cached_can_issue_more;
17557 }
17558
17559 /* If no reservation, but reach here */
17560 if (recog_memoized (insn) < 0)
17561 return more;
17562
17563 if (rs6000_sched_groups)
17564 {
17565 if (is_microcoded_insn (insn))
17566 cached_can_issue_more = 0;
17567 else if (is_cracked_insn (insn))
17568 cached_can_issue_more = more > 2 ? more - 2 : 0;
17569 else
17570 cached_can_issue_more = more - 1;
17571
17572 return cached_can_issue_more;
17573 }
17574
17575 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17576 return 0;
17577
17578 cached_can_issue_more = more - 1;
17579 return cached_can_issue_more;
17580 }
17581
17582 static int
17583 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17584 {
17585 int r = rs6000_variable_issue_1 (insn, more);
17586 if (verbose)
17587 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17588 return r;
17589 }
17590
17591 /* Adjust the cost of a scheduling dependency. Return the new cost of
17592 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17593
17594 static int
17595 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17596 unsigned int)
17597 {
17598 enum attr_type attr_type;
17599
17600 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17601 return cost;
17602
17603 switch (dep_type)
17604 {
17605 case REG_DEP_TRUE:
17606 {
17607 /* Data dependency; DEP_INSN writes a register that INSN reads
17608 some cycles later. */
17609
17610 /* Separate a load from a narrower, dependent store. */
17611 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17612 || rs6000_tune == PROCESSOR_POWER10)
17613 && GET_CODE (PATTERN (insn)) == SET
17614 && GET_CODE (PATTERN (dep_insn)) == SET
17615 && MEM_P (XEXP (PATTERN (insn), 1))
17616 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17617 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17618 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17619 return cost + 14;
17620
17621 attr_type = get_attr_type (insn);
17622
17623 switch (attr_type)
17624 {
17625 case TYPE_JMPREG:
17626 /* Tell the first scheduling pass about the latency between
17627 a mtctr and bctr (and mtlr and br/blr). The first
17628 scheduling pass will not know about this latency since
17629 the mtctr instruction, which has the latency associated
17630 to it, will be generated by reload. */
17631 return 4;
17632 case TYPE_BRANCH:
17633 /* Leave some extra cycles between a compare and its
17634 dependent branch, to inhibit expensive mispredicts. */
17635 if ((rs6000_tune == PROCESSOR_PPC603
17636 || rs6000_tune == PROCESSOR_PPC604
17637 || rs6000_tune == PROCESSOR_PPC604e
17638 || rs6000_tune == PROCESSOR_PPC620
17639 || rs6000_tune == PROCESSOR_PPC630
17640 || rs6000_tune == PROCESSOR_PPC750
17641 || rs6000_tune == PROCESSOR_PPC7400
17642 || rs6000_tune == PROCESSOR_PPC7450
17643 || rs6000_tune == PROCESSOR_PPCE5500
17644 || rs6000_tune == PROCESSOR_PPCE6500
17645 || rs6000_tune == PROCESSOR_POWER4
17646 || rs6000_tune == PROCESSOR_POWER5
17647 || rs6000_tune == PROCESSOR_POWER7
17648 || rs6000_tune == PROCESSOR_POWER8
17649 || rs6000_tune == PROCESSOR_POWER9
17650 || rs6000_tune == PROCESSOR_POWER10
17651 || rs6000_tune == PROCESSOR_CELL)
17652 && recog_memoized (dep_insn)
17653 && (INSN_CODE (dep_insn) >= 0))
17654
17655 switch (get_attr_type (dep_insn))
17656 {
17657 case TYPE_CMP:
17658 case TYPE_FPCOMPARE:
17659 case TYPE_CR_LOGICAL:
17660 return cost + 2;
17661 case TYPE_EXTS:
17662 case TYPE_MUL:
17663 if (get_attr_dot (dep_insn) == DOT_YES)
17664 return cost + 2;
17665 else
17666 break;
17667 case TYPE_SHIFT:
17668 if (get_attr_dot (dep_insn) == DOT_YES
17669 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17670 return cost + 2;
17671 else
17672 break;
17673 default:
17674 break;
17675 }
17676 break;
17677
17678 case TYPE_STORE:
17679 case TYPE_FPSTORE:
17680 if ((rs6000_tune == PROCESSOR_POWER6)
17681 && recog_memoized (dep_insn)
17682 && (INSN_CODE (dep_insn) >= 0))
17683 {
17684
17685 if (GET_CODE (PATTERN (insn)) != SET)
17686 /* If this happens, we have to extend this to schedule
17687 optimally. Return default for now. */
17688 return cost;
17689
17690 /* Adjust the cost for the case where the value written
17691 by a fixed point operation is used as the address
17692 gen value on a store. */
17693 switch (get_attr_type (dep_insn))
17694 {
17695 case TYPE_LOAD:
17696 case TYPE_CNTLZ:
17697 {
17698 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17699 return get_attr_sign_extend (dep_insn)
17700 == SIGN_EXTEND_YES ? 6 : 4;
17701 break;
17702 }
17703 case TYPE_SHIFT:
17704 {
17705 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17706 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17707 6 : 3;
17708 break;
17709 }
17710 case TYPE_INTEGER:
17711 case TYPE_ADD:
17712 case TYPE_LOGICAL:
17713 case TYPE_EXTS:
17714 case TYPE_INSERT:
17715 {
17716 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17717 return 3;
17718 break;
17719 }
17720 case TYPE_STORE:
17721 case TYPE_FPLOAD:
17722 case TYPE_FPSTORE:
17723 {
17724 if (get_attr_update (dep_insn) == UPDATE_YES
17725 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17726 return 3;
17727 break;
17728 }
17729 case TYPE_MUL:
17730 {
17731 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17732 return 17;
17733 break;
17734 }
17735 case TYPE_DIV:
17736 {
17737 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17738 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17739 break;
17740 }
17741 default:
17742 break;
17743 }
17744 }
17745 break;
17746
17747 case TYPE_LOAD:
17748 if ((rs6000_tune == PROCESSOR_POWER6)
17749 && recog_memoized (dep_insn)
17750 && (INSN_CODE (dep_insn) >= 0))
17751 {
17752
17753 /* Adjust the cost for the case where the value written
17754 by a fixed point instruction is used within the address
17755 gen portion of a subsequent load(u)(x) */
17756 switch (get_attr_type (dep_insn))
17757 {
17758 case TYPE_LOAD:
17759 case TYPE_CNTLZ:
17760 {
17761 if (set_to_load_agen (dep_insn, insn))
17762 return get_attr_sign_extend (dep_insn)
17763 == SIGN_EXTEND_YES ? 6 : 4;
17764 break;
17765 }
17766 case TYPE_SHIFT:
17767 {
17768 if (set_to_load_agen (dep_insn, insn))
17769 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17770 6 : 3;
17771 break;
17772 }
17773 case TYPE_INTEGER:
17774 case TYPE_ADD:
17775 case TYPE_LOGICAL:
17776 case TYPE_EXTS:
17777 case TYPE_INSERT:
17778 {
17779 if (set_to_load_agen (dep_insn, insn))
17780 return 3;
17781 break;
17782 }
17783 case TYPE_STORE:
17784 case TYPE_FPLOAD:
17785 case TYPE_FPSTORE:
17786 {
17787 if (get_attr_update (dep_insn) == UPDATE_YES
17788 && set_to_load_agen (dep_insn, insn))
17789 return 3;
17790 break;
17791 }
17792 case TYPE_MUL:
17793 {
17794 if (set_to_load_agen (dep_insn, insn))
17795 return 17;
17796 break;
17797 }
17798 case TYPE_DIV:
17799 {
17800 if (set_to_load_agen (dep_insn, insn))
17801 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17802 break;
17803 }
17804 default:
17805 break;
17806 }
17807 }
17808 break;
17809
17810 default:
17811 break;
17812 }
17813
17814 /* Fall out to return default cost. */
17815 }
17816 break;
17817
17818 case REG_DEP_OUTPUT:
17819 /* Output dependency; DEP_INSN writes a register that INSN writes some
17820 cycles later. */
17821 if ((rs6000_tune == PROCESSOR_POWER6)
17822 && recog_memoized (dep_insn)
17823 && (INSN_CODE (dep_insn) >= 0))
17824 {
17825 attr_type = get_attr_type (insn);
17826
17827 switch (attr_type)
17828 {
17829 case TYPE_FP:
17830 case TYPE_FPSIMPLE:
17831 if (get_attr_type (dep_insn) == TYPE_FP
17832 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17833 return 1;
17834 break;
17835 default:
17836 break;
17837 }
17838 }
17839 /* Fall through, no cost for output dependency. */
17840 /* FALLTHRU */
17841
17842 case REG_DEP_ANTI:
17843 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17844 cycles later. */
17845 return 0;
17846
17847 default:
17848 gcc_unreachable ();
17849 }
17850
17851 return cost;
17852 }
17853
17854 /* Debug version of rs6000_adjust_cost. */
17855
17856 static int
17857 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17858 int cost, unsigned int dw)
17859 {
17860 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17861
17862 if (ret != cost)
17863 {
17864 const char *dep;
17865
17866 switch (dep_type)
17867 {
17868 default: dep = "unknown depencency"; break;
17869 case REG_DEP_TRUE: dep = "data dependency"; break;
17870 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17871 case REG_DEP_ANTI: dep = "anti depencency"; break;
17872 }
17873
17874 fprintf (stderr,
17875 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17876 "%s, insn:\n", ret, cost, dep);
17877
17878 debug_rtx (insn);
17879 }
17880
17881 return ret;
17882 }
17883
17884 /* The function returns a true if INSN is microcoded.
17885 Return false otherwise. */
17886
17887 static bool
17888 is_microcoded_insn (rtx_insn *insn)
17889 {
17890 if (!insn || !NONDEBUG_INSN_P (insn)
17891 || GET_CODE (PATTERN (insn)) == USE
17892 || GET_CODE (PATTERN (insn)) == CLOBBER)
17893 return false;
17894
17895 if (rs6000_tune == PROCESSOR_CELL)
17896 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17897
17898 if (rs6000_sched_groups
17899 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17900 {
17901 enum attr_type type = get_attr_type (insn);
17902 if ((type == TYPE_LOAD
17903 && get_attr_update (insn) == UPDATE_YES
17904 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17905 || ((type == TYPE_LOAD || type == TYPE_STORE)
17906 && get_attr_update (insn) == UPDATE_YES
17907 && get_attr_indexed (insn) == INDEXED_YES)
17908 || type == TYPE_MFCR)
17909 return true;
17910 }
17911
17912 return false;
17913 }
17914
17915 /* The function returns true if INSN is cracked into 2 instructions
17916 by the processor (and therefore occupies 2 issue slots). */
17917
17918 static bool
17919 is_cracked_insn (rtx_insn *insn)
17920 {
17921 if (!insn || !NONDEBUG_INSN_P (insn)
17922 || GET_CODE (PATTERN (insn)) == USE
17923 || GET_CODE (PATTERN (insn)) == CLOBBER)
17924 return false;
17925
17926 if (rs6000_sched_groups
17927 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17928 {
17929 enum attr_type type = get_attr_type (insn);
17930 if ((type == TYPE_LOAD
17931 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17932 && get_attr_update (insn) == UPDATE_NO)
17933 || (type == TYPE_LOAD
17934 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17935 && get_attr_update (insn) == UPDATE_YES
17936 && get_attr_indexed (insn) == INDEXED_NO)
17937 || (type == TYPE_STORE
17938 && get_attr_update (insn) == UPDATE_YES
17939 && get_attr_indexed (insn) == INDEXED_NO)
17940 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17941 && get_attr_update (insn) == UPDATE_YES)
17942 || (type == TYPE_CR_LOGICAL
17943 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17944 || (type == TYPE_EXTS
17945 && get_attr_dot (insn) == DOT_YES)
17946 || (type == TYPE_SHIFT
17947 && get_attr_dot (insn) == DOT_YES
17948 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17949 || (type == TYPE_MUL
17950 && get_attr_dot (insn) == DOT_YES)
17951 || type == TYPE_DIV
17952 || (type == TYPE_INSERT
17953 && get_attr_size (insn) == SIZE_32))
17954 return true;
17955 }
17956
17957 return false;
17958 }
17959
17960 /* The function returns true if INSN can be issued only from
17961 the branch slot. */
17962
17963 static bool
17964 is_branch_slot_insn (rtx_insn *insn)
17965 {
17966 if (!insn || !NONDEBUG_INSN_P (insn)
17967 || GET_CODE (PATTERN (insn)) == USE
17968 || GET_CODE (PATTERN (insn)) == CLOBBER)
17969 return false;
17970
17971 if (rs6000_sched_groups)
17972 {
17973 enum attr_type type = get_attr_type (insn);
17974 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17975 return true;
17976 return false;
17977 }
17978
17979 return false;
17980 }
17981
17982 /* The function returns true if out_inst sets a value that is
17983 used in the address generation computation of in_insn */
17984 static bool
17985 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17986 {
17987 rtx out_set, in_set;
17988
17989 /* For performance reasons, only handle the simple case where
17990 both loads are a single_set. */
17991 out_set = single_set (out_insn);
17992 if (out_set)
17993 {
17994 in_set = single_set (in_insn);
17995 if (in_set)
17996 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17997 }
17998
17999 return false;
18000 }
18001
18002 /* Try to determine base/offset/size parts of the given MEM.
18003 Return true if successful, false if all the values couldn't
18004 be determined.
18005
18006 This function only looks for REG or REG+CONST address forms.
18007 REG+REG address form will return false. */
18008
18009 static bool
18010 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18011 HOST_WIDE_INT *size)
18012 {
18013 rtx addr_rtx;
18014 if MEM_SIZE_KNOWN_P (mem)
18015 *size = MEM_SIZE (mem);
18016 else
18017 return false;
18018
18019 addr_rtx = (XEXP (mem, 0));
18020 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18021 addr_rtx = XEXP (addr_rtx, 1);
18022
18023 *offset = 0;
18024 while (GET_CODE (addr_rtx) == PLUS
18025 && CONST_INT_P (XEXP (addr_rtx, 1)))
18026 {
18027 *offset += INTVAL (XEXP (addr_rtx, 1));
18028 addr_rtx = XEXP (addr_rtx, 0);
18029 }
18030 if (!REG_P (addr_rtx))
18031 return false;
18032
18033 *base = addr_rtx;
18034 return true;
18035 }
18036
18037 /* The function returns true if the target storage location of
18038 mem1 is adjacent to the target storage location of mem2 */
18039 /* Return 1 if memory locations are adjacent. */
18040
18041 static bool
18042 adjacent_mem_locations (rtx mem1, rtx mem2)
18043 {
18044 rtx reg1, reg2;
18045 HOST_WIDE_INT off1, size1, off2, size2;
18046
18047 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18048 && get_memref_parts (mem2, &reg2, &off2, &size2))
18049 return ((REGNO (reg1) == REGNO (reg2))
18050 && ((off1 + size1 == off2)
18051 || (off2 + size2 == off1)));
18052
18053 return false;
18054 }
18055
18056 /* This function returns true if it can be determined that the two MEM
18057 locations overlap by at least 1 byte based on base reg/offset/size. */
18058
18059 static bool
18060 mem_locations_overlap (rtx mem1, rtx mem2)
18061 {
18062 rtx reg1, reg2;
18063 HOST_WIDE_INT off1, size1, off2, size2;
18064
18065 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18066 && get_memref_parts (mem2, &reg2, &off2, &size2))
18067 return ((REGNO (reg1) == REGNO (reg2))
18068 && (((off1 <= off2) && (off1 + size1 > off2))
18069 || ((off2 <= off1) && (off2 + size2 > off1))));
18070
18071 return false;
18072 }
18073
18074 /* A C statement (sans semicolon) to update the integer scheduling
18075 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18076 INSN earlier, reduce the priority to execute INSN later. Do not
18077 define this macro if you do not need to adjust the scheduling
18078 priorities of insns. */
18079
18080 static int
18081 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18082 {
18083 rtx load_mem, str_mem;
18084 /* On machines (like the 750) which have asymmetric integer units,
18085 where one integer unit can do multiply and divides and the other
18086 can't, reduce the priority of multiply/divide so it is scheduled
18087 before other integer operations. */
18088
18089 #if 0
18090 if (! INSN_P (insn))
18091 return priority;
18092
18093 if (GET_CODE (PATTERN (insn)) == USE)
18094 return priority;
18095
18096 switch (rs6000_tune) {
18097 case PROCESSOR_PPC750:
18098 switch (get_attr_type (insn))
18099 {
18100 default:
18101 break;
18102
18103 case TYPE_MUL:
18104 case TYPE_DIV:
18105 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18106 priority, priority);
18107 if (priority >= 0 && priority < 0x01000000)
18108 priority >>= 3;
18109 break;
18110 }
18111 }
18112 #endif
18113
18114 if (insn_must_be_first_in_group (insn)
18115 && reload_completed
18116 && current_sched_info->sched_max_insns_priority
18117 && rs6000_sched_restricted_insns_priority)
18118 {
18119
18120 /* Prioritize insns that can be dispatched only in the first
18121 dispatch slot. */
18122 if (rs6000_sched_restricted_insns_priority == 1)
18123 /* Attach highest priority to insn. This means that in
18124 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
18125 precede 'priority' (critical path) considerations. */
18126 return current_sched_info->sched_max_insns_priority;
18127 else if (rs6000_sched_restricted_insns_priority == 2)
18128 /* Increase priority of insn by a minimal amount. This means that in
18129 haifa-sched.c:ready_sort(), only 'priority' (critical path)
18130 considerations precede dispatch-slot restriction considerations. */
18131 return (priority + 1);
18132 }
18133
18134 if (rs6000_tune == PROCESSOR_POWER6
18135 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18136 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18137 /* Attach highest priority to insn if the scheduler has just issued two
18138 stores and this instruction is a load, or two loads and this instruction
18139 is a store. Power6 wants loads and stores scheduled alternately
18140 when possible */
18141 return current_sched_info->sched_max_insns_priority;
18142
18143 return priority;
18144 }
18145
18146 /* Return true if the instruction is nonpipelined on the Cell. */
18147 static bool
18148 is_nonpipeline_insn (rtx_insn *insn)
18149 {
18150 enum attr_type type;
18151 if (!insn || !NONDEBUG_INSN_P (insn)
18152 || GET_CODE (PATTERN (insn)) == USE
18153 || GET_CODE (PATTERN (insn)) == CLOBBER)
18154 return false;
18155
18156 type = get_attr_type (insn);
18157 if (type == TYPE_MUL
18158 || type == TYPE_DIV
18159 || type == TYPE_SDIV
18160 || type == TYPE_DDIV
18161 || type == TYPE_SSQRT
18162 || type == TYPE_DSQRT
18163 || type == TYPE_MFCR
18164 || type == TYPE_MFCRF
18165 || type == TYPE_MFJMPR)
18166 {
18167 return true;
18168 }
18169 return false;
18170 }
18171
18172
18173 /* Return how many instructions the machine can issue per cycle. */
18174
18175 static int
18176 rs6000_issue_rate (void)
18177 {
18178 /* Unless scheduling for register pressure, use issue rate of 1 for
18179 first scheduling pass to decrease degradation. */
18180 if (!reload_completed && !flag_sched_pressure)
18181 return 1;
18182
18183 switch (rs6000_tune) {
18184 case PROCESSOR_RS64A:
18185 case PROCESSOR_PPC601: /* ? */
18186 case PROCESSOR_PPC7450:
18187 return 3;
18188 case PROCESSOR_PPC440:
18189 case PROCESSOR_PPC603:
18190 case PROCESSOR_PPC750:
18191 case PROCESSOR_PPC7400:
18192 case PROCESSOR_PPC8540:
18193 case PROCESSOR_PPC8548:
18194 case PROCESSOR_CELL:
18195 case PROCESSOR_PPCE300C2:
18196 case PROCESSOR_PPCE300C3:
18197 case PROCESSOR_PPCE500MC:
18198 case PROCESSOR_PPCE500MC64:
18199 case PROCESSOR_PPCE5500:
18200 case PROCESSOR_PPCE6500:
18201 case PROCESSOR_TITAN:
18202 return 2;
18203 case PROCESSOR_PPC476:
18204 case PROCESSOR_PPC604:
18205 case PROCESSOR_PPC604e:
18206 case PROCESSOR_PPC620:
18207 case PROCESSOR_PPC630:
18208 return 4;
18209 case PROCESSOR_POWER4:
18210 case PROCESSOR_POWER5:
18211 case PROCESSOR_POWER6:
18212 case PROCESSOR_POWER7:
18213 return 5;
18214 case PROCESSOR_POWER8:
18215 return 7;
18216 case PROCESSOR_POWER9:
18217 case PROCESSOR_POWER10:
18218 return 6;
18219 default:
18220 return 1;
18221 }
18222 }
18223
18224 /* Return how many instructions to look ahead for better insn
18225 scheduling. */
18226
18227 static int
18228 rs6000_use_sched_lookahead (void)
18229 {
18230 switch (rs6000_tune)
18231 {
18232 case PROCESSOR_PPC8540:
18233 case PROCESSOR_PPC8548:
18234 return 4;
18235
18236 case PROCESSOR_CELL:
18237 return (reload_completed ? 8 : 0);
18238
18239 default:
18240 return 0;
18241 }
18242 }
18243
18244 /* We are choosing insn from the ready queue. Return zero if INSN can be
18245 chosen. */
18246 static int
18247 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18248 {
18249 if (ready_index == 0)
18250 return 0;
18251
18252 if (rs6000_tune != PROCESSOR_CELL)
18253 return 0;
18254
18255 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18256
18257 if (!reload_completed
18258 || is_nonpipeline_insn (insn)
18259 || is_microcoded_insn (insn))
18260 return 1;
18261
18262 return 0;
18263 }
18264
18265 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18266 and return true. */
18267
18268 static bool
18269 find_mem_ref (rtx pat, rtx *mem_ref)
18270 {
18271 const char * fmt;
18272 int i, j;
18273
18274 /* stack_tie does not produce any real memory traffic. */
18275 if (tie_operand (pat, VOIDmode))
18276 return false;
18277
18278 if (MEM_P (pat))
18279 {
18280 *mem_ref = pat;
18281 return true;
18282 }
18283
18284 /* Recursively process the pattern. */
18285 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18286
18287 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18288 {
18289 if (fmt[i] == 'e')
18290 {
18291 if (find_mem_ref (XEXP (pat, i), mem_ref))
18292 return true;
18293 }
18294 else if (fmt[i] == 'E')
18295 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18296 {
18297 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18298 return true;
18299 }
18300 }
18301
18302 return false;
18303 }
18304
18305 /* Determine if PAT is a PATTERN of a load insn. */
18306
18307 static bool
18308 is_load_insn1 (rtx pat, rtx *load_mem)
18309 {
18310 if (!pat || pat == NULL_RTX)
18311 return false;
18312
18313 if (GET_CODE (pat) == SET)
18314 return find_mem_ref (SET_SRC (pat), load_mem);
18315
18316 if (GET_CODE (pat) == PARALLEL)
18317 {
18318 int i;
18319
18320 for (i = 0; i < XVECLEN (pat, 0); i++)
18321 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18322 return true;
18323 }
18324
18325 return false;
18326 }
18327
18328 /* Determine if INSN loads from memory. */
18329
18330 static bool
18331 is_load_insn (rtx insn, rtx *load_mem)
18332 {
18333 if (!insn || !INSN_P (insn))
18334 return false;
18335
18336 if (CALL_P (insn))
18337 return false;
18338
18339 return is_load_insn1 (PATTERN (insn), load_mem);
18340 }
18341
18342 /* Determine if PAT is a PATTERN of a store insn. */
18343
18344 static bool
18345 is_store_insn1 (rtx pat, rtx *str_mem)
18346 {
18347 if (!pat || pat == NULL_RTX)
18348 return false;
18349
18350 if (GET_CODE (pat) == SET)
18351 return find_mem_ref (SET_DEST (pat), str_mem);
18352
18353 if (GET_CODE (pat) == PARALLEL)
18354 {
18355 int i;
18356
18357 for (i = 0; i < XVECLEN (pat, 0); i++)
18358 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18359 return true;
18360 }
18361
18362 return false;
18363 }
18364
18365 /* Determine if INSN stores to memory. */
18366
18367 static bool
18368 is_store_insn (rtx insn, rtx *str_mem)
18369 {
18370 if (!insn || !INSN_P (insn))
18371 return false;
18372
18373 return is_store_insn1 (PATTERN (insn), str_mem);
18374 }
18375
18376 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18377
18378 static bool
18379 is_power9_pairable_vec_type (enum attr_type type)
18380 {
18381 switch (type)
18382 {
18383 case TYPE_VECSIMPLE:
18384 case TYPE_VECCOMPLEX:
18385 case TYPE_VECDIV:
18386 case TYPE_VECCMP:
18387 case TYPE_VECPERM:
18388 case TYPE_VECFLOAT:
18389 case TYPE_VECFDIV:
18390 case TYPE_VECDOUBLE:
18391 return true;
18392 default:
18393 break;
18394 }
18395 return false;
18396 }
18397
18398 /* Returns whether the dependence between INSN and NEXT is considered
18399 costly by the given target. */
18400
18401 static bool
18402 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18403 {
18404 rtx insn;
18405 rtx next;
18406 rtx load_mem, str_mem;
18407
18408 /* If the flag is not enabled - no dependence is considered costly;
18409 allow all dependent insns in the same group.
18410 This is the most aggressive option. */
18411 if (rs6000_sched_costly_dep == no_dep_costly)
18412 return false;
18413
18414 /* If the flag is set to 1 - a dependence is always considered costly;
18415 do not allow dependent instructions in the same group.
18416 This is the most conservative option. */
18417 if (rs6000_sched_costly_dep == all_deps_costly)
18418 return true;
18419
18420 insn = DEP_PRO (dep);
18421 next = DEP_CON (dep);
18422
18423 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18424 && is_load_insn (next, &load_mem)
18425 && is_store_insn (insn, &str_mem))
18426 /* Prevent load after store in the same group. */
18427 return true;
18428
18429 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18430 && is_load_insn (next, &load_mem)
18431 && is_store_insn (insn, &str_mem)
18432 && DEP_TYPE (dep) == REG_DEP_TRUE
18433 && mem_locations_overlap(str_mem, load_mem))
18434 /* Prevent load after store in the same group if it is a true
18435 dependence. */
18436 return true;
18437
18438 /* The flag is set to X; dependences with latency >= X are considered costly,
18439 and will not be scheduled in the same group. */
18440 if (rs6000_sched_costly_dep <= max_dep_latency
18441 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18442 return true;
18443
18444 return false;
18445 }
18446
18447 /* Return the next insn after INSN that is found before TAIL is reached,
18448 skipping any "non-active" insns - insns that will not actually occupy
18449 an issue slot. Return NULL_RTX if such an insn is not found. */
18450
18451 static rtx_insn *
18452 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18453 {
18454 if (insn == NULL_RTX || insn == tail)
18455 return NULL;
18456
18457 while (1)
18458 {
18459 insn = NEXT_INSN (insn);
18460 if (insn == NULL_RTX || insn == tail)
18461 return NULL;
18462
18463 if (CALL_P (insn)
18464 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18465 || (NONJUMP_INSN_P (insn)
18466 && GET_CODE (PATTERN (insn)) != USE
18467 && GET_CODE (PATTERN (insn)) != CLOBBER
18468 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18469 break;
18470 }
18471 return insn;
18472 }
18473
18474 /* Move instruction at POS to the end of the READY list. */
18475
18476 static void
18477 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18478 {
18479 rtx_insn *tmp;
18480 int i;
18481
18482 tmp = ready[pos];
18483 for (i = pos; i < lastpos; i++)
18484 ready[i] = ready[i + 1];
18485 ready[lastpos] = tmp;
18486 }
18487
18488 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18489
18490 static int
18491 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18492 {
18493 /* For Power6, we need to handle some special cases to try and keep the
18494 store queue from overflowing and triggering expensive flushes.
18495
18496 This code monitors how load and store instructions are being issued
18497 and skews the ready list one way or the other to increase the likelihood
18498 that a desired instruction is issued at the proper time.
18499
18500 A couple of things are done. First, we maintain a "load_store_pendulum"
18501 to track the current state of load/store issue.
18502
18503 - If the pendulum is at zero, then no loads or stores have been
18504 issued in the current cycle so we do nothing.
18505
18506 - If the pendulum is 1, then a single load has been issued in this
18507 cycle and we attempt to locate another load in the ready list to
18508 issue with it.
18509
18510 - If the pendulum is -2, then two stores have already been
18511 issued in this cycle, so we increase the priority of the first load
18512 in the ready list to increase it's likelihood of being chosen first
18513 in the next cycle.
18514
18515 - If the pendulum is -1, then a single store has been issued in this
18516 cycle and we attempt to locate another store in the ready list to
18517 issue with it, preferring a store to an adjacent memory location to
18518 facilitate store pairing in the store queue.
18519
18520 - If the pendulum is 2, then two loads have already been
18521 issued in this cycle, so we increase the priority of the first store
18522 in the ready list to increase it's likelihood of being chosen first
18523 in the next cycle.
18524
18525 - If the pendulum < -2 or > 2, then do nothing.
18526
18527 Note: This code covers the most common scenarios. There exist non
18528 load/store instructions which make use of the LSU and which
18529 would need to be accounted for to strictly model the behavior
18530 of the machine. Those instructions are currently unaccounted
18531 for to help minimize compile time overhead of this code.
18532 */
18533 int pos;
18534 rtx load_mem, str_mem;
18535
18536 if (is_store_insn (last_scheduled_insn, &str_mem))
18537 /* Issuing a store, swing the load_store_pendulum to the left */
18538 load_store_pendulum--;
18539 else if (is_load_insn (last_scheduled_insn, &load_mem))
18540 /* Issuing a load, swing the load_store_pendulum to the right */
18541 load_store_pendulum++;
18542 else
18543 return cached_can_issue_more;
18544
18545 /* If the pendulum is balanced, or there is only one instruction on
18546 the ready list, then all is well, so return. */
18547 if ((load_store_pendulum == 0) || (lastpos <= 0))
18548 return cached_can_issue_more;
18549
18550 if (load_store_pendulum == 1)
18551 {
18552 /* A load has been issued in this cycle. Scan the ready list
18553 for another load to issue with it */
18554 pos = lastpos;
18555
18556 while (pos >= 0)
18557 {
18558 if (is_load_insn (ready[pos], &load_mem))
18559 {
18560 /* Found a load. Move it to the head of the ready list,
18561 and adjust it's priority so that it is more likely to
18562 stay there */
18563 move_to_end_of_ready (ready, pos, lastpos);
18564
18565 if (!sel_sched_p ()
18566 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18567 INSN_PRIORITY (ready[lastpos])++;
18568 break;
18569 }
18570 pos--;
18571 }
18572 }
18573 else if (load_store_pendulum == -2)
18574 {
18575 /* Two stores have been issued in this cycle. Increase the
18576 priority of the first load in the ready list to favor it for
18577 issuing in the next cycle. */
18578 pos = lastpos;
18579
18580 while (pos >= 0)
18581 {
18582 if (is_load_insn (ready[pos], &load_mem)
18583 && !sel_sched_p ()
18584 && INSN_PRIORITY_KNOWN (ready[pos]))
18585 {
18586 INSN_PRIORITY (ready[pos])++;
18587
18588 /* Adjust the pendulum to account for the fact that a load
18589 was found and increased in priority. This is to prevent
18590 increasing the priority of multiple loads */
18591 load_store_pendulum--;
18592
18593 break;
18594 }
18595 pos--;
18596 }
18597 }
18598 else if (load_store_pendulum == -1)
18599 {
18600 /* A store has been issued in this cycle. Scan the ready list for
18601 another store to issue with it, preferring a store to an adjacent
18602 memory location */
18603 int first_store_pos = -1;
18604
18605 pos = lastpos;
18606
18607 while (pos >= 0)
18608 {
18609 if (is_store_insn (ready[pos], &str_mem))
18610 {
18611 rtx str_mem2;
18612 /* Maintain the index of the first store found on the
18613 list */
18614 if (first_store_pos == -1)
18615 first_store_pos = pos;
18616
18617 if (is_store_insn (last_scheduled_insn, &str_mem2)
18618 && adjacent_mem_locations (str_mem, str_mem2))
18619 {
18620 /* Found an adjacent store. Move it to the head of the
18621 ready list, and adjust it's priority so that it is
18622 more likely to stay there */
18623 move_to_end_of_ready (ready, pos, lastpos);
18624
18625 if (!sel_sched_p ()
18626 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18627 INSN_PRIORITY (ready[lastpos])++;
18628
18629 first_store_pos = -1;
18630
18631 break;
18632 };
18633 }
18634 pos--;
18635 }
18636
18637 if (first_store_pos >= 0)
18638 {
18639 /* An adjacent store wasn't found, but a non-adjacent store was,
18640 so move the non-adjacent store to the front of the ready
18641 list, and adjust its priority so that it is more likely to
18642 stay there. */
18643 move_to_end_of_ready (ready, first_store_pos, lastpos);
18644 if (!sel_sched_p ()
18645 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18646 INSN_PRIORITY (ready[lastpos])++;
18647 }
18648 }
18649 else if (load_store_pendulum == 2)
18650 {
18651 /* Two loads have been issued in this cycle. Increase the priority
18652 of the first store in the ready list to favor it for issuing in
18653 the next cycle. */
18654 pos = lastpos;
18655
18656 while (pos >= 0)
18657 {
18658 if (is_store_insn (ready[pos], &str_mem)
18659 && !sel_sched_p ()
18660 && INSN_PRIORITY_KNOWN (ready[pos]))
18661 {
18662 INSN_PRIORITY (ready[pos])++;
18663
18664 /* Adjust the pendulum to account for the fact that a store
18665 was found and increased in priority. This is to prevent
18666 increasing the priority of multiple stores */
18667 load_store_pendulum++;
18668
18669 break;
18670 }
18671 pos--;
18672 }
18673 }
18674
18675 return cached_can_issue_more;
18676 }
18677
18678 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18679
18680 static int
18681 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18682 {
18683 int pos;
18684 enum attr_type type, type2;
18685
18686 type = get_attr_type (last_scheduled_insn);
18687
18688 /* Try to issue fixed point divides back-to-back in pairs so they will be
18689 routed to separate execution units and execute in parallel. */
18690 if (type == TYPE_DIV && divide_cnt == 0)
18691 {
18692 /* First divide has been scheduled. */
18693 divide_cnt = 1;
18694
18695 /* Scan the ready list looking for another divide, if found move it
18696 to the end of the list so it is chosen next. */
18697 pos = lastpos;
18698 while (pos >= 0)
18699 {
18700 if (recog_memoized (ready[pos]) >= 0
18701 && get_attr_type (ready[pos]) == TYPE_DIV)
18702 {
18703 move_to_end_of_ready (ready, pos, lastpos);
18704 break;
18705 }
18706 pos--;
18707 }
18708 }
18709 else
18710 {
18711 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18712 divide_cnt = 0;
18713
18714 /* The best dispatch throughput for vector and vector load insns can be
18715 achieved by interleaving a vector and vector load such that they'll
18716 dispatch to the same superslice. If this pairing cannot be achieved
18717 then it is best to pair vector insns together and vector load insns
18718 together.
18719
18720 To aid in this pairing, vec_pairing maintains the current state with
18721 the following values:
18722
18723 0 : Initial state, no vecload/vector pairing has been started.
18724
18725 1 : A vecload or vector insn has been issued and a candidate for
18726 pairing has been found and moved to the end of the ready
18727 list. */
18728 if (type == TYPE_VECLOAD)
18729 {
18730 /* Issued a vecload. */
18731 if (vec_pairing == 0)
18732 {
18733 int vecload_pos = -1;
18734 /* We issued a single vecload, look for a vector insn to pair it
18735 with. If one isn't found, try to pair another vecload. */
18736 pos = lastpos;
18737 while (pos >= 0)
18738 {
18739 if (recog_memoized (ready[pos]) >= 0)
18740 {
18741 type2 = get_attr_type (ready[pos]);
18742 if (is_power9_pairable_vec_type (type2))
18743 {
18744 /* Found a vector insn to pair with, move it to the
18745 end of the ready list so it is scheduled next. */
18746 move_to_end_of_ready (ready, pos, lastpos);
18747 vec_pairing = 1;
18748 return cached_can_issue_more;
18749 }
18750 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18751 /* Remember position of first vecload seen. */
18752 vecload_pos = pos;
18753 }
18754 pos--;
18755 }
18756 if (vecload_pos >= 0)
18757 {
18758 /* Didn't find a vector to pair with but did find a vecload,
18759 move it to the end of the ready list. */
18760 move_to_end_of_ready (ready, vecload_pos, lastpos);
18761 vec_pairing = 1;
18762 return cached_can_issue_more;
18763 }
18764 }
18765 }
18766 else if (is_power9_pairable_vec_type (type))
18767 {
18768 /* Issued a vector operation. */
18769 if (vec_pairing == 0)
18770 {
18771 int vec_pos = -1;
18772 /* We issued a single vector insn, look for a vecload to pair it
18773 with. If one isn't found, try to pair another vector. */
18774 pos = lastpos;
18775 while (pos >= 0)
18776 {
18777 if (recog_memoized (ready[pos]) >= 0)
18778 {
18779 type2 = get_attr_type (ready[pos]);
18780 if (type2 == TYPE_VECLOAD)
18781 {
18782 /* Found a vecload insn to pair with, move it to the
18783 end of the ready list so it is scheduled next. */
18784 move_to_end_of_ready (ready, pos, lastpos);
18785 vec_pairing = 1;
18786 return cached_can_issue_more;
18787 }
18788 else if (is_power9_pairable_vec_type (type2)
18789 && vec_pos == -1)
18790 /* Remember position of first vector insn seen. */
18791 vec_pos = pos;
18792 }
18793 pos--;
18794 }
18795 if (vec_pos >= 0)
18796 {
18797 /* Didn't find a vecload to pair with but did find a vector
18798 insn, move it to the end of the ready list. */
18799 move_to_end_of_ready (ready, vec_pos, lastpos);
18800 vec_pairing = 1;
18801 return cached_can_issue_more;
18802 }
18803 }
18804 }
18805
18806 /* We've either finished a vec/vecload pair, couldn't find an insn to
18807 continue the current pair, or the last insn had nothing to do with
18808 with pairing. In any case, reset the state. */
18809 vec_pairing = 0;
18810 }
18811
18812 return cached_can_issue_more;
18813 }
18814
18815 /* We are about to begin issuing insns for this clock cycle. */
18816
18817 static int
18818 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18819 rtx_insn **ready ATTRIBUTE_UNUSED,
18820 int *pn_ready ATTRIBUTE_UNUSED,
18821 int clock_var ATTRIBUTE_UNUSED)
18822 {
18823 int n_ready = *pn_ready;
18824
18825 if (sched_verbose)
18826 fprintf (dump, "// rs6000_sched_reorder :\n");
18827
18828 /* Reorder the ready list, if the second to last ready insn
18829 is a nonepipeline insn. */
18830 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18831 {
18832 if (is_nonpipeline_insn (ready[n_ready - 1])
18833 && (recog_memoized (ready[n_ready - 2]) > 0))
18834 /* Simply swap first two insns. */
18835 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18836 }
18837
18838 if (rs6000_tune == PROCESSOR_POWER6)
18839 load_store_pendulum = 0;
18840
18841 return rs6000_issue_rate ();
18842 }
18843
18844 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18845
18846 static int
18847 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18848 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18849 {
18850 if (sched_verbose)
18851 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18852
18853 /* Do Power6 dependent reordering if necessary. */
18854 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18855 return power6_sched_reorder2 (ready, *pn_ready - 1);
18856
18857 /* Do Power9 dependent reordering if necessary. */
18858 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18859 && recog_memoized (last_scheduled_insn) >= 0)
18860 return power9_sched_reorder2 (ready, *pn_ready - 1);
18861
18862 return cached_can_issue_more;
18863 }
18864
18865 /* Return whether the presence of INSN causes a dispatch group termination
18866 of group WHICH_GROUP.
18867
18868 If WHICH_GROUP == current_group, this function will return true if INSN
18869 causes the termination of the current group (i.e, the dispatch group to
18870 which INSN belongs). This means that INSN will be the last insn in the
18871 group it belongs to.
18872
18873 If WHICH_GROUP == previous_group, this function will return true if INSN
18874 causes the termination of the previous group (i.e, the dispatch group that
18875 precedes the group to which INSN belongs). This means that INSN will be
18876 the first insn in the group it belongs to). */
18877
18878 static bool
18879 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18880 {
18881 bool first, last;
18882
18883 if (! insn)
18884 return false;
18885
18886 first = insn_must_be_first_in_group (insn);
18887 last = insn_must_be_last_in_group (insn);
18888
18889 if (first && last)
18890 return true;
18891
18892 if (which_group == current_group)
18893 return last;
18894 else if (which_group == previous_group)
18895 return first;
18896
18897 return false;
18898 }
18899
18900
18901 static bool
18902 insn_must_be_first_in_group (rtx_insn *insn)
18903 {
18904 enum attr_type type;
18905
18906 if (!insn
18907 || NOTE_P (insn)
18908 || DEBUG_INSN_P (insn)
18909 || GET_CODE (PATTERN (insn)) == USE
18910 || GET_CODE (PATTERN (insn)) == CLOBBER)
18911 return false;
18912
18913 switch (rs6000_tune)
18914 {
18915 case PROCESSOR_POWER5:
18916 if (is_cracked_insn (insn))
18917 return true;
18918 /* FALLTHRU */
18919 case PROCESSOR_POWER4:
18920 if (is_microcoded_insn (insn))
18921 return true;
18922
18923 if (!rs6000_sched_groups)
18924 return false;
18925
18926 type = get_attr_type (insn);
18927
18928 switch (type)
18929 {
18930 case TYPE_MFCR:
18931 case TYPE_MFCRF:
18932 case TYPE_MTCR:
18933 case TYPE_CR_LOGICAL:
18934 case TYPE_MTJMPR:
18935 case TYPE_MFJMPR:
18936 case TYPE_DIV:
18937 case TYPE_LOAD_L:
18938 case TYPE_STORE_C:
18939 case TYPE_ISYNC:
18940 case TYPE_SYNC:
18941 return true;
18942 default:
18943 break;
18944 }
18945 break;
18946 case PROCESSOR_POWER6:
18947 type = get_attr_type (insn);
18948
18949 switch (type)
18950 {
18951 case TYPE_EXTS:
18952 case TYPE_CNTLZ:
18953 case TYPE_TRAP:
18954 case TYPE_MUL:
18955 case TYPE_INSERT:
18956 case TYPE_FPCOMPARE:
18957 case TYPE_MFCR:
18958 case TYPE_MTCR:
18959 case TYPE_MFJMPR:
18960 case TYPE_MTJMPR:
18961 case TYPE_ISYNC:
18962 case TYPE_SYNC:
18963 case TYPE_LOAD_L:
18964 case TYPE_STORE_C:
18965 return true;
18966 case TYPE_SHIFT:
18967 if (get_attr_dot (insn) == DOT_NO
18968 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18969 return true;
18970 else
18971 break;
18972 case TYPE_DIV:
18973 if (get_attr_size (insn) == SIZE_32)
18974 return true;
18975 else
18976 break;
18977 case TYPE_LOAD:
18978 case TYPE_STORE:
18979 case TYPE_FPLOAD:
18980 case TYPE_FPSTORE:
18981 if (get_attr_update (insn) == UPDATE_YES)
18982 return true;
18983 else
18984 break;
18985 default:
18986 break;
18987 }
18988 break;
18989 case PROCESSOR_POWER7:
18990 type = get_attr_type (insn);
18991
18992 switch (type)
18993 {
18994 case TYPE_CR_LOGICAL:
18995 case TYPE_MFCR:
18996 case TYPE_MFCRF:
18997 case TYPE_MTCR:
18998 case TYPE_DIV:
18999 case TYPE_ISYNC:
19000 case TYPE_LOAD_L:
19001 case TYPE_STORE_C:
19002 case TYPE_MFJMPR:
19003 case TYPE_MTJMPR:
19004 return true;
19005 case TYPE_MUL:
19006 case TYPE_SHIFT:
19007 case TYPE_EXTS:
19008 if (get_attr_dot (insn) == DOT_YES)
19009 return true;
19010 else
19011 break;
19012 case TYPE_LOAD:
19013 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19014 || get_attr_update (insn) == UPDATE_YES)
19015 return true;
19016 else
19017 break;
19018 case TYPE_STORE:
19019 case TYPE_FPLOAD:
19020 case TYPE_FPSTORE:
19021 if (get_attr_update (insn) == UPDATE_YES)
19022 return true;
19023 else
19024 break;
19025 default:
19026 break;
19027 }
19028 break;
19029 case PROCESSOR_POWER8:
19030 type = get_attr_type (insn);
19031
19032 switch (type)
19033 {
19034 case TYPE_CR_LOGICAL:
19035 case TYPE_MFCR:
19036 case TYPE_MFCRF:
19037 case TYPE_MTCR:
19038 case TYPE_SYNC:
19039 case TYPE_ISYNC:
19040 case TYPE_LOAD_L:
19041 case TYPE_STORE_C:
19042 case TYPE_VECSTORE:
19043 case TYPE_MFJMPR:
19044 case TYPE_MTJMPR:
19045 return true;
19046 case TYPE_SHIFT:
19047 case TYPE_EXTS:
19048 case TYPE_MUL:
19049 if (get_attr_dot (insn) == DOT_YES)
19050 return true;
19051 else
19052 break;
19053 case TYPE_LOAD:
19054 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19055 || get_attr_update (insn) == UPDATE_YES)
19056 return true;
19057 else
19058 break;
19059 case TYPE_STORE:
19060 if (get_attr_update (insn) == UPDATE_YES
19061 && get_attr_indexed (insn) == INDEXED_YES)
19062 return true;
19063 else
19064 break;
19065 default:
19066 break;
19067 }
19068 break;
19069 default:
19070 break;
19071 }
19072
19073 return false;
19074 }
19075
19076 static bool
19077 insn_must_be_last_in_group (rtx_insn *insn)
19078 {
19079 enum attr_type type;
19080
19081 if (!insn
19082 || NOTE_P (insn)
19083 || DEBUG_INSN_P (insn)
19084 || GET_CODE (PATTERN (insn)) == USE
19085 || GET_CODE (PATTERN (insn)) == CLOBBER)
19086 return false;
19087
19088 switch (rs6000_tune) {
19089 case PROCESSOR_POWER4:
19090 case PROCESSOR_POWER5:
19091 if (is_microcoded_insn (insn))
19092 return true;
19093
19094 if (is_branch_slot_insn (insn))
19095 return true;
19096
19097 break;
19098 case PROCESSOR_POWER6:
19099 type = get_attr_type (insn);
19100
19101 switch (type)
19102 {
19103 case TYPE_EXTS:
19104 case TYPE_CNTLZ:
19105 case TYPE_TRAP:
19106 case TYPE_MUL:
19107 case TYPE_FPCOMPARE:
19108 case TYPE_MFCR:
19109 case TYPE_MTCR:
19110 case TYPE_MFJMPR:
19111 case TYPE_MTJMPR:
19112 case TYPE_ISYNC:
19113 case TYPE_SYNC:
19114 case TYPE_LOAD_L:
19115 case TYPE_STORE_C:
19116 return true;
19117 case TYPE_SHIFT:
19118 if (get_attr_dot (insn) == DOT_NO
19119 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19120 return true;
19121 else
19122 break;
19123 case TYPE_DIV:
19124 if (get_attr_size (insn) == SIZE_32)
19125 return true;
19126 else
19127 break;
19128 default:
19129 break;
19130 }
19131 break;
19132 case PROCESSOR_POWER7:
19133 type = get_attr_type (insn);
19134
19135 switch (type)
19136 {
19137 case TYPE_ISYNC:
19138 case TYPE_SYNC:
19139 case TYPE_LOAD_L:
19140 case TYPE_STORE_C:
19141 return true;
19142 case TYPE_LOAD:
19143 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19144 && get_attr_update (insn) == UPDATE_YES)
19145 return true;
19146 else
19147 break;
19148 case TYPE_STORE:
19149 if (get_attr_update (insn) == UPDATE_YES
19150 && get_attr_indexed (insn) == INDEXED_YES)
19151 return true;
19152 else
19153 break;
19154 default:
19155 break;
19156 }
19157 break;
19158 case PROCESSOR_POWER8:
19159 type = get_attr_type (insn);
19160
19161 switch (type)
19162 {
19163 case TYPE_MFCR:
19164 case TYPE_MTCR:
19165 case TYPE_ISYNC:
19166 case TYPE_SYNC:
19167 case TYPE_LOAD_L:
19168 case TYPE_STORE_C:
19169 return true;
19170 case TYPE_LOAD:
19171 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19172 && get_attr_update (insn) == UPDATE_YES)
19173 return true;
19174 else
19175 break;
19176 case TYPE_STORE:
19177 if (get_attr_update (insn) == UPDATE_YES
19178 && get_attr_indexed (insn) == INDEXED_YES)
19179 return true;
19180 else
19181 break;
19182 default:
19183 break;
19184 }
19185 break;
19186 default:
19187 break;
19188 }
19189
19190 return false;
19191 }
19192
19193 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19194 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19195
19196 static bool
19197 is_costly_group (rtx *group_insns, rtx next_insn)
19198 {
19199 int i;
19200 int issue_rate = rs6000_issue_rate ();
19201
19202 for (i = 0; i < issue_rate; i++)
19203 {
19204 sd_iterator_def sd_it;
19205 dep_t dep;
19206 rtx insn = group_insns[i];
19207
19208 if (!insn)
19209 continue;
19210
19211 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19212 {
19213 rtx next = DEP_CON (dep);
19214
19215 if (next == next_insn
19216 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19217 return true;
19218 }
19219 }
19220
19221 return false;
19222 }
19223
19224 /* Utility of the function redefine_groups.
19225 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19226 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19227 to keep it "far" (in a separate group) from GROUP_INSNS, following
19228 one of the following schemes, depending on the value of the flag
19229 -minsert_sched_nops = X:
19230 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19231 in order to force NEXT_INSN into a separate group.
19232 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19233 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19234 insertion (has a group just ended, how many vacant issue slots remain in the
19235 last group, and how many dispatch groups were encountered so far). */
19236
19237 static int
19238 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19239 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19240 int *group_count)
19241 {
19242 rtx nop;
19243 bool force;
19244 int issue_rate = rs6000_issue_rate ();
19245 bool end = *group_end;
19246 int i;
19247
19248 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19249 return can_issue_more;
19250
19251 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19252 return can_issue_more;
19253
19254 force = is_costly_group (group_insns, next_insn);
19255 if (!force)
19256 return can_issue_more;
19257
19258 if (sched_verbose > 6)
19259 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19260 *group_count ,can_issue_more);
19261
19262 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19263 {
19264 if (*group_end)
19265 can_issue_more = 0;
19266
19267 /* Since only a branch can be issued in the last issue_slot, it is
19268 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19269 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19270 in this case the last nop will start a new group and the branch
19271 will be forced to the new group. */
19272 if (can_issue_more && !is_branch_slot_insn (next_insn))
19273 can_issue_more--;
19274
19275 /* Do we have a special group ending nop? */
19276 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19277 || rs6000_tune == PROCESSOR_POWER8)
19278 {
19279 nop = gen_group_ending_nop ();
19280 emit_insn_before (nop, next_insn);
19281 can_issue_more = 0;
19282 }
19283 else
19284 while (can_issue_more > 0)
19285 {
19286 nop = gen_nop ();
19287 emit_insn_before (nop, next_insn);
19288 can_issue_more--;
19289 }
19290
19291 *group_end = true;
19292 return 0;
19293 }
19294
19295 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19296 {
19297 int n_nops = rs6000_sched_insert_nops;
19298
19299 /* Nops can't be issued from the branch slot, so the effective
19300 issue_rate for nops is 'issue_rate - 1'. */
19301 if (can_issue_more == 0)
19302 can_issue_more = issue_rate;
19303 can_issue_more--;
19304 if (can_issue_more == 0)
19305 {
19306 can_issue_more = issue_rate - 1;
19307 (*group_count)++;
19308 end = true;
19309 for (i = 0; i < issue_rate; i++)
19310 {
19311 group_insns[i] = 0;
19312 }
19313 }
19314
19315 while (n_nops > 0)
19316 {
19317 nop = gen_nop ();
19318 emit_insn_before (nop, next_insn);
19319 if (can_issue_more == issue_rate - 1) /* new group begins */
19320 end = false;
19321 can_issue_more--;
19322 if (can_issue_more == 0)
19323 {
19324 can_issue_more = issue_rate - 1;
19325 (*group_count)++;
19326 end = true;
19327 for (i = 0; i < issue_rate; i++)
19328 {
19329 group_insns[i] = 0;
19330 }
19331 }
19332 n_nops--;
19333 }
19334
19335 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19336 can_issue_more++;
19337
19338 /* Is next_insn going to start a new group? */
19339 *group_end
19340 = (end
19341 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19342 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19343 || (can_issue_more < issue_rate &&
19344 insn_terminates_group_p (next_insn, previous_group)));
19345 if (*group_end && end)
19346 (*group_count)--;
19347
19348 if (sched_verbose > 6)
19349 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19350 *group_count, can_issue_more);
19351 return can_issue_more;
19352 }
19353
19354 return can_issue_more;
19355 }
19356
19357 /* This function tries to synch the dispatch groups that the compiler "sees"
19358 with the dispatch groups that the processor dispatcher is expected to
19359 form in practice. It tries to achieve this synchronization by forcing the
19360 estimated processor grouping on the compiler (as opposed to the function
19361 'pad_goups' which tries to force the scheduler's grouping on the processor).
19362
19363 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19364 examines the (estimated) dispatch groups that will be formed by the processor
19365 dispatcher. It marks these group boundaries to reflect the estimated
19366 processor grouping, overriding the grouping that the scheduler had marked.
19367 Depending on the value of the flag '-minsert-sched-nops' this function can
19368 force certain insns into separate groups or force a certain distance between
19369 them by inserting nops, for example, if there exists a "costly dependence"
19370 between the insns.
19371
19372 The function estimates the group boundaries that the processor will form as
19373 follows: It keeps track of how many vacant issue slots are available after
19374 each insn. A subsequent insn will start a new group if one of the following
19375 4 cases applies:
19376 - no more vacant issue slots remain in the current dispatch group.
19377 - only the last issue slot, which is the branch slot, is vacant, but the next
19378 insn is not a branch.
19379 - only the last 2 or less issue slots, including the branch slot, are vacant,
19380 which means that a cracked insn (which occupies two issue slots) can't be
19381 issued in this group.
19382 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19383 start a new group. */
19384
19385 static int
19386 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19387 rtx_insn *tail)
19388 {
19389 rtx_insn *insn, *next_insn;
19390 int issue_rate;
19391 int can_issue_more;
19392 int slot, i;
19393 bool group_end;
19394 int group_count = 0;
19395 rtx *group_insns;
19396
19397 /* Initialize. */
19398 issue_rate = rs6000_issue_rate ();
19399 group_insns = XALLOCAVEC (rtx, issue_rate);
19400 for (i = 0; i < issue_rate; i++)
19401 {
19402 group_insns[i] = 0;
19403 }
19404 can_issue_more = issue_rate;
19405 slot = 0;
19406 insn = get_next_active_insn (prev_head_insn, tail);
19407 group_end = false;
19408
19409 while (insn != NULL_RTX)
19410 {
19411 slot = (issue_rate - can_issue_more);
19412 group_insns[slot] = insn;
19413 can_issue_more =
19414 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19415 if (insn_terminates_group_p (insn, current_group))
19416 can_issue_more = 0;
19417
19418 next_insn = get_next_active_insn (insn, tail);
19419 if (next_insn == NULL_RTX)
19420 return group_count + 1;
19421
19422 /* Is next_insn going to start a new group? */
19423 group_end
19424 = (can_issue_more == 0
19425 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19426 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19427 || (can_issue_more < issue_rate &&
19428 insn_terminates_group_p (next_insn, previous_group)));
19429
19430 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19431 next_insn, &group_end, can_issue_more,
19432 &group_count);
19433
19434 if (group_end)
19435 {
19436 group_count++;
19437 can_issue_more = 0;
19438 for (i = 0; i < issue_rate; i++)
19439 {
19440 group_insns[i] = 0;
19441 }
19442 }
19443
19444 if (GET_MODE (next_insn) == TImode && can_issue_more)
19445 PUT_MODE (next_insn, VOIDmode);
19446 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19447 PUT_MODE (next_insn, TImode);
19448
19449 insn = next_insn;
19450 if (can_issue_more == 0)
19451 can_issue_more = issue_rate;
19452 } /* while */
19453
19454 return group_count;
19455 }
19456
19457 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19458 dispatch group boundaries that the scheduler had marked. Pad with nops
19459 any dispatch groups which have vacant issue slots, in order to force the
19460 scheduler's grouping on the processor dispatcher. The function
19461 returns the number of dispatch groups found. */
19462
19463 static int
19464 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19465 rtx_insn *tail)
19466 {
19467 rtx_insn *insn, *next_insn;
19468 rtx nop;
19469 int issue_rate;
19470 int can_issue_more;
19471 int group_end;
19472 int group_count = 0;
19473
19474 /* Initialize issue_rate. */
19475 issue_rate = rs6000_issue_rate ();
19476 can_issue_more = issue_rate;
19477
19478 insn = get_next_active_insn (prev_head_insn, tail);
19479 next_insn = get_next_active_insn (insn, tail);
19480
19481 while (insn != NULL_RTX)
19482 {
19483 can_issue_more =
19484 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19485
19486 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19487
19488 if (next_insn == NULL_RTX)
19489 break;
19490
19491 if (group_end)
19492 {
19493 /* If the scheduler had marked group termination at this location
19494 (between insn and next_insn), and neither insn nor next_insn will
19495 force group termination, pad the group with nops to force group
19496 termination. */
19497 if (can_issue_more
19498 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19499 && !insn_terminates_group_p (insn, current_group)
19500 && !insn_terminates_group_p (next_insn, previous_group))
19501 {
19502 if (!is_branch_slot_insn (next_insn))
19503 can_issue_more--;
19504
19505 while (can_issue_more)
19506 {
19507 nop = gen_nop ();
19508 emit_insn_before (nop, next_insn);
19509 can_issue_more--;
19510 }
19511 }
19512
19513 can_issue_more = issue_rate;
19514 group_count++;
19515 }
19516
19517 insn = next_insn;
19518 next_insn = get_next_active_insn (insn, tail);
19519 }
19520
19521 return group_count;
19522 }
19523
19524 /* We're beginning a new block. Initialize data structures as necessary. */
19525
19526 static void
19527 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19528 int sched_verbose ATTRIBUTE_UNUSED,
19529 int max_ready ATTRIBUTE_UNUSED)
19530 {
19531 last_scheduled_insn = NULL;
19532 load_store_pendulum = 0;
19533 divide_cnt = 0;
19534 vec_pairing = 0;
19535 }
19536
19537 /* The following function is called at the end of scheduling BB.
19538 After reload, it inserts nops at insn group bundling. */
19539
19540 static void
19541 rs6000_sched_finish (FILE *dump, int sched_verbose)
19542 {
19543 int n_groups;
19544
19545 if (sched_verbose)
19546 fprintf (dump, "=== Finishing schedule.\n");
19547
19548 if (reload_completed && rs6000_sched_groups)
19549 {
19550 /* Do not run sched_finish hook when selective scheduling enabled. */
19551 if (sel_sched_p ())
19552 return;
19553
19554 if (rs6000_sched_insert_nops == sched_finish_none)
19555 return;
19556
19557 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19558 n_groups = pad_groups (dump, sched_verbose,
19559 current_sched_info->prev_head,
19560 current_sched_info->next_tail);
19561 else
19562 n_groups = redefine_groups (dump, sched_verbose,
19563 current_sched_info->prev_head,
19564 current_sched_info->next_tail);
19565
19566 if (sched_verbose >= 6)
19567 {
19568 fprintf (dump, "ngroups = %d\n", n_groups);
19569 print_rtl (dump, current_sched_info->prev_head);
19570 fprintf (dump, "Done finish_sched\n");
19571 }
19572 }
19573 }
19574
19575 struct rs6000_sched_context
19576 {
19577 short cached_can_issue_more;
19578 rtx_insn *last_scheduled_insn;
19579 int load_store_pendulum;
19580 int divide_cnt;
19581 int vec_pairing;
19582 };
19583
19584 typedef struct rs6000_sched_context rs6000_sched_context_def;
19585 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19586
19587 /* Allocate store for new scheduling context. */
19588 static void *
19589 rs6000_alloc_sched_context (void)
19590 {
19591 return xmalloc (sizeof (rs6000_sched_context_def));
19592 }
19593
19594 /* If CLEAN_P is true then initializes _SC with clean data,
19595 and from the global context otherwise. */
19596 static void
19597 rs6000_init_sched_context (void *_sc, bool clean_p)
19598 {
19599 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19600
19601 if (clean_p)
19602 {
19603 sc->cached_can_issue_more = 0;
19604 sc->last_scheduled_insn = NULL;
19605 sc->load_store_pendulum = 0;
19606 sc->divide_cnt = 0;
19607 sc->vec_pairing = 0;
19608 }
19609 else
19610 {
19611 sc->cached_can_issue_more = cached_can_issue_more;
19612 sc->last_scheduled_insn = last_scheduled_insn;
19613 sc->load_store_pendulum = load_store_pendulum;
19614 sc->divide_cnt = divide_cnt;
19615 sc->vec_pairing = vec_pairing;
19616 }
19617 }
19618
19619 /* Sets the global scheduling context to the one pointed to by _SC. */
19620 static void
19621 rs6000_set_sched_context (void *_sc)
19622 {
19623 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19624
19625 gcc_assert (sc != NULL);
19626
19627 cached_can_issue_more = sc->cached_can_issue_more;
19628 last_scheduled_insn = sc->last_scheduled_insn;
19629 load_store_pendulum = sc->load_store_pendulum;
19630 divide_cnt = sc->divide_cnt;
19631 vec_pairing = sc->vec_pairing;
19632 }
19633
19634 /* Free _SC. */
19635 static void
19636 rs6000_free_sched_context (void *_sc)
19637 {
19638 gcc_assert (_sc != NULL);
19639
19640 free (_sc);
19641 }
19642
19643 static bool
19644 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19645 {
19646 switch (get_attr_type (insn))
19647 {
19648 case TYPE_DIV:
19649 case TYPE_SDIV:
19650 case TYPE_DDIV:
19651 case TYPE_VECDIV:
19652 case TYPE_SSQRT:
19653 case TYPE_DSQRT:
19654 return false;
19655
19656 default:
19657 return true;
19658 }
19659 }
19660 \f
19661 /* Length in units of the trampoline for entering a nested function. */
19662
19663 int
19664 rs6000_trampoline_size (void)
19665 {
19666 int ret = 0;
19667
19668 switch (DEFAULT_ABI)
19669 {
19670 default:
19671 gcc_unreachable ();
19672
19673 case ABI_AIX:
19674 ret = (TARGET_32BIT) ? 12 : 24;
19675 break;
19676
19677 case ABI_ELFv2:
19678 gcc_assert (!TARGET_32BIT);
19679 ret = 32;
19680 break;
19681
19682 case ABI_DARWIN:
19683 case ABI_V4:
19684 ret = (TARGET_32BIT) ? 40 : 48;
19685 break;
19686 }
19687
19688 return ret;
19689 }
19690
19691 /* Emit RTL insns to initialize the variable parts of a trampoline.
19692 FNADDR is an RTX for the address of the function's pure code.
19693 CXT is an RTX for the static chain value for the function. */
19694
19695 static void
19696 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19697 {
19698 int regsize = (TARGET_32BIT) ? 4 : 8;
19699 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19700 rtx ctx_reg = force_reg (Pmode, cxt);
19701 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19702
19703 switch (DEFAULT_ABI)
19704 {
19705 default:
19706 gcc_unreachable ();
19707
19708 /* Under AIX, just build the 3 word function descriptor */
19709 case ABI_AIX:
19710 {
19711 rtx fnmem, fn_reg, toc_reg;
19712
19713 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19714 error ("you cannot take the address of a nested function if you use "
19715 "the %qs option", "-mno-pointers-to-nested-functions");
19716
19717 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19718 fn_reg = gen_reg_rtx (Pmode);
19719 toc_reg = gen_reg_rtx (Pmode);
19720
19721 /* Macro to shorten the code expansions below. */
19722 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19723
19724 m_tramp = replace_equiv_address (m_tramp, addr);
19725
19726 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19727 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19728 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19729 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19730 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19731
19732 # undef MEM_PLUS
19733 }
19734 break;
19735
19736 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19737 case ABI_ELFv2:
19738 case ABI_DARWIN:
19739 case ABI_V4:
19740 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19741 LCT_NORMAL, VOIDmode,
19742 addr, Pmode,
19743 GEN_INT (rs6000_trampoline_size ()), SImode,
19744 fnaddr, Pmode,
19745 ctx_reg, Pmode);
19746 break;
19747 }
19748 }
19749
19750 \f
19751 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19752 identifier as an argument, so the front end shouldn't look it up. */
19753
19754 static bool
19755 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19756 {
19757 return is_attribute_p ("altivec", attr_id);
19758 }
19759
19760 /* Handle the "altivec" attribute. The attribute may have
19761 arguments as follows:
19762
19763 __attribute__((altivec(vector__)))
19764 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19765 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19766
19767 and may appear more than once (e.g., 'vector bool char') in a
19768 given declaration. */
19769
19770 static tree
19771 rs6000_handle_altivec_attribute (tree *node,
19772 tree name ATTRIBUTE_UNUSED,
19773 tree args,
19774 int flags ATTRIBUTE_UNUSED,
19775 bool *no_add_attrs)
19776 {
19777 tree type = *node, result = NULL_TREE;
19778 machine_mode mode;
19779 int unsigned_p;
19780 char altivec_type
19781 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19782 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19783 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19784 : '?');
19785
19786 while (POINTER_TYPE_P (type)
19787 || TREE_CODE (type) == FUNCTION_TYPE
19788 || TREE_CODE (type) == METHOD_TYPE
19789 || TREE_CODE (type) == ARRAY_TYPE)
19790 type = TREE_TYPE (type);
19791
19792 mode = TYPE_MODE (type);
19793
19794 /* Check for invalid AltiVec type qualifiers. */
19795 if (type == long_double_type_node)
19796 error ("use of %<long double%> in AltiVec types is invalid");
19797 else if (type == boolean_type_node)
19798 error ("use of boolean types in AltiVec types is invalid");
19799 else if (TREE_CODE (type) == COMPLEX_TYPE)
19800 error ("use of %<complex%> in AltiVec types is invalid");
19801 else if (DECIMAL_FLOAT_MODE_P (mode))
19802 error ("use of decimal floating point types in AltiVec types is invalid");
19803 else if (!TARGET_VSX)
19804 {
19805 if (type == long_unsigned_type_node || type == long_integer_type_node)
19806 {
19807 if (TARGET_64BIT)
19808 error ("use of %<long%> in AltiVec types is invalid for "
19809 "64-bit code without %qs", "-mvsx");
19810 else if (rs6000_warn_altivec_long)
19811 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19812 "use %<int%>");
19813 }
19814 else if (type == long_long_unsigned_type_node
19815 || type == long_long_integer_type_node)
19816 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19817 "-mvsx");
19818 else if (type == double_type_node)
19819 error ("use of %<double%> in AltiVec types is invalid without %qs",
19820 "-mvsx");
19821 }
19822
19823 switch (altivec_type)
19824 {
19825 case 'v':
19826 unsigned_p = TYPE_UNSIGNED (type);
19827 switch (mode)
19828 {
19829 case E_TImode:
19830 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19831 break;
19832 case E_DImode:
19833 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19834 break;
19835 case E_SImode:
19836 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19837 break;
19838 case E_HImode:
19839 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19840 break;
19841 case E_QImode:
19842 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19843 break;
19844 case E_SFmode: result = V4SF_type_node; break;
19845 case E_DFmode: result = V2DF_type_node; break;
19846 /* If the user says 'vector int bool', we may be handed the 'bool'
19847 attribute _before_ the 'vector' attribute, and so select the
19848 proper type in the 'b' case below. */
19849 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19850 case E_V2DImode: case E_V2DFmode:
19851 result = type;
19852 default: break;
19853 }
19854 break;
19855 case 'b':
19856 switch (mode)
19857 {
19858 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19859 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19860 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19861 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19862 default: break;
19863 }
19864 break;
19865 case 'p':
19866 switch (mode)
19867 {
19868 case E_V8HImode: result = pixel_V8HI_type_node;
19869 default: break;
19870 }
19871 default: break;
19872 }
19873
19874 /* Propagate qualifiers attached to the element type
19875 onto the vector type. */
19876 if (result && result != type && TYPE_QUALS (type))
19877 result = build_qualified_type (result, TYPE_QUALS (type));
19878
19879 *no_add_attrs = true; /* No need to hang on to the attribute. */
19880
19881 if (result)
19882 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19883
19884 return NULL_TREE;
19885 }
19886
19887 /* AltiVec defines five built-in scalar types that serve as vector
19888 elements; we must teach the compiler how to mangle them. The 128-bit
19889 floating point mangling is target-specific as well. MMA defines
19890 two built-in types to be used as opaque vector types. */
19891
19892 static const char *
19893 rs6000_mangle_type (const_tree type)
19894 {
19895 type = TYPE_MAIN_VARIANT (type);
19896
19897 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19898 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
19899 && TREE_CODE (type) != OPAQUE_TYPE)
19900 return NULL;
19901
19902 if (type == bool_char_type_node) return "U6__boolc";
19903 if (type == bool_short_type_node) return "U6__bools";
19904 if (type == pixel_type_node) return "u7__pixel";
19905 if (type == bool_int_type_node) return "U6__booli";
19906 if (type == bool_long_long_type_node) return "U6__boolx";
19907
19908 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19909 return "g";
19910 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19911 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19912
19913 if (type == vector_pair_type_node)
19914 return "u13__vector_pair";
19915 if (type == vector_quad_type_node)
19916 return "u13__vector_quad";
19917
19918 /* For all other types, use the default mangling. */
19919 return NULL;
19920 }
19921
19922 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19923 struct attribute_spec.handler. */
19924
19925 static tree
19926 rs6000_handle_longcall_attribute (tree *node, tree name,
19927 tree args ATTRIBUTE_UNUSED,
19928 int flags ATTRIBUTE_UNUSED,
19929 bool *no_add_attrs)
19930 {
19931 if (TREE_CODE (*node) != FUNCTION_TYPE
19932 && TREE_CODE (*node) != FIELD_DECL
19933 && TREE_CODE (*node) != TYPE_DECL)
19934 {
19935 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19936 name);
19937 *no_add_attrs = true;
19938 }
19939
19940 return NULL_TREE;
19941 }
19942
19943 /* Set longcall attributes on all functions declared when
19944 rs6000_default_long_calls is true. */
19945 static void
19946 rs6000_set_default_type_attributes (tree type)
19947 {
19948 if (rs6000_default_long_calls
19949 && (TREE_CODE (type) == FUNCTION_TYPE
19950 || TREE_CODE (type) == METHOD_TYPE))
19951 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19952 NULL_TREE,
19953 TYPE_ATTRIBUTES (type));
19954
19955 #if TARGET_MACHO
19956 darwin_set_default_type_attributes (type);
19957 #endif
19958 }
19959
19960 /* Return a reference suitable for calling a function with the
19961 longcall attribute. */
19962
19963 static rtx
19964 rs6000_longcall_ref (rtx call_ref, rtx arg)
19965 {
19966 /* System V adds '.' to the internal name, so skip them. */
19967 const char *call_name = XSTR (call_ref, 0);
19968 if (*call_name == '.')
19969 {
19970 while (*call_name == '.')
19971 call_name++;
19972
19973 tree node = get_identifier (call_name);
19974 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19975 }
19976
19977 if (TARGET_PLTSEQ)
19978 {
19979 rtx base = const0_rtx;
19980 int regno = 12;
19981 if (rs6000_pcrel_p ())
19982 {
19983 rtx reg = gen_rtx_REG (Pmode, regno);
19984 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19985 gen_rtvec (3, base, call_ref, arg),
19986 UNSPECV_PLT_PCREL);
19987 emit_insn (gen_rtx_SET (reg, u));
19988 return reg;
19989 }
19990
19991 if (DEFAULT_ABI == ABI_ELFv2)
19992 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19993 else
19994 {
19995 if (flag_pic)
19996 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19997 regno = 11;
19998 }
19999 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20000 may be used by a function global entry point. For SysV4, r11
20001 is used by __glink_PLTresolve lazy resolver entry. */
20002 rtx reg = gen_rtx_REG (Pmode, regno);
20003 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20004 UNSPEC_PLT16_HA);
20005 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20006 gen_rtvec (3, reg, call_ref, arg),
20007 UNSPECV_PLT16_LO);
20008 emit_insn (gen_rtx_SET (reg, hi));
20009 emit_insn (gen_rtx_SET (reg, lo));
20010 return reg;
20011 }
20012
20013 return force_reg (Pmode, call_ref);
20014 }
20015 \f
20016 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20017 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20018 #endif
20019
20020 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20021 struct attribute_spec.handler. */
20022 static tree
20023 rs6000_handle_struct_attribute (tree *node, tree name,
20024 tree args ATTRIBUTE_UNUSED,
20025 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20026 {
20027 tree *type = NULL;
20028 if (DECL_P (*node))
20029 {
20030 if (TREE_CODE (*node) == TYPE_DECL)
20031 type = &TREE_TYPE (*node);
20032 }
20033 else
20034 type = node;
20035
20036 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20037 || TREE_CODE (*type) == UNION_TYPE)))
20038 {
20039 warning (OPT_Wattributes, "%qE attribute ignored", name);
20040 *no_add_attrs = true;
20041 }
20042
20043 else if ((is_attribute_p ("ms_struct", name)
20044 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20045 || ((is_attribute_p ("gcc_struct", name)
20046 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20047 {
20048 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20049 name);
20050 *no_add_attrs = true;
20051 }
20052
20053 return NULL_TREE;
20054 }
20055
20056 static bool
20057 rs6000_ms_bitfield_layout_p (const_tree record_type)
20058 {
20059 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20060 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20061 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20062 }
20063 \f
20064 #ifdef USING_ELFOS_H
20065
20066 /* A get_unnamed_section callback, used for switching to toc_section. */
20067
20068 static void
20069 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20070 {
20071 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20072 && TARGET_MINIMAL_TOC)
20073 {
20074 if (!toc_initialized)
20075 {
20076 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20077 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20078 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20079 fprintf (asm_out_file, "\t.tc ");
20080 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20081 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20082 fprintf (asm_out_file, "\n");
20083
20084 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20085 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20086 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20087 fprintf (asm_out_file, " = .+32768\n");
20088 toc_initialized = 1;
20089 }
20090 else
20091 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20092 }
20093 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20094 {
20095 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20096 if (!toc_initialized)
20097 {
20098 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20099 toc_initialized = 1;
20100 }
20101 }
20102 else
20103 {
20104 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20105 if (!toc_initialized)
20106 {
20107 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20108 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20109 fprintf (asm_out_file, " = .+32768\n");
20110 toc_initialized = 1;
20111 }
20112 }
20113 }
20114
20115 /* Implement TARGET_ASM_INIT_SECTIONS. */
20116
20117 static void
20118 rs6000_elf_asm_init_sections (void)
20119 {
20120 toc_section
20121 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20122
20123 sdata2_section
20124 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20125 SDATA2_SECTION_ASM_OP);
20126 }
20127
20128 /* Implement TARGET_SELECT_RTX_SECTION. */
20129
20130 static section *
20131 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20132 unsigned HOST_WIDE_INT align)
20133 {
20134 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20135 return toc_section;
20136 else
20137 return default_elf_select_rtx_section (mode, x, align);
20138 }
20139 \f
20140 /* For a SYMBOL_REF, set generic flags and then perform some
20141 target-specific processing.
20142
20143 When the AIX ABI is requested on a non-AIX system, replace the
20144 function name with the real name (with a leading .) rather than the
20145 function descriptor name. This saves a lot of overriding code to
20146 read the prefixes. */
20147
20148 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20149 static void
20150 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20151 {
20152 default_encode_section_info (decl, rtl, first);
20153
20154 if (first
20155 && TREE_CODE (decl) == FUNCTION_DECL
20156 && !TARGET_AIX
20157 && DEFAULT_ABI == ABI_AIX)
20158 {
20159 rtx sym_ref = XEXP (rtl, 0);
20160 size_t len = strlen (XSTR (sym_ref, 0));
20161 char *str = XALLOCAVEC (char, len + 2);
20162 str[0] = '.';
20163 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20164 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20165 }
20166 }
20167
20168 static inline bool
20169 compare_section_name (const char *section, const char *templ)
20170 {
20171 int len;
20172
20173 len = strlen (templ);
20174 return (strncmp (section, templ, len) == 0
20175 && (section[len] == 0 || section[len] == '.'));
20176 }
20177
20178 bool
20179 rs6000_elf_in_small_data_p (const_tree decl)
20180 {
20181 if (rs6000_sdata == SDATA_NONE)
20182 return false;
20183
20184 /* We want to merge strings, so we never consider them small data. */
20185 if (TREE_CODE (decl) == STRING_CST)
20186 return false;
20187
20188 /* Functions are never in the small data area. */
20189 if (TREE_CODE (decl) == FUNCTION_DECL)
20190 return false;
20191
20192 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20193 {
20194 const char *section = DECL_SECTION_NAME (decl);
20195 if (compare_section_name (section, ".sdata")
20196 || compare_section_name (section, ".sdata2")
20197 || compare_section_name (section, ".gnu.linkonce.s")
20198 || compare_section_name (section, ".sbss")
20199 || compare_section_name (section, ".sbss2")
20200 || compare_section_name (section, ".gnu.linkonce.sb")
20201 || strcmp (section, ".PPC.EMB.sdata0") == 0
20202 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20203 return true;
20204 }
20205 else
20206 {
20207 /* If we are told not to put readonly data in sdata, then don't. */
20208 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20209 && !rs6000_readonly_in_sdata)
20210 return false;
20211
20212 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20213
20214 if (size > 0
20215 && size <= g_switch_value
20216 /* If it's not public, and we're not going to reference it there,
20217 there's no need to put it in the small data section. */
20218 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20219 return true;
20220 }
20221
20222 return false;
20223 }
20224
20225 #endif /* USING_ELFOS_H */
20226 \f
20227 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20228
20229 static bool
20230 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20231 {
20232 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20233 }
20234
20235 /* Do not place thread-local symbols refs in the object blocks. */
20236
20237 static bool
20238 rs6000_use_blocks_for_decl_p (const_tree decl)
20239 {
20240 return !DECL_THREAD_LOCAL_P (decl);
20241 }
20242 \f
20243 /* Return a REG that occurs in ADDR with coefficient 1.
20244 ADDR can be effectively incremented by incrementing REG.
20245
20246 r0 is special and we must not select it as an address
20247 register by this routine since our caller will try to
20248 increment the returned register via an "la" instruction. */
20249
20250 rtx
20251 find_addr_reg (rtx addr)
20252 {
20253 while (GET_CODE (addr) == PLUS)
20254 {
20255 if (REG_P (XEXP (addr, 0))
20256 && REGNO (XEXP (addr, 0)) != 0)
20257 addr = XEXP (addr, 0);
20258 else if (REG_P (XEXP (addr, 1))
20259 && REGNO (XEXP (addr, 1)) != 0)
20260 addr = XEXP (addr, 1);
20261 else if (CONSTANT_P (XEXP (addr, 0)))
20262 addr = XEXP (addr, 1);
20263 else if (CONSTANT_P (XEXP (addr, 1)))
20264 addr = XEXP (addr, 0);
20265 else
20266 gcc_unreachable ();
20267 }
20268 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20269 return addr;
20270 }
20271
20272 void
20273 rs6000_fatal_bad_address (rtx op)
20274 {
20275 fatal_insn ("bad address", op);
20276 }
20277
20278 #if TARGET_MACHO
20279
20280 vec<branch_island, va_gc> *branch_islands;
20281
20282 /* Remember to generate a branch island for far calls to the given
20283 function. */
20284
20285 static void
20286 add_compiler_branch_island (tree label_name, tree function_name,
20287 int line_number)
20288 {
20289 branch_island bi = {function_name, label_name, line_number};
20290 vec_safe_push (branch_islands, bi);
20291 }
20292
20293 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20294 already there or not. */
20295
20296 static int
20297 no_previous_def (tree function_name)
20298 {
20299 branch_island *bi;
20300 unsigned ix;
20301
20302 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20303 if (function_name == bi->function_name)
20304 return 0;
20305 return 1;
20306 }
20307
20308 /* GET_PREV_LABEL gets the label name from the previous definition of
20309 the function. */
20310
20311 static tree
20312 get_prev_label (tree function_name)
20313 {
20314 branch_island *bi;
20315 unsigned ix;
20316
20317 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20318 if (function_name == bi->function_name)
20319 return bi->label_name;
20320 return NULL_TREE;
20321 }
20322
20323 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20324
20325 void
20326 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20327 {
20328 unsigned int length;
20329 char *symbol_name, *lazy_ptr_name;
20330 char *local_label_0;
20331 static unsigned label = 0;
20332
20333 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20334 symb = (*targetm.strip_name_encoding) (symb);
20335
20336 length = strlen (symb);
20337 symbol_name = XALLOCAVEC (char, length + 32);
20338 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20339
20340 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20341 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20342
20343 if (MACHOPIC_PURE)
20344 {
20345 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20346 fprintf (file, "\t.align 5\n");
20347
20348 fprintf (file, "%s:\n", stub);
20349 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20350
20351 label++;
20352 local_label_0 = XALLOCAVEC (char, 16);
20353 sprintf (local_label_0, "L%u$spb", label);
20354
20355 fprintf (file, "\tmflr r0\n");
20356 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20357 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20358 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20359 lazy_ptr_name, local_label_0);
20360 fprintf (file, "\tmtlr r0\n");
20361 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20362 (TARGET_64BIT ? "ldu" : "lwzu"),
20363 lazy_ptr_name, local_label_0);
20364 fprintf (file, "\tmtctr r12\n");
20365 fprintf (file, "\tbctr\n");
20366 }
20367 else /* mdynamic-no-pic or mkernel. */
20368 {
20369 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20370 fprintf (file, "\t.align 4\n");
20371
20372 fprintf (file, "%s:\n", stub);
20373 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20374
20375 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20376 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20377 (TARGET_64BIT ? "ldu" : "lwzu"),
20378 lazy_ptr_name);
20379 fprintf (file, "\tmtctr r12\n");
20380 fprintf (file, "\tbctr\n");
20381 }
20382
20383 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20384 fprintf (file, "%s:\n", lazy_ptr_name);
20385 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20386 fprintf (file, "%sdyld_stub_binding_helper\n",
20387 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20388 }
20389
20390 /* Legitimize PIC addresses. If the address is already
20391 position-independent, we return ORIG. Newly generated
20392 position-independent addresses go into a reg. This is REG if non
20393 zero, otherwise we allocate register(s) as necessary. */
20394
20395 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20396
20397 rtx
20398 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20399 rtx reg)
20400 {
20401 rtx base, offset;
20402
20403 if (reg == NULL && !reload_completed)
20404 reg = gen_reg_rtx (Pmode);
20405
20406 if (GET_CODE (orig) == CONST)
20407 {
20408 rtx reg_temp;
20409
20410 if (GET_CODE (XEXP (orig, 0)) == PLUS
20411 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20412 return orig;
20413
20414 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20415
20416 /* Use a different reg for the intermediate value, as
20417 it will be marked UNCHANGING. */
20418 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20419 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20420 Pmode, reg_temp);
20421 offset =
20422 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20423 Pmode, reg);
20424
20425 if (CONST_INT_P (offset))
20426 {
20427 if (SMALL_INT (offset))
20428 return plus_constant (Pmode, base, INTVAL (offset));
20429 else if (!reload_completed)
20430 offset = force_reg (Pmode, offset);
20431 else
20432 {
20433 rtx mem = force_const_mem (Pmode, orig);
20434 return machopic_legitimize_pic_address (mem, Pmode, reg);
20435 }
20436 }
20437 return gen_rtx_PLUS (Pmode, base, offset);
20438 }
20439
20440 /* Fall back on generic machopic code. */
20441 return machopic_legitimize_pic_address (orig, mode, reg);
20442 }
20443
20444 /* Output a .machine directive for the Darwin assembler, and call
20445 the generic start_file routine. */
20446
20447 static void
20448 rs6000_darwin_file_start (void)
20449 {
20450 static const struct
20451 {
20452 const char *arg;
20453 const char *name;
20454 HOST_WIDE_INT if_set;
20455 } mapping[] = {
20456 { "ppc64", "ppc64", MASK_64BIT },
20457 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20458 { "power4", "ppc970", 0 },
20459 { "G5", "ppc970", 0 },
20460 { "7450", "ppc7450", 0 },
20461 { "7400", "ppc7400", MASK_ALTIVEC },
20462 { "G4", "ppc7400", 0 },
20463 { "750", "ppc750", 0 },
20464 { "740", "ppc750", 0 },
20465 { "G3", "ppc750", 0 },
20466 { "604e", "ppc604e", 0 },
20467 { "604", "ppc604", 0 },
20468 { "603e", "ppc603", 0 },
20469 { "603", "ppc603", 0 },
20470 { "601", "ppc601", 0 },
20471 { NULL, "ppc", 0 } };
20472 const char *cpu_id = "";
20473 size_t i;
20474
20475 rs6000_file_start ();
20476 darwin_file_start ();
20477
20478 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20479
20480 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20481 cpu_id = rs6000_default_cpu;
20482
20483 if (global_options_set.x_rs6000_cpu_index)
20484 cpu_id = processor_target_table[rs6000_cpu_index].name;
20485
20486 /* Look through the mapping array. Pick the first name that either
20487 matches the argument, has a bit set in IF_SET that is also set
20488 in the target flags, or has a NULL name. */
20489
20490 i = 0;
20491 while (mapping[i].arg != NULL
20492 && strcmp (mapping[i].arg, cpu_id) != 0
20493 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20494 i++;
20495
20496 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20497 }
20498
20499 #endif /* TARGET_MACHO */
20500
20501 #if TARGET_ELF
20502 static int
20503 rs6000_elf_reloc_rw_mask (void)
20504 {
20505 if (flag_pic)
20506 return 3;
20507 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20508 return 2;
20509 else
20510 return 0;
20511 }
20512
20513 /* Record an element in the table of global constructors. SYMBOL is
20514 a SYMBOL_REF of the function to be called; PRIORITY is a number
20515 between 0 and MAX_INIT_PRIORITY.
20516
20517 This differs from default_named_section_asm_out_constructor in
20518 that we have special handling for -mrelocatable. */
20519
20520 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20521 static void
20522 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20523 {
20524 const char *section = ".ctors";
20525 char buf[18];
20526
20527 if (priority != DEFAULT_INIT_PRIORITY)
20528 {
20529 sprintf (buf, ".ctors.%.5u",
20530 /* Invert the numbering so the linker puts us in the proper
20531 order; constructors are run from right to left, and the
20532 linker sorts in increasing order. */
20533 MAX_INIT_PRIORITY - priority);
20534 section = buf;
20535 }
20536
20537 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20538 assemble_align (POINTER_SIZE);
20539
20540 if (DEFAULT_ABI == ABI_V4
20541 && (TARGET_RELOCATABLE || flag_pic > 1))
20542 {
20543 fputs ("\t.long (", asm_out_file);
20544 output_addr_const (asm_out_file, symbol);
20545 fputs (")@fixup\n", asm_out_file);
20546 }
20547 else
20548 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20549 }
20550
20551 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20552 static void
20553 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20554 {
20555 const char *section = ".dtors";
20556 char buf[18];
20557
20558 if (priority != DEFAULT_INIT_PRIORITY)
20559 {
20560 sprintf (buf, ".dtors.%.5u",
20561 /* Invert the numbering so the linker puts us in the proper
20562 order; constructors are run from right to left, and the
20563 linker sorts in increasing order. */
20564 MAX_INIT_PRIORITY - priority);
20565 section = buf;
20566 }
20567
20568 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20569 assemble_align (POINTER_SIZE);
20570
20571 if (DEFAULT_ABI == ABI_V4
20572 && (TARGET_RELOCATABLE || flag_pic > 1))
20573 {
20574 fputs ("\t.long (", asm_out_file);
20575 output_addr_const (asm_out_file, symbol);
20576 fputs (")@fixup\n", asm_out_file);
20577 }
20578 else
20579 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20580 }
20581
20582 void
20583 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20584 {
20585 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20586 {
20587 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20588 ASM_OUTPUT_LABEL (file, name);
20589 fputs (DOUBLE_INT_ASM_OP, file);
20590 rs6000_output_function_entry (file, name);
20591 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20592 if (DOT_SYMBOLS)
20593 {
20594 fputs ("\t.size\t", file);
20595 assemble_name (file, name);
20596 fputs (",24\n\t.type\t.", file);
20597 assemble_name (file, name);
20598 fputs (",@function\n", file);
20599 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20600 {
20601 fputs ("\t.globl\t.", file);
20602 assemble_name (file, name);
20603 putc ('\n', file);
20604 }
20605 }
20606 else
20607 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20608 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20609 rs6000_output_function_entry (file, name);
20610 fputs (":\n", file);
20611 return;
20612 }
20613
20614 int uses_toc;
20615 if (DEFAULT_ABI == ABI_V4
20616 && (TARGET_RELOCATABLE || flag_pic > 1)
20617 && !TARGET_SECURE_PLT
20618 && (!constant_pool_empty_p () || crtl->profile)
20619 && (uses_toc = uses_TOC ()))
20620 {
20621 char buf[256];
20622
20623 if (uses_toc == 2)
20624 switch_to_other_text_partition ();
20625 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20626
20627 fprintf (file, "\t.long ");
20628 assemble_name (file, toc_label_name);
20629 need_toc_init = 1;
20630 putc ('-', file);
20631 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20632 assemble_name (file, buf);
20633 putc ('\n', file);
20634 if (uses_toc == 2)
20635 switch_to_other_text_partition ();
20636 }
20637
20638 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20639 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20640
20641 if (TARGET_CMODEL == CMODEL_LARGE
20642 && rs6000_global_entry_point_prologue_needed_p ())
20643 {
20644 char buf[256];
20645
20646 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20647
20648 fprintf (file, "\t.quad .TOC.-");
20649 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20650 assemble_name (file, buf);
20651 putc ('\n', file);
20652 }
20653
20654 if (DEFAULT_ABI == ABI_AIX)
20655 {
20656 const char *desc_name, *orig_name;
20657
20658 orig_name = (*targetm.strip_name_encoding) (name);
20659 desc_name = orig_name;
20660 while (*desc_name == '.')
20661 desc_name++;
20662
20663 if (TREE_PUBLIC (decl))
20664 fprintf (file, "\t.globl %s\n", desc_name);
20665
20666 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20667 fprintf (file, "%s:\n", desc_name);
20668 fprintf (file, "\t.long %s\n", orig_name);
20669 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20670 fputs ("\t.long 0\n", file);
20671 fprintf (file, "\t.previous\n");
20672 }
20673 ASM_OUTPUT_LABEL (file, name);
20674 }
20675
20676 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20677 static void
20678 rs6000_elf_file_end (void)
20679 {
20680 #ifdef HAVE_AS_GNU_ATTRIBUTE
20681 /* ??? The value emitted depends on options active at file end.
20682 Assume anyone using #pragma or attributes that might change
20683 options knows what they are doing. */
20684 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20685 && rs6000_passes_float)
20686 {
20687 int fp;
20688
20689 if (TARGET_HARD_FLOAT)
20690 fp = 1;
20691 else
20692 fp = 2;
20693 if (rs6000_passes_long_double)
20694 {
20695 if (!TARGET_LONG_DOUBLE_128)
20696 fp |= 2 * 4;
20697 else if (TARGET_IEEEQUAD)
20698 fp |= 3 * 4;
20699 else
20700 fp |= 1 * 4;
20701 }
20702 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20703 }
20704 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20705 {
20706 if (rs6000_passes_vector)
20707 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20708 (TARGET_ALTIVEC_ABI ? 2 : 1));
20709 if (rs6000_returns_struct)
20710 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20711 aix_struct_return ? 2 : 1);
20712 }
20713 #endif
20714 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20715 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20716 file_end_indicate_exec_stack ();
20717 #endif
20718
20719 if (flag_split_stack)
20720 file_end_indicate_split_stack ();
20721
20722 if (cpu_builtin_p)
20723 {
20724 /* We have expanded a CPU builtin, so we need to emit a reference to
20725 the special symbol that LIBC uses to declare it supports the
20726 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20727 switch_to_section (data_section);
20728 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20729 fprintf (asm_out_file, "\t%s %s\n",
20730 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20731 }
20732 }
20733 #endif
20734
20735 #if TARGET_XCOFF
20736
20737 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20738 #define HAVE_XCOFF_DWARF_EXTRAS 0
20739 #endif
20740
20741 static enum unwind_info_type
20742 rs6000_xcoff_debug_unwind_info (void)
20743 {
20744 return UI_NONE;
20745 }
20746
20747 static void
20748 rs6000_xcoff_asm_output_anchor (rtx symbol)
20749 {
20750 char buffer[100];
20751
20752 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20753 SYMBOL_REF_BLOCK_OFFSET (symbol));
20754 fprintf (asm_out_file, "%s", SET_ASM_OP);
20755 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20756 fprintf (asm_out_file, ",");
20757 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20758 fprintf (asm_out_file, "\n");
20759 }
20760
20761 static void
20762 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20763 {
20764 fputs (GLOBAL_ASM_OP, stream);
20765 RS6000_OUTPUT_BASENAME (stream, name);
20766 putc ('\n', stream);
20767 }
20768
20769 /* A get_unnamed_decl callback, used for read-only sections. PTR
20770 points to the section string variable. */
20771
20772 static void
20773 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20774 {
20775 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20776 *(const char *const *) directive,
20777 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20778 }
20779
20780 /* Likewise for read-write sections. */
20781
20782 static void
20783 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20784 {
20785 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20786 *(const char *const *) directive,
20787 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20788 }
20789
20790 static void
20791 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20792 {
20793 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20794 *(const char *const *) directive,
20795 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20796 }
20797
20798 /* A get_unnamed_section callback, used for switching to toc_section. */
20799
20800 static void
20801 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20802 {
20803 if (TARGET_MINIMAL_TOC)
20804 {
20805 /* toc_section is always selected at least once from
20806 rs6000_xcoff_file_start, so this is guaranteed to
20807 always be defined once and only once in each file. */
20808 if (!toc_initialized)
20809 {
20810 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20811 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20812 toc_initialized = 1;
20813 }
20814 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20815 (TARGET_32BIT ? "" : ",3"));
20816 }
20817 else
20818 fputs ("\t.toc\n", asm_out_file);
20819 }
20820
20821 /* Implement TARGET_ASM_INIT_SECTIONS. */
20822
20823 static void
20824 rs6000_xcoff_asm_init_sections (void)
20825 {
20826 read_only_data_section
20827 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20828 &xcoff_read_only_section_name);
20829
20830 private_data_section
20831 = get_unnamed_section (SECTION_WRITE,
20832 rs6000_xcoff_output_readwrite_section_asm_op,
20833 &xcoff_private_data_section_name);
20834
20835 read_only_private_data_section
20836 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20837 &xcoff_private_rodata_section_name);
20838
20839 tls_data_section
20840 = get_unnamed_section (SECTION_TLS,
20841 rs6000_xcoff_output_tls_section_asm_op,
20842 &xcoff_tls_data_section_name);
20843
20844 tls_private_data_section
20845 = get_unnamed_section (SECTION_TLS,
20846 rs6000_xcoff_output_tls_section_asm_op,
20847 &xcoff_private_data_section_name);
20848
20849 toc_section
20850 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20851
20852 readonly_data_section = read_only_data_section;
20853 }
20854
20855 static int
20856 rs6000_xcoff_reloc_rw_mask (void)
20857 {
20858 return 3;
20859 }
20860
20861 static void
20862 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20863 tree decl ATTRIBUTE_UNUSED)
20864 {
20865 int smclass;
20866 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20867
20868 if (flags & SECTION_EXCLUDE)
20869 smclass = 4;
20870 else if (flags & SECTION_DEBUG)
20871 {
20872 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20873 return;
20874 }
20875 else if (flags & SECTION_CODE)
20876 smclass = 0;
20877 else if (flags & SECTION_TLS)
20878 smclass = 3;
20879 else if (flags & SECTION_WRITE)
20880 smclass = 2;
20881 else
20882 smclass = 1;
20883
20884 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20885 (flags & SECTION_CODE) ? "." : "",
20886 name, suffix[smclass], flags & SECTION_ENTSIZE);
20887 }
20888
20889 #define IN_NAMED_SECTION(DECL) \
20890 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20891 && DECL_SECTION_NAME (DECL) != NULL)
20892
20893 static section *
20894 rs6000_xcoff_select_section (tree decl, int reloc,
20895 unsigned HOST_WIDE_INT align)
20896 {
20897 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20898 named section. */
20899 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
20900 {
20901 resolve_unique_section (decl, reloc, true);
20902 if (IN_NAMED_SECTION (decl))
20903 return get_named_section (decl, NULL, reloc);
20904 }
20905
20906 if (decl_readonly_section (decl, reloc))
20907 {
20908 if (TREE_PUBLIC (decl))
20909 return read_only_data_section;
20910 else
20911 return read_only_private_data_section;
20912 }
20913 else
20914 {
20915 #if HAVE_AS_TLS
20916 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20917 {
20918 if (TREE_PUBLIC (decl))
20919 return tls_data_section;
20920 else if (bss_initializer_p (decl))
20921 {
20922 /* Convert to COMMON to emit in BSS. */
20923 DECL_COMMON (decl) = 1;
20924 return tls_comm_section;
20925 }
20926 else
20927 return tls_private_data_section;
20928 }
20929 else
20930 #endif
20931 if (TREE_PUBLIC (decl))
20932 return data_section;
20933 else
20934 return private_data_section;
20935 }
20936 }
20937
20938 static void
20939 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20940 {
20941 const char *name;
20942
20943 /* Use select_section for private data and uninitialized data with
20944 alignment <= BIGGEST_ALIGNMENT. */
20945 if (!TREE_PUBLIC (decl)
20946 || DECL_COMMON (decl)
20947 || (DECL_INITIAL (decl) == NULL_TREE
20948 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20949 || DECL_INITIAL (decl) == error_mark_node
20950 || (flag_zero_initialized_in_bss
20951 && initializer_zerop (DECL_INITIAL (decl))))
20952 return;
20953
20954 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20955 name = (*targetm.strip_name_encoding) (name);
20956 set_decl_section_name (decl, name);
20957 }
20958
20959 /* Select section for constant in constant pool.
20960
20961 On RS/6000, all constants are in the private read-only data area.
20962 However, if this is being placed in the TOC it must be output as a
20963 toc entry. */
20964
20965 static section *
20966 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20967 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20968 {
20969 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20970 return toc_section;
20971 else
20972 return read_only_private_data_section;
20973 }
20974
20975 /* Remove any trailing [DS] or the like from the symbol name. */
20976
20977 static const char *
20978 rs6000_xcoff_strip_name_encoding (const char *name)
20979 {
20980 size_t len;
20981 if (*name == '*')
20982 name++;
20983 len = strlen (name);
20984 if (name[len - 1] == ']')
20985 return ggc_alloc_string (name, len - 4);
20986 else
20987 return name;
20988 }
20989
20990 /* Section attributes. AIX is always PIC. */
20991
20992 static unsigned int
20993 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20994 {
20995 unsigned int align;
20996 unsigned int flags = default_section_type_flags (decl, name, reloc);
20997
20998 /* Align to at least UNIT size. */
20999 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
21000 align = MIN_UNITS_PER_WORD;
21001 else
21002 /* Increase alignment of large objects if not already stricter. */
21003 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21004 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21005 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21006
21007 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21008 }
21009
21010 /* Output at beginning of assembler file.
21011
21012 Initialize the section names for the RS/6000 at this point.
21013
21014 Specify filename, including full path, to assembler.
21015
21016 We want to go into the TOC section so at least one .toc will be emitted.
21017 Also, in order to output proper .bs/.es pairs, we need at least one static
21018 [RW] section emitted.
21019
21020 Finally, declare mcount when profiling to make the assembler happy. */
21021
21022 static void
21023 rs6000_xcoff_file_start (void)
21024 {
21025 rs6000_gen_section_name (&xcoff_bss_section_name,
21026 main_input_filename, ".bss_");
21027 rs6000_gen_section_name (&xcoff_private_data_section_name,
21028 main_input_filename, ".rw_");
21029 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21030 main_input_filename, ".rop_");
21031 rs6000_gen_section_name (&xcoff_read_only_section_name,
21032 main_input_filename, ".ro_");
21033 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21034 main_input_filename, ".tls_");
21035 rs6000_gen_section_name (&xcoff_tbss_section_name,
21036 main_input_filename, ".tbss_[UL]");
21037
21038 fputs ("\t.file\t", asm_out_file);
21039 output_quoted_string (asm_out_file, main_input_filename);
21040 fputc ('\n', asm_out_file);
21041 if (write_symbols != NO_DEBUG)
21042 switch_to_section (private_data_section);
21043 switch_to_section (toc_section);
21044 switch_to_section (text_section);
21045 if (profile_flag)
21046 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21047 rs6000_file_start ();
21048 }
21049
21050 /* Output at end of assembler file.
21051 On the RS/6000, referencing data should automatically pull in text. */
21052
21053 static void
21054 rs6000_xcoff_file_end (void)
21055 {
21056 switch_to_section (text_section);
21057 fputs ("_section_.text:\n", asm_out_file);
21058 switch_to_section (data_section);
21059 fputs (TARGET_32BIT
21060 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21061 asm_out_file);
21062 }
21063
21064 struct declare_alias_data
21065 {
21066 FILE *file;
21067 bool function_descriptor;
21068 };
21069
21070 /* Declare alias N. A helper function for for_node_and_aliases. */
21071
21072 static bool
21073 rs6000_declare_alias (struct symtab_node *n, void *d)
21074 {
21075 struct declare_alias_data *data = (struct declare_alias_data *)d;
21076 /* Main symbol is output specially, because varasm machinery does part of
21077 the job for us - we do not need to declare .globl/lglobs and such. */
21078 if (!n->alias || n->weakref)
21079 return false;
21080
21081 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21082 return false;
21083
21084 /* Prevent assemble_alias from trying to use .set pseudo operation
21085 that does not behave as expected by the middle-end. */
21086 TREE_ASM_WRITTEN (n->decl) = true;
21087
21088 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21089 char *buffer = (char *) alloca (strlen (name) + 2);
21090 char *p;
21091 int dollar_inside = 0;
21092
21093 strcpy (buffer, name);
21094 p = strchr (buffer, '$');
21095 while (p) {
21096 *p = '_';
21097 dollar_inside++;
21098 p = strchr (p + 1, '$');
21099 }
21100 if (TREE_PUBLIC (n->decl))
21101 {
21102 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21103 {
21104 if (dollar_inside) {
21105 if (data->function_descriptor)
21106 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21107 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21108 }
21109 if (data->function_descriptor)
21110 {
21111 fputs ("\t.globl .", data->file);
21112 RS6000_OUTPUT_BASENAME (data->file, buffer);
21113 putc ('\n', data->file);
21114 }
21115 fputs ("\t.globl ", data->file);
21116 RS6000_OUTPUT_BASENAME (data->file, buffer);
21117 putc ('\n', data->file);
21118 }
21119 #ifdef ASM_WEAKEN_DECL
21120 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21121 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21122 #endif
21123 }
21124 else
21125 {
21126 if (dollar_inside)
21127 {
21128 if (data->function_descriptor)
21129 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21130 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21131 }
21132 if (data->function_descriptor)
21133 {
21134 fputs ("\t.lglobl .", data->file);
21135 RS6000_OUTPUT_BASENAME (data->file, buffer);
21136 putc ('\n', data->file);
21137 }
21138 fputs ("\t.lglobl ", data->file);
21139 RS6000_OUTPUT_BASENAME (data->file, buffer);
21140 putc ('\n', data->file);
21141 }
21142 if (data->function_descriptor)
21143 fputs (".", data->file);
21144 RS6000_OUTPUT_BASENAME (data->file, buffer);
21145 fputs (":\n", data->file);
21146 return false;
21147 }
21148
21149
21150 #ifdef HAVE_GAS_HIDDEN
21151 /* Helper function to calculate visibility of a DECL
21152 and return the value as a const string. */
21153
21154 static const char *
21155 rs6000_xcoff_visibility (tree decl)
21156 {
21157 static const char * const visibility_types[] = {
21158 "", ",protected", ",hidden", ",internal"
21159 };
21160
21161 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21162 return visibility_types[vis];
21163 }
21164 #endif
21165
21166
21167 /* This macro produces the initial definition of a function name.
21168 On the RS/6000, we need to place an extra '.' in the function name and
21169 output the function descriptor.
21170 Dollar signs are converted to underscores.
21171
21172 The csect for the function will have already been created when
21173 text_section was selected. We do have to go back to that csect, however.
21174
21175 The third and fourth parameters to the .function pseudo-op (16 and 044)
21176 are placeholders which no longer have any use.
21177
21178 Because AIX assembler's .set command has unexpected semantics, we output
21179 all aliases as alternative labels in front of the definition. */
21180
21181 void
21182 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21183 {
21184 char *buffer = (char *) alloca (strlen (name) + 1);
21185 char *p;
21186 int dollar_inside = 0;
21187 struct declare_alias_data data = {file, false};
21188
21189 strcpy (buffer, name);
21190 p = strchr (buffer, '$');
21191 while (p) {
21192 *p = '_';
21193 dollar_inside++;
21194 p = strchr (p + 1, '$');
21195 }
21196 if (TREE_PUBLIC (decl))
21197 {
21198 if (!RS6000_WEAK || !DECL_WEAK (decl))
21199 {
21200 if (dollar_inside) {
21201 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21202 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21203 }
21204 fputs ("\t.globl .", file);
21205 RS6000_OUTPUT_BASENAME (file, buffer);
21206 #ifdef HAVE_GAS_HIDDEN
21207 fputs (rs6000_xcoff_visibility (decl), file);
21208 #endif
21209 putc ('\n', file);
21210 }
21211 }
21212 else
21213 {
21214 if (dollar_inside) {
21215 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21216 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21217 }
21218 fputs ("\t.lglobl .", file);
21219 RS6000_OUTPUT_BASENAME (file, buffer);
21220 putc ('\n', file);
21221 }
21222 fputs ("\t.csect ", file);
21223 RS6000_OUTPUT_BASENAME (file, buffer);
21224 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
21225 RS6000_OUTPUT_BASENAME (file, buffer);
21226 fputs (":\n", file);
21227 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21228 &data, true);
21229 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21230 RS6000_OUTPUT_BASENAME (file, buffer);
21231 fputs (", TOC[tc0], 0\n", file);
21232 in_section = NULL;
21233 switch_to_section (function_section (decl));
21234 putc ('.', file);
21235 RS6000_OUTPUT_BASENAME (file, buffer);
21236 fputs (":\n", file);
21237 data.function_descriptor = true;
21238 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21239 &data, true);
21240 if (!DECL_IGNORED_P (decl))
21241 {
21242 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21243 xcoffout_declare_function (file, decl, buffer);
21244 else if (write_symbols == DWARF2_DEBUG)
21245 {
21246 name = (*targetm.strip_name_encoding) (name);
21247 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21248 }
21249 }
21250 return;
21251 }
21252
21253
21254 /* Output assembly language to globalize a symbol from a DECL,
21255 possibly with visibility. */
21256
21257 void
21258 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21259 {
21260 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21261 fputs (GLOBAL_ASM_OP, stream);
21262 RS6000_OUTPUT_BASENAME (stream, name);
21263 #ifdef HAVE_GAS_HIDDEN
21264 fputs (rs6000_xcoff_visibility (decl), stream);
21265 #endif
21266 putc ('\n', stream);
21267 }
21268
21269 /* Output assembly language to define a symbol as COMMON from a DECL,
21270 possibly with visibility. */
21271
21272 void
21273 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21274 tree decl ATTRIBUTE_UNUSED,
21275 const char *name,
21276 unsigned HOST_WIDE_INT size,
21277 unsigned HOST_WIDE_INT align)
21278 {
21279 unsigned HOST_WIDE_INT align2 = 2;
21280
21281 if (align > 32)
21282 align2 = floor_log2 (align / BITS_PER_UNIT);
21283 else if (size > 4)
21284 align2 = 3;
21285
21286 fputs (COMMON_ASM_OP, stream);
21287 RS6000_OUTPUT_BASENAME (stream, name);
21288
21289 fprintf (stream,
21290 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
21291 size, align2);
21292
21293 #ifdef HAVE_GAS_HIDDEN
21294 if (decl != NULL)
21295 fputs (rs6000_xcoff_visibility (decl), stream);
21296 #endif
21297 putc ('\n', stream);
21298 }
21299
21300 /* This macro produces the initial definition of a object (variable) name.
21301 Because AIX assembler's .set command has unexpected semantics, we output
21302 all aliases as alternative labels in front of the definition. */
21303
21304 void
21305 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21306 {
21307 struct declare_alias_data data = {file, false};
21308 RS6000_OUTPUT_BASENAME (file, name);
21309 fputs (":\n", file);
21310 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21311 &data, true);
21312 }
21313
21314 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21315
21316 void
21317 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21318 {
21319 fputs (integer_asm_op (size, FALSE), file);
21320 assemble_name (file, label);
21321 fputs ("-$", file);
21322 }
21323
21324 /* Output a symbol offset relative to the dbase for the current object.
21325 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21326 signed offsets.
21327
21328 __gcc_unwind_dbase is embedded in all executables/libraries through
21329 libgcc/config/rs6000/crtdbase.S. */
21330
21331 void
21332 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21333 {
21334 fputs (integer_asm_op (size, FALSE), file);
21335 assemble_name (file, label);
21336 fputs("-__gcc_unwind_dbase", file);
21337 }
21338
21339 #ifdef HAVE_AS_TLS
21340 static void
21341 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21342 {
21343 rtx symbol;
21344 int flags;
21345 const char *symname;
21346
21347 default_encode_section_info (decl, rtl, first);
21348
21349 /* Careful not to prod global register variables. */
21350 if (!MEM_P (rtl))
21351 return;
21352 symbol = XEXP (rtl, 0);
21353 if (!SYMBOL_REF_P (symbol))
21354 return;
21355
21356 flags = SYMBOL_REF_FLAGS (symbol);
21357
21358 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21359 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21360
21361 SYMBOL_REF_FLAGS (symbol) = flags;
21362
21363 /* Append mapping class to extern decls. */
21364 symname = XSTR (symbol, 0);
21365 if (decl /* sync condition with assemble_external () */
21366 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
21367 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
21368 || TREE_CODE (decl) == FUNCTION_DECL)
21369 && symname[strlen (symname) - 1] != ']')
21370 {
21371 char *newname = (char *) alloca (strlen (symname) + 5);
21372 strcpy (newname, symname);
21373 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
21374 ? "[DS]" : "[UA]"));
21375 XSTR (symbol, 0) = ggc_strdup (newname);
21376 }
21377 }
21378 #endif /* HAVE_AS_TLS */
21379 #endif /* TARGET_XCOFF */
21380
21381 void
21382 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21383 const char *name, const char *val)
21384 {
21385 fputs ("\t.weak\t", stream);
21386 RS6000_OUTPUT_BASENAME (stream, name);
21387 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21388 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21389 {
21390 if (TARGET_XCOFF)
21391 fputs ("[DS]", stream);
21392 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21393 if (TARGET_XCOFF)
21394 fputs (rs6000_xcoff_visibility (decl), stream);
21395 #endif
21396 fputs ("\n\t.weak\t.", stream);
21397 RS6000_OUTPUT_BASENAME (stream, name);
21398 }
21399 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21400 if (TARGET_XCOFF)
21401 fputs (rs6000_xcoff_visibility (decl), stream);
21402 #endif
21403 fputc ('\n', stream);
21404 if (val)
21405 {
21406 #ifdef ASM_OUTPUT_DEF
21407 ASM_OUTPUT_DEF (stream, name, val);
21408 #endif
21409 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21410 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21411 {
21412 fputs ("\t.set\t.", stream);
21413 RS6000_OUTPUT_BASENAME (stream, name);
21414 fputs (",.", stream);
21415 RS6000_OUTPUT_BASENAME (stream, val);
21416 fputc ('\n', stream);
21417 }
21418 }
21419 }
21420
21421
21422 /* Return true if INSN should not be copied. */
21423
21424 static bool
21425 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21426 {
21427 return recog_memoized (insn) >= 0
21428 && get_attr_cannot_copy (insn);
21429 }
21430
21431 /* Compute a (partial) cost for rtx X. Return true if the complete
21432 cost has been computed, and false if subexpressions should be
21433 scanned. In either case, *TOTAL contains the cost result. */
21434
21435 static bool
21436 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21437 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21438 {
21439 int code = GET_CODE (x);
21440
21441 switch (code)
21442 {
21443 /* On the RS/6000, if it is valid in the insn, it is free. */
21444 case CONST_INT:
21445 if (((outer_code == SET
21446 || outer_code == PLUS
21447 || outer_code == MINUS)
21448 && (satisfies_constraint_I (x)
21449 || satisfies_constraint_L (x)))
21450 || (outer_code == AND
21451 && (satisfies_constraint_K (x)
21452 || (mode == SImode
21453 ? satisfies_constraint_L (x)
21454 : satisfies_constraint_J (x))))
21455 || ((outer_code == IOR || outer_code == XOR)
21456 && (satisfies_constraint_K (x)
21457 || (mode == SImode
21458 ? satisfies_constraint_L (x)
21459 : satisfies_constraint_J (x))))
21460 || outer_code == ASHIFT
21461 || outer_code == ASHIFTRT
21462 || outer_code == LSHIFTRT
21463 || outer_code == ROTATE
21464 || outer_code == ROTATERT
21465 || outer_code == ZERO_EXTRACT
21466 || (outer_code == MULT
21467 && satisfies_constraint_I (x))
21468 || ((outer_code == DIV || outer_code == UDIV
21469 || outer_code == MOD || outer_code == UMOD)
21470 && exact_log2 (INTVAL (x)) >= 0)
21471 || (outer_code == COMPARE
21472 && (satisfies_constraint_I (x)
21473 || satisfies_constraint_K (x)))
21474 || ((outer_code == EQ || outer_code == NE)
21475 && (satisfies_constraint_I (x)
21476 || satisfies_constraint_K (x)
21477 || (mode == SImode
21478 ? satisfies_constraint_L (x)
21479 : satisfies_constraint_J (x))))
21480 || (outer_code == GTU
21481 && satisfies_constraint_I (x))
21482 || (outer_code == LTU
21483 && satisfies_constraint_P (x)))
21484 {
21485 *total = 0;
21486 return true;
21487 }
21488 else if ((outer_code == PLUS
21489 && reg_or_add_cint_operand (x, mode))
21490 || (outer_code == MINUS
21491 && reg_or_sub_cint_operand (x, mode))
21492 || ((outer_code == SET
21493 || outer_code == IOR
21494 || outer_code == XOR)
21495 && (INTVAL (x)
21496 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21497 {
21498 *total = COSTS_N_INSNS (1);
21499 return true;
21500 }
21501 /* FALLTHRU */
21502
21503 case CONST_DOUBLE:
21504 case CONST_WIDE_INT:
21505 case CONST:
21506 case HIGH:
21507 case SYMBOL_REF:
21508 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21509 return true;
21510
21511 case MEM:
21512 /* When optimizing for size, MEM should be slightly more expensive
21513 than generating address, e.g., (plus (reg) (const)).
21514 L1 cache latency is about two instructions. */
21515 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21516 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21517 *total += COSTS_N_INSNS (100);
21518 return true;
21519
21520 case LABEL_REF:
21521 *total = 0;
21522 return true;
21523
21524 case PLUS:
21525 case MINUS:
21526 if (FLOAT_MODE_P (mode))
21527 *total = rs6000_cost->fp;
21528 else
21529 *total = COSTS_N_INSNS (1);
21530 return false;
21531
21532 case MULT:
21533 if (CONST_INT_P (XEXP (x, 1))
21534 && satisfies_constraint_I (XEXP (x, 1)))
21535 {
21536 if (INTVAL (XEXP (x, 1)) >= -256
21537 && INTVAL (XEXP (x, 1)) <= 255)
21538 *total = rs6000_cost->mulsi_const9;
21539 else
21540 *total = rs6000_cost->mulsi_const;
21541 }
21542 else if (mode == SFmode)
21543 *total = rs6000_cost->fp;
21544 else if (FLOAT_MODE_P (mode))
21545 *total = rs6000_cost->dmul;
21546 else if (mode == DImode)
21547 *total = rs6000_cost->muldi;
21548 else
21549 *total = rs6000_cost->mulsi;
21550 return false;
21551
21552 case FMA:
21553 if (mode == SFmode)
21554 *total = rs6000_cost->fp;
21555 else
21556 *total = rs6000_cost->dmul;
21557 break;
21558
21559 case DIV:
21560 case MOD:
21561 if (FLOAT_MODE_P (mode))
21562 {
21563 *total = mode == DFmode ? rs6000_cost->ddiv
21564 : rs6000_cost->sdiv;
21565 return false;
21566 }
21567 /* FALLTHRU */
21568
21569 case UDIV:
21570 case UMOD:
21571 if (CONST_INT_P (XEXP (x, 1))
21572 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21573 {
21574 if (code == DIV || code == MOD)
21575 /* Shift, addze */
21576 *total = COSTS_N_INSNS (2);
21577 else
21578 /* Shift */
21579 *total = COSTS_N_INSNS (1);
21580 }
21581 else
21582 {
21583 if (GET_MODE (XEXP (x, 1)) == DImode)
21584 *total = rs6000_cost->divdi;
21585 else
21586 *total = rs6000_cost->divsi;
21587 }
21588 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21589 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21590 *total += COSTS_N_INSNS (2);
21591 return false;
21592
21593 case CTZ:
21594 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21595 return false;
21596
21597 case FFS:
21598 *total = COSTS_N_INSNS (4);
21599 return false;
21600
21601 case POPCOUNT:
21602 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21603 return false;
21604
21605 case PARITY:
21606 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21607 return false;
21608
21609 case NOT:
21610 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21611 *total = 0;
21612 else
21613 *total = COSTS_N_INSNS (1);
21614 return false;
21615
21616 case AND:
21617 if (CONST_INT_P (XEXP (x, 1)))
21618 {
21619 rtx left = XEXP (x, 0);
21620 rtx_code left_code = GET_CODE (left);
21621
21622 /* rotate-and-mask: 1 insn. */
21623 if ((left_code == ROTATE
21624 || left_code == ASHIFT
21625 || left_code == LSHIFTRT)
21626 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21627 {
21628 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21629 if (!CONST_INT_P (XEXP (left, 1)))
21630 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21631 *total += COSTS_N_INSNS (1);
21632 return true;
21633 }
21634
21635 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21636 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21637 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21638 || (val & 0xffff) == val
21639 || (val & 0xffff0000) == val
21640 || ((val & 0xffff) == 0 && mode == SImode))
21641 {
21642 *total = rtx_cost (left, mode, AND, 0, speed);
21643 *total += COSTS_N_INSNS (1);
21644 return true;
21645 }
21646
21647 /* 2 insns. */
21648 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21649 {
21650 *total = rtx_cost (left, mode, AND, 0, speed);
21651 *total += COSTS_N_INSNS (2);
21652 return true;
21653 }
21654 }
21655
21656 *total = COSTS_N_INSNS (1);
21657 return false;
21658
21659 case IOR:
21660 /* FIXME */
21661 *total = COSTS_N_INSNS (1);
21662 return true;
21663
21664 case CLZ:
21665 case XOR:
21666 case ZERO_EXTRACT:
21667 *total = COSTS_N_INSNS (1);
21668 return false;
21669
21670 case ASHIFT:
21671 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21672 the sign extend and shift separately within the insn. */
21673 if (TARGET_EXTSWSLI && mode == DImode
21674 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21675 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21676 {
21677 *total = 0;
21678 return false;
21679 }
21680 /* fall through */
21681
21682 case ASHIFTRT:
21683 case LSHIFTRT:
21684 case ROTATE:
21685 case ROTATERT:
21686 /* Handle mul_highpart. */
21687 if (outer_code == TRUNCATE
21688 && GET_CODE (XEXP (x, 0)) == MULT)
21689 {
21690 if (mode == DImode)
21691 *total = rs6000_cost->muldi;
21692 else
21693 *total = rs6000_cost->mulsi;
21694 return true;
21695 }
21696 else if (outer_code == AND)
21697 *total = 0;
21698 else
21699 *total = COSTS_N_INSNS (1);
21700 return false;
21701
21702 case SIGN_EXTEND:
21703 case ZERO_EXTEND:
21704 if (MEM_P (XEXP (x, 0)))
21705 *total = 0;
21706 else
21707 *total = COSTS_N_INSNS (1);
21708 return false;
21709
21710 case COMPARE:
21711 case NEG:
21712 case ABS:
21713 if (!FLOAT_MODE_P (mode))
21714 {
21715 *total = COSTS_N_INSNS (1);
21716 return false;
21717 }
21718 /* FALLTHRU */
21719
21720 case FLOAT:
21721 case UNSIGNED_FLOAT:
21722 case FIX:
21723 case UNSIGNED_FIX:
21724 case FLOAT_TRUNCATE:
21725 *total = rs6000_cost->fp;
21726 return false;
21727
21728 case FLOAT_EXTEND:
21729 if (mode == DFmode)
21730 *total = rs6000_cost->sfdf_convert;
21731 else
21732 *total = rs6000_cost->fp;
21733 return false;
21734
21735 case CALL:
21736 case IF_THEN_ELSE:
21737 if (!speed)
21738 {
21739 *total = COSTS_N_INSNS (1);
21740 return true;
21741 }
21742 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21743 {
21744 *total = rs6000_cost->fp;
21745 return false;
21746 }
21747 break;
21748
21749 case NE:
21750 case EQ:
21751 case GTU:
21752 case LTU:
21753 /* Carry bit requires mode == Pmode.
21754 NEG or PLUS already counted so only add one. */
21755 if (mode == Pmode
21756 && (outer_code == NEG || outer_code == PLUS))
21757 {
21758 *total = COSTS_N_INSNS (1);
21759 return true;
21760 }
21761 /* FALLTHRU */
21762
21763 case GT:
21764 case LT:
21765 case UNORDERED:
21766 if (outer_code == SET)
21767 {
21768 if (XEXP (x, 1) == const0_rtx)
21769 {
21770 *total = COSTS_N_INSNS (2);
21771 return true;
21772 }
21773 else
21774 {
21775 *total = COSTS_N_INSNS (3);
21776 return false;
21777 }
21778 }
21779 /* CC COMPARE. */
21780 if (outer_code == COMPARE)
21781 {
21782 *total = 0;
21783 return true;
21784 }
21785 break;
21786
21787 case UNSPEC:
21788 if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
21789 {
21790 *total = 0;
21791 return true;
21792 }
21793 break;
21794
21795 default:
21796 break;
21797 }
21798
21799 return false;
21800 }
21801
21802 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21803
21804 static bool
21805 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21806 int opno, int *total, bool speed)
21807 {
21808 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21809
21810 fprintf (stderr,
21811 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21812 "opno = %d, total = %d, speed = %s, x:\n",
21813 ret ? "complete" : "scan inner",
21814 GET_MODE_NAME (mode),
21815 GET_RTX_NAME (outer_code),
21816 opno,
21817 *total,
21818 speed ? "true" : "false");
21819
21820 debug_rtx (x);
21821
21822 return ret;
21823 }
21824
21825 static int
21826 rs6000_insn_cost (rtx_insn *insn, bool speed)
21827 {
21828 if (recog_memoized (insn) < 0)
21829 return 0;
21830
21831 /* If we are optimizing for size, just use the length. */
21832 if (!speed)
21833 return get_attr_length (insn);
21834
21835 /* Use the cost if provided. */
21836 int cost = get_attr_cost (insn);
21837 if (cost > 0)
21838 return cost;
21839
21840 /* If the insn tells us how many insns there are, use that. Otherwise use
21841 the length/4. Adjust the insn length to remove the extra size that
21842 prefixed instructions take. */
21843 int n = get_attr_num_insns (insn);
21844 if (n == 0)
21845 {
21846 int length = get_attr_length (insn);
21847 if (get_attr_prefixed (insn) == PREFIXED_YES)
21848 {
21849 int adjust = 0;
21850 ADJUST_INSN_LENGTH (insn, adjust);
21851 length -= adjust;
21852 }
21853
21854 n = length / 4;
21855 }
21856
21857 enum attr_type type = get_attr_type (insn);
21858
21859 switch (type)
21860 {
21861 case TYPE_LOAD:
21862 case TYPE_FPLOAD:
21863 case TYPE_VECLOAD:
21864 cost = COSTS_N_INSNS (n + 1);
21865 break;
21866
21867 case TYPE_MUL:
21868 switch (get_attr_size (insn))
21869 {
21870 case SIZE_8:
21871 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21872 break;
21873 case SIZE_16:
21874 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21875 break;
21876 case SIZE_32:
21877 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21878 break;
21879 case SIZE_64:
21880 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21881 break;
21882 default:
21883 gcc_unreachable ();
21884 }
21885 break;
21886 case TYPE_DIV:
21887 switch (get_attr_size (insn))
21888 {
21889 case SIZE_32:
21890 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21891 break;
21892 case SIZE_64:
21893 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21894 break;
21895 default:
21896 gcc_unreachable ();
21897 }
21898 break;
21899
21900 case TYPE_FP:
21901 cost = n * rs6000_cost->fp;
21902 break;
21903 case TYPE_DMUL:
21904 cost = n * rs6000_cost->dmul;
21905 break;
21906 case TYPE_SDIV:
21907 cost = n * rs6000_cost->sdiv;
21908 break;
21909 case TYPE_DDIV:
21910 cost = n * rs6000_cost->ddiv;
21911 break;
21912
21913 case TYPE_SYNC:
21914 case TYPE_LOAD_L:
21915 case TYPE_MFCR:
21916 case TYPE_MFCRF:
21917 cost = COSTS_N_INSNS (n + 2);
21918 break;
21919
21920 default:
21921 cost = COSTS_N_INSNS (n);
21922 }
21923
21924 return cost;
21925 }
21926
21927 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21928
21929 static int
21930 rs6000_debug_address_cost (rtx x, machine_mode mode,
21931 addr_space_t as, bool speed)
21932 {
21933 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21934
21935 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21936 ret, speed ? "true" : "false");
21937 debug_rtx (x);
21938
21939 return ret;
21940 }
21941
21942
21943 /* A C expression returning the cost of moving data from a register of class
21944 CLASS1 to one of CLASS2. */
21945
21946 static int
21947 rs6000_register_move_cost (machine_mode mode,
21948 reg_class_t from, reg_class_t to)
21949 {
21950 int ret;
21951 reg_class_t rclass;
21952
21953 if (TARGET_DEBUG_COST)
21954 dbg_cost_ctrl++;
21955
21956 /* If we have VSX, we can easily move between FPR or Altivec registers,
21957 otherwise we can only easily move within classes.
21958 Do this first so we give best-case answers for union classes
21959 containing both gprs and vsx regs. */
21960 HARD_REG_SET to_vsx, from_vsx;
21961 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21962 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21963 if (!hard_reg_set_empty_p (to_vsx)
21964 && !hard_reg_set_empty_p (from_vsx)
21965 && (TARGET_VSX
21966 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21967 {
21968 int reg = FIRST_FPR_REGNO;
21969 if (TARGET_VSX
21970 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21971 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21972 reg = FIRST_ALTIVEC_REGNO;
21973 ret = 2 * hard_regno_nregs (reg, mode);
21974 }
21975
21976 /* Moves from/to GENERAL_REGS. */
21977 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21978 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21979 {
21980 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21981 {
21982 if (TARGET_DIRECT_MOVE)
21983 {
21984 /* Keep the cost for direct moves above that for within
21985 a register class even if the actual processor cost is
21986 comparable. We do this because a direct move insn
21987 can't be a nop, whereas with ideal register
21988 allocation a move within the same class might turn
21989 out to be a nop. */
21990 if (rs6000_tune == PROCESSOR_POWER9
21991 || rs6000_tune == PROCESSOR_POWER10)
21992 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21993 else
21994 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21995 /* SFmode requires a conversion when moving between gprs
21996 and vsx. */
21997 if (mode == SFmode)
21998 ret += 2;
21999 }
22000 else
22001 ret = (rs6000_memory_move_cost (mode, rclass, false)
22002 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22003 }
22004
22005 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22006 shift. */
22007 else if (rclass == CR_REGS)
22008 ret = 4;
22009
22010 /* For those processors that have slow LR/CTR moves, make them more
22011 expensive than memory in order to bias spills to memory .*/
22012 else if ((rs6000_tune == PROCESSOR_POWER6
22013 || rs6000_tune == PROCESSOR_POWER7
22014 || rs6000_tune == PROCESSOR_POWER8
22015 || rs6000_tune == PROCESSOR_POWER9)
22016 && reg_class_subset_p (rclass, SPECIAL_REGS))
22017 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22018
22019 else
22020 /* A move will cost one instruction per GPR moved. */
22021 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22022 }
22023
22024 /* Everything else has to go through GENERAL_REGS. */
22025 else
22026 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22027 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22028
22029 if (TARGET_DEBUG_COST)
22030 {
22031 if (dbg_cost_ctrl == 1)
22032 fprintf (stderr,
22033 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22034 ret, GET_MODE_NAME (mode), reg_class_names[from],
22035 reg_class_names[to]);
22036 dbg_cost_ctrl--;
22037 }
22038
22039 return ret;
22040 }
22041
22042 /* A C expressions returning the cost of moving data of MODE from a register to
22043 or from memory. */
22044
22045 static int
22046 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22047 bool in ATTRIBUTE_UNUSED)
22048 {
22049 int ret;
22050
22051 if (TARGET_DEBUG_COST)
22052 dbg_cost_ctrl++;
22053
22054 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22055 ret = 4 * hard_regno_nregs (0, mode);
22056 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22057 || reg_classes_intersect_p (rclass, VSX_REGS)))
22058 ret = 4 * hard_regno_nregs (32, mode);
22059 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22060 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22061 else
22062 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22063
22064 if (TARGET_DEBUG_COST)
22065 {
22066 if (dbg_cost_ctrl == 1)
22067 fprintf (stderr,
22068 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22069 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22070 dbg_cost_ctrl--;
22071 }
22072
22073 return ret;
22074 }
22075
22076 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22077
22078 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22079 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22080 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22081 move cost between GENERAL_REGS and VSX_REGS low.
22082
22083 It might seem reasonable to use a union class. After all, if usage
22084 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22085 rather than memory. However, in cases where register pressure of
22086 both is high, like the cactus_adm spec test, allowing
22087 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22088 the first scheduling pass. This is partly due to an allocno of
22089 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22090 class, which gives too high a pressure for GENERAL_REGS and too low
22091 for VSX_REGS. So, force a choice of the subclass here.
22092
22093 The best class is also the union if GENERAL_REGS and VSX_REGS have
22094 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22095 allocno class, since trying to narrow down the class by regno mode
22096 is prone to error. For example, SImode is allowed in VSX regs and
22097 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22098 it would be wrong to choose an allocno of GENERAL_REGS based on
22099 SImode. */
22100
22101 static reg_class_t
22102 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22103 reg_class_t allocno_class,
22104 reg_class_t best_class)
22105 {
22106 switch (allocno_class)
22107 {
22108 case GEN_OR_VSX_REGS:
22109 /* best_class must be a subset of allocno_class. */
22110 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22111 || best_class == GEN_OR_FLOAT_REGS
22112 || best_class == VSX_REGS
22113 || best_class == ALTIVEC_REGS
22114 || best_class == FLOAT_REGS
22115 || best_class == GENERAL_REGS
22116 || best_class == BASE_REGS);
22117 /* Use best_class but choose wider classes when copying from the
22118 wider class to best_class is cheap. This mimics IRA choice
22119 of allocno class. */
22120 if (best_class == BASE_REGS)
22121 return GENERAL_REGS;
22122 if (TARGET_VSX
22123 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
22124 return VSX_REGS;
22125 return best_class;
22126
22127 default:
22128 break;
22129 }
22130
22131 return allocno_class;
22132 }
22133
22134 /* Returns a code for a target-specific builtin that implements
22135 reciprocal of the function, or NULL_TREE if not available. */
22136
22137 static tree
22138 rs6000_builtin_reciprocal (tree fndecl)
22139 {
22140 switch (DECL_MD_FUNCTION_CODE (fndecl))
22141 {
22142 case VSX_BUILTIN_XVSQRTDP:
22143 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
22144 return NULL_TREE;
22145
22146 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
22147
22148 case VSX_BUILTIN_XVSQRTSP:
22149 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
22150 return NULL_TREE;
22151
22152 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
22153
22154 default:
22155 return NULL_TREE;
22156 }
22157 }
22158
22159 /* Load up a constant. If the mode is a vector mode, splat the value across
22160 all of the vector elements. */
22161
22162 static rtx
22163 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22164 {
22165 rtx reg;
22166
22167 if (mode == SFmode || mode == DFmode)
22168 {
22169 rtx d = const_double_from_real_value (dconst, mode);
22170 reg = force_reg (mode, d);
22171 }
22172 else if (mode == V4SFmode)
22173 {
22174 rtx d = const_double_from_real_value (dconst, SFmode);
22175 rtvec v = gen_rtvec (4, d, d, d, d);
22176 reg = gen_reg_rtx (mode);
22177 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22178 }
22179 else if (mode == V2DFmode)
22180 {
22181 rtx d = const_double_from_real_value (dconst, DFmode);
22182 rtvec v = gen_rtvec (2, d, d);
22183 reg = gen_reg_rtx (mode);
22184 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22185 }
22186 else
22187 gcc_unreachable ();
22188
22189 return reg;
22190 }
22191
22192 /* Generate an FMA instruction. */
22193
22194 static void
22195 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22196 {
22197 machine_mode mode = GET_MODE (target);
22198 rtx dst;
22199
22200 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22201 gcc_assert (dst != NULL);
22202
22203 if (dst != target)
22204 emit_move_insn (target, dst);
22205 }
22206
22207 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22208
22209 static void
22210 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22211 {
22212 machine_mode mode = GET_MODE (dst);
22213 rtx r;
22214
22215 /* This is a tad more complicated, since the fnma_optab is for
22216 a different expression: fma(-m1, m2, a), which is the same
22217 thing except in the case of signed zeros.
22218
22219 Fortunately we know that if FMA is supported that FNMSUB is
22220 also supported in the ISA. Just expand it directly. */
22221
22222 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22223
22224 r = gen_rtx_NEG (mode, a);
22225 r = gen_rtx_FMA (mode, m1, m2, r);
22226 r = gen_rtx_NEG (mode, r);
22227 emit_insn (gen_rtx_SET (dst, r));
22228 }
22229
22230 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22231 add a reg_note saying that this was a division. Support both scalar and
22232 vector divide. Assumes no trapping math and finite arguments. */
22233
22234 void
22235 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22236 {
22237 machine_mode mode = GET_MODE (dst);
22238 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22239 int i;
22240
22241 /* Low precision estimates guarantee 5 bits of accuracy. High
22242 precision estimates guarantee 14 bits of accuracy. SFmode
22243 requires 23 bits of accuracy. DFmode requires 52 bits of
22244 accuracy. Each pass at least doubles the accuracy, leading
22245 to the following. */
22246 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22247 if (mode == DFmode || mode == V2DFmode)
22248 passes++;
22249
22250 enum insn_code code = optab_handler (smul_optab, mode);
22251 insn_gen_fn gen_mul = GEN_FCN (code);
22252
22253 gcc_assert (code != CODE_FOR_nothing);
22254
22255 one = rs6000_load_constant_and_splat (mode, dconst1);
22256
22257 /* x0 = 1./d estimate */
22258 x0 = gen_reg_rtx (mode);
22259 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22260 UNSPEC_FRES)));
22261
22262 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22263 if (passes > 1) {
22264
22265 /* e0 = 1. - d * x0 */
22266 e0 = gen_reg_rtx (mode);
22267 rs6000_emit_nmsub (e0, d, x0, one);
22268
22269 /* x1 = x0 + e0 * x0 */
22270 x1 = gen_reg_rtx (mode);
22271 rs6000_emit_madd (x1, e0, x0, x0);
22272
22273 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22274 ++i, xprev = xnext, eprev = enext) {
22275
22276 /* enext = eprev * eprev */
22277 enext = gen_reg_rtx (mode);
22278 emit_insn (gen_mul (enext, eprev, eprev));
22279
22280 /* xnext = xprev + enext * xprev */
22281 xnext = gen_reg_rtx (mode);
22282 rs6000_emit_madd (xnext, enext, xprev, xprev);
22283 }
22284
22285 } else
22286 xprev = x0;
22287
22288 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22289
22290 /* u = n * xprev */
22291 u = gen_reg_rtx (mode);
22292 emit_insn (gen_mul (u, n, xprev));
22293
22294 /* v = n - (d * u) */
22295 v = gen_reg_rtx (mode);
22296 rs6000_emit_nmsub (v, d, u, n);
22297
22298 /* dst = (v * xprev) + u */
22299 rs6000_emit_madd (dst, v, xprev, u);
22300
22301 if (note_p)
22302 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22303 }
22304
22305 /* Goldschmidt's Algorithm for single/double-precision floating point
22306 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22307
22308 void
22309 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22310 {
22311 machine_mode mode = GET_MODE (src);
22312 rtx e = gen_reg_rtx (mode);
22313 rtx g = gen_reg_rtx (mode);
22314 rtx h = gen_reg_rtx (mode);
22315
22316 /* Low precision estimates guarantee 5 bits of accuracy. High
22317 precision estimates guarantee 14 bits of accuracy. SFmode
22318 requires 23 bits of accuracy. DFmode requires 52 bits of
22319 accuracy. Each pass at least doubles the accuracy, leading
22320 to the following. */
22321 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22322 if (mode == DFmode || mode == V2DFmode)
22323 passes++;
22324
22325 int i;
22326 rtx mhalf;
22327 enum insn_code code = optab_handler (smul_optab, mode);
22328 insn_gen_fn gen_mul = GEN_FCN (code);
22329
22330 gcc_assert (code != CODE_FOR_nothing);
22331
22332 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22333
22334 /* e = rsqrt estimate */
22335 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22336 UNSPEC_RSQRT)));
22337
22338 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22339 if (!recip)
22340 {
22341 rtx zero = force_reg (mode, CONST0_RTX (mode));
22342
22343 if (mode == SFmode)
22344 {
22345 rtx target = emit_conditional_move (e, GT, src, zero, mode,
22346 e, zero, mode, 0);
22347 if (target != e)
22348 emit_move_insn (e, target);
22349 }
22350 else
22351 {
22352 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22353 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22354 }
22355 }
22356
22357 /* g = sqrt estimate. */
22358 emit_insn (gen_mul (g, e, src));
22359 /* h = 1/(2*sqrt) estimate. */
22360 emit_insn (gen_mul (h, e, mhalf));
22361
22362 if (recip)
22363 {
22364 if (passes == 1)
22365 {
22366 rtx t = gen_reg_rtx (mode);
22367 rs6000_emit_nmsub (t, g, h, mhalf);
22368 /* Apply correction directly to 1/rsqrt estimate. */
22369 rs6000_emit_madd (dst, e, t, e);
22370 }
22371 else
22372 {
22373 for (i = 0; i < passes; i++)
22374 {
22375 rtx t1 = gen_reg_rtx (mode);
22376 rtx g1 = gen_reg_rtx (mode);
22377 rtx h1 = gen_reg_rtx (mode);
22378
22379 rs6000_emit_nmsub (t1, g, h, mhalf);
22380 rs6000_emit_madd (g1, g, t1, g);
22381 rs6000_emit_madd (h1, h, t1, h);
22382
22383 g = g1;
22384 h = h1;
22385 }
22386 /* Multiply by 2 for 1/rsqrt. */
22387 emit_insn (gen_add3_insn (dst, h, h));
22388 }
22389 }
22390 else
22391 {
22392 rtx t = gen_reg_rtx (mode);
22393 rs6000_emit_nmsub (t, g, h, mhalf);
22394 rs6000_emit_madd (dst, g, t, g);
22395 }
22396
22397 return;
22398 }
22399
22400 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22401 (Power7) targets. DST is the target, and SRC is the argument operand. */
22402
22403 void
22404 rs6000_emit_popcount (rtx dst, rtx src)
22405 {
22406 machine_mode mode = GET_MODE (dst);
22407 rtx tmp1, tmp2;
22408
22409 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22410 if (TARGET_POPCNTD)
22411 {
22412 if (mode == SImode)
22413 emit_insn (gen_popcntdsi2 (dst, src));
22414 else
22415 emit_insn (gen_popcntddi2 (dst, src));
22416 return;
22417 }
22418
22419 tmp1 = gen_reg_rtx (mode);
22420
22421 if (mode == SImode)
22422 {
22423 emit_insn (gen_popcntbsi2 (tmp1, src));
22424 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22425 NULL_RTX, 0);
22426 tmp2 = force_reg (SImode, tmp2);
22427 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22428 }
22429 else
22430 {
22431 emit_insn (gen_popcntbdi2 (tmp1, src));
22432 tmp2 = expand_mult (DImode, tmp1,
22433 GEN_INT ((HOST_WIDE_INT)
22434 0x01010101 << 32 | 0x01010101),
22435 NULL_RTX, 0);
22436 tmp2 = force_reg (DImode, tmp2);
22437 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22438 }
22439 }
22440
22441
22442 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22443 target, and SRC is the argument operand. */
22444
22445 void
22446 rs6000_emit_parity (rtx dst, rtx src)
22447 {
22448 machine_mode mode = GET_MODE (dst);
22449 rtx tmp;
22450
22451 tmp = gen_reg_rtx (mode);
22452
22453 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22454 if (TARGET_CMPB)
22455 {
22456 if (mode == SImode)
22457 {
22458 emit_insn (gen_popcntbsi2 (tmp, src));
22459 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22460 }
22461 else
22462 {
22463 emit_insn (gen_popcntbdi2 (tmp, src));
22464 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22465 }
22466 return;
22467 }
22468
22469 if (mode == SImode)
22470 {
22471 /* Is mult+shift >= shift+xor+shift+xor? */
22472 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22473 {
22474 rtx tmp1, tmp2, tmp3, tmp4;
22475
22476 tmp1 = gen_reg_rtx (SImode);
22477 emit_insn (gen_popcntbsi2 (tmp1, src));
22478
22479 tmp2 = gen_reg_rtx (SImode);
22480 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22481 tmp3 = gen_reg_rtx (SImode);
22482 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22483
22484 tmp4 = gen_reg_rtx (SImode);
22485 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22486 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22487 }
22488 else
22489 rs6000_emit_popcount (tmp, src);
22490 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22491 }
22492 else
22493 {
22494 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22495 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22496 {
22497 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22498
22499 tmp1 = gen_reg_rtx (DImode);
22500 emit_insn (gen_popcntbdi2 (tmp1, src));
22501
22502 tmp2 = gen_reg_rtx (DImode);
22503 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22504 tmp3 = gen_reg_rtx (DImode);
22505 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22506
22507 tmp4 = gen_reg_rtx (DImode);
22508 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22509 tmp5 = gen_reg_rtx (DImode);
22510 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22511
22512 tmp6 = gen_reg_rtx (DImode);
22513 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22514 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22515 }
22516 else
22517 rs6000_emit_popcount (tmp, src);
22518 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22519 }
22520 }
22521
22522 /* Expand an Altivec constant permutation for little endian mode.
22523 OP0 and OP1 are the input vectors and TARGET is the output vector.
22524 SEL specifies the constant permutation vector.
22525
22526 There are two issues: First, the two input operands must be
22527 swapped so that together they form a double-wide array in LE
22528 order. Second, the vperm instruction has surprising behavior
22529 in LE mode: it interprets the elements of the source vectors
22530 in BE mode ("left to right") and interprets the elements of
22531 the destination vector in LE mode ("right to left"). To
22532 correct for this, we must subtract each element of the permute
22533 control vector from 31.
22534
22535 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22536 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22537 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22538 serve as the permute control vector. Then, in BE mode,
22539
22540 vperm 9,10,11,12
22541
22542 places the desired result in vr9. However, in LE mode the
22543 vector contents will be
22544
22545 vr10 = 00000003 00000002 00000001 00000000
22546 vr11 = 00000007 00000006 00000005 00000004
22547
22548 The result of the vperm using the same permute control vector is
22549
22550 vr9 = 05000000 07000000 01000000 03000000
22551
22552 That is, the leftmost 4 bytes of vr10 are interpreted as the
22553 source for the rightmost 4 bytes of vr9, and so on.
22554
22555 If we change the permute control vector to
22556
22557 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22558
22559 and issue
22560
22561 vperm 9,11,10,12
22562
22563 we get the desired
22564
22565 vr9 = 00000006 00000004 00000002 00000000. */
22566
22567 static void
22568 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22569 const vec_perm_indices &sel)
22570 {
22571 unsigned int i;
22572 rtx perm[16];
22573 rtx constv, unspec;
22574
22575 /* Unpack and adjust the constant selector. */
22576 for (i = 0; i < 16; ++i)
22577 {
22578 unsigned int elt = 31 - (sel[i] & 31);
22579 perm[i] = GEN_INT (elt);
22580 }
22581
22582 /* Expand to a permute, swapping the inputs and using the
22583 adjusted selector. */
22584 if (!REG_P (op0))
22585 op0 = force_reg (V16QImode, op0);
22586 if (!REG_P (op1))
22587 op1 = force_reg (V16QImode, op1);
22588
22589 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22590 constv = force_reg (V16QImode, constv);
22591 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22592 UNSPEC_VPERM);
22593 if (!REG_P (target))
22594 {
22595 rtx tmp = gen_reg_rtx (V16QImode);
22596 emit_move_insn (tmp, unspec);
22597 unspec = tmp;
22598 }
22599
22600 emit_move_insn (target, unspec);
22601 }
22602
22603 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22604 permute control vector. But here it's not a constant, so we must
22605 generate a vector NAND or NOR to do the adjustment. */
22606
22607 void
22608 altivec_expand_vec_perm_le (rtx operands[4])
22609 {
22610 rtx notx, iorx, unspec;
22611 rtx target = operands[0];
22612 rtx op0 = operands[1];
22613 rtx op1 = operands[2];
22614 rtx sel = operands[3];
22615 rtx tmp = target;
22616 rtx norreg = gen_reg_rtx (V16QImode);
22617 machine_mode mode = GET_MODE (target);
22618
22619 /* Get everything in regs so the pattern matches. */
22620 if (!REG_P (op0))
22621 op0 = force_reg (mode, op0);
22622 if (!REG_P (op1))
22623 op1 = force_reg (mode, op1);
22624 if (!REG_P (sel))
22625 sel = force_reg (V16QImode, sel);
22626 if (!REG_P (target))
22627 tmp = gen_reg_rtx (mode);
22628
22629 if (TARGET_P9_VECTOR)
22630 {
22631 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22632 UNSPEC_VPERMR);
22633 }
22634 else
22635 {
22636 /* Invert the selector with a VNAND if available, else a VNOR.
22637 The VNAND is preferred for future fusion opportunities. */
22638 notx = gen_rtx_NOT (V16QImode, sel);
22639 iorx = (TARGET_P8_VECTOR
22640 ? gen_rtx_IOR (V16QImode, notx, notx)
22641 : gen_rtx_AND (V16QImode, notx, notx));
22642 emit_insn (gen_rtx_SET (norreg, iorx));
22643
22644 /* Permute with operands reversed and adjusted selector. */
22645 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22646 UNSPEC_VPERM);
22647 }
22648
22649 /* Copy into target, possibly by way of a register. */
22650 if (!REG_P (target))
22651 {
22652 emit_move_insn (tmp, unspec);
22653 unspec = tmp;
22654 }
22655
22656 emit_move_insn (target, unspec);
22657 }
22658
22659 /* Expand an Altivec constant permutation. Return true if we match
22660 an efficient implementation; false to fall back to VPERM.
22661
22662 OP0 and OP1 are the input vectors and TARGET is the output vector.
22663 SEL specifies the constant permutation vector. */
22664
22665 static bool
22666 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22667 const vec_perm_indices &sel)
22668 {
22669 struct altivec_perm_insn {
22670 HOST_WIDE_INT mask;
22671 enum insn_code impl;
22672 unsigned char perm[16];
22673 };
22674 static const struct altivec_perm_insn patterns[] = {
22675 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
22676 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22677 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
22678 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22679 { OPTION_MASK_ALTIVEC,
22680 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22681 : CODE_FOR_altivec_vmrglb_direct),
22682 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22683 { OPTION_MASK_ALTIVEC,
22684 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22685 : CODE_FOR_altivec_vmrglh_direct),
22686 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22687 { OPTION_MASK_ALTIVEC,
22688 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22689 : CODE_FOR_altivec_vmrglw_direct),
22690 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22691 { OPTION_MASK_ALTIVEC,
22692 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22693 : CODE_FOR_altivec_vmrghb_direct),
22694 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22695 { OPTION_MASK_ALTIVEC,
22696 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22697 : CODE_FOR_altivec_vmrghh_direct),
22698 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22699 { OPTION_MASK_ALTIVEC,
22700 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22701 : CODE_FOR_altivec_vmrghw_direct),
22702 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22703 { OPTION_MASK_P8_VECTOR,
22704 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22705 : CODE_FOR_p8_vmrgow_v4sf_direct),
22706 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22707 { OPTION_MASK_P8_VECTOR,
22708 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22709 : CODE_FOR_p8_vmrgew_v4sf_direct),
22710 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22711 };
22712
22713 unsigned int i, j, elt, which;
22714 unsigned char perm[16];
22715 rtx x;
22716 bool one_vec;
22717
22718 /* Unpack the constant selector. */
22719 for (i = which = 0; i < 16; ++i)
22720 {
22721 elt = sel[i] & 31;
22722 which |= (elt < 16 ? 1 : 2);
22723 perm[i] = elt;
22724 }
22725
22726 /* Simplify the constant selector based on operands. */
22727 switch (which)
22728 {
22729 default:
22730 gcc_unreachable ();
22731
22732 case 3:
22733 one_vec = false;
22734 if (!rtx_equal_p (op0, op1))
22735 break;
22736 /* FALLTHRU */
22737
22738 case 2:
22739 for (i = 0; i < 16; ++i)
22740 perm[i] &= 15;
22741 op0 = op1;
22742 one_vec = true;
22743 break;
22744
22745 case 1:
22746 op1 = op0;
22747 one_vec = true;
22748 break;
22749 }
22750
22751 /* Look for splat patterns. */
22752 if (one_vec)
22753 {
22754 elt = perm[0];
22755
22756 for (i = 0; i < 16; ++i)
22757 if (perm[i] != elt)
22758 break;
22759 if (i == 16)
22760 {
22761 if (!BYTES_BIG_ENDIAN)
22762 elt = 15 - elt;
22763 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22764 return true;
22765 }
22766
22767 if (elt % 2 == 0)
22768 {
22769 for (i = 0; i < 16; i += 2)
22770 if (perm[i] != elt || perm[i + 1] != elt + 1)
22771 break;
22772 if (i == 16)
22773 {
22774 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22775 x = gen_reg_rtx (V8HImode);
22776 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22777 GEN_INT (field)));
22778 emit_move_insn (target, gen_lowpart (V16QImode, x));
22779 return true;
22780 }
22781 }
22782
22783 if (elt % 4 == 0)
22784 {
22785 for (i = 0; i < 16; i += 4)
22786 if (perm[i] != elt
22787 || perm[i + 1] != elt + 1
22788 || perm[i + 2] != elt + 2
22789 || perm[i + 3] != elt + 3)
22790 break;
22791 if (i == 16)
22792 {
22793 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22794 x = gen_reg_rtx (V4SImode);
22795 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22796 GEN_INT (field)));
22797 emit_move_insn (target, gen_lowpart (V16QImode, x));
22798 return true;
22799 }
22800 }
22801 }
22802
22803 /* Look for merge and pack patterns. */
22804 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22805 {
22806 bool swapped;
22807
22808 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22809 continue;
22810
22811 elt = patterns[j].perm[0];
22812 if (perm[0] == elt)
22813 swapped = false;
22814 else if (perm[0] == elt + 16)
22815 swapped = true;
22816 else
22817 continue;
22818 for (i = 1; i < 16; ++i)
22819 {
22820 elt = patterns[j].perm[i];
22821 if (swapped)
22822 elt = (elt >= 16 ? elt - 16 : elt + 16);
22823 else if (one_vec && elt >= 16)
22824 elt -= 16;
22825 if (perm[i] != elt)
22826 break;
22827 }
22828 if (i == 16)
22829 {
22830 enum insn_code icode = patterns[j].impl;
22831 machine_mode omode = insn_data[icode].operand[0].mode;
22832 machine_mode imode = insn_data[icode].operand[1].mode;
22833
22834 /* For little-endian, don't use vpkuwum and vpkuhum if the
22835 underlying vector type is not V4SI and V8HI, respectively.
22836 For example, using vpkuwum with a V8HI picks up the even
22837 halfwords (BE numbering) when the even halfwords (LE
22838 numbering) are what we need. */
22839 if (!BYTES_BIG_ENDIAN
22840 && icode == CODE_FOR_altivec_vpkuwum_direct
22841 && ((REG_P (op0)
22842 && GET_MODE (op0) != V4SImode)
22843 || (SUBREG_P (op0)
22844 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22845 continue;
22846 if (!BYTES_BIG_ENDIAN
22847 && icode == CODE_FOR_altivec_vpkuhum_direct
22848 && ((REG_P (op0)
22849 && GET_MODE (op0) != V8HImode)
22850 || (SUBREG_P (op0)
22851 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22852 continue;
22853
22854 /* For little-endian, the two input operands must be swapped
22855 (or swapped back) to ensure proper right-to-left numbering
22856 from 0 to 2N-1. */
22857 if (swapped ^ !BYTES_BIG_ENDIAN)
22858 std::swap (op0, op1);
22859 if (imode != V16QImode)
22860 {
22861 op0 = gen_lowpart (imode, op0);
22862 op1 = gen_lowpart (imode, op1);
22863 }
22864 if (omode == V16QImode)
22865 x = target;
22866 else
22867 x = gen_reg_rtx (omode);
22868 emit_insn (GEN_FCN (icode) (x, op0, op1));
22869 if (omode != V16QImode)
22870 emit_move_insn (target, gen_lowpart (V16QImode, x));
22871 return true;
22872 }
22873 }
22874
22875 if (!BYTES_BIG_ENDIAN)
22876 {
22877 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22878 return true;
22879 }
22880
22881 return false;
22882 }
22883
22884 /* Expand a VSX Permute Doubleword constant permutation.
22885 Return true if we match an efficient implementation. */
22886
22887 static bool
22888 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22889 unsigned char perm0, unsigned char perm1)
22890 {
22891 rtx x;
22892
22893 /* If both selectors come from the same operand, fold to single op. */
22894 if ((perm0 & 2) == (perm1 & 2))
22895 {
22896 if (perm0 & 2)
22897 op0 = op1;
22898 else
22899 op1 = op0;
22900 }
22901 /* If both operands are equal, fold to simpler permutation. */
22902 if (rtx_equal_p (op0, op1))
22903 {
22904 perm0 = perm0 & 1;
22905 perm1 = (perm1 & 1) + 2;
22906 }
22907 /* If the first selector comes from the second operand, swap. */
22908 else if (perm0 & 2)
22909 {
22910 if (perm1 & 2)
22911 return false;
22912 perm0 -= 2;
22913 perm1 += 2;
22914 std::swap (op0, op1);
22915 }
22916 /* If the second selector does not come from the second operand, fail. */
22917 else if ((perm1 & 2) == 0)
22918 return false;
22919
22920 /* Success! */
22921 if (target != NULL)
22922 {
22923 machine_mode vmode, dmode;
22924 rtvec v;
22925
22926 vmode = GET_MODE (target);
22927 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22928 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22929 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22930 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22931 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22932 emit_insn (gen_rtx_SET (target, x));
22933 }
22934 return true;
22935 }
22936
22937 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22938
22939 static bool
22940 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22941 rtx op1, const vec_perm_indices &sel)
22942 {
22943 bool testing_p = !target;
22944
22945 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22946 if (TARGET_ALTIVEC && testing_p)
22947 return true;
22948
22949 if (op0)
22950 {
22951 rtx nop0 = force_reg (vmode, op0);
22952 if (op0 == op1)
22953 op1 = nop0;
22954 op0 = nop0;
22955 }
22956 if (op1)
22957 op1 = force_reg (vmode, op1);
22958
22959 /* Check for ps_merge* or xxpermdi insns. */
22960 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22961 {
22962 if (testing_p)
22963 {
22964 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22965 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22966 }
22967 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22968 return true;
22969 }
22970
22971 if (TARGET_ALTIVEC)
22972 {
22973 /* Force the target-independent code to lower to V16QImode. */
22974 if (vmode != V16QImode)
22975 return false;
22976 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22977 return true;
22978 }
22979
22980 return false;
22981 }
22982
22983 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22984 OP0 and OP1 are the input vectors and TARGET is the output vector.
22985 PERM specifies the constant permutation vector. */
22986
22987 static void
22988 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22989 machine_mode vmode, const vec_perm_builder &perm)
22990 {
22991 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22992 if (x != target)
22993 emit_move_insn (target, x);
22994 }
22995
22996 /* Expand an extract even operation. */
22997
22998 void
22999 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23000 {
23001 machine_mode vmode = GET_MODE (target);
23002 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23003 vec_perm_builder perm (nelt, nelt, 1);
23004
23005 for (i = 0; i < nelt; i++)
23006 perm.quick_push (i * 2);
23007
23008 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23009 }
23010
23011 /* Expand a vector interleave operation. */
23012
23013 void
23014 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23015 {
23016 machine_mode vmode = GET_MODE (target);
23017 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23018 vec_perm_builder perm (nelt, nelt, 1);
23019
23020 high = (highp ? 0 : nelt / 2);
23021 for (i = 0; i < nelt / 2; i++)
23022 {
23023 perm.quick_push (i + high);
23024 perm.quick_push (i + nelt + high);
23025 }
23026
23027 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23028 }
23029
23030 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23031 void
23032 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23033 {
23034 HOST_WIDE_INT hwi_scale (scale);
23035 REAL_VALUE_TYPE r_pow;
23036 rtvec v = rtvec_alloc (2);
23037 rtx elt;
23038 rtx scale_vec = gen_reg_rtx (V2DFmode);
23039 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23040 elt = const_double_from_real_value (r_pow, DFmode);
23041 RTVEC_ELT (v, 0) = elt;
23042 RTVEC_ELT (v, 1) = elt;
23043 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23044 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23045 }
23046
23047 /* Return an RTX representing where to find the function value of a
23048 function returning MODE. */
23049 static rtx
23050 rs6000_complex_function_value (machine_mode mode)
23051 {
23052 unsigned int regno;
23053 rtx r1, r2;
23054 machine_mode inner = GET_MODE_INNER (mode);
23055 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23056
23057 if (TARGET_FLOAT128_TYPE
23058 && (mode == KCmode
23059 || (mode == TCmode && TARGET_IEEEQUAD)))
23060 regno = ALTIVEC_ARG_RETURN;
23061
23062 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23063 regno = FP_ARG_RETURN;
23064
23065 else
23066 {
23067 regno = GP_ARG_RETURN;
23068
23069 /* 32-bit is OK since it'll go in r3/r4. */
23070 if (TARGET_32BIT && inner_bytes >= 4)
23071 return gen_rtx_REG (mode, regno);
23072 }
23073
23074 if (inner_bytes >= 8)
23075 return gen_rtx_REG (mode, regno);
23076
23077 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23078 const0_rtx);
23079 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23080 GEN_INT (inner_bytes));
23081 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23082 }
23083
23084 /* Return an rtx describing a return value of MODE as a PARALLEL
23085 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23086 stride REG_STRIDE. */
23087
23088 static rtx
23089 rs6000_parallel_return (machine_mode mode,
23090 int n_elts, machine_mode elt_mode,
23091 unsigned int regno, unsigned int reg_stride)
23092 {
23093 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23094
23095 int i;
23096 for (i = 0; i < n_elts; i++)
23097 {
23098 rtx r = gen_rtx_REG (elt_mode, regno);
23099 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23100 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23101 regno += reg_stride;
23102 }
23103
23104 return par;
23105 }
23106
23107 /* Target hook for TARGET_FUNCTION_VALUE.
23108
23109 An integer value is in r3 and a floating-point value is in fp1,
23110 unless -msoft-float. */
23111
23112 static rtx
23113 rs6000_function_value (const_tree valtype,
23114 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23115 bool outgoing ATTRIBUTE_UNUSED)
23116 {
23117 machine_mode mode;
23118 unsigned int regno;
23119 machine_mode elt_mode;
23120 int n_elts;
23121
23122 /* Special handling for structs in darwin64. */
23123 if (TARGET_MACHO
23124 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23125 {
23126 CUMULATIVE_ARGS valcum;
23127 rtx valret;
23128
23129 valcum.words = 0;
23130 valcum.fregno = FP_ARG_MIN_REG;
23131 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23132 /* Do a trial code generation as if this were going to be passed as
23133 an argument; if any part goes in memory, we return NULL. */
23134 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23135 if (valret)
23136 return valret;
23137 /* Otherwise fall through to standard ABI rules. */
23138 }
23139
23140 mode = TYPE_MODE (valtype);
23141
23142 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23143 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23144 {
23145 int first_reg, n_regs;
23146
23147 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23148 {
23149 /* _Decimal128 must use even/odd register pairs. */
23150 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23151 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23152 }
23153 else
23154 {
23155 first_reg = ALTIVEC_ARG_RETURN;
23156 n_regs = 1;
23157 }
23158
23159 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23160 }
23161
23162 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23163 if (TARGET_32BIT && TARGET_POWERPC64)
23164 switch (mode)
23165 {
23166 default:
23167 break;
23168 case E_DImode:
23169 case E_SCmode:
23170 case E_DCmode:
23171 case E_TCmode:
23172 int count = GET_MODE_SIZE (mode) / 4;
23173 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23174 }
23175
23176 if ((INTEGRAL_TYPE_P (valtype)
23177 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23178 || POINTER_TYPE_P (valtype))
23179 mode = TARGET_32BIT ? SImode : DImode;
23180
23181 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23182 /* _Decimal128 must use an even/odd register pair. */
23183 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23184 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23185 && !FLOAT128_VECTOR_P (mode))
23186 regno = FP_ARG_RETURN;
23187 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23188 && targetm.calls.split_complex_arg)
23189 return rs6000_complex_function_value (mode);
23190 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23191 return register is used in both cases, and we won't see V2DImode/V2DFmode
23192 for pure altivec, combine the two cases. */
23193 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23194 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23195 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23196 regno = ALTIVEC_ARG_RETURN;
23197 else
23198 regno = GP_ARG_RETURN;
23199
23200 return gen_rtx_REG (mode, regno);
23201 }
23202
23203 /* Define how to find the value returned by a library function
23204 assuming the value has mode MODE. */
23205 rtx
23206 rs6000_libcall_value (machine_mode mode)
23207 {
23208 unsigned int regno;
23209
23210 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23211 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23212 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23213
23214 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23215 /* _Decimal128 must use an even/odd register pair. */
23216 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23217 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23218 regno = FP_ARG_RETURN;
23219 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23220 return register is used in both cases, and we won't see V2DImode/V2DFmode
23221 for pure altivec, combine the two cases. */
23222 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23223 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23224 regno = ALTIVEC_ARG_RETURN;
23225 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23226 return rs6000_complex_function_value (mode);
23227 else
23228 regno = GP_ARG_RETURN;
23229
23230 return gen_rtx_REG (mode, regno);
23231 }
23232
23233 /* Compute register pressure classes. We implement the target hook to avoid
23234 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23235 lead to incorrect estimates of number of available registers and therefor
23236 increased register pressure/spill. */
23237 static int
23238 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23239 {
23240 int n;
23241
23242 n = 0;
23243 pressure_classes[n++] = GENERAL_REGS;
23244 if (TARGET_VSX)
23245 pressure_classes[n++] = VSX_REGS;
23246 else
23247 {
23248 if (TARGET_ALTIVEC)
23249 pressure_classes[n++] = ALTIVEC_REGS;
23250 if (TARGET_HARD_FLOAT)
23251 pressure_classes[n++] = FLOAT_REGS;
23252 }
23253 pressure_classes[n++] = CR_REGS;
23254 pressure_classes[n++] = SPECIAL_REGS;
23255
23256 return n;
23257 }
23258
23259 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23260 Frame pointer elimination is automatically handled.
23261
23262 For the RS/6000, if frame pointer elimination is being done, we would like
23263 to convert ap into fp, not sp.
23264
23265 We need r30 if -mminimal-toc was specified, and there are constant pool
23266 references. */
23267
23268 static bool
23269 rs6000_can_eliminate (const int from, const int to)
23270 {
23271 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23272 ? ! frame_pointer_needed
23273 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23274 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23275 || constant_pool_empty_p ()
23276 : true);
23277 }
23278
23279 /* Define the offset between two registers, FROM to be eliminated and its
23280 replacement TO, at the start of a routine. */
23281 HOST_WIDE_INT
23282 rs6000_initial_elimination_offset (int from, int to)
23283 {
23284 rs6000_stack_t *info = rs6000_stack_info ();
23285 HOST_WIDE_INT offset;
23286
23287 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23288 offset = info->push_p ? 0 : -info->total_size;
23289 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23290 {
23291 offset = info->push_p ? 0 : -info->total_size;
23292 if (FRAME_GROWS_DOWNWARD)
23293 offset += info->fixed_size + info->vars_size + info->parm_size;
23294 }
23295 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23296 offset = FRAME_GROWS_DOWNWARD
23297 ? info->fixed_size + info->vars_size + info->parm_size
23298 : 0;
23299 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23300 offset = info->total_size;
23301 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23302 offset = info->push_p ? info->total_size : 0;
23303 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23304 offset = 0;
23305 else
23306 gcc_unreachable ();
23307
23308 return offset;
23309 }
23310
23311 /* Fill in sizes of registers used by unwinder. */
23312
23313 static void
23314 rs6000_init_dwarf_reg_sizes_extra (tree address)
23315 {
23316 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23317 {
23318 int i;
23319 machine_mode mode = TYPE_MODE (char_type_node);
23320 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23321 rtx mem = gen_rtx_MEM (BLKmode, addr);
23322 rtx value = gen_int_mode (16, mode);
23323
23324 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23325 The unwinder still needs to know the size of Altivec registers. */
23326
23327 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23328 {
23329 int column = DWARF_REG_TO_UNWIND_COLUMN
23330 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23331 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23332
23333 emit_move_insn (adjust_address (mem, mode, offset), value);
23334 }
23335 }
23336 }
23337
23338 /* Map internal gcc register numbers to debug format register numbers.
23339 FORMAT specifies the type of debug register number to use:
23340 0 -- debug information, except for frame-related sections
23341 1 -- DWARF .debug_frame section
23342 2 -- DWARF .eh_frame section */
23343
23344 unsigned int
23345 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23346 {
23347 /* On some platforms, we use the standard DWARF register
23348 numbering for .debug_info and .debug_frame. */
23349 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
23350 {
23351 #ifdef RS6000_USE_DWARF_NUMBERING
23352 if (regno <= 31)
23353 return regno;
23354 if (FP_REGNO_P (regno))
23355 return regno - FIRST_FPR_REGNO + 32;
23356 if (ALTIVEC_REGNO_P (regno))
23357 return regno - FIRST_ALTIVEC_REGNO + 1124;
23358 if (regno == LR_REGNO)
23359 return 108;
23360 if (regno == CTR_REGNO)
23361 return 109;
23362 if (regno == CA_REGNO)
23363 return 101; /* XER */
23364 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23365 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23366 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23367 to the DWARF reg for CR. */
23368 if (format == 1 && regno == CR2_REGNO)
23369 return 64;
23370 if (CR_REGNO_P (regno))
23371 return regno - CR0_REGNO + 86;
23372 if (regno == VRSAVE_REGNO)
23373 return 356;
23374 if (regno == VSCR_REGNO)
23375 return 67;
23376
23377 /* These do not make much sense. */
23378 if (regno == FRAME_POINTER_REGNUM)
23379 return 111;
23380 if (regno == ARG_POINTER_REGNUM)
23381 return 67;
23382 if (regno == 64)
23383 return 100;
23384
23385 gcc_unreachable ();
23386 #endif
23387 }
23388
23389 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23390 information, and also for .eh_frame. */
23391 /* Translate the regnos to their numbers in GCC 7 (and before). */
23392 if (regno <= 31)
23393 return regno;
23394 if (FP_REGNO_P (regno))
23395 return regno - FIRST_FPR_REGNO + 32;
23396 if (ALTIVEC_REGNO_P (regno))
23397 return regno - FIRST_ALTIVEC_REGNO + 77;
23398 if (regno == LR_REGNO)
23399 return 65;
23400 if (regno == CTR_REGNO)
23401 return 66;
23402 if (regno == CA_REGNO)
23403 return 76; /* XER */
23404 if (CR_REGNO_P (regno))
23405 return regno - CR0_REGNO + 68;
23406 if (regno == VRSAVE_REGNO)
23407 return 109;
23408 if (regno == VSCR_REGNO)
23409 return 110;
23410
23411 if (regno == FRAME_POINTER_REGNUM)
23412 return 111;
23413 if (regno == ARG_POINTER_REGNUM)
23414 return 67;
23415 if (regno == 64)
23416 return 64;
23417
23418 gcc_unreachable ();
23419 }
23420
23421 /* target hook eh_return_filter_mode */
23422 static scalar_int_mode
23423 rs6000_eh_return_filter_mode (void)
23424 {
23425 return TARGET_32BIT ? SImode : word_mode;
23426 }
23427
23428 /* Target hook for translate_mode_attribute. */
23429 static machine_mode
23430 rs6000_translate_mode_attribute (machine_mode mode)
23431 {
23432 if ((FLOAT128_IEEE_P (mode)
23433 && ieee128_float_type_node == long_double_type_node)
23434 || (FLOAT128_IBM_P (mode)
23435 && ibm128_float_type_node == long_double_type_node))
23436 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23437 return mode;
23438 }
23439
23440 /* Target hook for scalar_mode_supported_p. */
23441 static bool
23442 rs6000_scalar_mode_supported_p (scalar_mode mode)
23443 {
23444 /* -m32 does not support TImode. This is the default, from
23445 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23446 same ABI as for -m32. But default_scalar_mode_supported_p allows
23447 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23448 for -mpowerpc64. */
23449 if (TARGET_32BIT && mode == TImode)
23450 return false;
23451
23452 if (DECIMAL_FLOAT_MODE_P (mode))
23453 return default_decimal_float_supported_p ();
23454 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23455 return true;
23456 else
23457 return default_scalar_mode_supported_p (mode);
23458 }
23459
23460 /* Target hook for vector_mode_supported_p. */
23461 static bool
23462 rs6000_vector_mode_supported_p (machine_mode mode)
23463 {
23464 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23465 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23466 double-double. */
23467 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23468 return true;
23469
23470 else
23471 return false;
23472 }
23473
23474 /* Target hook for floatn_mode. */
23475 static opt_scalar_float_mode
23476 rs6000_floatn_mode (int n, bool extended)
23477 {
23478 if (extended)
23479 {
23480 switch (n)
23481 {
23482 case 32:
23483 return DFmode;
23484
23485 case 64:
23486 if (TARGET_FLOAT128_TYPE)
23487 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23488 else
23489 return opt_scalar_float_mode ();
23490
23491 case 128:
23492 return opt_scalar_float_mode ();
23493
23494 default:
23495 /* Those are the only valid _FloatNx types. */
23496 gcc_unreachable ();
23497 }
23498 }
23499 else
23500 {
23501 switch (n)
23502 {
23503 case 32:
23504 return SFmode;
23505
23506 case 64:
23507 return DFmode;
23508
23509 case 128:
23510 if (TARGET_FLOAT128_TYPE)
23511 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23512 else
23513 return opt_scalar_float_mode ();
23514
23515 default:
23516 return opt_scalar_float_mode ();
23517 }
23518 }
23519
23520 }
23521
23522 /* Target hook for c_mode_for_suffix. */
23523 static machine_mode
23524 rs6000_c_mode_for_suffix (char suffix)
23525 {
23526 if (TARGET_FLOAT128_TYPE)
23527 {
23528 if (suffix == 'q' || suffix == 'Q')
23529 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23530
23531 /* At the moment, we are not defining a suffix for IBM extended double.
23532 If/when the default for -mabi=ieeelongdouble is changed, and we want
23533 to support __ibm128 constants in legacy library code, we may need to
23534 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23535 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23536 __float80 constants. */
23537 }
23538
23539 return VOIDmode;
23540 }
23541
23542 /* Target hook for invalid_arg_for_unprototyped_fn. */
23543 static const char *
23544 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23545 {
23546 return (!rs6000_darwin64_abi
23547 && typelist == 0
23548 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23549 && (funcdecl == NULL_TREE
23550 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23551 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23552 ? N_("AltiVec argument passed to unprototyped function")
23553 : NULL;
23554 }
23555
23556 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23557 setup by using __stack_chk_fail_local hidden function instead of
23558 calling __stack_chk_fail directly. Otherwise it is better to call
23559 __stack_chk_fail directly. */
23560
23561 static tree ATTRIBUTE_UNUSED
23562 rs6000_stack_protect_fail (void)
23563 {
23564 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23565 ? default_hidden_stack_protect_fail ()
23566 : default_external_stack_protect_fail ();
23567 }
23568
23569 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23570
23571 #if TARGET_ELF
23572 static unsigned HOST_WIDE_INT
23573 rs6000_asan_shadow_offset (void)
23574 {
23575 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23576 }
23577 #endif
23578 \f
23579 /* Mask options that we want to support inside of attribute((target)) and
23580 #pragma GCC target operations. Note, we do not include things like
23581 64/32-bit, endianness, hard/soft floating point, etc. that would have
23582 different calling sequences. */
23583
23584 struct rs6000_opt_mask {
23585 const char *name; /* option name */
23586 HOST_WIDE_INT mask; /* mask to set */
23587 bool invert; /* invert sense of mask */
23588 bool valid_target; /* option is a target option */
23589 };
23590
23591 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23592 {
23593 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23594 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23595 false, true },
23596 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23597 false, true },
23598 { "cmpb", OPTION_MASK_CMPB, false, true },
23599 { "crypto", OPTION_MASK_CRYPTO, false, true },
23600 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23601 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23602 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23603 false, true },
23604 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23605 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23606 { "fprnd", OPTION_MASK_FPRND, false, true },
23607 { "power10", OPTION_MASK_POWER10, false, true },
23608 { "hard-dfp", OPTION_MASK_DFP, false, true },
23609 { "htm", OPTION_MASK_HTM, false, true },
23610 { "isel", OPTION_MASK_ISEL, false, true },
23611 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23612 { "mfpgpr", 0, false, true },
23613 { "mma", OPTION_MASK_MMA, false, true },
23614 { "modulo", OPTION_MASK_MODULO, false, true },
23615 { "mulhw", OPTION_MASK_MULHW, false, true },
23616 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23617 { "pcrel", OPTION_MASK_PCREL, false, true },
23618 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23619 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23620 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23621 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23622 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23623 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23624 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23625 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23626 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23627 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23628 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23629 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23630 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23631 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23632 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23633 { "string", 0, false, true },
23634 { "update", OPTION_MASK_NO_UPDATE, true , true },
23635 { "vsx", OPTION_MASK_VSX, false, true },
23636 #ifdef OPTION_MASK_64BIT
23637 #if TARGET_AIX_OS
23638 { "aix64", OPTION_MASK_64BIT, false, false },
23639 { "aix32", OPTION_MASK_64BIT, true, false },
23640 #else
23641 { "64", OPTION_MASK_64BIT, false, false },
23642 { "32", OPTION_MASK_64BIT, true, false },
23643 #endif
23644 #endif
23645 #ifdef OPTION_MASK_EABI
23646 { "eabi", OPTION_MASK_EABI, false, false },
23647 #endif
23648 #ifdef OPTION_MASK_LITTLE_ENDIAN
23649 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23650 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23651 #endif
23652 #ifdef OPTION_MASK_RELOCATABLE
23653 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23654 #endif
23655 #ifdef OPTION_MASK_STRICT_ALIGN
23656 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23657 #endif
23658 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23659 { "string", 0, false, false },
23660 };
23661
23662 /* Builtin mask mapping for printing the flags. */
23663 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
23664 {
23665 { "altivec", RS6000_BTM_ALTIVEC, false, false },
23666 { "vsx", RS6000_BTM_VSX, false, false },
23667 { "fre", RS6000_BTM_FRE, false, false },
23668 { "fres", RS6000_BTM_FRES, false, false },
23669 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
23670 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23671 { "popcntd", RS6000_BTM_POPCNTD, false, false },
23672 { "cell", RS6000_BTM_CELL, false, false },
23673 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23674 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
23675 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
23676 { "crypto", RS6000_BTM_CRYPTO, false, false },
23677 { "htm", RS6000_BTM_HTM, false, false },
23678 { "hard-dfp", RS6000_BTM_DFP, false, false },
23679 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
23680 { "long-double-128", RS6000_BTM_LDBL128, false, false },
23681 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
23682 { "float128", RS6000_BTM_FLOAT128, false, false },
23683 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
23684 { "mma", RS6000_BTM_MMA, false, false },
23685 { "power10", RS6000_BTM_P10, false, false },
23686 };
23687
23688 /* Option variables that we want to support inside attribute((target)) and
23689 #pragma GCC target operations. */
23690
23691 struct rs6000_opt_var {
23692 const char *name; /* option name */
23693 size_t global_offset; /* offset of the option in global_options. */
23694 size_t target_offset; /* offset of the option in target options. */
23695 };
23696
23697 static struct rs6000_opt_var const rs6000_opt_vars[] =
23698 {
23699 { "friz",
23700 offsetof (struct gcc_options, x_TARGET_FRIZ),
23701 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23702 { "avoid-indexed-addresses",
23703 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23704 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23705 { "longcall",
23706 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23707 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23708 { "optimize-swaps",
23709 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23710 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23711 { "allow-movmisalign",
23712 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23713 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23714 { "sched-groups",
23715 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23716 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23717 { "always-hint",
23718 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23719 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23720 { "align-branch-targets",
23721 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23722 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23723 { "sched-prolog",
23724 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23725 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23726 { "sched-epilog",
23727 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23728 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23729 { "speculate-indirect-jumps",
23730 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23731 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23732 };
23733
23734 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23735 parsing. Return true if there were no errors. */
23736
23737 static bool
23738 rs6000_inner_target_options (tree args, bool attr_p)
23739 {
23740 bool ret = true;
23741
23742 if (args == NULL_TREE)
23743 ;
23744
23745 else if (TREE_CODE (args) == STRING_CST)
23746 {
23747 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23748 char *q;
23749
23750 while ((q = strtok (p, ",")) != NULL)
23751 {
23752 bool error_p = false;
23753 bool not_valid_p = false;
23754 const char *cpu_opt = NULL;
23755
23756 p = NULL;
23757 if (strncmp (q, "cpu=", 4) == 0)
23758 {
23759 int cpu_index = rs6000_cpu_name_lookup (q+4);
23760 if (cpu_index >= 0)
23761 rs6000_cpu_index = cpu_index;
23762 else
23763 {
23764 error_p = true;
23765 cpu_opt = q+4;
23766 }
23767 }
23768 else if (strncmp (q, "tune=", 5) == 0)
23769 {
23770 int tune_index = rs6000_cpu_name_lookup (q+5);
23771 if (tune_index >= 0)
23772 rs6000_tune_index = tune_index;
23773 else
23774 {
23775 error_p = true;
23776 cpu_opt = q+5;
23777 }
23778 }
23779 else
23780 {
23781 size_t i;
23782 bool invert = false;
23783 char *r = q;
23784
23785 error_p = true;
23786 if (strncmp (r, "no-", 3) == 0)
23787 {
23788 invert = true;
23789 r += 3;
23790 }
23791
23792 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23793 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23794 {
23795 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23796
23797 if (!rs6000_opt_masks[i].valid_target)
23798 not_valid_p = true;
23799 else
23800 {
23801 error_p = false;
23802 rs6000_isa_flags_explicit |= mask;
23803
23804 /* VSX needs altivec, so -mvsx automagically sets
23805 altivec and disables -mavoid-indexed-addresses. */
23806 if (!invert)
23807 {
23808 if (mask == OPTION_MASK_VSX)
23809 {
23810 mask |= OPTION_MASK_ALTIVEC;
23811 TARGET_AVOID_XFORM = 0;
23812 }
23813 }
23814
23815 if (rs6000_opt_masks[i].invert)
23816 invert = !invert;
23817
23818 if (invert)
23819 rs6000_isa_flags &= ~mask;
23820 else
23821 rs6000_isa_flags |= mask;
23822 }
23823 break;
23824 }
23825
23826 if (error_p && !not_valid_p)
23827 {
23828 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23829 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23830 {
23831 size_t j = rs6000_opt_vars[i].global_offset;
23832 *((int *) ((char *)&global_options + j)) = !invert;
23833 error_p = false;
23834 not_valid_p = false;
23835 break;
23836 }
23837 }
23838 }
23839
23840 if (error_p)
23841 {
23842 const char *eprefix, *esuffix;
23843
23844 ret = false;
23845 if (attr_p)
23846 {
23847 eprefix = "__attribute__((__target__(";
23848 esuffix = ")))";
23849 }
23850 else
23851 {
23852 eprefix = "#pragma GCC target ";
23853 esuffix = "";
23854 }
23855
23856 if (cpu_opt)
23857 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23858 q, esuffix);
23859 else if (not_valid_p)
23860 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23861 else
23862 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23863 }
23864 }
23865 }
23866
23867 else if (TREE_CODE (args) == TREE_LIST)
23868 {
23869 do
23870 {
23871 tree value = TREE_VALUE (args);
23872 if (value)
23873 {
23874 bool ret2 = rs6000_inner_target_options (value, attr_p);
23875 if (!ret2)
23876 ret = false;
23877 }
23878 args = TREE_CHAIN (args);
23879 }
23880 while (args != NULL_TREE);
23881 }
23882
23883 else
23884 {
23885 error ("attribute %<target%> argument not a string");
23886 return false;
23887 }
23888
23889 return ret;
23890 }
23891
23892 /* Print out the target options as a list for -mdebug=target. */
23893
23894 static void
23895 rs6000_debug_target_options (tree args, const char *prefix)
23896 {
23897 if (args == NULL_TREE)
23898 fprintf (stderr, "%s<NULL>", prefix);
23899
23900 else if (TREE_CODE (args) == STRING_CST)
23901 {
23902 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23903 char *q;
23904
23905 while ((q = strtok (p, ",")) != NULL)
23906 {
23907 p = NULL;
23908 fprintf (stderr, "%s\"%s\"", prefix, q);
23909 prefix = ", ";
23910 }
23911 }
23912
23913 else if (TREE_CODE (args) == TREE_LIST)
23914 {
23915 do
23916 {
23917 tree value = TREE_VALUE (args);
23918 if (value)
23919 {
23920 rs6000_debug_target_options (value, prefix);
23921 prefix = ", ";
23922 }
23923 args = TREE_CHAIN (args);
23924 }
23925 while (args != NULL_TREE);
23926 }
23927
23928 else
23929 gcc_unreachable ();
23930
23931 return;
23932 }
23933
23934 \f
23935 /* Hook to validate attribute((target("..."))). */
23936
23937 static bool
23938 rs6000_valid_attribute_p (tree fndecl,
23939 tree ARG_UNUSED (name),
23940 tree args,
23941 int flags)
23942 {
23943 struct cl_target_option cur_target;
23944 bool ret;
23945 tree old_optimize;
23946 tree new_target, new_optimize;
23947 tree func_optimize;
23948
23949 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23950
23951 if (TARGET_DEBUG_TARGET)
23952 {
23953 tree tname = DECL_NAME (fndecl);
23954 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23955 if (tname)
23956 fprintf (stderr, "function: %.*s\n",
23957 (int) IDENTIFIER_LENGTH (tname),
23958 IDENTIFIER_POINTER (tname));
23959 else
23960 fprintf (stderr, "function: unknown\n");
23961
23962 fprintf (stderr, "args:");
23963 rs6000_debug_target_options (args, " ");
23964 fprintf (stderr, "\n");
23965
23966 if (flags)
23967 fprintf (stderr, "flags: 0x%x\n", flags);
23968
23969 fprintf (stderr, "--------------------\n");
23970 }
23971
23972 /* attribute((target("default"))) does nothing, beyond
23973 affecting multi-versioning. */
23974 if (TREE_VALUE (args)
23975 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23976 && TREE_CHAIN (args) == NULL_TREE
23977 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23978 return true;
23979
23980 old_optimize = build_optimization_node (&global_options,
23981 &global_options_set);
23982 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23983
23984 /* If the function changed the optimization levels as well as setting target
23985 options, start with the optimizations specified. */
23986 if (func_optimize && func_optimize != old_optimize)
23987 cl_optimization_restore (&global_options, &global_options_set,
23988 TREE_OPTIMIZATION (func_optimize));
23989
23990 /* The target attributes may also change some optimization flags, so update
23991 the optimization options if necessary. */
23992 cl_target_option_save (&cur_target, &global_options, &global_options_set);
23993 rs6000_cpu_index = rs6000_tune_index = -1;
23994 ret = rs6000_inner_target_options (args, true);
23995
23996 /* Set up any additional state. */
23997 if (ret)
23998 {
23999 ret = rs6000_option_override_internal (false);
24000 new_target = build_target_option_node (&global_options,
24001 &global_options_set);
24002 }
24003 else
24004 new_target = NULL;
24005
24006 new_optimize = build_optimization_node (&global_options,
24007 &global_options_set);
24008
24009 if (!new_target)
24010 ret = false;
24011
24012 else if (fndecl)
24013 {
24014 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24015
24016 if (old_optimize != new_optimize)
24017 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24018 }
24019
24020 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24021
24022 if (old_optimize != new_optimize)
24023 cl_optimization_restore (&global_options, &global_options_set,
24024 TREE_OPTIMIZATION (old_optimize));
24025
24026 return ret;
24027 }
24028
24029 \f
24030 /* Hook to validate the current #pragma GCC target and set the state, and
24031 update the macros based on what was changed. If ARGS is NULL, then
24032 POP_TARGET is used to reset the options. */
24033
24034 bool
24035 rs6000_pragma_target_parse (tree args, tree pop_target)
24036 {
24037 tree prev_tree = build_target_option_node (&global_options,
24038 &global_options_set);
24039 tree cur_tree;
24040 struct cl_target_option *prev_opt, *cur_opt;
24041 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24042 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24043
24044 if (TARGET_DEBUG_TARGET)
24045 {
24046 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24047 fprintf (stderr, "args:");
24048 rs6000_debug_target_options (args, " ");
24049 fprintf (stderr, "\n");
24050
24051 if (pop_target)
24052 {
24053 fprintf (stderr, "pop_target:\n");
24054 debug_tree (pop_target);
24055 }
24056 else
24057 fprintf (stderr, "pop_target: <NULL>\n");
24058
24059 fprintf (stderr, "--------------------\n");
24060 }
24061
24062 if (! args)
24063 {
24064 cur_tree = ((pop_target)
24065 ? pop_target
24066 : target_option_default_node);
24067 cl_target_option_restore (&global_options, &global_options_set,
24068 TREE_TARGET_OPTION (cur_tree));
24069 }
24070 else
24071 {
24072 rs6000_cpu_index = rs6000_tune_index = -1;
24073 if (!rs6000_inner_target_options (args, false)
24074 || !rs6000_option_override_internal (false)
24075 || (cur_tree = build_target_option_node (&global_options,
24076 &global_options_set))
24077 == NULL_TREE)
24078 {
24079 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24080 fprintf (stderr, "invalid pragma\n");
24081
24082 return false;
24083 }
24084 }
24085
24086 target_option_current_node = cur_tree;
24087 rs6000_activate_target_options (target_option_current_node);
24088
24089 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24090 change the macros that are defined. */
24091 if (rs6000_target_modify_macros_ptr)
24092 {
24093 prev_opt = TREE_TARGET_OPTION (prev_tree);
24094 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24095 prev_flags = prev_opt->x_rs6000_isa_flags;
24096
24097 cur_opt = TREE_TARGET_OPTION (cur_tree);
24098 cur_flags = cur_opt->x_rs6000_isa_flags;
24099 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24100
24101 diff_bumask = (prev_bumask ^ cur_bumask);
24102 diff_flags = (prev_flags ^ cur_flags);
24103
24104 if ((diff_flags != 0) || (diff_bumask != 0))
24105 {
24106 /* Delete old macros. */
24107 rs6000_target_modify_macros_ptr (false,
24108 prev_flags & diff_flags,
24109 prev_bumask & diff_bumask);
24110
24111 /* Define new macros. */
24112 rs6000_target_modify_macros_ptr (true,
24113 cur_flags & diff_flags,
24114 cur_bumask & diff_bumask);
24115 }
24116 }
24117
24118 return true;
24119 }
24120
24121 \f
24122 /* Remember the last target of rs6000_set_current_function. */
24123 static GTY(()) tree rs6000_previous_fndecl;
24124
24125 /* Restore target's globals from NEW_TREE and invalidate the
24126 rs6000_previous_fndecl cache. */
24127
24128 void
24129 rs6000_activate_target_options (tree new_tree)
24130 {
24131 cl_target_option_restore (&global_options, &global_options_set,
24132 TREE_TARGET_OPTION (new_tree));
24133 if (TREE_TARGET_GLOBALS (new_tree))
24134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24135 else if (new_tree == target_option_default_node)
24136 restore_target_globals (&default_target_globals);
24137 else
24138 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24139 rs6000_previous_fndecl = NULL_TREE;
24140 }
24141
24142 /* Establish appropriate back-end context for processing the function
24143 FNDECL. The argument might be NULL to indicate processing at top
24144 level, outside of any function scope. */
24145 static void
24146 rs6000_set_current_function (tree fndecl)
24147 {
24148 if (TARGET_DEBUG_TARGET)
24149 {
24150 fprintf (stderr, "\n==================== rs6000_set_current_function");
24151
24152 if (fndecl)
24153 fprintf (stderr, ", fndecl %s (%p)",
24154 (DECL_NAME (fndecl)
24155 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24156 : "<unknown>"), (void *)fndecl);
24157
24158 if (rs6000_previous_fndecl)
24159 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24160
24161 fprintf (stderr, "\n");
24162 }
24163
24164 /* Only change the context if the function changes. This hook is called
24165 several times in the course of compiling a function, and we don't want to
24166 slow things down too much or call target_reinit when it isn't safe. */
24167 if (fndecl == rs6000_previous_fndecl)
24168 return;
24169
24170 tree old_tree;
24171 if (rs6000_previous_fndecl == NULL_TREE)
24172 old_tree = target_option_current_node;
24173 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24174 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24175 else
24176 old_tree = target_option_default_node;
24177
24178 tree new_tree;
24179 if (fndecl == NULL_TREE)
24180 {
24181 if (old_tree != target_option_current_node)
24182 new_tree = target_option_current_node;
24183 else
24184 new_tree = NULL_TREE;
24185 }
24186 else
24187 {
24188 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24189 if (new_tree == NULL_TREE)
24190 new_tree = target_option_default_node;
24191 }
24192
24193 if (TARGET_DEBUG_TARGET)
24194 {
24195 if (new_tree)
24196 {
24197 fprintf (stderr, "\nnew fndecl target specific options:\n");
24198 debug_tree (new_tree);
24199 }
24200
24201 if (old_tree)
24202 {
24203 fprintf (stderr, "\nold fndecl target specific options:\n");
24204 debug_tree (old_tree);
24205 }
24206
24207 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24208 fprintf (stderr, "--------------------\n");
24209 }
24210
24211 if (new_tree && old_tree != new_tree)
24212 rs6000_activate_target_options (new_tree);
24213
24214 if (fndecl)
24215 rs6000_previous_fndecl = fndecl;
24216 }
24217
24218 \f
24219 /* Save the current options */
24220
24221 static void
24222 rs6000_function_specific_save (struct cl_target_option *ptr,
24223 struct gcc_options *opts,
24224 struct gcc_options */* opts_set */)
24225 {
24226 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24227 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24228 }
24229
24230 /* Restore the current options */
24231
24232 static void
24233 rs6000_function_specific_restore (struct gcc_options *opts,
24234 struct gcc_options */* opts_set */,
24235 struct cl_target_option *ptr)
24236
24237 {
24238 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24239 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24240 (void) rs6000_option_override_internal (false);
24241 }
24242
24243 /* Print the current options */
24244
24245 static void
24246 rs6000_function_specific_print (FILE *file, int indent,
24247 struct cl_target_option *ptr)
24248 {
24249 rs6000_print_isa_options (file, indent, "Isa options set",
24250 ptr->x_rs6000_isa_flags);
24251
24252 rs6000_print_isa_options (file, indent, "Isa options explicit",
24253 ptr->x_rs6000_isa_flags_explicit);
24254 }
24255
24256 /* Helper function to print the current isa or misc options on a line. */
24257
24258 static void
24259 rs6000_print_options_internal (FILE *file,
24260 int indent,
24261 const char *string,
24262 HOST_WIDE_INT flags,
24263 const char *prefix,
24264 const struct rs6000_opt_mask *opts,
24265 size_t num_elements)
24266 {
24267 size_t i;
24268 size_t start_column = 0;
24269 size_t cur_column;
24270 size_t max_column = 120;
24271 size_t prefix_len = strlen (prefix);
24272 size_t comma_len = 0;
24273 const char *comma = "";
24274
24275 if (indent)
24276 start_column += fprintf (file, "%*s", indent, "");
24277
24278 if (!flags)
24279 {
24280 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24281 return;
24282 }
24283
24284 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24285
24286 /* Print the various mask options. */
24287 cur_column = start_column;
24288 for (i = 0; i < num_elements; i++)
24289 {
24290 bool invert = opts[i].invert;
24291 const char *name = opts[i].name;
24292 const char *no_str = "";
24293 HOST_WIDE_INT mask = opts[i].mask;
24294 size_t len = comma_len + prefix_len + strlen (name);
24295
24296 if (!invert)
24297 {
24298 if ((flags & mask) == 0)
24299 {
24300 no_str = "no-";
24301 len += strlen ("no-");
24302 }
24303
24304 flags &= ~mask;
24305 }
24306
24307 else
24308 {
24309 if ((flags & mask) != 0)
24310 {
24311 no_str = "no-";
24312 len += strlen ("no-");
24313 }
24314
24315 flags |= mask;
24316 }
24317
24318 cur_column += len;
24319 if (cur_column > max_column)
24320 {
24321 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24322 cur_column = start_column + len;
24323 comma = "";
24324 }
24325
24326 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24327 comma = ", ";
24328 comma_len = strlen (", ");
24329 }
24330
24331 fputs ("\n", file);
24332 }
24333
24334 /* Helper function to print the current isa options on a line. */
24335
24336 static void
24337 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24338 HOST_WIDE_INT flags)
24339 {
24340 rs6000_print_options_internal (file, indent, string, flags, "-m",
24341 &rs6000_opt_masks[0],
24342 ARRAY_SIZE (rs6000_opt_masks));
24343 }
24344
24345 static void
24346 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24347 HOST_WIDE_INT flags)
24348 {
24349 rs6000_print_options_internal (file, indent, string, flags, "",
24350 &rs6000_builtin_mask_names[0],
24351 ARRAY_SIZE (rs6000_builtin_mask_names));
24352 }
24353
24354 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24355 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24356 -mupper-regs-df, etc.).
24357
24358 If the user used -mno-power8-vector, we need to turn off all of the implicit
24359 ISA 2.07 and 3.0 options that relate to the vector unit.
24360
24361 If the user used -mno-power9-vector, we need to turn off all of the implicit
24362 ISA 3.0 options that relate to the vector unit.
24363
24364 This function does not handle explicit options such as the user specifying
24365 -mdirect-move. These are handled in rs6000_option_override_internal, and
24366 the appropriate error is given if needed.
24367
24368 We return a mask of all of the implicit options that should not be enabled
24369 by default. */
24370
24371 static HOST_WIDE_INT
24372 rs6000_disable_incompatible_switches (void)
24373 {
24374 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24375 size_t i, j;
24376
24377 static const struct {
24378 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24379 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24380 const char *const name; /* name of the switch. */
24381 } flags[] = {
24382 { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" },
24383 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24384 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24385 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24386 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24387 };
24388
24389 for (i = 0; i < ARRAY_SIZE (flags); i++)
24390 {
24391 HOST_WIDE_INT no_flag = flags[i].no_flag;
24392
24393 if ((rs6000_isa_flags & no_flag) == 0
24394 && (rs6000_isa_flags_explicit & no_flag) != 0)
24395 {
24396 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24397 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24398 & rs6000_isa_flags
24399 & dep_flags);
24400
24401 if (set_flags)
24402 {
24403 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24404 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24405 {
24406 set_flags &= ~rs6000_opt_masks[j].mask;
24407 error ("%<-mno-%s%> turns off %<-m%s%>",
24408 flags[i].name,
24409 rs6000_opt_masks[j].name);
24410 }
24411
24412 gcc_assert (!set_flags);
24413 }
24414
24415 rs6000_isa_flags &= ~dep_flags;
24416 ignore_masks |= no_flag | dep_flags;
24417 }
24418 }
24419
24420 return ignore_masks;
24421 }
24422
24423 \f
24424 /* Helper function for printing the function name when debugging. */
24425
24426 static const char *
24427 get_decl_name (tree fn)
24428 {
24429 tree name;
24430
24431 if (!fn)
24432 return "<null>";
24433
24434 name = DECL_NAME (fn);
24435 if (!name)
24436 return "<no-name>";
24437
24438 return IDENTIFIER_POINTER (name);
24439 }
24440
24441 /* Return the clone id of the target we are compiling code for in a target
24442 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24443 the priority list for the target clones (ordered from lowest to
24444 highest). */
24445
24446 static int
24447 rs6000_clone_priority (tree fndecl)
24448 {
24449 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24450 HOST_WIDE_INT isa_masks;
24451 int ret = CLONE_DEFAULT;
24452 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24453 const char *attrs_str = NULL;
24454
24455 attrs = TREE_VALUE (TREE_VALUE (attrs));
24456 attrs_str = TREE_STRING_POINTER (attrs);
24457
24458 /* Return priority zero for default function. Return the ISA needed for the
24459 function if it is not the default. */
24460 if (strcmp (attrs_str, "default") != 0)
24461 {
24462 if (fn_opts == NULL_TREE)
24463 fn_opts = target_option_default_node;
24464
24465 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24466 isa_masks = rs6000_isa_flags;
24467 else
24468 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24469
24470 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24471 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24472 break;
24473 }
24474
24475 if (TARGET_DEBUG_TARGET)
24476 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24477 get_decl_name (fndecl), ret);
24478
24479 return ret;
24480 }
24481
24482 /* This compares the priority of target features in function DECL1 and DECL2.
24483 It returns positive value if DECL1 is higher priority, negative value if
24484 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24485 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24486
24487 static int
24488 rs6000_compare_version_priority (tree decl1, tree decl2)
24489 {
24490 int priority1 = rs6000_clone_priority (decl1);
24491 int priority2 = rs6000_clone_priority (decl2);
24492 int ret = priority1 - priority2;
24493
24494 if (TARGET_DEBUG_TARGET)
24495 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24496 get_decl_name (decl1), get_decl_name (decl2), ret);
24497
24498 return ret;
24499 }
24500
24501 /* Make a dispatcher declaration for the multi-versioned function DECL.
24502 Calls to DECL function will be replaced with calls to the dispatcher
24503 by the front-end. Returns the decl of the dispatcher function. */
24504
24505 static tree
24506 rs6000_get_function_versions_dispatcher (void *decl)
24507 {
24508 tree fn = (tree) decl;
24509 struct cgraph_node *node = NULL;
24510 struct cgraph_node *default_node = NULL;
24511 struct cgraph_function_version_info *node_v = NULL;
24512 struct cgraph_function_version_info *first_v = NULL;
24513
24514 tree dispatch_decl = NULL;
24515
24516 struct cgraph_function_version_info *default_version_info = NULL;
24517 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24518
24519 if (TARGET_DEBUG_TARGET)
24520 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24521 get_decl_name (fn));
24522
24523 node = cgraph_node::get (fn);
24524 gcc_assert (node != NULL);
24525
24526 node_v = node->function_version ();
24527 gcc_assert (node_v != NULL);
24528
24529 if (node_v->dispatcher_resolver != NULL)
24530 return node_v->dispatcher_resolver;
24531
24532 /* Find the default version and make it the first node. */
24533 first_v = node_v;
24534 /* Go to the beginning of the chain. */
24535 while (first_v->prev != NULL)
24536 first_v = first_v->prev;
24537
24538 default_version_info = first_v;
24539 while (default_version_info != NULL)
24540 {
24541 const tree decl2 = default_version_info->this_node->decl;
24542 if (is_function_default_version (decl2))
24543 break;
24544 default_version_info = default_version_info->next;
24545 }
24546
24547 /* If there is no default node, just return NULL. */
24548 if (default_version_info == NULL)
24549 return NULL;
24550
24551 /* Make default info the first node. */
24552 if (first_v != default_version_info)
24553 {
24554 default_version_info->prev->next = default_version_info->next;
24555 if (default_version_info->next)
24556 default_version_info->next->prev = default_version_info->prev;
24557 first_v->prev = default_version_info;
24558 default_version_info->next = first_v;
24559 default_version_info->prev = NULL;
24560 }
24561
24562 default_node = default_version_info->this_node;
24563
24564 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24565 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24566 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24567 "exports hardware capability bits");
24568 #else
24569
24570 if (targetm.has_ifunc_p ())
24571 {
24572 struct cgraph_function_version_info *it_v = NULL;
24573 struct cgraph_node *dispatcher_node = NULL;
24574 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24575
24576 /* Right now, the dispatching is done via ifunc. */
24577 dispatch_decl = make_dispatcher_decl (default_node->decl);
24578
24579 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24580 gcc_assert (dispatcher_node != NULL);
24581 dispatcher_node->dispatcher_function = 1;
24582 dispatcher_version_info
24583 = dispatcher_node->insert_new_function_version ();
24584 dispatcher_version_info->next = default_version_info;
24585 dispatcher_node->definition = 1;
24586
24587 /* Set the dispatcher for all the versions. */
24588 it_v = default_version_info;
24589 while (it_v != NULL)
24590 {
24591 it_v->dispatcher_resolver = dispatch_decl;
24592 it_v = it_v->next;
24593 }
24594 }
24595 else
24596 {
24597 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24598 "multiversioning needs ifunc which is not supported "
24599 "on this target");
24600 }
24601 #endif
24602
24603 return dispatch_decl;
24604 }
24605
24606 /* Make the resolver function decl to dispatch the versions of a multi-
24607 versioned function, DEFAULT_DECL. Create an empty basic block in the
24608 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24609 function. */
24610
24611 static tree
24612 make_resolver_func (const tree default_decl,
24613 const tree dispatch_decl,
24614 basic_block *empty_bb)
24615 {
24616 /* Make the resolver function static. The resolver function returns
24617 void *. */
24618 tree decl_name = clone_function_name (default_decl, "resolver");
24619 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24620 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24621 tree decl = build_fn_decl (resolver_name, type);
24622 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24623
24624 DECL_NAME (decl) = decl_name;
24625 TREE_USED (decl) = 1;
24626 DECL_ARTIFICIAL (decl) = 1;
24627 DECL_IGNORED_P (decl) = 0;
24628 TREE_PUBLIC (decl) = 0;
24629 DECL_UNINLINABLE (decl) = 1;
24630
24631 /* Resolver is not external, body is generated. */
24632 DECL_EXTERNAL (decl) = 0;
24633 DECL_EXTERNAL (dispatch_decl) = 0;
24634
24635 DECL_CONTEXT (decl) = NULL_TREE;
24636 DECL_INITIAL (decl) = make_node (BLOCK);
24637 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24638
24639 if (DECL_COMDAT_GROUP (default_decl)
24640 || TREE_PUBLIC (default_decl))
24641 {
24642 /* In this case, each translation unit with a call to this
24643 versioned function will put out a resolver. Ensure it
24644 is comdat to keep just one copy. */
24645 DECL_COMDAT (decl) = 1;
24646 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24647 }
24648 else
24649 TREE_PUBLIC (dispatch_decl) = 0;
24650
24651 /* Build result decl and add to function_decl. */
24652 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24653 DECL_CONTEXT (t) = decl;
24654 DECL_ARTIFICIAL (t) = 1;
24655 DECL_IGNORED_P (t) = 1;
24656 DECL_RESULT (decl) = t;
24657
24658 gimplify_function_tree (decl);
24659 push_cfun (DECL_STRUCT_FUNCTION (decl));
24660 *empty_bb = init_lowered_empty_function (decl, false,
24661 profile_count::uninitialized ());
24662
24663 cgraph_node::add_new_function (decl, true);
24664 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24665
24666 pop_cfun ();
24667
24668 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24669 DECL_ATTRIBUTES (dispatch_decl)
24670 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24671
24672 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24673
24674 return decl;
24675 }
24676
24677 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24678 return a pointer to VERSION_DECL if we are running on a machine that
24679 supports the index CLONE_ISA hardware architecture bits. This function will
24680 be called during version dispatch to decide which function version to
24681 execute. It returns the basic block at the end, to which more conditions
24682 can be added. */
24683
24684 static basic_block
24685 add_condition_to_bb (tree function_decl, tree version_decl,
24686 int clone_isa, basic_block new_bb)
24687 {
24688 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
24689
24690 gcc_assert (new_bb != NULL);
24691 gimple_seq gseq = bb_seq (new_bb);
24692
24693
24694 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
24695 build_fold_addr_expr (version_decl));
24696 tree result_var = create_tmp_var (ptr_type_node);
24697 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
24698 gimple *return_stmt = gimple_build_return (result_var);
24699
24700 if (clone_isa == CLONE_DEFAULT)
24701 {
24702 gimple_seq_add_stmt (&gseq, convert_stmt);
24703 gimple_seq_add_stmt (&gseq, return_stmt);
24704 set_bb_seq (new_bb, gseq);
24705 gimple_set_bb (convert_stmt, new_bb);
24706 gimple_set_bb (return_stmt, new_bb);
24707 pop_cfun ();
24708 return new_bb;
24709 }
24710
24711 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24712 tree cond_var = create_tmp_var (bool_int_type_node);
24713 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24714 const char *arg_str = rs6000_clone_map[clone_isa].name;
24715 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24716 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24717 gimple_call_set_lhs (call_cond_stmt, cond_var);
24718
24719 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24720 gimple_set_bb (call_cond_stmt, new_bb);
24721 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24722
24723 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24724 NULL_TREE, NULL_TREE);
24725 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24726 gimple_set_bb (if_else_stmt, new_bb);
24727 gimple_seq_add_stmt (&gseq, if_else_stmt);
24728
24729 gimple_seq_add_stmt (&gseq, convert_stmt);
24730 gimple_seq_add_stmt (&gseq, return_stmt);
24731 set_bb_seq (new_bb, gseq);
24732
24733 basic_block bb1 = new_bb;
24734 edge e12 = split_block (bb1, if_else_stmt);
24735 basic_block bb2 = e12->dest;
24736 e12->flags &= ~EDGE_FALLTHRU;
24737 e12->flags |= EDGE_TRUE_VALUE;
24738
24739 edge e23 = split_block (bb2, return_stmt);
24740 gimple_set_bb (convert_stmt, bb2);
24741 gimple_set_bb (return_stmt, bb2);
24742
24743 basic_block bb3 = e23->dest;
24744 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24745
24746 remove_edge (e23);
24747 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24748
24749 pop_cfun ();
24750 return bb3;
24751 }
24752
24753 /* This function generates the dispatch function for multi-versioned functions.
24754 DISPATCH_DECL is the function which will contain the dispatch logic.
24755 FNDECLS are the function choices for dispatch, and is a tree chain.
24756 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24757 code is generated. */
24758
24759 static int
24760 dispatch_function_versions (tree dispatch_decl,
24761 void *fndecls_p,
24762 basic_block *empty_bb)
24763 {
24764 int ix;
24765 tree ele;
24766 vec<tree> *fndecls;
24767 tree clones[CLONE_MAX];
24768
24769 if (TARGET_DEBUG_TARGET)
24770 fputs ("dispatch_function_versions, top\n", stderr);
24771
24772 gcc_assert (dispatch_decl != NULL
24773 && fndecls_p != NULL
24774 && empty_bb != NULL);
24775
24776 /* fndecls_p is actually a vector. */
24777 fndecls = static_cast<vec<tree> *> (fndecls_p);
24778
24779 /* At least one more version other than the default. */
24780 gcc_assert (fndecls->length () >= 2);
24781
24782 /* The first version in the vector is the default decl. */
24783 memset ((void *) clones, '\0', sizeof (clones));
24784 clones[CLONE_DEFAULT] = (*fndecls)[0];
24785
24786 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24787 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24788 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24789 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24790 to insert the code here to do the call. */
24791
24792 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24793 {
24794 int priority = rs6000_clone_priority (ele);
24795 if (!clones[priority])
24796 clones[priority] = ele;
24797 }
24798
24799 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24800 if (clones[ix])
24801 {
24802 if (TARGET_DEBUG_TARGET)
24803 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24804 ix, get_decl_name (clones[ix]));
24805
24806 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24807 *empty_bb);
24808 }
24809
24810 return 0;
24811 }
24812
24813 /* Generate the dispatching code body to dispatch multi-versioned function
24814 DECL. The target hook is called to process the "target" attributes and
24815 provide the code to dispatch the right function at run-time. NODE points
24816 to the dispatcher decl whose body will be created. */
24817
24818 static tree
24819 rs6000_generate_version_dispatcher_body (void *node_p)
24820 {
24821 tree resolver;
24822 basic_block empty_bb;
24823 struct cgraph_node *node = (cgraph_node *) node_p;
24824 struct cgraph_function_version_info *ninfo = node->function_version ();
24825
24826 if (ninfo->dispatcher_resolver)
24827 return ninfo->dispatcher_resolver;
24828
24829 /* node is going to be an alias, so remove the finalized bit. */
24830 node->definition = false;
24831
24832 /* The first version in the chain corresponds to the default version. */
24833 ninfo->dispatcher_resolver = resolver
24834 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24835
24836 if (TARGET_DEBUG_TARGET)
24837 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24838 get_decl_name (resolver));
24839
24840 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24841 auto_vec<tree, 2> fn_ver_vec;
24842
24843 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24844 vinfo;
24845 vinfo = vinfo->next)
24846 {
24847 struct cgraph_node *version = vinfo->this_node;
24848 /* Check for virtual functions here again, as by this time it should
24849 have been determined if this function needs a vtable index or
24850 not. This happens for methods in derived classes that override
24851 virtual methods in base classes but are not explicitly marked as
24852 virtual. */
24853 if (DECL_VINDEX (version->decl))
24854 sorry ("Virtual function multiversioning not supported");
24855
24856 fn_ver_vec.safe_push (version->decl);
24857 }
24858
24859 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24860 cgraph_edge::rebuild_edges ();
24861 pop_cfun ();
24862 return resolver;
24863 }
24864
24865 \f
24866 /* Hook to determine if one function can safely inline another. */
24867
24868 static bool
24869 rs6000_can_inline_p (tree caller, tree callee)
24870 {
24871 bool ret = false;
24872 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24873 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24874
24875 /* If the callee has no option attributes, then it is ok to inline. */
24876 if (!callee_tree)
24877 ret = true;
24878
24879 else
24880 {
24881 HOST_WIDE_INT caller_isa;
24882 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24883 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24884 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24885
24886 /* If the caller has option attributes, then use them.
24887 Otherwise, use the command line options. */
24888 if (caller_tree)
24889 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24890 else
24891 caller_isa = rs6000_isa_flags;
24892
24893 /* The callee's options must be a subset of the caller's options, i.e.
24894 a vsx function may inline an altivec function, but a no-vsx function
24895 must not inline a vsx function. However, for those options that the
24896 callee has explicitly enabled or disabled, then we must enforce that
24897 the callee's and caller's options match exactly; see PR70010. */
24898 if (((caller_isa & callee_isa) == callee_isa)
24899 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24900 ret = true;
24901 }
24902
24903 if (TARGET_DEBUG_TARGET)
24904 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24905 get_decl_name (caller), get_decl_name (callee),
24906 (ret ? "can" : "cannot"));
24907
24908 return ret;
24909 }
24910 \f
24911 /* Allocate a stack temp and fixup the address so it meets the particular
24912 memory requirements (either offetable or REG+REG addressing). */
24913
24914 rtx
24915 rs6000_allocate_stack_temp (machine_mode mode,
24916 bool offsettable_p,
24917 bool reg_reg_p)
24918 {
24919 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24920 rtx addr = XEXP (stack, 0);
24921 int strict_p = reload_completed;
24922
24923 if (!legitimate_indirect_address_p (addr, strict_p))
24924 {
24925 if (offsettable_p
24926 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24927 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24928
24929 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24930 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24931 }
24932
24933 return stack;
24934 }
24935
24936 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24937 convert to such a form to deal with memory reference instructions
24938 like STFIWX and LDBRX that only take reg+reg addressing. */
24939
24940 rtx
24941 rs6000_force_indexed_or_indirect_mem (rtx x)
24942 {
24943 machine_mode mode = GET_MODE (x);
24944
24945 gcc_assert (MEM_P (x));
24946 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24947 {
24948 rtx addr = XEXP (x, 0);
24949 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24950 {
24951 rtx reg = XEXP (addr, 0);
24952 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24953 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24954 gcc_assert (REG_P (reg));
24955 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24956 addr = reg;
24957 }
24958 else if (GET_CODE (addr) == PRE_MODIFY)
24959 {
24960 rtx reg = XEXP (addr, 0);
24961 rtx expr = XEXP (addr, 1);
24962 gcc_assert (REG_P (reg));
24963 gcc_assert (GET_CODE (expr) == PLUS);
24964 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24965 addr = reg;
24966 }
24967
24968 if (GET_CODE (addr) == PLUS)
24969 {
24970 rtx op0 = XEXP (addr, 0);
24971 rtx op1 = XEXP (addr, 1);
24972 op0 = force_reg (Pmode, op0);
24973 op1 = force_reg (Pmode, op1);
24974 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24975 }
24976 else
24977 x = replace_equiv_address (x, force_reg (Pmode, addr));
24978 }
24979
24980 return x;
24981 }
24982
24983 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24984
24985 On the RS/6000, all integer constants are acceptable, most won't be valid
24986 for particular insns, though. Only easy FP constants are acceptable. */
24987
24988 static bool
24989 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24990 {
24991 if (TARGET_ELF && tls_referenced_p (x))
24992 return false;
24993
24994 if (CONST_DOUBLE_P (x))
24995 return easy_fp_constant (x, mode);
24996
24997 if (GET_CODE (x) == CONST_VECTOR)
24998 return easy_vector_constant (x, mode);
24999
25000 return true;
25001 }
25002
25003 \f
25004 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25005
25006 static bool
25007 chain_already_loaded (rtx_insn *last)
25008 {
25009 for (; last != NULL; last = PREV_INSN (last))
25010 {
25011 if (NONJUMP_INSN_P (last))
25012 {
25013 rtx patt = PATTERN (last);
25014
25015 if (GET_CODE (patt) == SET)
25016 {
25017 rtx lhs = XEXP (patt, 0);
25018
25019 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25020 return true;
25021 }
25022 }
25023 }
25024 return false;
25025 }
25026
25027 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25028
25029 void
25030 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25031 {
25032 rtx func = func_desc;
25033 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25034 rtx toc_load = NULL_RTX;
25035 rtx toc_restore = NULL_RTX;
25036 rtx func_addr;
25037 rtx abi_reg = NULL_RTX;
25038 rtx call[5];
25039 int n_call;
25040 rtx insn;
25041 bool is_pltseq_longcall;
25042
25043 if (global_tlsarg)
25044 tlsarg = global_tlsarg;
25045
25046 /* Handle longcall attributes. */
25047 is_pltseq_longcall = false;
25048 if ((INTVAL (cookie) & CALL_LONG) != 0
25049 && GET_CODE (func_desc) == SYMBOL_REF)
25050 {
25051 func = rs6000_longcall_ref (func_desc, tlsarg);
25052 if (TARGET_PLTSEQ)
25053 is_pltseq_longcall = true;
25054 }
25055
25056 /* Handle indirect calls. */
25057 if (!SYMBOL_REF_P (func)
25058 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25059 {
25060 if (!rs6000_pcrel_p ())
25061 {
25062 /* Save the TOC into its reserved slot before the call,
25063 and prepare to restore it after the call. */
25064 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25065 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25066 gen_rtvec (1, stack_toc_offset),
25067 UNSPEC_TOCSLOT);
25068 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25069
25070 /* Can we optimize saving the TOC in the prologue or
25071 do we need to do it at every call? */
25072 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25073 cfun->machine->save_toc_in_prologue = true;
25074 else
25075 {
25076 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25077 rtx stack_toc_mem = gen_frame_mem (Pmode,
25078 gen_rtx_PLUS (Pmode, stack_ptr,
25079 stack_toc_offset));
25080 MEM_VOLATILE_P (stack_toc_mem) = 1;
25081 if (is_pltseq_longcall)
25082 {
25083 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25084 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25085 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25086 }
25087 else
25088 emit_move_insn (stack_toc_mem, toc_reg);
25089 }
25090 }
25091
25092 if (DEFAULT_ABI == ABI_ELFv2)
25093 {
25094 /* A function pointer in the ELFv2 ABI is just a plain address, but
25095 the ABI requires it to be loaded into r12 before the call. */
25096 func_addr = gen_rtx_REG (Pmode, 12);
25097 emit_move_insn (func_addr, func);
25098 abi_reg = func_addr;
25099 /* Indirect calls via CTR are strongly preferred over indirect
25100 calls via LR, so move the address there. Needed to mark
25101 this insn for linker plt sequence editing too. */
25102 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25103 if (is_pltseq_longcall)
25104 {
25105 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25106 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25107 emit_insn (gen_rtx_SET (func_addr, mark_func));
25108 v = gen_rtvec (2, func_addr, func_desc);
25109 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25110 }
25111 else
25112 emit_move_insn (func_addr, abi_reg);
25113 }
25114 else
25115 {
25116 /* A function pointer under AIX is a pointer to a data area whose
25117 first word contains the actual address of the function, whose
25118 second word contains a pointer to its TOC, and whose third word
25119 contains a value to place in the static chain register (r11).
25120 Note that if we load the static chain, our "trampoline" need
25121 not have any executable code. */
25122
25123 /* Load up address of the actual function. */
25124 func = force_reg (Pmode, func);
25125 func_addr = gen_reg_rtx (Pmode);
25126 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25127
25128 /* Indirect calls via CTR are strongly preferred over indirect
25129 calls via LR, so move the address there. */
25130 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25131 emit_move_insn (ctr_reg, func_addr);
25132 func_addr = ctr_reg;
25133
25134 /* Prepare to load the TOC of the called function. Note that the
25135 TOC load must happen immediately before the actual call so
25136 that unwinding the TOC registers works correctly. See the
25137 comment in frob_update_context. */
25138 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25139 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25140 gen_rtx_PLUS (Pmode, func,
25141 func_toc_offset));
25142 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25143
25144 /* If we have a static chain, load it up. But, if the call was
25145 originally direct, the 3rd word has not been written since no
25146 trampoline has been built, so we ought not to load it, lest we
25147 override a static chain value. */
25148 if (!(GET_CODE (func_desc) == SYMBOL_REF
25149 && SYMBOL_REF_FUNCTION_P (func_desc))
25150 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25151 && !chain_already_loaded (get_current_sequence ()->next->last))
25152 {
25153 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25154 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25155 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25156 gen_rtx_PLUS (Pmode, func,
25157 func_sc_offset));
25158 emit_move_insn (sc_reg, func_sc_mem);
25159 abi_reg = sc_reg;
25160 }
25161 }
25162 }
25163 else
25164 {
25165 /* No TOC register needed for calls from PC-relative callers. */
25166 if (!rs6000_pcrel_p ())
25167 /* Direct calls use the TOC: for local calls, the callee will
25168 assume the TOC register is set; for non-local calls, the
25169 PLT stub needs the TOC register. */
25170 abi_reg = toc_reg;
25171 func_addr = func;
25172 }
25173
25174 /* Create the call. */
25175 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25176 if (value != NULL_RTX)
25177 call[0] = gen_rtx_SET (value, call[0]);
25178 call[1] = gen_rtx_USE (VOIDmode, cookie);
25179 n_call = 2;
25180
25181 if (toc_load)
25182 call[n_call++] = toc_load;
25183 if (toc_restore)
25184 call[n_call++] = toc_restore;
25185
25186 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25187
25188 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25189 insn = emit_call_insn (insn);
25190
25191 /* Mention all registers defined by the ABI to hold information
25192 as uses in CALL_INSN_FUNCTION_USAGE. */
25193 if (abi_reg)
25194 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25195 }
25196
25197 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25198
25199 void
25200 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25201 {
25202 rtx call[2];
25203 rtx insn;
25204 rtx r12 = NULL_RTX;
25205 rtx func_addr = func_desc;
25206
25207 gcc_assert (INTVAL (cookie) == 0);
25208
25209 if (global_tlsarg)
25210 tlsarg = global_tlsarg;
25211
25212 /* For ELFv2, r12 and CTR need to hold the function address
25213 for an indirect call. */
25214 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25215 {
25216 r12 = gen_rtx_REG (Pmode, 12);
25217 emit_move_insn (r12, func_desc);
25218 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25219 emit_move_insn (func_addr, r12);
25220 }
25221
25222 /* Create the call. */
25223 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25224 if (value != NULL_RTX)
25225 call[0] = gen_rtx_SET (value, call[0]);
25226
25227 call[1] = simple_return_rtx;
25228
25229 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25230 insn = emit_call_insn (insn);
25231
25232 /* Note use of the TOC register. */
25233 if (!rs6000_pcrel_p ())
25234 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25235 gen_rtx_REG (Pmode, TOC_REGNUM));
25236
25237 /* Note use of r12. */
25238 if (r12)
25239 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25240 }
25241
25242 /* Expand code to perform a call under the SYSV4 ABI. */
25243
25244 void
25245 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25246 {
25247 rtx func = func_desc;
25248 rtx func_addr;
25249 rtx call[4];
25250 rtx insn;
25251 rtx abi_reg = NULL_RTX;
25252 int n;
25253
25254 if (global_tlsarg)
25255 tlsarg = global_tlsarg;
25256
25257 /* Handle longcall attributes. */
25258 if ((INTVAL (cookie) & CALL_LONG) != 0
25259 && GET_CODE (func_desc) == SYMBOL_REF)
25260 {
25261 func = rs6000_longcall_ref (func_desc, tlsarg);
25262 /* If the longcall was implemented as an inline PLT call using
25263 PLT unspecs then func will be REG:r11. If not, func will be
25264 a pseudo reg. The inline PLT call sequence supports lazy
25265 linking (and longcalls to functions in dlopen'd libraries).
25266 The other style of longcalls don't. The lazy linking entry
25267 to the dynamic symbol resolver requires r11 be the function
25268 address (as it is for linker generated PLT stubs). Ensure
25269 r11 stays valid to the bctrl by marking r11 used by the call. */
25270 if (TARGET_PLTSEQ)
25271 abi_reg = func;
25272 }
25273
25274 /* Handle indirect calls. */
25275 if (GET_CODE (func) != SYMBOL_REF)
25276 {
25277 func = force_reg (Pmode, func);
25278
25279 /* Indirect calls via CTR are strongly preferred over indirect
25280 calls via LR, so move the address there. That can't be left
25281 to reload because we want to mark every instruction in an
25282 inline PLT call sequence with a reloc, enabling the linker to
25283 edit the sequence back to a direct call when that makes sense. */
25284 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25285 if (abi_reg)
25286 {
25287 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25288 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25289 emit_insn (gen_rtx_SET (func_addr, mark_func));
25290 v = gen_rtvec (2, func_addr, func_desc);
25291 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25292 }
25293 else
25294 emit_move_insn (func_addr, func);
25295 }
25296 else
25297 func_addr = func;
25298
25299 /* Create the call. */
25300 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25301 if (value != NULL_RTX)
25302 call[0] = gen_rtx_SET (value, call[0]);
25303
25304 call[1] = gen_rtx_USE (VOIDmode, cookie);
25305 n = 2;
25306 if (TARGET_SECURE_PLT
25307 && flag_pic
25308 && GET_CODE (func_addr) == SYMBOL_REF
25309 && !SYMBOL_REF_LOCAL_P (func_addr))
25310 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25311
25312 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25313
25314 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25315 insn = emit_call_insn (insn);
25316 if (abi_reg)
25317 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25318 }
25319
25320 /* Expand code to perform a sibling call under the SysV4 ABI. */
25321
25322 void
25323 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25324 {
25325 rtx func = func_desc;
25326 rtx func_addr;
25327 rtx call[3];
25328 rtx insn;
25329 rtx abi_reg = NULL_RTX;
25330
25331 if (global_tlsarg)
25332 tlsarg = global_tlsarg;
25333
25334 /* Handle longcall attributes. */
25335 if ((INTVAL (cookie) & CALL_LONG) != 0
25336 && GET_CODE (func_desc) == SYMBOL_REF)
25337 {
25338 func = rs6000_longcall_ref (func_desc, tlsarg);
25339 /* If the longcall was implemented as an inline PLT call using
25340 PLT unspecs then func will be REG:r11. If not, func will be
25341 a pseudo reg. The inline PLT call sequence supports lazy
25342 linking (and longcalls to functions in dlopen'd libraries).
25343 The other style of longcalls don't. The lazy linking entry
25344 to the dynamic symbol resolver requires r11 be the function
25345 address (as it is for linker generated PLT stubs). Ensure
25346 r11 stays valid to the bctr by marking r11 used by the call. */
25347 if (TARGET_PLTSEQ)
25348 abi_reg = func;
25349 }
25350
25351 /* Handle indirect calls. */
25352 if (GET_CODE (func) != SYMBOL_REF)
25353 {
25354 func = force_reg (Pmode, func);
25355
25356 /* Indirect sibcalls must go via CTR. That can't be left to
25357 reload because we want to mark every instruction in an inline
25358 PLT call sequence with a reloc, enabling the linker to edit
25359 the sequence back to a direct call when that makes sense. */
25360 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25361 if (abi_reg)
25362 {
25363 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25364 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25365 emit_insn (gen_rtx_SET (func_addr, mark_func));
25366 v = gen_rtvec (2, func_addr, func_desc);
25367 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25368 }
25369 else
25370 emit_move_insn (func_addr, func);
25371 }
25372 else
25373 func_addr = func;
25374
25375 /* Create the call. */
25376 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25377 if (value != NULL_RTX)
25378 call[0] = gen_rtx_SET (value, call[0]);
25379
25380 call[1] = gen_rtx_USE (VOIDmode, cookie);
25381 call[2] = simple_return_rtx;
25382
25383 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25384 insn = emit_call_insn (insn);
25385 if (abi_reg)
25386 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25387 }
25388
25389 #if TARGET_MACHO
25390
25391 /* Expand code to perform a call under the Darwin ABI.
25392 Modulo handling of mlongcall, this is much the same as sysv.
25393 if/when the longcall optimisation is removed, we could drop this
25394 code and use the sysv case (taking care to avoid the tls stuff).
25395
25396 We can use this for sibcalls too, if needed. */
25397
25398 void
25399 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25400 rtx cookie, bool sibcall)
25401 {
25402 rtx func = func_desc;
25403 rtx func_addr;
25404 rtx call[3];
25405 rtx insn;
25406 int cookie_val = INTVAL (cookie);
25407 bool make_island = false;
25408
25409 /* Handle longcall attributes, there are two cases for Darwin:
25410 1) Newer linkers are capable of synthesising any branch islands needed.
25411 2) We need a helper branch island synthesised by the compiler.
25412 The second case has mostly been retired and we don't use it for m64.
25413 In fact, it's is an optimisation, we could just indirect as sysv does..
25414 ... however, backwards compatibility for now.
25415 If we're going to use this, then we need to keep the CALL_LONG bit set,
25416 so that we can pick up the special insn form later. */
25417 if ((cookie_val & CALL_LONG) != 0
25418 && GET_CODE (func_desc) == SYMBOL_REF)
25419 {
25420 /* FIXME: the longcall opt should not hang off this flag, it is most
25421 likely incorrect for kernel-mode code-generation. */
25422 if (darwin_symbol_stubs && TARGET_32BIT)
25423 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25424 else
25425 {
25426 /* The linker is capable of doing this, but the user explicitly
25427 asked for -mlongcall, so we'll do the 'normal' version. */
25428 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25429 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25430 }
25431 }
25432
25433 /* Handle indirect calls. */
25434 if (GET_CODE (func) != SYMBOL_REF)
25435 {
25436 func = force_reg (Pmode, func);
25437
25438 /* Indirect calls via CTR are strongly preferred over indirect
25439 calls via LR, and are required for indirect sibcalls, so move
25440 the address there. */
25441 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25442 emit_move_insn (func_addr, func);
25443 }
25444 else
25445 func_addr = func;
25446
25447 /* Create the call. */
25448 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25449 if (value != NULL_RTX)
25450 call[0] = gen_rtx_SET (value, call[0]);
25451
25452 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25453
25454 if (sibcall)
25455 call[2] = simple_return_rtx;
25456 else
25457 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25458
25459 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25460 insn = emit_call_insn (insn);
25461 /* Now we have the debug info in the insn, we can set up the branch island
25462 if we're using one. */
25463 if (make_island)
25464 {
25465 tree funname = get_identifier (XSTR (func_desc, 0));
25466
25467 if (no_previous_def (funname))
25468 {
25469 rtx label_rtx = gen_label_rtx ();
25470 char *label_buf, temp_buf[256];
25471 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25472 CODE_LABEL_NUMBER (label_rtx));
25473 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25474 tree labelname = get_identifier (label_buf);
25475 add_compiler_branch_island (labelname, funname,
25476 insn_line ((const rtx_insn*)insn));
25477 }
25478 }
25479 }
25480 #endif
25481
25482 void
25483 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25484 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25485 {
25486 #if TARGET_MACHO
25487 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25488 #else
25489 gcc_unreachable();
25490 #endif
25491 }
25492
25493
25494 void
25495 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25496 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25497 {
25498 #if TARGET_MACHO
25499 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25500 #else
25501 gcc_unreachable();
25502 #endif
25503 }
25504
25505 /* Return whether we should generate PC-relative code for FNDECL. */
25506 bool
25507 rs6000_fndecl_pcrel_p (const_tree fndecl)
25508 {
25509 if (DEFAULT_ABI != ABI_ELFv2)
25510 return false;
25511
25512 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25513
25514 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25515 && TARGET_CMODEL == CMODEL_MEDIUM);
25516 }
25517
25518 /* Return whether we should generate PC-relative code for *FN. */
25519 bool
25520 rs6000_function_pcrel_p (struct function *fn)
25521 {
25522 if (DEFAULT_ABI != ABI_ELFv2)
25523 return false;
25524
25525 /* Optimize usual case. */
25526 if (fn == cfun)
25527 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25528 && TARGET_CMODEL == CMODEL_MEDIUM);
25529
25530 return rs6000_fndecl_pcrel_p (fn->decl);
25531 }
25532
25533 /* Return whether we should generate PC-relative code for the current
25534 function. */
25535 bool
25536 rs6000_pcrel_p ()
25537 {
25538 return (DEFAULT_ABI == ABI_ELFv2
25539 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25540 && TARGET_CMODEL == CMODEL_MEDIUM);
25541 }
25542
25543 \f
25544 /* Given an address (ADDR), a mode (MODE), and what the format of the
25545 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25546 for the address. */
25547
25548 enum insn_form
25549 address_to_insn_form (rtx addr,
25550 machine_mode mode,
25551 enum non_prefixed_form non_prefixed_format)
25552 {
25553 /* Single register is easy. */
25554 if (REG_P (addr) || SUBREG_P (addr))
25555 return INSN_FORM_BASE_REG;
25556
25557 /* If the non prefixed instruction format doesn't support offset addressing,
25558 make sure only indexed addressing is allowed.
25559
25560 We special case SDmode so that the register allocator does not try to move
25561 SDmode through GPR registers, but instead uses the 32-bit integer load and
25562 store instructions for the floating point registers. */
25563 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25564 {
25565 if (GET_CODE (addr) != PLUS)
25566 return INSN_FORM_BAD;
25567
25568 rtx op0 = XEXP (addr, 0);
25569 rtx op1 = XEXP (addr, 1);
25570 if (!REG_P (op0) && !SUBREG_P (op0))
25571 return INSN_FORM_BAD;
25572
25573 if (!REG_P (op1) && !SUBREG_P (op1))
25574 return INSN_FORM_BAD;
25575
25576 return INSN_FORM_X;
25577 }
25578
25579 /* Deal with update forms. */
25580 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25581 return INSN_FORM_UPDATE;
25582
25583 /* Handle PC-relative symbols and labels. Check for both local and
25584 external symbols. Assume labels are always local. TLS symbols
25585 are not PC-relative for rs6000. */
25586 if (TARGET_PCREL)
25587 {
25588 if (LABEL_REF_P (addr))
25589 return INSN_FORM_PCREL_LOCAL;
25590
25591 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25592 {
25593 if (!SYMBOL_REF_LOCAL_P (addr))
25594 return INSN_FORM_PCREL_EXTERNAL;
25595 else
25596 return INSN_FORM_PCREL_LOCAL;
25597 }
25598 }
25599
25600 if (GET_CODE (addr) == CONST)
25601 addr = XEXP (addr, 0);
25602
25603 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25604 if (GET_CODE (addr) == LO_SUM)
25605 return INSN_FORM_LO_SUM;
25606
25607 /* Everything below must be an offset address of some form. */
25608 if (GET_CODE (addr) != PLUS)
25609 return INSN_FORM_BAD;
25610
25611 rtx op0 = XEXP (addr, 0);
25612 rtx op1 = XEXP (addr, 1);
25613
25614 /* Check for indexed addresses. */
25615 if (REG_P (op1) || SUBREG_P (op1))
25616 {
25617 if (REG_P (op0) || SUBREG_P (op0))
25618 return INSN_FORM_X;
25619
25620 return INSN_FORM_BAD;
25621 }
25622
25623 if (!CONST_INT_P (op1))
25624 return INSN_FORM_BAD;
25625
25626 HOST_WIDE_INT offset = INTVAL (op1);
25627 if (!SIGNED_INTEGER_34BIT_P (offset))
25628 return INSN_FORM_BAD;
25629
25630 /* Check for local and external PC-relative addresses. Labels are always
25631 local. TLS symbols are not PC-relative for rs6000. */
25632 if (TARGET_PCREL)
25633 {
25634 if (LABEL_REF_P (op0))
25635 return INSN_FORM_PCREL_LOCAL;
25636
25637 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
25638 {
25639 if (!SYMBOL_REF_LOCAL_P (op0))
25640 return INSN_FORM_PCREL_EXTERNAL;
25641 else
25642 return INSN_FORM_PCREL_LOCAL;
25643 }
25644 }
25645
25646 /* If it isn't PC-relative, the address must use a base register. */
25647 if (!REG_P (op0) && !SUBREG_P (op0))
25648 return INSN_FORM_BAD;
25649
25650 /* Large offsets must be prefixed. */
25651 if (!SIGNED_INTEGER_16BIT_P (offset))
25652 {
25653 if (TARGET_PREFIXED)
25654 return INSN_FORM_PREFIXED_NUMERIC;
25655
25656 return INSN_FORM_BAD;
25657 }
25658
25659 /* We have a 16-bit offset, see what default instruction format to use. */
25660 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
25661 {
25662 unsigned size = GET_MODE_SIZE (mode);
25663
25664 /* On 64-bit systems, assume 64-bit integers need to use DS form
25665 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25666 (for LXV and STXV). TImode is problematical in that its normal usage
25667 is expected to be GPRs where it wants a DS instruction format, but if
25668 it goes into the vector registers, it wants a DQ instruction
25669 format. */
25670 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
25671 non_prefixed_format = NON_PREFIXED_DS;
25672
25673 else if (TARGET_VSX && size >= 16
25674 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
25675 non_prefixed_format = NON_PREFIXED_DQ;
25676
25677 else
25678 non_prefixed_format = NON_PREFIXED_D;
25679 }
25680
25681 /* Classify the D/DS/DQ-form addresses. */
25682 switch (non_prefixed_format)
25683 {
25684 /* Instruction format D, all 16 bits are valid. */
25685 case NON_PREFIXED_D:
25686 return INSN_FORM_D;
25687
25688 /* Instruction format DS, bottom 2 bits must be 0. */
25689 case NON_PREFIXED_DS:
25690 if ((offset & 3) == 0)
25691 return INSN_FORM_DS;
25692
25693 else if (TARGET_PREFIXED)
25694 return INSN_FORM_PREFIXED_NUMERIC;
25695
25696 else
25697 return INSN_FORM_BAD;
25698
25699 /* Instruction format DQ, bottom 4 bits must be 0. */
25700 case NON_PREFIXED_DQ:
25701 if ((offset & 15) == 0)
25702 return INSN_FORM_DQ;
25703
25704 else if (TARGET_PREFIXED)
25705 return INSN_FORM_PREFIXED_NUMERIC;
25706
25707 else
25708 return INSN_FORM_BAD;
25709
25710 default:
25711 break;
25712 }
25713
25714 return INSN_FORM_BAD;
25715 }
25716
25717 /* Helper function to see if we're potentially looking at lfs/stfs.
25718 - PARALLEL containing a SET and a CLOBBER
25719 - stfs:
25720 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25721 - CLOBBER is a V4SF
25722 - lfs:
25723 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25724 - CLOBBER is a DI
25725 */
25726
25727 static bool
25728 is_lfs_stfs_insn (rtx_insn *insn)
25729 {
25730 rtx pattern = PATTERN (insn);
25731 if (GET_CODE (pattern) != PARALLEL)
25732 return false;
25733
25734 /* This should be a parallel with exactly one set and one clobber. */
25735 if (XVECLEN (pattern, 0) != 2)
25736 return false;
25737
25738 rtx set = XVECEXP (pattern, 0, 0);
25739 if (GET_CODE (set) != SET)
25740 return false;
25741
25742 rtx clobber = XVECEXP (pattern, 0, 1);
25743 if (GET_CODE (clobber) != CLOBBER)
25744 return false;
25745
25746 /* All we care is that the destination of the SET is a mem:SI,
25747 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25748 should be a scratch:V4SF. */
25749
25750 rtx dest = SET_DEST (set);
25751 rtx src = SET_SRC (set);
25752 rtx scratch = SET_DEST (clobber);
25753
25754 if (GET_CODE (src) != UNSPEC)
25755 return false;
25756
25757 /* stfs case. */
25758 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
25759 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
25760 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
25761 return true;
25762
25763 /* lfs case. */
25764 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
25765 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
25766 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
25767 return true;
25768
25769 return false;
25770 }
25771
25772 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25773 instruction format (D/DS/DQ) used for offset memory. */
25774
25775 static enum non_prefixed_form
25776 reg_to_non_prefixed (rtx reg, machine_mode mode)
25777 {
25778 /* If it isn't a register, use the defaults. */
25779 if (!REG_P (reg) && !SUBREG_P (reg))
25780 return NON_PREFIXED_DEFAULT;
25781
25782 unsigned int r = reg_or_subregno (reg);
25783
25784 /* If we have a pseudo, use the default instruction format. */
25785 if (!HARD_REGISTER_NUM_P (r))
25786 return NON_PREFIXED_DEFAULT;
25787
25788 unsigned size = GET_MODE_SIZE (mode);
25789
25790 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25791 128-bit floating point, and 128-bit integers. Before power9, only indexed
25792 addressing was available for vectors. */
25793 if (FP_REGNO_P (r))
25794 {
25795 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25796 return NON_PREFIXED_D;
25797
25798 else if (size < 8)
25799 return NON_PREFIXED_X;
25800
25801 else if (TARGET_VSX && size >= 16
25802 && (VECTOR_MODE_P (mode)
25803 || VECTOR_ALIGNMENT_P (mode)
25804 || mode == TImode || mode == CTImode))
25805 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25806
25807 else
25808 return NON_PREFIXED_DEFAULT;
25809 }
25810
25811 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25812 128-bit floating point, and 128-bit integers. Before power9, only indexed
25813 addressing was available. */
25814 else if (ALTIVEC_REGNO_P (r))
25815 {
25816 if (!TARGET_P9_VECTOR)
25817 return NON_PREFIXED_X;
25818
25819 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25820 return NON_PREFIXED_DS;
25821
25822 else if (size < 8)
25823 return NON_PREFIXED_X;
25824
25825 else if (TARGET_VSX && size >= 16
25826 && (VECTOR_MODE_P (mode)
25827 || VECTOR_ALIGNMENT_P (mode)
25828 || mode == TImode || mode == CTImode))
25829 return NON_PREFIXED_DQ;
25830
25831 else
25832 return NON_PREFIXED_DEFAULT;
25833 }
25834
25835 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25836 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25837 through the GPR registers for memory operations. */
25838 else if (TARGET_POWERPC64 && size >= 8)
25839 return NON_PREFIXED_DS;
25840
25841 return NON_PREFIXED_D;
25842 }
25843
25844 \f
25845 /* Whether a load instruction is a prefixed instruction. This is called from
25846 the prefixed attribute processing. */
25847
25848 bool
25849 prefixed_load_p (rtx_insn *insn)
25850 {
25851 /* Validate the insn to make sure it is a normal load insn. */
25852 extract_insn_cached (insn);
25853 if (recog_data.n_operands < 2)
25854 return false;
25855
25856 rtx reg = recog_data.operand[0];
25857 rtx mem = recog_data.operand[1];
25858
25859 if (!REG_P (reg) && !SUBREG_P (reg))
25860 return false;
25861
25862 if (!MEM_P (mem))
25863 return false;
25864
25865 /* Prefixed load instructions do not support update or indexed forms. */
25866 if (get_attr_indexed (insn) == INDEXED_YES
25867 || get_attr_update (insn) == UPDATE_YES)
25868 return false;
25869
25870 /* LWA uses the DS format instead of the D format that LWZ uses. */
25871 enum non_prefixed_form non_prefixed;
25872 machine_mode reg_mode = GET_MODE (reg);
25873 machine_mode mem_mode = GET_MODE (mem);
25874
25875 if (mem_mode == SImode && reg_mode == DImode
25876 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25877 non_prefixed = NON_PREFIXED_DS;
25878
25879 else
25880 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25881
25882 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25883 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
25884 else
25885 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25886 }
25887
25888 /* Whether a store instruction is a prefixed instruction. This is called from
25889 the prefixed attribute processing. */
25890
25891 bool
25892 prefixed_store_p (rtx_insn *insn)
25893 {
25894 /* Validate the insn to make sure it is a normal store insn. */
25895 extract_insn_cached (insn);
25896 if (recog_data.n_operands < 2)
25897 return false;
25898
25899 rtx mem = recog_data.operand[0];
25900 rtx reg = recog_data.operand[1];
25901
25902 if (!REG_P (reg) && !SUBREG_P (reg))
25903 return false;
25904
25905 if (!MEM_P (mem))
25906 return false;
25907
25908 /* Prefixed store instructions do not support update or indexed forms. */
25909 if (get_attr_indexed (insn) == INDEXED_YES
25910 || get_attr_update (insn) == UPDATE_YES)
25911 return false;
25912
25913 machine_mode mem_mode = GET_MODE (mem);
25914 rtx addr = XEXP (mem, 0);
25915 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25916
25917 /* Need to make sure we aren't looking at a stfs which doesn't look
25918 like the other things reg_to_non_prefixed/address_is_prefixed
25919 looks for. */
25920 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25921 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
25922 else
25923 return address_is_prefixed (addr, mem_mode, non_prefixed);
25924 }
25925
25926 /* Whether a load immediate or add instruction is a prefixed instruction. This
25927 is called from the prefixed attribute processing. */
25928
25929 bool
25930 prefixed_paddi_p (rtx_insn *insn)
25931 {
25932 rtx set = single_set (insn);
25933 if (!set)
25934 return false;
25935
25936 rtx dest = SET_DEST (set);
25937 rtx src = SET_SRC (set);
25938
25939 if (!REG_P (dest) && !SUBREG_P (dest))
25940 return false;
25941
25942 /* Is this a load immediate that can't be done with a simple ADDI or
25943 ADDIS? */
25944 if (CONST_INT_P (src))
25945 return (satisfies_constraint_eI (src)
25946 && !satisfies_constraint_I (src)
25947 && !satisfies_constraint_L (src));
25948
25949 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25950 ADDIS? */
25951 if (GET_CODE (src) == PLUS)
25952 {
25953 rtx op1 = XEXP (src, 1);
25954
25955 return (CONST_INT_P (op1)
25956 && satisfies_constraint_eI (op1)
25957 && !satisfies_constraint_I (op1)
25958 && !satisfies_constraint_L (op1));
25959 }
25960
25961 /* If not, is it a load of a PC-relative address? */
25962 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25963 return false;
25964
25965 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25966 return false;
25967
25968 enum insn_form iform = address_to_insn_form (src, Pmode,
25969 NON_PREFIXED_DEFAULT);
25970
25971 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25972 }
25973
25974 /* Whether the next instruction needs a 'p' prefix issued before the
25975 instruction is printed out. */
25976 static bool next_insn_prefixed_p;
25977
25978 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25979 outputting the assembler code. On the PowerPC, we remember if the current
25980 insn is a prefixed insn where we need to emit a 'p' before the insn.
25981
25982 In addition, if the insn is part of a PC-relative reference to an external
25983 label optimization, this is recorded also. */
25984 void
25985 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25986 {
25987 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25988 return;
25989 }
25990
25991 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25992 We use it to emit a 'p' for prefixed insns that is set in
25993 FINAL_PRESCAN_INSN. */
25994 void
25995 rs6000_asm_output_opcode (FILE *stream)
25996 {
25997 if (next_insn_prefixed_p)
25998 fprintf (stream, "p");
25999
26000 return;
26001 }
26002
26003 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26004 should be adjusted to reflect any required changes. This macro is used when
26005 there is some systematic length adjustment required that would be difficult
26006 to express in the length attribute.
26007
26008 In the PowerPC, we use this to adjust the length of an instruction if one or
26009 more prefixed instructions are generated, using the attribute
26010 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26011 hardware requires that a prefied instruciton does not cross a 64-byte
26012 boundary. This means the compiler has to assume the length of the first
26013 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26014 already set for the non-prefixed instruction, we just need to udpate for the
26015 difference. */
26016
26017 int
26018 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26019 {
26020 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26021 {
26022 rtx pattern = PATTERN (insn);
26023 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26024 && get_attr_prefixed (insn) == PREFIXED_YES)
26025 {
26026 int num_prefixed = get_attr_max_prefixed_insns (insn);
26027 length += 4 * (num_prefixed + 1);
26028 }
26029 }
26030
26031 return length;
26032 }
26033
26034 \f
26035 #ifdef HAVE_GAS_HIDDEN
26036 # define USE_HIDDEN_LINKONCE 1
26037 #else
26038 # define USE_HIDDEN_LINKONCE 0
26039 #endif
26040
26041 /* Fills in the label name that should be used for a 476 link stack thunk. */
26042
26043 void
26044 get_ppc476_thunk_name (char name[32])
26045 {
26046 gcc_assert (TARGET_LINK_STACK);
26047
26048 if (USE_HIDDEN_LINKONCE)
26049 sprintf (name, "__ppc476.get_thunk");
26050 else
26051 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26052 }
26053
26054 /* This function emits the simple thunk routine that is used to preserve
26055 the link stack on the 476 cpu. */
26056
26057 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26058 static void
26059 rs6000_code_end (void)
26060 {
26061 char name[32];
26062 tree decl;
26063
26064 if (!TARGET_LINK_STACK)
26065 return;
26066
26067 get_ppc476_thunk_name (name);
26068
26069 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26070 build_function_type_list (void_type_node, NULL_TREE));
26071 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26072 NULL_TREE, void_type_node);
26073 TREE_PUBLIC (decl) = 1;
26074 TREE_STATIC (decl) = 1;
26075
26076 #if RS6000_WEAK
26077 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26078 {
26079 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26080 targetm.asm_out.unique_section (decl, 0);
26081 switch_to_section (get_named_section (decl, NULL, 0));
26082 DECL_WEAK (decl) = 1;
26083 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26084 targetm.asm_out.globalize_label (asm_out_file, name);
26085 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26086 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26087 }
26088 else
26089 #endif
26090 {
26091 switch_to_section (text_section);
26092 ASM_OUTPUT_LABEL (asm_out_file, name);
26093 }
26094
26095 DECL_INITIAL (decl) = make_node (BLOCK);
26096 current_function_decl = decl;
26097 allocate_struct_function (decl, false);
26098 init_function_start (decl);
26099 first_function_block_is_cold = false;
26100 /* Make sure unwind info is emitted for the thunk if needed. */
26101 final_start_function (emit_barrier (), asm_out_file, 1);
26102
26103 fputs ("\tblr\n", asm_out_file);
26104
26105 final_end_function ();
26106 init_insn_lengths ();
26107 free_after_compilation (cfun);
26108 set_cfun (NULL);
26109 current_function_decl = NULL;
26110 }
26111
26112 /* Add r30 to hard reg set if the prologue sets it up and it is not
26113 pic_offset_table_rtx. */
26114
26115 static void
26116 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26117 {
26118 if (!TARGET_SINGLE_PIC_BASE
26119 && TARGET_TOC
26120 && TARGET_MINIMAL_TOC
26121 && !constant_pool_empty_p ())
26122 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26123 if (cfun->machine->split_stack_argp_used)
26124 add_to_hard_reg_set (&set->set, Pmode, 12);
26125
26126 /* Make sure the hard reg set doesn't include r2, which was possibly added
26127 via PIC_OFFSET_TABLE_REGNUM. */
26128 if (TARGET_TOC)
26129 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26130 }
26131
26132 \f
26133 /* Helper function for rs6000_split_logical to emit a logical instruction after
26134 spliting the operation to single GPR registers.
26135
26136 DEST is the destination register.
26137 OP1 and OP2 are the input source registers.
26138 CODE is the base operation (AND, IOR, XOR, NOT).
26139 MODE is the machine mode.
26140 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26141 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26142 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26143
26144 static void
26145 rs6000_split_logical_inner (rtx dest,
26146 rtx op1,
26147 rtx op2,
26148 enum rtx_code code,
26149 machine_mode mode,
26150 bool complement_final_p,
26151 bool complement_op1_p,
26152 bool complement_op2_p)
26153 {
26154 rtx bool_rtx;
26155
26156 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26157 if (op2 && CONST_INT_P (op2)
26158 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26159 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26160 {
26161 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26162 HOST_WIDE_INT value = INTVAL (op2) & mask;
26163
26164 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26165 if (code == AND)
26166 {
26167 if (value == 0)
26168 {
26169 emit_insn (gen_rtx_SET (dest, const0_rtx));
26170 return;
26171 }
26172
26173 else if (value == mask)
26174 {
26175 if (!rtx_equal_p (dest, op1))
26176 emit_insn (gen_rtx_SET (dest, op1));
26177 return;
26178 }
26179 }
26180
26181 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26182 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26183 else if (code == IOR || code == XOR)
26184 {
26185 if (value == 0)
26186 {
26187 if (!rtx_equal_p (dest, op1))
26188 emit_insn (gen_rtx_SET (dest, op1));
26189 return;
26190 }
26191 }
26192 }
26193
26194 if (code == AND && mode == SImode
26195 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26196 {
26197 emit_insn (gen_andsi3 (dest, op1, op2));
26198 return;
26199 }
26200
26201 if (complement_op1_p)
26202 op1 = gen_rtx_NOT (mode, op1);
26203
26204 if (complement_op2_p)
26205 op2 = gen_rtx_NOT (mode, op2);
26206
26207 /* For canonical RTL, if only one arm is inverted it is the first. */
26208 if (!complement_op1_p && complement_op2_p)
26209 std::swap (op1, op2);
26210
26211 bool_rtx = ((code == NOT)
26212 ? gen_rtx_NOT (mode, op1)
26213 : gen_rtx_fmt_ee (code, mode, op1, op2));
26214
26215 if (complement_final_p)
26216 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26217
26218 emit_insn (gen_rtx_SET (dest, bool_rtx));
26219 }
26220
26221 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26222 operations are split immediately during RTL generation to allow for more
26223 optimizations of the AND/IOR/XOR.
26224
26225 OPERANDS is an array containing the destination and two input operands.
26226 CODE is the base operation (AND, IOR, XOR, NOT).
26227 MODE is the machine mode.
26228 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26229 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26230 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26231 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26232 formation of the AND instructions. */
26233
26234 static void
26235 rs6000_split_logical_di (rtx operands[3],
26236 enum rtx_code code,
26237 bool complement_final_p,
26238 bool complement_op1_p,
26239 bool complement_op2_p)
26240 {
26241 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26242 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26243 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26244 enum hi_lo { hi = 0, lo = 1 };
26245 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26246 size_t i;
26247
26248 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26249 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26250 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26251 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26252
26253 if (code == NOT)
26254 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26255 else
26256 {
26257 if (!CONST_INT_P (operands[2]))
26258 {
26259 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26260 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26261 }
26262 else
26263 {
26264 HOST_WIDE_INT value = INTVAL (operands[2]);
26265 HOST_WIDE_INT value_hi_lo[2];
26266
26267 gcc_assert (!complement_final_p);
26268 gcc_assert (!complement_op1_p);
26269 gcc_assert (!complement_op2_p);
26270
26271 value_hi_lo[hi] = value >> 32;
26272 value_hi_lo[lo] = value & lower_32bits;
26273
26274 for (i = 0; i < 2; i++)
26275 {
26276 HOST_WIDE_INT sub_value = value_hi_lo[i];
26277
26278 if (sub_value & sign_bit)
26279 sub_value |= upper_32bits;
26280
26281 op2_hi_lo[i] = GEN_INT (sub_value);
26282
26283 /* If this is an AND instruction, check to see if we need to load
26284 the value in a register. */
26285 if (code == AND && sub_value != -1 && sub_value != 0
26286 && !and_operand (op2_hi_lo[i], SImode))
26287 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26288 }
26289 }
26290 }
26291
26292 for (i = 0; i < 2; i++)
26293 {
26294 /* Split large IOR/XOR operations. */
26295 if ((code == IOR || code == XOR)
26296 && CONST_INT_P (op2_hi_lo[i])
26297 && !complement_final_p
26298 && !complement_op1_p
26299 && !complement_op2_p
26300 && !logical_const_operand (op2_hi_lo[i], SImode))
26301 {
26302 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26303 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26304 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26305 rtx tmp = gen_reg_rtx (SImode);
26306
26307 /* Make sure the constant is sign extended. */
26308 if ((hi_16bits & sign_bit) != 0)
26309 hi_16bits |= upper_32bits;
26310
26311 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26312 code, SImode, false, false, false);
26313
26314 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26315 code, SImode, false, false, false);
26316 }
26317 else
26318 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26319 code, SImode, complement_final_p,
26320 complement_op1_p, complement_op2_p);
26321 }
26322
26323 return;
26324 }
26325
26326 /* Split the insns that make up boolean operations operating on multiple GPR
26327 registers. The boolean MD patterns ensure that the inputs either are
26328 exactly the same as the output registers, or there is no overlap.
26329
26330 OPERANDS is an array containing the destination and two input operands.
26331 CODE is the base operation (AND, IOR, XOR, NOT).
26332 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26333 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26334 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26335
26336 void
26337 rs6000_split_logical (rtx operands[3],
26338 enum rtx_code code,
26339 bool complement_final_p,
26340 bool complement_op1_p,
26341 bool complement_op2_p)
26342 {
26343 machine_mode mode = GET_MODE (operands[0]);
26344 machine_mode sub_mode;
26345 rtx op0, op1, op2;
26346 int sub_size, regno0, regno1, nregs, i;
26347
26348 /* If this is DImode, use the specialized version that can run before
26349 register allocation. */
26350 if (mode == DImode && !TARGET_POWERPC64)
26351 {
26352 rs6000_split_logical_di (operands, code, complement_final_p,
26353 complement_op1_p, complement_op2_p);
26354 return;
26355 }
26356
26357 op0 = operands[0];
26358 op1 = operands[1];
26359 op2 = (code == NOT) ? NULL_RTX : operands[2];
26360 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26361 sub_size = GET_MODE_SIZE (sub_mode);
26362 regno0 = REGNO (op0);
26363 regno1 = REGNO (op1);
26364
26365 gcc_assert (reload_completed);
26366 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26367 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26368
26369 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26370 gcc_assert (nregs > 1);
26371
26372 if (op2 && REG_P (op2))
26373 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26374
26375 for (i = 0; i < nregs; i++)
26376 {
26377 int offset = i * sub_size;
26378 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26379 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26380 rtx sub_op2 = ((code == NOT)
26381 ? NULL_RTX
26382 : simplify_subreg (sub_mode, op2, mode, offset));
26383
26384 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26385 complement_final_p, complement_op1_p,
26386 complement_op2_p);
26387 }
26388
26389 return;
26390 }
26391
26392 \f
26393 /* Return true if the peephole2 can combine a load involving a combination of
26394 an addis instruction and a load with an offset that can be fused together on
26395 a power8. */
26396
26397 bool
26398 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
26399 rtx addis_value, /* addis value. */
26400 rtx target, /* target register that is loaded. */
26401 rtx mem) /* bottom part of the memory addr. */
26402 {
26403 rtx addr;
26404 rtx base_reg;
26405
26406 /* Validate arguments. */
26407 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
26408 return false;
26409
26410 if (!base_reg_operand (target, GET_MODE (target)))
26411 return false;
26412
26413 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
26414 return false;
26415
26416 /* Allow sign/zero extension. */
26417 if (GET_CODE (mem) == ZERO_EXTEND
26418 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
26419 mem = XEXP (mem, 0);
26420
26421 if (!MEM_P (mem))
26422 return false;
26423
26424 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
26425 return false;
26426
26427 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
26428 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
26429 return false;
26430
26431 /* Validate that the register used to load the high value is either the
26432 register being loaded, or we can safely replace its use.
26433
26434 This function is only called from the peephole2 pass and we assume that
26435 there are 2 instructions in the peephole (addis and load), so we want to
26436 check if the target register was not used in the memory address and the
26437 register to hold the addis result is dead after the peephole. */
26438 if (REGNO (addis_reg) != REGNO (target))
26439 {
26440 if (reg_mentioned_p (target, mem))
26441 return false;
26442
26443 if (!peep2_reg_dead_p (2, addis_reg))
26444 return false;
26445
26446 /* If the target register being loaded is the stack pointer, we must
26447 avoid loading any other value into it, even temporarily. */
26448 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
26449 return false;
26450 }
26451
26452 base_reg = XEXP (addr, 0);
26453 return REGNO (addis_reg) == REGNO (base_reg);
26454 }
26455
26456 /* During the peephole2 pass, adjust and expand the insns for a load fusion
26457 sequence. We adjust the addis register to use the target register. If the
26458 load sign extends, we adjust the code to do the zero extending load, and an
26459 explicit sign extension later since the fusion only covers zero extending
26460 loads.
26461
26462 The operands are:
26463 operands[0] register set with addis (to be replaced with target)
26464 operands[1] value set via addis
26465 operands[2] target register being loaded
26466 operands[3] D-form memory reference using operands[0]. */
26467
26468 void
26469 expand_fusion_gpr_load (rtx *operands)
26470 {
26471 rtx addis_value = operands[1];
26472 rtx target = operands[2];
26473 rtx orig_mem = operands[3];
26474 rtx new_addr, new_mem, orig_addr, offset;
26475 enum rtx_code plus_or_lo_sum;
26476 machine_mode target_mode = GET_MODE (target);
26477 machine_mode extend_mode = target_mode;
26478 machine_mode ptr_mode = Pmode;
26479 enum rtx_code extend = UNKNOWN;
26480
26481 if (GET_CODE (orig_mem) == ZERO_EXTEND
26482 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
26483 {
26484 extend = GET_CODE (orig_mem);
26485 orig_mem = XEXP (orig_mem, 0);
26486 target_mode = GET_MODE (orig_mem);
26487 }
26488
26489 gcc_assert (MEM_P (orig_mem));
26490
26491 orig_addr = XEXP (orig_mem, 0);
26492 plus_or_lo_sum = GET_CODE (orig_addr);
26493 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
26494
26495 offset = XEXP (orig_addr, 1);
26496 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
26497 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
26498
26499 if (extend != UNKNOWN)
26500 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
26501
26502 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
26503 UNSPEC_FUSION_GPR);
26504 emit_insn (gen_rtx_SET (target, new_mem));
26505
26506 if (extend == SIGN_EXTEND)
26507 {
26508 int sub_off = ((BYTES_BIG_ENDIAN)
26509 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
26510 : 0);
26511 rtx sign_reg
26512 = simplify_subreg (target_mode, target, extend_mode, sub_off);
26513
26514 emit_insn (gen_rtx_SET (target,
26515 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
26516 }
26517
26518 return;
26519 }
26520
26521 /* Emit the addis instruction that will be part of a fused instruction
26522 sequence. */
26523
26524 void
26525 emit_fusion_addis (rtx target, rtx addis_value)
26526 {
26527 rtx fuse_ops[10];
26528 const char *addis_str = NULL;
26529
26530 /* Emit the addis instruction. */
26531 fuse_ops[0] = target;
26532 if (satisfies_constraint_L (addis_value))
26533 {
26534 fuse_ops[1] = addis_value;
26535 addis_str = "lis %0,%v1";
26536 }
26537
26538 else if (GET_CODE (addis_value) == PLUS)
26539 {
26540 rtx op0 = XEXP (addis_value, 0);
26541 rtx op1 = XEXP (addis_value, 1);
26542
26543 if (REG_P (op0) && CONST_INT_P (op1)
26544 && satisfies_constraint_L (op1))
26545 {
26546 fuse_ops[1] = op0;
26547 fuse_ops[2] = op1;
26548 addis_str = "addis %0,%1,%v2";
26549 }
26550 }
26551
26552 else if (GET_CODE (addis_value) == HIGH)
26553 {
26554 rtx value = XEXP (addis_value, 0);
26555 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
26556 {
26557 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
26558 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
26559 if (TARGET_ELF)
26560 addis_str = "addis %0,%2,%1@toc@ha";
26561
26562 else if (TARGET_XCOFF)
26563 addis_str = "addis %0,%1@u(%2)";
26564
26565 else
26566 gcc_unreachable ();
26567 }
26568
26569 else if (GET_CODE (value) == PLUS)
26570 {
26571 rtx op0 = XEXP (value, 0);
26572 rtx op1 = XEXP (value, 1);
26573
26574 if (GET_CODE (op0) == UNSPEC
26575 && XINT (op0, 1) == UNSPEC_TOCREL
26576 && CONST_INT_P (op1))
26577 {
26578 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
26579 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
26580 fuse_ops[3] = op1;
26581 if (TARGET_ELF)
26582 addis_str = "addis %0,%2,%1+%3@toc@ha";
26583
26584 else if (TARGET_XCOFF)
26585 addis_str = "addis %0,%1+%3@u(%2)";
26586
26587 else
26588 gcc_unreachable ();
26589 }
26590 }
26591
26592 else if (satisfies_constraint_L (value))
26593 {
26594 fuse_ops[1] = value;
26595 addis_str = "lis %0,%v1";
26596 }
26597
26598 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
26599 {
26600 fuse_ops[1] = value;
26601 addis_str = "lis %0,%1@ha";
26602 }
26603 }
26604
26605 if (!addis_str)
26606 fatal_insn ("Could not generate addis value for fusion", addis_value);
26607
26608 output_asm_insn (addis_str, fuse_ops);
26609 }
26610
26611 /* Emit a D-form load or store instruction that is the second instruction
26612 of a fusion sequence. */
26613
26614 static void
26615 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
26616 {
26617 rtx fuse_ops[10];
26618 char insn_template[80];
26619
26620 fuse_ops[0] = load_reg;
26621 fuse_ops[1] = addis_reg;
26622
26623 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
26624 {
26625 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
26626 fuse_ops[2] = offset;
26627 output_asm_insn (insn_template, fuse_ops);
26628 }
26629
26630 else if (GET_CODE (offset) == UNSPEC
26631 && XINT (offset, 1) == UNSPEC_TOCREL)
26632 {
26633 if (TARGET_ELF)
26634 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
26635
26636 else if (TARGET_XCOFF)
26637 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26638
26639 else
26640 gcc_unreachable ();
26641
26642 fuse_ops[2] = XVECEXP (offset, 0, 0);
26643 output_asm_insn (insn_template, fuse_ops);
26644 }
26645
26646 else if (GET_CODE (offset) == PLUS
26647 && GET_CODE (XEXP (offset, 0)) == UNSPEC
26648 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
26649 && CONST_INT_P (XEXP (offset, 1)))
26650 {
26651 rtx tocrel_unspec = XEXP (offset, 0);
26652 if (TARGET_ELF)
26653 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
26654
26655 else if (TARGET_XCOFF)
26656 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
26657
26658 else
26659 gcc_unreachable ();
26660
26661 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
26662 fuse_ops[3] = XEXP (offset, 1);
26663 output_asm_insn (insn_template, fuse_ops);
26664 }
26665
26666 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
26667 {
26668 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26669
26670 fuse_ops[2] = offset;
26671 output_asm_insn (insn_template, fuse_ops);
26672 }
26673
26674 else
26675 fatal_insn ("Unable to generate load/store offset for fusion", offset);
26676
26677 return;
26678 }
26679
26680 /* Given an address, convert it into the addis and load offset parts. Addresses
26681 created during the peephole2 process look like:
26682 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
26683 (unspec [(...)] UNSPEC_TOCREL)) */
26684
26685 static void
26686 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
26687 {
26688 rtx hi, lo;
26689
26690 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
26691 {
26692 hi = XEXP (addr, 0);
26693 lo = XEXP (addr, 1);
26694 }
26695 else
26696 gcc_unreachable ();
26697
26698 *p_hi = hi;
26699 *p_lo = lo;
26700 }
26701
26702 /* Return a string to fuse an addis instruction with a gpr load to the same
26703 register that we loaded up the addis instruction. The address that is used
26704 is the logical address that was formed during peephole2:
26705 (lo_sum (high) (low-part))
26706
26707 The code is complicated, so we call output_asm_insn directly, and just
26708 return "". */
26709
26710 const char *
26711 emit_fusion_gpr_load (rtx target, rtx mem)
26712 {
26713 rtx addis_value;
26714 rtx addr;
26715 rtx load_offset;
26716 const char *load_str = NULL;
26717 machine_mode mode;
26718
26719 if (GET_CODE (mem) == ZERO_EXTEND)
26720 mem = XEXP (mem, 0);
26721
26722 gcc_assert (REG_P (target) && MEM_P (mem));
26723
26724 addr = XEXP (mem, 0);
26725 fusion_split_address (addr, &addis_value, &load_offset);
26726
26727 /* Now emit the load instruction to the same register. */
26728 mode = GET_MODE (mem);
26729 switch (mode)
26730 {
26731 case E_QImode:
26732 load_str = "lbz";
26733 break;
26734
26735 case E_HImode:
26736 load_str = "lhz";
26737 break;
26738
26739 case E_SImode:
26740 case E_SFmode:
26741 load_str = "lwz";
26742 break;
26743
26744 case E_DImode:
26745 case E_DFmode:
26746 gcc_assert (TARGET_POWERPC64);
26747 load_str = "ld";
26748 break;
26749
26750 default:
26751 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
26752 }
26753
26754 /* Emit the addis instruction. */
26755 emit_fusion_addis (target, addis_value);
26756
26757 /* Emit the D-form load instruction. */
26758 emit_fusion_load (target, target, load_offset, load_str);
26759
26760 return "";
26761 }
26762 \f
26763
26764 #ifdef RS6000_GLIBC_ATOMIC_FENV
26765 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26766 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
26767 #endif
26768
26769 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26770
26771 static void
26772 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
26773 {
26774 if (!TARGET_HARD_FLOAT)
26775 {
26776 #ifdef RS6000_GLIBC_ATOMIC_FENV
26777 if (atomic_hold_decl == NULL_TREE)
26778 {
26779 atomic_hold_decl
26780 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26781 get_identifier ("__atomic_feholdexcept"),
26782 build_function_type_list (void_type_node,
26783 double_ptr_type_node,
26784 NULL_TREE));
26785 TREE_PUBLIC (atomic_hold_decl) = 1;
26786 DECL_EXTERNAL (atomic_hold_decl) = 1;
26787 }
26788
26789 if (atomic_clear_decl == NULL_TREE)
26790 {
26791 atomic_clear_decl
26792 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26793 get_identifier ("__atomic_feclearexcept"),
26794 build_function_type_list (void_type_node,
26795 NULL_TREE));
26796 TREE_PUBLIC (atomic_clear_decl) = 1;
26797 DECL_EXTERNAL (atomic_clear_decl) = 1;
26798 }
26799
26800 tree const_double = build_qualified_type (double_type_node,
26801 TYPE_QUAL_CONST);
26802 tree const_double_ptr = build_pointer_type (const_double);
26803 if (atomic_update_decl == NULL_TREE)
26804 {
26805 atomic_update_decl
26806 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26807 get_identifier ("__atomic_feupdateenv"),
26808 build_function_type_list (void_type_node,
26809 const_double_ptr,
26810 NULL_TREE));
26811 TREE_PUBLIC (atomic_update_decl) = 1;
26812 DECL_EXTERNAL (atomic_update_decl) = 1;
26813 }
26814
26815 tree fenv_var = create_tmp_var_raw (double_type_node);
26816 TREE_ADDRESSABLE (fenv_var) = 1;
26817 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26818 build4 (TARGET_EXPR, double_type_node, fenv_var,
26819 void_node, NULL_TREE, NULL_TREE));
26820
26821 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26822 *clear = build_call_expr (atomic_clear_decl, 0);
26823 *update = build_call_expr (atomic_update_decl, 1,
26824 fold_convert (const_double_ptr, fenv_addr));
26825 #endif
26826 return;
26827 }
26828
26829 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26830 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26831 tree call_mffs = build_call_expr (mffs, 0);
26832
26833 /* Generates the equivalent of feholdexcept (&fenv_var)
26834
26835 *fenv_var = __builtin_mffs ();
26836 double fenv_hold;
26837 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26838 __builtin_mtfsf (0xff, fenv_hold); */
26839
26840 /* Mask to clear everything except for the rounding modes and non-IEEE
26841 arithmetic flag. */
26842 const unsigned HOST_WIDE_INT hold_exception_mask
26843 = HOST_WIDE_INT_C (0xffffffff00000007);
26844
26845 tree fenv_var = create_tmp_var_raw (double_type_node);
26846
26847 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26848 NULL_TREE, NULL_TREE);
26849
26850 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26851 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26852 build_int_cst (uint64_type_node,
26853 hold_exception_mask));
26854
26855 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26856 fenv_llu_and);
26857
26858 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26859 build_int_cst (unsigned_type_node, 0xff),
26860 fenv_hold_mtfsf);
26861
26862 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26863
26864 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26865
26866 double fenv_clear = __builtin_mffs ();
26867 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26868 __builtin_mtfsf (0xff, fenv_clear); */
26869
26870 /* Mask to clear everything except for the rounding modes and non-IEEE
26871 arithmetic flag. */
26872 const unsigned HOST_WIDE_INT clear_exception_mask
26873 = HOST_WIDE_INT_C (0xffffffff00000000);
26874
26875 tree fenv_clear = create_tmp_var_raw (double_type_node);
26876
26877 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26878 call_mffs, NULL_TREE, NULL_TREE);
26879
26880 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26881 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26882 fenv_clean_llu,
26883 build_int_cst (uint64_type_node,
26884 clear_exception_mask));
26885
26886 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26887 fenv_clear_llu_and);
26888
26889 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26890 build_int_cst (unsigned_type_node, 0xff),
26891 fenv_clear_mtfsf);
26892
26893 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26894
26895 /* Generates the equivalent of feupdateenv (&fenv_var)
26896
26897 double old_fenv = __builtin_mffs ();
26898 double fenv_update;
26899 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26900 (*(uint64_t*)fenv_var 0x1ff80fff);
26901 __builtin_mtfsf (0xff, fenv_update); */
26902
26903 const unsigned HOST_WIDE_INT update_exception_mask
26904 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26905 const unsigned HOST_WIDE_INT new_exception_mask
26906 = HOST_WIDE_INT_C (0x1ff80fff);
26907
26908 tree old_fenv = create_tmp_var_raw (double_type_node);
26909 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26910 call_mffs, NULL_TREE, NULL_TREE);
26911
26912 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26913 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26914 build_int_cst (uint64_type_node,
26915 update_exception_mask));
26916
26917 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26918 build_int_cst (uint64_type_node,
26919 new_exception_mask));
26920
26921 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26922 old_llu_and, new_llu_and);
26923
26924 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26925 new_llu_mask);
26926
26927 tree update_mtfsf = build_call_expr (mtfsf, 2,
26928 build_int_cst (unsigned_type_node, 0xff),
26929 fenv_update_mtfsf);
26930
26931 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26932 }
26933
26934 void
26935 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26936 {
26937 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26938
26939 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26940 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26941
26942 /* The destination of the vmrgew instruction layout is:
26943 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26944 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26945 vmrgew instruction will be correct. */
26946 if (BYTES_BIG_ENDIAN)
26947 {
26948 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26949 GEN_INT (0)));
26950 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26951 GEN_INT (3)));
26952 }
26953 else
26954 {
26955 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26956 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26957 }
26958
26959 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26960 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26961
26962 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26963 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26964
26965 if (BYTES_BIG_ENDIAN)
26966 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26967 else
26968 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26969 }
26970
26971 void
26972 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26973 {
26974 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26975
26976 rtx_tmp0 = gen_reg_rtx (V2DImode);
26977 rtx_tmp1 = gen_reg_rtx (V2DImode);
26978
26979 /* The destination of the vmrgew instruction layout is:
26980 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26981 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26982 vmrgew instruction will be correct. */
26983 if (BYTES_BIG_ENDIAN)
26984 {
26985 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26986 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26987 }
26988 else
26989 {
26990 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26991 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26992 }
26993
26994 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26995 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26996
26997 if (signed_convert)
26998 {
26999 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
27000 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
27001 }
27002 else
27003 {
27004 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27005 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27006 }
27007
27008 if (BYTES_BIG_ENDIAN)
27009 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27010 else
27011 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27012 }
27013
27014 void
27015 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27016 rtx src2)
27017 {
27018 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27019
27020 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27021 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27022
27023 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27024 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27025
27026 rtx_tmp2 = gen_reg_rtx (V4SImode);
27027 rtx_tmp3 = gen_reg_rtx (V4SImode);
27028
27029 if (signed_convert)
27030 {
27031 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27032 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27033 }
27034 else
27035 {
27036 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
27037 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
27038 }
27039
27040 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
27041 }
27042
27043 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27044
27045 static bool
27046 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
27047 optimization_type opt_type)
27048 {
27049 switch (op)
27050 {
27051 case rsqrt_optab:
27052 return (opt_type == OPTIMIZE_FOR_SPEED
27053 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
27054
27055 default:
27056 return true;
27057 }
27058 }
27059
27060 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27061
27062 static HOST_WIDE_INT
27063 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
27064 {
27065 if (TREE_CODE (exp) == STRING_CST
27066 && (STRICT_ALIGNMENT || !optimize_size))
27067 return MAX (align, BITS_PER_WORD);
27068 return align;
27069 }
27070
27071 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27072
27073 static HOST_WIDE_INT
27074 rs6000_starting_frame_offset (void)
27075 {
27076 if (FRAME_GROWS_DOWNWARD)
27077 return 0;
27078 return RS6000_STARTING_FRAME_OFFSET;
27079 }
27080 \f
27081
27082 /* Create an alias for a mangled name where we have changed the mangling (in
27083 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
27084 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
27085
27086 #if TARGET_ELF && RS6000_WEAK
27087 static void
27088 rs6000_globalize_decl_name (FILE * stream, tree decl)
27089 {
27090 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
27091
27092 targetm.asm_out.globalize_label (stream, name);
27093
27094 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
27095 {
27096 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
27097 const char *old_name;
27098
27099 ieee128_mangling_gcc_8_1 = true;
27100 lang_hooks.set_decl_assembler_name (decl);
27101 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
27102 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
27103 ieee128_mangling_gcc_8_1 = false;
27104
27105 if (strcmp (name, old_name) != 0)
27106 {
27107 fprintf (stream, "\t.weak %s\n", old_name);
27108 fprintf (stream, "\t.set %s,%s\n", old_name, name);
27109 }
27110 }
27111 }
27112 #endif
27113
27114 \f
27115 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27116 function names from <foo>l to <foo>f128 if the default long double type is
27117 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27118 include file switches the names on systems that support long double as IEEE
27119 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27120 In the future, glibc will export names like __ieee128_sinf128 and we can
27121 switch to using those instead of using sinf128, which pollutes the user's
27122 namespace.
27123
27124 This will switch the names for Fortran math functions as well (which doesn't
27125 use math.h). However, Fortran needs other changes to the compiler and
27126 library before you can switch the real*16 type at compile time.
27127
27128 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27129 only do this if the default is that long double is IBM extended double, and
27130 the user asked for IEEE 128-bit. */
27131
27132 static tree
27133 rs6000_mangle_decl_assembler_name (tree decl, tree id)
27134 {
27135 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
27136 && TREE_CODE (decl) == FUNCTION_DECL
27137 && DECL_IS_UNDECLARED_BUILTIN (decl))
27138 {
27139 size_t len = IDENTIFIER_LENGTH (id);
27140 const char *name = IDENTIFIER_POINTER (id);
27141
27142 if (name[len - 1] == 'l')
27143 {
27144 bool uses_ieee128_p = false;
27145 tree type = TREE_TYPE (decl);
27146 machine_mode ret_mode = TYPE_MODE (type);
27147
27148 /* See if the function returns a IEEE 128-bit floating point type or
27149 complex type. */
27150 if (ret_mode == TFmode || ret_mode == TCmode)
27151 uses_ieee128_p = true;
27152 else
27153 {
27154 function_args_iterator args_iter;
27155 tree arg;
27156
27157 /* See if the function passes a IEEE 128-bit floating point type
27158 or complex type. */
27159 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
27160 {
27161 machine_mode arg_mode = TYPE_MODE (arg);
27162 if (arg_mode == TFmode || arg_mode == TCmode)
27163 {
27164 uses_ieee128_p = true;
27165 break;
27166 }
27167 }
27168 }
27169
27170 /* If we passed or returned an IEEE 128-bit floating point type,
27171 change the name. */
27172 if (uses_ieee128_p)
27173 {
27174 char *name2 = (char *) alloca (len + 4);
27175 memcpy (name2, name, len - 1);
27176 strcpy (name2 + len - 1, "f128");
27177 id = get_identifier (name2);
27178 }
27179 }
27180 }
27181
27182 return id;
27183 }
27184
27185 /* Predict whether the given loop in gimple will be transformed in the RTL
27186 doloop_optimize pass. */
27187
27188 static bool
27189 rs6000_predict_doloop_p (struct loop *loop)
27190 {
27191 gcc_assert (loop);
27192
27193 /* On rs6000, targetm.can_use_doloop_p is actually
27194 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
27195 if (loop->inner != NULL)
27196 {
27197 if (dump_file && (dump_flags & TDF_DETAILS))
27198 fprintf (dump_file, "Predict doloop failure due to"
27199 " loop nesting.\n");
27200 return false;
27201 }
27202
27203 return true;
27204 }
27205
27206 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
27207
27208 static bool
27209 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
27210 {
27211 gcc_assert (MEM_P (mem));
27212
27213 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
27214 type addresses, so don't allow MEMs with those address types to be
27215 substituted as an equivalent expression. See PR93974 for details. */
27216 if (GET_CODE (XEXP (mem, 0)) == AND)
27217 return true;
27218
27219 return false;
27220 }
27221
27222 /* Implement TARGET_INVALID_CONVERSION. */
27223
27224 static const char *
27225 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
27226 {
27227 /* Make sure we're working with the canonical types. */
27228 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
27229 fromtype = TYPE_CANONICAL (fromtype);
27230 if (TYPE_CANONICAL (totype) != NULL_TREE)
27231 totype = TYPE_CANONICAL (totype);
27232
27233 machine_mode frommode = TYPE_MODE (fromtype);
27234 machine_mode tomode = TYPE_MODE (totype);
27235
27236 if (frommode != tomode)
27237 {
27238 /* Do not allow conversions to/from XOmode and OOmode types. */
27239 if (frommode == XOmode)
27240 return N_("invalid conversion from type %<__vector_quad%>");
27241 if (tomode == XOmode)
27242 return N_("invalid conversion to type %<__vector_quad%>");
27243 if (frommode == OOmode)
27244 return N_("invalid conversion from type %<__vector_pair%>");
27245 if (tomode == OOmode)
27246 return N_("invalid conversion to type %<__vector_pair%>");
27247 }
27248 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
27249 {
27250 /* We really care about the modes of the base types. */
27251 frommode = TYPE_MODE (TREE_TYPE (fromtype));
27252 tomode = TYPE_MODE (TREE_TYPE (totype));
27253
27254 /* Do not allow conversions to/from XOmode and OOmode pointer
27255 types, except to/from void pointers. */
27256 if (frommode != tomode
27257 && frommode != VOIDmode
27258 && tomode != VOIDmode)
27259 {
27260 if (frommode == XOmode)
27261 return N_("invalid conversion from type %<* __vector_quad%>");
27262 if (tomode == XOmode)
27263 return N_("invalid conversion to type %<* __vector_quad%>");
27264 if (frommode == OOmode)
27265 return N_("invalid conversion from type %<* __vector_pair%>");
27266 if (tomode == OOmode)
27267 return N_("invalid conversion to type %<* __vector_pair%>");
27268 }
27269 }
27270
27271 /* Conversion allowed. */
27272 return NULL;
27273 }
27274
27275 long long
27276 rs6000_const_f32_to_i32 (rtx operand)
27277 {
27278 long long value;
27279 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
27280
27281 gcc_assert (GET_MODE (operand) == SFmode);
27282 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
27283 return value;
27284 }
27285
27286 void
27287 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
27288 {
27289 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
27290 inform (input_location,
27291 "the result for the xxspltidp instruction "
27292 "is undefined for subnormal input values");
27293 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
27294 }
27295
27296 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
27297
27298 static bool
27299 rs6000_gen_pic_addr_diff_vec (void)
27300 {
27301 return rs6000_relative_jumptables;
27302 }
27303
27304 void
27305 rs6000_output_addr_vec_elt (FILE *file, int value)
27306 {
27307 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
27308 char buf[100];
27309
27310 fprintf (file, "%s", directive);
27311 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
27312 assemble_name (file, buf);
27313 fprintf (file, "\n");
27314 }
27315
27316 struct gcc_target targetm = TARGET_INITIALIZER;
27317
27318 #include "gt-rs6000.h"