rs6000: Fix rs6000_emit_le_vsx_store (PR98549)
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2021 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-walk.h"
63 #include "ssa.h"
64 #include "tree-vectorizer.h"
65 #include "tree-ssa-propagate.h"
66 #include "intl.h"
67 #include "tm-constrs.h"
68 #include "target-globals.h"
69 #include "builtins.h"
70 #include "tree-vector-builder.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #include "case-cfn-macros.h"
78 #include "ppc-auxv.h"
79 #include "rs6000-internal.h"
80 #include "opts.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
86 systems will also set long double to be IEEE 128-bit. AIX and Darwin
87 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
88 those systems will not pick up this default. This needs to be after all
89 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
90 properly defined. */
91 #ifndef TARGET_IEEEQUAD_DEFAULT
92 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
93 #define TARGET_IEEEQUAD_DEFAULT 1
94 #else
95 #define TARGET_IEEEQUAD_DEFAULT 0
96 #endif
97 #endif
98
99 /* Don't enable PC-relative addressing if the target does not support it. */
100 #ifndef PCREL_SUPPORTED_BY_OS
101 #define PCREL_SUPPORTED_BY_OS 0
102 #endif
103
104 /* Support targetm.vectorize.builtin_mask_for_load. */
105 tree altivec_builtin_mask_for_load;
106
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
111
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
114
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
119
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
128
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
132
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
135
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
152
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
156
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
159
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
162
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
165
166 static int dbg_cost_ctrl;
167
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
171
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
175
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
179
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
186
187 section *toc_section = 0;
188
189 /* Describe the vector unit used for modes. */
190 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
191 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
192
193 /* Register classes for various constraints that are based on the target
194 switches. */
195 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
196
197 /* Describe the alignment of a vector. */
198 int rs6000_vector_align[NUM_MACHINE_MODES];
199
200 /* Map selected modes to types for builtins. */
201 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
202
203 /* What modes to automatically generate reciprocal divide estimate (fre) and
204 reciprocal sqrt (frsqrte) for. */
205 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
206
207 /* Masks to determine which reciprocal esitmate instructions to generate
208 automatically. */
209 enum rs6000_recip_mask {
210 RECIP_SF_DIV = 0x001, /* Use divide estimate */
211 RECIP_DF_DIV = 0x002,
212 RECIP_V4SF_DIV = 0x004,
213 RECIP_V2DF_DIV = 0x008,
214
215 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
216 RECIP_DF_RSQRT = 0x020,
217 RECIP_V4SF_RSQRT = 0x040,
218 RECIP_V2DF_RSQRT = 0x080,
219
220 /* Various combination of flags for -mrecip=xxx. */
221 RECIP_NONE = 0,
222 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
223 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
224 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
225
226 RECIP_HIGH_PRECISION = RECIP_ALL,
227
228 /* On low precision machines like the power5, don't enable double precision
229 reciprocal square root estimate, since it isn't accurate enough. */
230 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
231 };
232
233 /* -mrecip options. */
234 static struct
235 {
236 const char *string; /* option name */
237 unsigned int mask; /* mask bits to set */
238 } recip_options[] = {
239 { "all", RECIP_ALL },
240 { "none", RECIP_NONE },
241 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
242 | RECIP_V2DF_DIV) },
243 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
244 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
245 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
246 | RECIP_V2DF_RSQRT) },
247 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
248 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
249 };
250
251 /* On PowerPC, we have a limited number of target clones that we care about
252 which means we can use an array to hold the options, rather than having more
253 elaborate data structures to identify each possible variation. Order the
254 clones from the default to the highest ISA. */
255 enum {
256 CLONE_DEFAULT = 0, /* default clone. */
257 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
258 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
259 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
260 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
261 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
262 CLONE_MAX
263 };
264
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
269 };
270
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
277 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
278 };
279
280
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
289
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
299
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
309 };
310
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
319
320
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
324
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
331 };
332
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
343 };
344
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
350 };
351
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
356
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
375 };
376
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
382 {
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
385 }
386
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
390 {
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
393 }
394
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
398 {
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
400 }
401
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
407 {
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
410 }
411
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
420
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
426
427 int
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 {
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
434
435 in_set = single_set (in_insn);
436 if (in_set)
437 {
438 if (MEM_P (SET_DEST (in_set)))
439 {
440 out_set = single_set (out_insn);
441 if (!out_set)
442 {
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
445 {
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 {
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
454 }
455 }
456 }
457 }
458 }
459 else
460 {
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
464
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 {
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
472
473 if (MEM_P (SET_DEST (in_exp)))
474 {
475 out_set = single_set (out_insn);
476 if (!out_set)
477 {
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 {
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
489 }
490 }
491 }
492 }
493 }
494 return store_data_bypass_p (out_insn, in_insn);
495 }
496
497 \f
498 /* Processor costs (relative to an add) */
499
500 const struct processor_costs *rs6000_cost;
501
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
520 };
521
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
540 };
541
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
560 };
561
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
580 };
581
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
600 };
601
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
620 };
621
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
640 };
641
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
660 };
661
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
680 };
681
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
700 };
701
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
720 };
721
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
740 };
741
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
760 };
761
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
780 };
781
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
876 32,
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
941 };
942
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
961 };
962
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
981 };
982
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1001 };
1002
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1021 };
1022
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1041 };
1042
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1061 };
1062
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1081 };
1082
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1096 64,
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1101 };
1102
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1105
1106 \f
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1151
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1154
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1158
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1161
1162 const int INSN_NOT_AVAILABLE = -1;
1163
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1178
1179 /* Hash table stuff for keeping track of TOC entries. */
1180
1181 struct GTY((for_user)) toc_hash_struct
1182 {
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1188 };
1189
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1191 {
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 };
1195
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1197
1198
1199 \f
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1202 {
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1224 };
1225
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1228 {
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1250 };
1251 #endif
1252
1253 /* Table of valid machine attributes. */
1254
1255 static const struct attribute_spec rs6000_attribute_table[] =
1256 {
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 };
1274 \f
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1278 \f
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1286
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1289
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1314
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1319
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1324
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1327
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1340
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1343
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1346
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1349
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1352
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1355
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1358
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1361
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1366
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1369
1370 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1371 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1372
1373 #undef TARGET_LEGITIMIZE_ADDRESS
1374 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1375
1376 #undef TARGET_SCHED_VARIABLE_ISSUE
1377 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1378
1379 #undef TARGET_SCHED_ISSUE_RATE
1380 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1381 #undef TARGET_SCHED_ADJUST_COST
1382 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1383 #undef TARGET_SCHED_ADJUST_PRIORITY
1384 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1385 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1386 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1387 #undef TARGET_SCHED_INIT
1388 #define TARGET_SCHED_INIT rs6000_sched_init
1389 #undef TARGET_SCHED_FINISH
1390 #define TARGET_SCHED_FINISH rs6000_sched_finish
1391 #undef TARGET_SCHED_REORDER
1392 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1393 #undef TARGET_SCHED_REORDER2
1394 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1395
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1398
1399 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1400 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1401
1402 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1403 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1404 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1405 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1406 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1407 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1408 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1409 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1410
1411 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1412 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1413
1414 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1415 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1416 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1417 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1418 rs6000_builtin_support_vector_misalignment
1419 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1420 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1421 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1422 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1423 rs6000_builtin_vectorization_cost
1424 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1425 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1426 rs6000_preferred_simd_mode
1427 #undef TARGET_VECTORIZE_INIT_COST
1428 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1429 #undef TARGET_VECTORIZE_ADD_STMT_COST
1430 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1431 #undef TARGET_VECTORIZE_FINISH_COST
1432 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1433 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1434 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1435
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1438
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1443
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1448
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1451
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1454
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1457
1458 #if TARGET_MACHO
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1461 #endif
1462
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1465
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1468
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1471
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1474
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1490
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1493
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1496
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1499
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1502
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1505
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1508
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1530
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1533
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1536
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1539
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1542
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1545
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1548
1549 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1550 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1551
1552 #undef TARGET_FLOATN_MODE
1553 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1554
1555 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1556 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1557
1558 #undef TARGET_MD_ASM_ADJUST
1559 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1560
1561 #undef TARGET_OPTION_OVERRIDE
1562 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1563
1564 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1565 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1566 rs6000_builtin_vectorized_function
1567
1568 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1570 rs6000_builtin_md_vectorized_function
1571
1572 #undef TARGET_STACK_PROTECT_GUARD
1573 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1574
1575 #if !TARGET_MACHO
1576 #undef TARGET_STACK_PROTECT_FAIL
1577 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1578 #endif
1579
1580 #ifdef HAVE_AS_TLS
1581 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1582 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1583 #endif
1584
1585 /* Use a 32-bit anchor range. This leads to sequences like:
1586
1587 addis tmp,anchor,high
1588 add dest,tmp,low
1589
1590 where tmp itself acts as an anchor, and can be shared between
1591 accesses to the same 64k page. */
1592 #undef TARGET_MIN_ANCHOR_OFFSET
1593 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1594 #undef TARGET_MAX_ANCHOR_OFFSET
1595 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1596 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1597 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1598 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1599 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1600
1601 #undef TARGET_BUILTIN_RECIPROCAL
1602 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1603
1604 #undef TARGET_SECONDARY_RELOAD
1605 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1606 #undef TARGET_SECONDARY_MEMORY_NEEDED
1607 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1608 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1609 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1610
1611 #undef TARGET_LEGITIMATE_ADDRESS_P
1612 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1613
1614 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1615 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1616
1617 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1618 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1619
1620 #undef TARGET_CAN_ELIMINATE
1621 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1622
1623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1624 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1625
1626 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1627 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1628
1629 #undef TARGET_TRAMPOLINE_INIT
1630 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1631
1632 #undef TARGET_FUNCTION_VALUE
1633 #define TARGET_FUNCTION_VALUE rs6000_function_value
1634
1635 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1636 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1637
1638 #undef TARGET_OPTION_SAVE
1639 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1640
1641 #undef TARGET_OPTION_RESTORE
1642 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1643
1644 #undef TARGET_OPTION_PRINT
1645 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1646
1647 #undef TARGET_CAN_INLINE_P
1648 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1649
1650 #undef TARGET_SET_CURRENT_FUNCTION
1651 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1652
1653 #undef TARGET_LEGITIMATE_CONSTANT_P
1654 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1655
1656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1657 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1658
1659 #undef TARGET_CAN_USE_DOLOOP_P
1660 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1661
1662 #undef TARGET_PREDICT_DOLOOP_P
1663 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1664
1665 #undef TARGET_HAVE_COUNT_REG_DECR_P
1666 #define TARGET_HAVE_COUNT_REG_DECR_P true
1667
1668 /* 1000000000 is infinite cost in IVOPTs. */
1669 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1670 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1671
1672 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1673 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1674
1675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1677
1678 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1679 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1680 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1681 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1682 #undef TARGET_UNWIND_WORD_MODE
1683 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1684
1685 #undef TARGET_OFFLOAD_OPTIONS
1686 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1687
1688 #undef TARGET_C_MODE_FOR_SUFFIX
1689 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1690
1691 #undef TARGET_INVALID_BINARY_OP
1692 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1693
1694 #undef TARGET_OPTAB_SUPPORTED_P
1695 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1696
1697 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1698 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1699
1700 #undef TARGET_COMPARE_VERSION_PRIORITY
1701 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1702
1703 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1704 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1705 rs6000_generate_version_dispatcher_body
1706
1707 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1708 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1709 rs6000_get_function_versions_dispatcher
1710
1711 #undef TARGET_OPTION_FUNCTION_VERSIONS
1712 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1713
1714 #undef TARGET_HARD_REGNO_NREGS
1715 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1716 #undef TARGET_HARD_REGNO_MODE_OK
1717 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1718
1719 #undef TARGET_MODES_TIEABLE_P
1720 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1721
1722 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1723 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1724 rs6000_hard_regno_call_part_clobbered
1725
1726 #undef TARGET_SLOW_UNALIGNED_ACCESS
1727 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1728
1729 #undef TARGET_CAN_CHANGE_MODE_CLASS
1730 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1731
1732 #undef TARGET_CONSTANT_ALIGNMENT
1733 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1734
1735 #undef TARGET_STARTING_FRAME_OFFSET
1736 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1737
1738 #if TARGET_ELF && RS6000_WEAK
1739 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1740 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1741 #endif
1742
1743 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1744 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1745
1746 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1747 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1748
1749 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1750 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1751 rs6000_cannot_substitute_mem_equiv_p
1752
1753 #undef TARGET_INVALID_CONVERSION
1754 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1755 \f
1756
1757 /* Processor table. */
1758 struct rs6000_ptt
1759 {
1760 const char *const name; /* Canonical processor name. */
1761 const enum processor_type processor; /* Processor type enum value. */
1762 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1763 };
1764
1765 static struct rs6000_ptt const processor_target_table[] =
1766 {
1767 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1768 #include "rs6000-cpus.def"
1769 #undef RS6000_CPU
1770 };
1771
1772 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1773 name is invalid. */
1774
1775 static int
1776 rs6000_cpu_name_lookup (const char *name)
1777 {
1778 size_t i;
1779
1780 if (name != NULL)
1781 {
1782 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1783 if (! strcmp (name, processor_target_table[i].name))
1784 return (int)i;
1785 }
1786
1787 return -1;
1788 }
1789
1790 \f
1791 /* Return number of consecutive hard regs needed starting at reg REGNO
1792 to hold something of mode MODE.
1793 This is ordinarily the length in words of a value of mode MODE
1794 but can be less for certain modes in special long registers.
1795
1796 POWER and PowerPC GPRs hold 32 bits worth;
1797 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1798
1799 static int
1800 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1801 {
1802 unsigned HOST_WIDE_INT reg_size;
1803
1804 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1805 128-bit floating point that can go in vector registers, which has VSX
1806 memory addressing. */
1807 if (FP_REGNO_P (regno))
1808 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1809 ? UNITS_PER_VSX_WORD
1810 : UNITS_PER_FP_WORD);
1811
1812 else if (ALTIVEC_REGNO_P (regno))
1813 reg_size = UNITS_PER_ALTIVEC_WORD;
1814
1815 else
1816 reg_size = UNITS_PER_WORD;
1817
1818 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1819 }
1820
1821 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1822 MODE. */
1823 static int
1824 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1825 {
1826 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1827
1828 if (COMPLEX_MODE_P (mode))
1829 mode = GET_MODE_INNER (mode);
1830
1831 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1832 registers. */
1833 if (mode == OOmode)
1834 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1835
1836 /* MMA accumulator modes need FPR registers divisible by 4. */
1837 if (mode == XOmode)
1838 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1839
1840 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1841 register combinations, and use PTImode where we need to deal with quad
1842 word memory operations. Don't allow quad words in the argument or frame
1843 pointer registers, just registers 0..31. */
1844 if (mode == PTImode)
1845 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1846 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1847 && ((regno & 1) == 0));
1848
1849 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1850 implementations. Don't allow an item to be split between a FP register
1851 and an Altivec register. Allow TImode in all VSX registers if the user
1852 asked for it. */
1853 if (TARGET_VSX && VSX_REGNO_P (regno)
1854 && (VECTOR_MEM_VSX_P (mode)
1855 || VECTOR_ALIGNMENT_P (mode)
1856 || reg_addr[mode].scalar_in_vmx_p
1857 || mode == TImode
1858 || (TARGET_VADDUQM && mode == V1TImode)))
1859 {
1860 if (FP_REGNO_P (regno))
1861 return FP_REGNO_P (last_regno);
1862
1863 if (ALTIVEC_REGNO_P (regno))
1864 {
1865 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1866 return 0;
1867
1868 return ALTIVEC_REGNO_P (last_regno);
1869 }
1870 }
1871
1872 /* The GPRs can hold any mode, but values bigger than one register
1873 cannot go past R31. */
1874 if (INT_REGNO_P (regno))
1875 return INT_REGNO_P (last_regno);
1876
1877 /* The float registers (except for VSX vector modes) can only hold floating
1878 modes and DImode. */
1879 if (FP_REGNO_P (regno))
1880 {
1881 if (VECTOR_ALIGNMENT_P (mode))
1882 return false;
1883
1884 if (SCALAR_FLOAT_MODE_P (mode)
1885 && (mode != TDmode || (regno % 2) == 0)
1886 && FP_REGNO_P (last_regno))
1887 return 1;
1888
1889 if (GET_MODE_CLASS (mode) == MODE_INT)
1890 {
1891 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1892 return 1;
1893
1894 if (TARGET_P8_VECTOR && (mode == SImode))
1895 return 1;
1896
1897 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1898 return 1;
1899 }
1900
1901 return 0;
1902 }
1903
1904 /* The CR register can only hold CC modes. */
1905 if (CR_REGNO_P (regno))
1906 return GET_MODE_CLASS (mode) == MODE_CC;
1907
1908 if (CA_REGNO_P (regno))
1909 return mode == Pmode || mode == SImode;
1910
1911 /* AltiVec only in AldyVec registers. */
1912 if (ALTIVEC_REGNO_P (regno))
1913 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1914 || mode == V1TImode);
1915
1916 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1917 and it must be able to fit within the register set. */
1918
1919 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1920 }
1921
1922 /* Implement TARGET_HARD_REGNO_NREGS. */
1923
1924 static unsigned int
1925 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1926 {
1927 return rs6000_hard_regno_nregs[mode][regno];
1928 }
1929
1930 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1931
1932 static bool
1933 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1934 {
1935 return rs6000_hard_regno_mode_ok_p[mode][regno];
1936 }
1937
1938 /* Implement TARGET_MODES_TIEABLE_P.
1939
1940 PTImode cannot tie with other modes because PTImode is restricted to even
1941 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1942 57744).
1943
1944 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1945 registers) or XOmode (vector quad, restricted to FPR registers divisible
1946 by 4) to tie with other modes.
1947
1948 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1949 128-bit floating point on VSX systems ties with other vectors. */
1950
1951 static bool
1952 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1953 {
1954 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1955 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1956 return mode1 == mode2;
1957
1958 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1959 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1961 return false;
1962
1963 if (SCALAR_FLOAT_MODE_P (mode1))
1964 return SCALAR_FLOAT_MODE_P (mode2);
1965 if (SCALAR_FLOAT_MODE_P (mode2))
1966 return false;
1967
1968 if (GET_MODE_CLASS (mode1) == MODE_CC)
1969 return GET_MODE_CLASS (mode2) == MODE_CC;
1970 if (GET_MODE_CLASS (mode2) == MODE_CC)
1971 return false;
1972
1973 return true;
1974 }
1975
1976 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1977
1978 static bool
1979 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1980 machine_mode mode)
1981 {
1982 if (TARGET_32BIT
1983 && TARGET_POWERPC64
1984 && GET_MODE_SIZE (mode) > 4
1985 && INT_REGNO_P (regno))
1986 return true;
1987
1988 if (TARGET_VSX
1989 && FP_REGNO_P (regno)
1990 && GET_MODE_SIZE (mode) > 8
1991 && !FLOAT128_2REG_P (mode))
1992 return true;
1993
1994 return false;
1995 }
1996
1997 /* Print interesting facts about registers. */
1998 static void
1999 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2000 {
2001 int r, m;
2002
2003 for (r = first_regno; r <= last_regno; ++r)
2004 {
2005 const char *comma = "";
2006 int len;
2007
2008 if (first_regno == last_regno)
2009 fprintf (stderr, "%s:\t", reg_name);
2010 else
2011 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2012
2013 len = 8;
2014 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2015 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2016 {
2017 if (len > 70)
2018 {
2019 fprintf (stderr, ",\n\t");
2020 len = 8;
2021 comma = "";
2022 }
2023
2024 if (rs6000_hard_regno_nregs[m][r] > 1)
2025 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2026 rs6000_hard_regno_nregs[m][r]);
2027 else
2028 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2029
2030 comma = ", ";
2031 }
2032
2033 if (call_used_or_fixed_reg_p (r))
2034 {
2035 if (len > 70)
2036 {
2037 fprintf (stderr, ",\n\t");
2038 len = 8;
2039 comma = "";
2040 }
2041
2042 len += fprintf (stderr, "%s%s", comma, "call-used");
2043 comma = ", ";
2044 }
2045
2046 if (fixed_regs[r])
2047 {
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 len = 8;
2052 comma = "";
2053 }
2054
2055 len += fprintf (stderr, "%s%s", comma, "fixed");
2056 comma = ", ";
2057 }
2058
2059 if (len > 70)
2060 {
2061 fprintf (stderr, ",\n\t");
2062 comma = "";
2063 }
2064
2065 len += fprintf (stderr, "%sreg-class = %s", comma,
2066 reg_class_names[(int)rs6000_regno_regclass[r]]);
2067 comma = ", ";
2068
2069 if (len > 70)
2070 {
2071 fprintf (stderr, ",\n\t");
2072 comma = "";
2073 }
2074
2075 fprintf (stderr, "%sregno = %d\n", comma, r);
2076 }
2077 }
2078
2079 static const char *
2080 rs6000_debug_vector_unit (enum rs6000_vector v)
2081 {
2082 const char *ret;
2083
2084 switch (v)
2085 {
2086 case VECTOR_NONE: ret = "none"; break;
2087 case VECTOR_ALTIVEC: ret = "altivec"; break;
2088 case VECTOR_VSX: ret = "vsx"; break;
2089 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2090 default: ret = "unknown"; break;
2091 }
2092
2093 return ret;
2094 }
2095
2096 /* Inner function printing just the address mask for a particular reload
2097 register class. */
2098 DEBUG_FUNCTION char *
2099 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2100 {
2101 static char ret[8];
2102 char *p = ret;
2103
2104 if ((mask & RELOAD_REG_VALID) != 0)
2105 *p++ = 'v';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2108
2109 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2110 *p++ = 'm';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2113
2114 if ((mask & RELOAD_REG_INDEXED) != 0)
2115 *p++ = 'i';
2116 else if (keep_spaces)
2117 *p++ = ' ';
2118
2119 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2120 *p++ = 'O';
2121 else if ((mask & RELOAD_REG_OFFSET) != 0)
2122 *p++ = 'o';
2123 else if (keep_spaces)
2124 *p++ = ' ';
2125
2126 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2127 *p++ = '+';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2130
2131 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2132 *p++ = '+';
2133 else if (keep_spaces)
2134 *p++ = ' ';
2135
2136 if ((mask & RELOAD_REG_AND_M16) != 0)
2137 *p++ = '&';
2138 else if (keep_spaces)
2139 *p++ = ' ';
2140
2141 *p = '\0';
2142
2143 return ret;
2144 }
2145
2146 /* Print the address masks in a human readble fashion. */
2147 DEBUG_FUNCTION void
2148 rs6000_debug_print_mode (ssize_t m)
2149 {
2150 ssize_t rc;
2151 int spaces = 0;
2152
2153 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2154 for (rc = 0; rc < N_RELOAD_REG; rc++)
2155 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2156 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2157
2158 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2159 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2160 {
2161 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2162 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2163 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2164 spaces = 0;
2165 }
2166 else
2167 spaces += strlen (" Reload=sl");
2168
2169 if (reg_addr[m].scalar_in_vmx_p)
2170 {
2171 fprintf (stderr, "%*s Upper=y", spaces, "");
2172 spaces = 0;
2173 }
2174 else
2175 spaces += strlen (" Upper=y");
2176
2177 if (rs6000_vector_unit[m] != VECTOR_NONE
2178 || rs6000_vector_mem[m] != VECTOR_NONE)
2179 {
2180 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2181 spaces, "",
2182 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2183 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2184 }
2185
2186 fputs ("\n", stderr);
2187 }
2188
2189 #define DEBUG_FMT_ID "%-32s= "
2190 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2191 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2192 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2193
2194 /* Print various interesting information with -mdebug=reg. */
2195 static void
2196 rs6000_debug_reg_global (void)
2197 {
2198 static const char *const tf[2] = { "false", "true" };
2199 const char *nl = (const char *)0;
2200 int m;
2201 size_t m1, m2, v;
2202 char costly_num[20];
2203 char nop_num[20];
2204 char flags_buffer[40];
2205 const char *costly_str;
2206 const char *nop_str;
2207 const char *trace_str;
2208 const char *abi_str;
2209 const char *cmodel_str;
2210 struct cl_target_option cl_opts;
2211
2212 /* Modes we want tieable information on. */
2213 static const machine_mode print_tieable_modes[] = {
2214 QImode,
2215 HImode,
2216 SImode,
2217 DImode,
2218 TImode,
2219 PTImode,
2220 SFmode,
2221 DFmode,
2222 TFmode,
2223 IFmode,
2224 KFmode,
2225 SDmode,
2226 DDmode,
2227 TDmode,
2228 V2SImode,
2229 V2SFmode,
2230 V16QImode,
2231 V8HImode,
2232 V4SImode,
2233 V2DImode,
2234 V1TImode,
2235 V32QImode,
2236 V16HImode,
2237 V8SImode,
2238 V4DImode,
2239 V2TImode,
2240 V4SFmode,
2241 V2DFmode,
2242 V8SFmode,
2243 V4DFmode,
2244 OOmode,
2245 XOmode,
2246 CCmode,
2247 CCUNSmode,
2248 CCEQmode,
2249 CCFPmode,
2250 };
2251
2252 /* Virtual regs we are interested in. */
2253 const static struct {
2254 int regno; /* register number. */
2255 const char *name; /* register name. */
2256 } virtual_regs[] = {
2257 { STACK_POINTER_REGNUM, "stack pointer:" },
2258 { TOC_REGNUM, "toc: " },
2259 { STATIC_CHAIN_REGNUM, "static chain: " },
2260 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2261 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2262 { ARG_POINTER_REGNUM, "arg pointer: " },
2263 { FRAME_POINTER_REGNUM, "frame pointer:" },
2264 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2265 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2266 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2267 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2268 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2269 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2270 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2271 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2272 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2273 };
2274
2275 fputs ("\nHard register information:\n", stderr);
2276 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2277 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2278 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2279 LAST_ALTIVEC_REGNO,
2280 "vs");
2281 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2282 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2283 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2284 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2285 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2286 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2287
2288 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2289 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2290 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2291
2292 fprintf (stderr,
2293 "\n"
2294 "d reg_class = %s\n"
2295 "f reg_class = %s\n"
2296 "v reg_class = %s\n"
2297 "wa reg_class = %s\n"
2298 "we reg_class = %s\n"
2299 "wr reg_class = %s\n"
2300 "wx reg_class = %s\n"
2301 "wA reg_class = %s\n"
2302 "\n",
2303 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2304 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2305 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2306 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2307 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2308 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2309 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2311
2312 nl = "\n";
2313 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2314 rs6000_debug_print_mode (m);
2315
2316 fputs ("\n", stderr);
2317
2318 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2319 {
2320 machine_mode mode1 = print_tieable_modes[m1];
2321 bool first_time = true;
2322
2323 nl = (const char *)0;
2324 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2325 {
2326 machine_mode mode2 = print_tieable_modes[m2];
2327 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2328 {
2329 if (first_time)
2330 {
2331 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2332 nl = "\n";
2333 first_time = false;
2334 }
2335
2336 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2337 }
2338 }
2339
2340 if (!first_time)
2341 fputs ("\n", stderr);
2342 }
2343
2344 if (nl)
2345 fputs (nl, stderr);
2346
2347 if (rs6000_recip_control)
2348 {
2349 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2350
2351 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2352 if (rs6000_recip_bits[m])
2353 {
2354 fprintf (stderr,
2355 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2356 GET_MODE_NAME (m),
2357 (RS6000_RECIP_AUTO_RE_P (m)
2358 ? "auto"
2359 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2360 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2361 ? "auto"
2362 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2363 }
2364
2365 fputs ("\n", stderr);
2366 }
2367
2368 if (rs6000_cpu_index >= 0)
2369 {
2370 const char *name = processor_target_table[rs6000_cpu_index].name;
2371 HOST_WIDE_INT flags
2372 = processor_target_table[rs6000_cpu_index].target_enable;
2373
2374 sprintf (flags_buffer, "-mcpu=%s flags", name);
2375 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2376 }
2377 else
2378 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2379
2380 if (rs6000_tune_index >= 0)
2381 {
2382 const char *name = processor_target_table[rs6000_tune_index].name;
2383 HOST_WIDE_INT flags
2384 = processor_target_table[rs6000_tune_index].target_enable;
2385
2386 sprintf (flags_buffer, "-mtune=%s flags", name);
2387 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2388 }
2389 else
2390 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2391
2392 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2393 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2394 rs6000_isa_flags);
2395
2396 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2397 rs6000_isa_flags_explicit);
2398
2399 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2400 rs6000_builtin_mask);
2401
2402 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2403
2404 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2405 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2406
2407 switch (rs6000_sched_costly_dep)
2408 {
2409 case max_dep_latency:
2410 costly_str = "max_dep_latency";
2411 break;
2412
2413 case no_dep_costly:
2414 costly_str = "no_dep_costly";
2415 break;
2416
2417 case all_deps_costly:
2418 costly_str = "all_deps_costly";
2419 break;
2420
2421 case true_store_to_load_dep_costly:
2422 costly_str = "true_store_to_load_dep_costly";
2423 break;
2424
2425 case store_to_load_dep_costly:
2426 costly_str = "store_to_load_dep_costly";
2427 break;
2428
2429 default:
2430 costly_str = costly_num;
2431 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2432 break;
2433 }
2434
2435 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2436
2437 switch (rs6000_sched_insert_nops)
2438 {
2439 case sched_finish_regroup_exact:
2440 nop_str = "sched_finish_regroup_exact";
2441 break;
2442
2443 case sched_finish_pad_groups:
2444 nop_str = "sched_finish_pad_groups";
2445 break;
2446
2447 case sched_finish_none:
2448 nop_str = "sched_finish_none";
2449 break;
2450
2451 default:
2452 nop_str = nop_num;
2453 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2454 break;
2455 }
2456
2457 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2458
2459 switch (rs6000_sdata)
2460 {
2461 default:
2462 case SDATA_NONE:
2463 break;
2464
2465 case SDATA_DATA:
2466 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2467 break;
2468
2469 case SDATA_SYSV:
2470 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2471 break;
2472
2473 case SDATA_EABI:
2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2475 break;
2476
2477 }
2478
2479 switch (rs6000_traceback)
2480 {
2481 case traceback_default: trace_str = "default"; break;
2482 case traceback_none: trace_str = "none"; break;
2483 case traceback_part: trace_str = "part"; break;
2484 case traceback_full: trace_str = "full"; break;
2485 default: trace_str = "unknown"; break;
2486 }
2487
2488 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2489
2490 switch (rs6000_current_cmodel)
2491 {
2492 case CMODEL_SMALL: cmodel_str = "small"; break;
2493 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2494 case CMODEL_LARGE: cmodel_str = "large"; break;
2495 default: cmodel_str = "unknown"; break;
2496 }
2497
2498 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2499
2500 switch (rs6000_current_abi)
2501 {
2502 case ABI_NONE: abi_str = "none"; break;
2503 case ABI_AIX: abi_str = "aix"; break;
2504 case ABI_ELFv2: abi_str = "ELFv2"; break;
2505 case ABI_V4: abi_str = "V4"; break;
2506 case ABI_DARWIN: abi_str = "darwin"; break;
2507 default: abi_str = "unknown"; break;
2508 }
2509
2510 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2511
2512 if (rs6000_altivec_abi)
2513 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2514
2515 if (rs6000_darwin64_abi)
2516 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2517
2518 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2519 (TARGET_SOFT_FLOAT ? "true" : "false"));
2520
2521 if (TARGET_LINK_STACK)
2522 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2523
2524 if (TARGET_P8_FUSION)
2525 {
2526 char options[80];
2527
2528 strcpy (options, "power8");
2529 if (TARGET_P8_FUSION_SIGN)
2530 strcat (options, ", sign");
2531
2532 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2533 }
2534
2535 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2536 TARGET_SECURE_PLT ? "secure" : "bss");
2537 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2538 aix_struct_return ? "aix" : "sysv");
2539 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2540 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2541 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2542 tf[!!rs6000_align_branch_targets]);
2543 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2544 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2545 rs6000_long_double_type_size);
2546 if (rs6000_long_double_type_size > 64)
2547 {
2548 fprintf (stderr, DEBUG_FMT_S, "long double type",
2549 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2550 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2551 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2552 }
2553 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2554 (int)rs6000_sched_restricted_insns_priority);
2555 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2556 (int)END_BUILTINS);
2557 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2558 (int)RS6000_BUILTIN_COUNT);
2559
2560 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2561 (int)TARGET_FLOAT128_ENABLE_TYPE);
2562
2563 if (TARGET_VSX)
2564 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2565 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2566
2567 if (TARGET_DIRECT_MOVE_128)
2568 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2569 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2570 }
2571
2572 \f
2573 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2574 legitimate address support to figure out the appropriate addressing to
2575 use. */
2576
2577 static void
2578 rs6000_setup_reg_addr_masks (void)
2579 {
2580 ssize_t rc, reg, m, nregs;
2581 addr_mask_type any_addr_mask, addr_mask;
2582
2583 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2584 {
2585 machine_mode m2 = (machine_mode) m;
2586 bool complex_p = false;
2587 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2588 size_t msize;
2589
2590 if (COMPLEX_MODE_P (m2))
2591 {
2592 complex_p = true;
2593 m2 = GET_MODE_INNER (m2);
2594 }
2595
2596 msize = GET_MODE_SIZE (m2);
2597
2598 /* SDmode is special in that we want to access it only via REG+REG
2599 addressing on power7 and above, since we want to use the LFIWZX and
2600 STFIWZX instructions to load it. */
2601 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2602
2603 any_addr_mask = 0;
2604 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2605 {
2606 addr_mask = 0;
2607 reg = reload_reg_map[rc].reg;
2608
2609 /* Can mode values go in the GPR/FPR/Altivec registers? */
2610 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2611 {
2612 bool small_int_vsx_p = (small_int_p
2613 && (rc == RELOAD_REG_FPR
2614 || rc == RELOAD_REG_VMX));
2615
2616 nregs = rs6000_hard_regno_nregs[m][reg];
2617 addr_mask |= RELOAD_REG_VALID;
2618
2619 /* Indicate if the mode takes more than 1 physical register. If
2620 it takes a single register, indicate it can do REG+REG
2621 addressing. Small integers in VSX registers can only do
2622 REG+REG addressing. */
2623 if (small_int_vsx_p)
2624 addr_mask |= RELOAD_REG_INDEXED;
2625 else if (nregs > 1 || m == BLKmode || complex_p)
2626 addr_mask |= RELOAD_REG_MULTIPLE;
2627 else
2628 addr_mask |= RELOAD_REG_INDEXED;
2629
2630 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2631 addressing. If we allow scalars into Altivec registers,
2632 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2633
2634 For VSX systems, we don't allow update addressing for
2635 DFmode/SFmode if those registers can go in both the
2636 traditional floating point registers and Altivec registers.
2637 The load/store instructions for the Altivec registers do not
2638 have update forms. If we allowed update addressing, it seems
2639 to break IV-OPT code using floating point if the index type is
2640 int instead of long (PR target/81550 and target/84042). */
2641
2642 if (TARGET_UPDATE
2643 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2644 && msize <= 8
2645 && !VECTOR_MODE_P (m2)
2646 && !VECTOR_ALIGNMENT_P (m2)
2647 && !complex_p
2648 && (m != E_DFmode || !TARGET_VSX)
2649 && (m != E_SFmode || !TARGET_P8_VECTOR)
2650 && !small_int_vsx_p)
2651 {
2652 addr_mask |= RELOAD_REG_PRE_INCDEC;
2653
2654 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2655 we don't allow PRE_MODIFY for some multi-register
2656 operations. */
2657 switch (m)
2658 {
2659 default:
2660 addr_mask |= RELOAD_REG_PRE_MODIFY;
2661 break;
2662
2663 case E_DImode:
2664 if (TARGET_POWERPC64)
2665 addr_mask |= RELOAD_REG_PRE_MODIFY;
2666 break;
2667
2668 case E_DFmode:
2669 case E_DDmode:
2670 if (TARGET_HARD_FLOAT)
2671 addr_mask |= RELOAD_REG_PRE_MODIFY;
2672 break;
2673 }
2674 }
2675 }
2676
2677 /* GPR and FPR registers can do REG+OFFSET addressing, except
2678 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2679 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2680 if ((addr_mask != 0) && !indexed_only_p
2681 && msize <= 8
2682 && (rc == RELOAD_REG_GPR
2683 || ((msize == 8 || m2 == SFmode)
2684 && (rc == RELOAD_REG_FPR
2685 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2686 addr_mask |= RELOAD_REG_OFFSET;
2687
2688 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2689 instructions are enabled. The offset for 128-bit VSX registers is
2690 only 12-bits. While GPRs can handle the full offset range, VSX
2691 registers can only handle the restricted range. */
2692 else if ((addr_mask != 0) && !indexed_only_p
2693 && msize == 16 && TARGET_P9_VECTOR
2694 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2695 || (m2 == TImode && TARGET_VSX)))
2696 {
2697 addr_mask |= RELOAD_REG_OFFSET;
2698 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2699 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2700 }
2701
2702 /* Vector pairs can do both indexed and offset loads if the
2703 instructions are enabled, otherwise they can only do offset loads
2704 since it will be broken into two vector moves. Vector quads can
2705 only do offset loads. */
2706 else if ((addr_mask != 0) && TARGET_MMA
2707 && (m2 == OOmode || m2 == XOmode))
2708 {
2709 addr_mask |= RELOAD_REG_OFFSET;
2710 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2711 {
2712 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2713 if (m2 == OOmode)
2714 addr_mask |= RELOAD_REG_INDEXED;
2715 }
2716 }
2717
2718 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2719 addressing on 128-bit types. */
2720 if (rc == RELOAD_REG_VMX && msize == 16
2721 && (addr_mask & RELOAD_REG_VALID) != 0)
2722 addr_mask |= RELOAD_REG_AND_M16;
2723
2724 reg_addr[m].addr_mask[rc] = addr_mask;
2725 any_addr_mask |= addr_mask;
2726 }
2727
2728 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2729 }
2730 }
2731
2732 \f
2733 /* Initialize the various global tables that are based on register size. */
2734 static void
2735 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2736 {
2737 ssize_t r, m, c;
2738 int align64;
2739 int align32;
2740
2741 /* Precalculate REGNO_REG_CLASS. */
2742 rs6000_regno_regclass[0] = GENERAL_REGS;
2743 for (r = 1; r < 32; ++r)
2744 rs6000_regno_regclass[r] = BASE_REGS;
2745
2746 for (r = 32; r < 64; ++r)
2747 rs6000_regno_regclass[r] = FLOAT_REGS;
2748
2749 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2750 rs6000_regno_regclass[r] = NO_REGS;
2751
2752 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2753 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2754
2755 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2756 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2757 rs6000_regno_regclass[r] = CR_REGS;
2758
2759 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2760 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2761 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2762 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2763 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2764 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2765 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2766
2767 /* Precalculate register class to simpler reload register class. We don't
2768 need all of the register classes that are combinations of different
2769 classes, just the simple ones that have constraint letters. */
2770 for (c = 0; c < N_REG_CLASSES; c++)
2771 reg_class_to_reg_type[c] = NO_REG_TYPE;
2772
2773 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2774 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2775 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2776 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2777 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2779 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2780 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2781 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2782 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2783
2784 if (TARGET_VSX)
2785 {
2786 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2787 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2788 }
2789 else
2790 {
2791 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2792 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2793 }
2794
2795 /* Precalculate the valid memory formats as well as the vector information,
2796 this must be set up before the rs6000_hard_regno_nregs_internal calls
2797 below. */
2798 gcc_assert ((int)VECTOR_NONE == 0);
2799 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2800 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2801
2802 gcc_assert ((int)CODE_FOR_nothing == 0);
2803 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2804
2805 gcc_assert ((int)NO_REGS == 0);
2806 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2807
2808 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2809 believes it can use native alignment or still uses 128-bit alignment. */
2810 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2811 {
2812 align64 = 64;
2813 align32 = 32;
2814 }
2815 else
2816 {
2817 align64 = 128;
2818 align32 = 128;
2819 }
2820
2821 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2822 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2823 if (TARGET_FLOAT128_TYPE)
2824 {
2825 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2826 rs6000_vector_align[KFmode] = 128;
2827
2828 if (FLOAT128_IEEE_P (TFmode))
2829 {
2830 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2831 rs6000_vector_align[TFmode] = 128;
2832 }
2833 }
2834
2835 /* V2DF mode, VSX only. */
2836 if (TARGET_VSX)
2837 {
2838 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2839 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2840 rs6000_vector_align[V2DFmode] = align64;
2841 }
2842
2843 /* V4SF mode, either VSX or Altivec. */
2844 if (TARGET_VSX)
2845 {
2846 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2847 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2848 rs6000_vector_align[V4SFmode] = align32;
2849 }
2850 else if (TARGET_ALTIVEC)
2851 {
2852 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2853 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2854 rs6000_vector_align[V4SFmode] = align32;
2855 }
2856
2857 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2858 and stores. */
2859 if (TARGET_ALTIVEC)
2860 {
2861 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2862 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2863 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2864 rs6000_vector_align[V4SImode] = align32;
2865 rs6000_vector_align[V8HImode] = align32;
2866 rs6000_vector_align[V16QImode] = align32;
2867
2868 if (TARGET_VSX)
2869 {
2870 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2871 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2872 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2873 }
2874 else
2875 {
2876 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2877 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2878 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2879 }
2880 }
2881
2882 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2883 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2884 if (TARGET_VSX)
2885 {
2886 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2887 rs6000_vector_unit[V2DImode]
2888 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2889 rs6000_vector_align[V2DImode] = align64;
2890
2891 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2892 rs6000_vector_unit[V1TImode]
2893 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2894 rs6000_vector_align[V1TImode] = 128;
2895 }
2896
2897 /* DFmode, see if we want to use the VSX unit. Memory is handled
2898 differently, so don't set rs6000_vector_mem. */
2899 if (TARGET_VSX)
2900 {
2901 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2902 rs6000_vector_align[DFmode] = 64;
2903 }
2904
2905 /* SFmode, see if we want to use the VSX unit. */
2906 if (TARGET_P8_VECTOR)
2907 {
2908 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2909 rs6000_vector_align[SFmode] = 32;
2910 }
2911
2912 /* Allow TImode in VSX register and set the VSX memory macros. */
2913 if (TARGET_VSX)
2914 {
2915 rs6000_vector_mem[TImode] = VECTOR_VSX;
2916 rs6000_vector_align[TImode] = align64;
2917 }
2918
2919 /* Add support for vector pairs and vector quad registers. */
2920 if (TARGET_MMA)
2921 {
2922 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2923 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2924 rs6000_vector_align[OOmode] = 256;
2925
2926 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2927 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2928 rs6000_vector_align[XOmode] = 512;
2929 }
2930
2931 /* Register class constraints for the constraints that depend on compile
2932 switches. When the VSX code was added, different constraints were added
2933 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2934 of the VSX registers are used. The register classes for scalar floating
2935 point types is set, based on whether we allow that type into the upper
2936 (Altivec) registers. GCC has register classes to target the Altivec
2937 registers for load/store operations, to select using a VSX memory
2938 operation instead of the traditional floating point operation. The
2939 constraints are:
2940
2941 d - Register class to use with traditional DFmode instructions.
2942 f - Register class to use with traditional SFmode instructions.
2943 v - Altivec register.
2944 wa - Any VSX register.
2945 wc - Reserved to represent individual CR bits (used in LLVM).
2946 wn - always NO_REGS.
2947 wr - GPR if 64-bit mode is permitted.
2948 wx - Float register if we can do 32-bit int stores. */
2949
2950 if (TARGET_HARD_FLOAT)
2951 {
2952 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2953 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2954 }
2955
2956 if (TARGET_VSX)
2957 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2958
2959 /* Add conditional constraints based on various options, to allow us to
2960 collapse multiple insn patterns. */
2961 if (TARGET_ALTIVEC)
2962 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2963
2964 if (TARGET_POWERPC64)
2965 {
2966 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2967 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2968 }
2969
2970 if (TARGET_STFIWX)
2971 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2972
2973 /* Support for new direct moves (ISA 3.0 + 64bit). */
2974 if (TARGET_DIRECT_MOVE_128)
2975 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2976
2977 /* Set up the reload helper and direct move functions. */
2978 if (TARGET_VSX || TARGET_ALTIVEC)
2979 {
2980 if (TARGET_64BIT)
2981 {
2982 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2983 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2984 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2985 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2986 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2987 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2988 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2989 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2990 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2991 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2992 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2993 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2994 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2995 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2996 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2997 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2998 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2999 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3000 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3001 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3002
3003 if (FLOAT128_VECTOR_P (KFmode))
3004 {
3005 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3006 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3007 }
3008
3009 if (FLOAT128_VECTOR_P (TFmode))
3010 {
3011 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3012 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3013 }
3014
3015 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3016 available. */
3017 if (TARGET_NO_SDMODE_STACK)
3018 {
3019 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3020 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3021 }
3022
3023 if (TARGET_VSX)
3024 {
3025 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3026 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3027 }
3028
3029 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3030 {
3031 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3032 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3033 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3034 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3035 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3036 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3037 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3038 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3039 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3040
3041 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3042 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3043 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3044 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3045 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3046 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3047 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3048 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3049 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3050
3051 if (FLOAT128_VECTOR_P (KFmode))
3052 {
3053 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3054 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3055 }
3056
3057 if (FLOAT128_VECTOR_P (TFmode))
3058 {
3059 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3060 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3061 }
3062
3063 if (TARGET_MMA)
3064 {
3065 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3066 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3067 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3068 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3069 }
3070 }
3071 }
3072 else
3073 {
3074 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3075 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3076 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3077 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3078 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3079 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3080 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3081 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3082 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3083 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3084 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3085 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3086 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3087 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3088 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3089 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3090 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3091 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3092 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3093 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3094
3095 if (FLOAT128_VECTOR_P (KFmode))
3096 {
3097 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3098 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3099 }
3100
3101 if (FLOAT128_IEEE_P (TFmode))
3102 {
3103 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3104 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3105 }
3106
3107 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3108 available. */
3109 if (TARGET_NO_SDMODE_STACK)
3110 {
3111 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3112 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3113 }
3114
3115 if (TARGET_VSX)
3116 {
3117 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3118 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3119 }
3120
3121 if (TARGET_DIRECT_MOVE)
3122 {
3123 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3124 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3125 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3126 }
3127 }
3128
3129 reg_addr[DFmode].scalar_in_vmx_p = true;
3130 reg_addr[DImode].scalar_in_vmx_p = true;
3131
3132 if (TARGET_P8_VECTOR)
3133 {
3134 reg_addr[SFmode].scalar_in_vmx_p = true;
3135 reg_addr[SImode].scalar_in_vmx_p = true;
3136
3137 if (TARGET_P9_VECTOR)
3138 {
3139 reg_addr[HImode].scalar_in_vmx_p = true;
3140 reg_addr[QImode].scalar_in_vmx_p = true;
3141 }
3142 }
3143 }
3144
3145 /* Precalculate HARD_REGNO_NREGS. */
3146 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3147 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3148 rs6000_hard_regno_nregs[m][r]
3149 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3150
3151 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3152 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3153 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3154 rs6000_hard_regno_mode_ok_p[m][r]
3155 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3156
3157 /* Precalculate CLASS_MAX_NREGS sizes. */
3158 for (c = 0; c < LIM_REG_CLASSES; ++c)
3159 {
3160 int reg_size;
3161
3162 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3163 reg_size = UNITS_PER_VSX_WORD;
3164
3165 else if (c == ALTIVEC_REGS)
3166 reg_size = UNITS_PER_ALTIVEC_WORD;
3167
3168 else if (c == FLOAT_REGS)
3169 reg_size = UNITS_PER_FP_WORD;
3170
3171 else
3172 reg_size = UNITS_PER_WORD;
3173
3174 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3175 {
3176 machine_mode m2 = (machine_mode)m;
3177 int reg_size2 = reg_size;
3178
3179 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3180 in VSX. */
3181 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3182 reg_size2 = UNITS_PER_FP_WORD;
3183
3184 rs6000_class_max_nregs[m][c]
3185 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3186 }
3187 }
3188
3189 /* Calculate which modes to automatically generate code to use a the
3190 reciprocal divide and square root instructions. In the future, possibly
3191 automatically generate the instructions even if the user did not specify
3192 -mrecip. The older machines double precision reciprocal sqrt estimate is
3193 not accurate enough. */
3194 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3195 if (TARGET_FRES)
3196 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3197 if (TARGET_FRE)
3198 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3199 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3200 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3201 if (VECTOR_UNIT_VSX_P (V2DFmode))
3202 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3203
3204 if (TARGET_FRSQRTES)
3205 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3206 if (TARGET_FRSQRTE)
3207 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3208 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3209 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3210 if (VECTOR_UNIT_VSX_P (V2DFmode))
3211 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3212
3213 if (rs6000_recip_control)
3214 {
3215 if (!flag_finite_math_only)
3216 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3217 "-ffast-math");
3218 if (flag_trapping_math)
3219 warning (0, "%qs requires %qs or %qs", "-mrecip",
3220 "-fno-trapping-math", "-ffast-math");
3221 if (!flag_reciprocal_math)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3223 "-ffast-math");
3224 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3225 {
3226 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3227 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3228 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3229
3230 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3231 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3232 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3233
3234 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3235 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3236 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3237
3238 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3239 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3240 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3241
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3243 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3244 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3245
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3247 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3248 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3249
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3251 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3252 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3253
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3255 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3256 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3257 }
3258 }
3259
3260 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3261 legitimate address support to figure out the appropriate addressing to
3262 use. */
3263 rs6000_setup_reg_addr_masks ();
3264
3265 if (global_init_p || TARGET_DEBUG_TARGET)
3266 {
3267 if (TARGET_DEBUG_REG)
3268 rs6000_debug_reg_global ();
3269
3270 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3271 fprintf (stderr,
3272 "SImode variable mult cost = %d\n"
3273 "SImode constant mult cost = %d\n"
3274 "SImode short constant mult cost = %d\n"
3275 "DImode multipliciation cost = %d\n"
3276 "SImode division cost = %d\n"
3277 "DImode division cost = %d\n"
3278 "Simple fp operation cost = %d\n"
3279 "DFmode multiplication cost = %d\n"
3280 "SFmode division cost = %d\n"
3281 "DFmode division cost = %d\n"
3282 "cache line size = %d\n"
3283 "l1 cache size = %d\n"
3284 "l2 cache size = %d\n"
3285 "simultaneous prefetches = %d\n"
3286 "\n",
3287 rs6000_cost->mulsi,
3288 rs6000_cost->mulsi_const,
3289 rs6000_cost->mulsi_const9,
3290 rs6000_cost->muldi,
3291 rs6000_cost->divsi,
3292 rs6000_cost->divdi,
3293 rs6000_cost->fp,
3294 rs6000_cost->dmul,
3295 rs6000_cost->sdiv,
3296 rs6000_cost->ddiv,
3297 rs6000_cost->cache_line_size,
3298 rs6000_cost->l1_cache_size,
3299 rs6000_cost->l2_cache_size,
3300 rs6000_cost->simultaneous_prefetches);
3301 }
3302 }
3303
3304 #if TARGET_MACHO
3305 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3306
3307 static void
3308 darwin_rs6000_override_options (void)
3309 {
3310 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3311 off. */
3312 rs6000_altivec_abi = 1;
3313 TARGET_ALTIVEC_VRSAVE = 1;
3314 rs6000_current_abi = ABI_DARWIN;
3315
3316 if (DEFAULT_ABI == ABI_DARWIN
3317 && TARGET_64BIT)
3318 darwin_one_byte_bool = 1;
3319
3320 if (TARGET_64BIT && ! TARGET_POWERPC64)
3321 {
3322 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3323 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3324 }
3325
3326 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3327 optimisation, and will not work with the most generic case (where the
3328 symbol is undefined external, but there is no symbl stub). */
3329 if (TARGET_64BIT)
3330 rs6000_default_long_calls = 0;
3331
3332 /* ld_classic is (so far) still used for kernel (static) code, and supports
3333 the JBSR longcall / branch islands. */
3334 if (flag_mkernel)
3335 {
3336 rs6000_default_long_calls = 1;
3337
3338 /* Allow a kext author to do -mkernel -mhard-float. */
3339 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3340 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3341 }
3342
3343 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3344 Altivec. */
3345 if (!flag_mkernel && !flag_apple_kext
3346 && TARGET_64BIT
3347 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3348 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3349
3350 /* Unless the user (not the configurer) has explicitly overridden
3351 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3352 G4 unless targeting the kernel. */
3353 if (!flag_mkernel
3354 && !flag_apple_kext
3355 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3356 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3357 && ! global_options_set.x_rs6000_cpu_index)
3358 {
3359 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3360 }
3361 }
3362 #endif
3363
3364 /* If not otherwise specified by a target, make 'long double' equivalent to
3365 'double'. */
3366
3367 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3368 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3369 #endif
3370
3371 /* Return the builtin mask of the various options used that could affect which
3372 builtins were used. In the past we used target_flags, but we've run out of
3373 bits, and some options are no longer in target_flags. */
3374
3375 HOST_WIDE_INT
3376 rs6000_builtin_mask_calculate (void)
3377 {
3378 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3379 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3380 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3381 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3382 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3383 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3384 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3385 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3386 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3387 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3388 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3389 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3390 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3391 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3392 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3393 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3394 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3395 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3396 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3397 | ((TARGET_LONG_DOUBLE_128
3398 && TARGET_HARD_FLOAT
3399 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3400 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3401 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3402 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3403 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3404 }
3405
3406 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3407 to clobber the XER[CA] bit because clobbering that bit without telling
3408 the compiler worked just fine with versions of GCC before GCC 5, and
3409 breaking a lot of older code in ways that are hard to track down is
3410 not such a great idea. */
3411
3412 static rtx_insn *
3413 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3414 vec<const char *> &/*constraints*/,
3415 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3416 {
3417 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3418 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3419 return NULL;
3420 }
3421
3422 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3423 but is called when the optimize level is changed via an attribute or
3424 pragma or when it is reset at the end of the code affected by the
3425 attribute or pragma. It is not called at the beginning of compilation
3426 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3427 actions then, you should have TARGET_OPTION_OVERRIDE call
3428 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3429
3430 static void
3431 rs6000_override_options_after_change (void)
3432 {
3433 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3434 turns -frename-registers on. */
3435 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
3436 || (global_options_set.x_flag_unroll_all_loops
3437 && flag_unroll_all_loops))
3438 {
3439 if (!global_options_set.x_unroll_only_small_loops)
3440 unroll_only_small_loops = 0;
3441 if (!global_options_set.x_flag_rename_registers)
3442 flag_rename_registers = 1;
3443 if (!global_options_set.x_flag_cunroll_grow_size)
3444 flag_cunroll_grow_size = 1;
3445 }
3446 else if (!global_options_set.x_flag_cunroll_grow_size)
3447 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3448 }
3449
3450 #ifdef TARGET_USES_LINUX64_OPT
3451 static void
3452 rs6000_linux64_override_options ()
3453 {
3454 if (!global_options_set.x_rs6000_alignment_flags)
3455 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3456 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3457 {
3458 if (DEFAULT_ABI != ABI_AIX)
3459 {
3460 rs6000_current_abi = ABI_AIX;
3461 error (INVALID_64BIT, "call");
3462 }
3463 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3464 if (ELFv2_ABI_CHECK)
3465 {
3466 rs6000_current_abi = ABI_ELFv2;
3467 if (dot_symbols)
3468 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3469 }
3470 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3471 {
3472 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3473 error (INVALID_64BIT, "relocatable");
3474 }
3475 if (rs6000_isa_flags & OPTION_MASK_EABI)
3476 {
3477 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3478 error (INVALID_64BIT, "eabi");
3479 }
3480 if (TARGET_PROTOTYPE)
3481 {
3482 target_prototype = 0;
3483 error (INVALID_64BIT, "prototype");
3484 }
3485 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3486 {
3487 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3488 error ("%<-m64%> requires a PowerPC64 cpu");
3489 }
3490 if (!global_options_set.x_rs6000_current_cmodel)
3491 SET_CMODEL (CMODEL_MEDIUM);
3492 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3493 {
3494 if (global_options_set.x_rs6000_current_cmodel
3495 && rs6000_current_cmodel != CMODEL_SMALL)
3496 error ("%<-mcmodel incompatible with other toc options%>");
3497 if (TARGET_MINIMAL_TOC)
3498 SET_CMODEL (CMODEL_SMALL);
3499 else if (TARGET_PCREL
3500 || (PCREL_SUPPORTED_BY_OS
3501 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3502 /* Ignore -mno-minimal-toc. */
3503 ;
3504 else
3505 SET_CMODEL (CMODEL_SMALL);
3506 }
3507 if (rs6000_current_cmodel != CMODEL_SMALL)
3508 {
3509 if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
3510 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3511 if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
3512 TARGET_NO_SUM_IN_TOC = 0;
3513 }
3514 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3515 {
3516 if (global_options_set.x_rs6000_pltseq)
3517 warning (0, "%qs unsupported for this ABI",
3518 "-mpltseq");
3519 rs6000_pltseq = false;
3520 }
3521 }
3522 else if (TARGET_64BIT)
3523 error (INVALID_32BIT, "32");
3524 else
3525 {
3526 if (TARGET_PROFILE_KERNEL)
3527 {
3528 profile_kernel = 0;
3529 error (INVALID_32BIT, "profile-kernel");
3530 }
3531 if (global_options_set.x_rs6000_current_cmodel)
3532 {
3533 SET_CMODEL (CMODEL_SMALL);
3534 error (INVALID_32BIT, "cmodel");
3535 }
3536 }
3537 }
3538 #endif
3539
3540 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3541 This support is only in little endian GLIBC 2.32 or newer. */
3542 static bool
3543 glibc_supports_ieee_128bit (void)
3544 {
3545 #ifdef OPTION_GLIBC
3546 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3547 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3548 return true;
3549 #endif /* OPTION_GLIBC. */
3550
3551 return false;
3552 }
3553
3554 /* Override command line options.
3555
3556 Combine build-specific configuration information with options
3557 specified on the command line to set various state variables which
3558 influence code generation, optimization, and expansion of built-in
3559 functions. Assure that command-line configuration preferences are
3560 compatible with each other and with the build configuration; issue
3561 warnings while adjusting configuration or error messages while
3562 rejecting configuration.
3563
3564 Upon entry to this function:
3565
3566 This function is called once at the beginning of
3567 compilation, and then again at the start and end of compiling
3568 each section of code that has a different configuration, as
3569 indicated, for example, by adding the
3570
3571 __attribute__((__target__("cpu=power9")))
3572
3573 qualifier to a function definition or, for example, by bracketing
3574 code between
3575
3576 #pragma GCC target("altivec")
3577
3578 and
3579
3580 #pragma GCC reset_options
3581
3582 directives. Parameter global_init_p is true for the initial
3583 invocation, which initializes global variables, and false for all
3584 subsequent invocations.
3585
3586
3587 Various global state information is assumed to be valid. This
3588 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3589 default CPU specified at build configure time, TARGET_DEFAULT,
3590 representing the default set of option flags for the default
3591 target, and global_options_set.x_rs6000_isa_flags, representing
3592 which options were requested on the command line.
3593
3594 Upon return from this function:
3595
3596 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3597 was set by name on the command line. Additionally, if certain
3598 attributes are automatically enabled or disabled by this function
3599 in order to assure compatibility between options and
3600 configuration, the flags associated with those attributes are
3601 also set. By setting these "explicit bits", we avoid the risk
3602 that other code might accidentally overwrite these particular
3603 attributes with "default values".
3604
3605 The various bits of rs6000_isa_flags are set to indicate the
3606 target options that have been selected for the most current
3607 compilation efforts. This has the effect of also turning on the
3608 associated TARGET_XXX values since these are macros which are
3609 generally defined to test the corresponding bit of the
3610 rs6000_isa_flags variable.
3611
3612 The variable rs6000_builtin_mask is set to represent the target
3613 options for the most current compilation efforts, consistent with
3614 the current contents of rs6000_isa_flags. This variable controls
3615 expansion of built-in functions.
3616
3617 Various other global variables and fields of global structures
3618 (over 50 in all) are initialized to reflect the desired options
3619 for the most current compilation efforts. */
3620
3621 static bool
3622 rs6000_option_override_internal (bool global_init_p)
3623 {
3624 bool ret = true;
3625
3626 HOST_WIDE_INT set_masks;
3627 HOST_WIDE_INT ignore_masks;
3628 int cpu_index = -1;
3629 int tune_index;
3630 struct cl_target_option *main_target_opt
3631 = ((global_init_p || target_option_default_node == NULL)
3632 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3633
3634 /* Print defaults. */
3635 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3636 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3637
3638 /* Remember the explicit arguments. */
3639 if (global_init_p)
3640 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3641
3642 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3643 library functions, so warn about it. The flag may be useful for
3644 performance studies from time to time though, so don't disable it
3645 entirely. */
3646 if (global_options_set.x_rs6000_alignment_flags
3647 && rs6000_alignment_flags == MASK_ALIGN_POWER
3648 && DEFAULT_ABI == ABI_DARWIN
3649 && TARGET_64BIT)
3650 warning (0, "%qs is not supported for 64-bit Darwin;"
3651 " it is incompatible with the installed C and C++ libraries",
3652 "-malign-power");
3653
3654 /* Numerous experiment shows that IRA based loop pressure
3655 calculation works better for RTL loop invariant motion on targets
3656 with enough (>= 32) registers. It is an expensive optimization.
3657 So it is on only for peak performance. */
3658 if (optimize >= 3 && global_init_p
3659 && !global_options_set.x_flag_ira_loop_pressure)
3660 flag_ira_loop_pressure = 1;
3661
3662 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3663 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3664 options were already specified. */
3665 if (flag_sanitize & SANITIZE_USER_ADDRESS
3666 && !global_options_set.x_flag_asynchronous_unwind_tables)
3667 flag_asynchronous_unwind_tables = 1;
3668
3669 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3670 loop unroller is active. It is only checked during unrolling, so
3671 we can just set it on by default. */
3672 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3673 flag_variable_expansion_in_unroller = 1;
3674
3675 /* Set the pointer size. */
3676 if (TARGET_64BIT)
3677 {
3678 rs6000_pmode = DImode;
3679 rs6000_pointer_size = 64;
3680 }
3681 else
3682 {
3683 rs6000_pmode = SImode;
3684 rs6000_pointer_size = 32;
3685 }
3686
3687 /* Some OSs don't support saving the high part of 64-bit registers on context
3688 switch. Other OSs don't support saving Altivec registers. On those OSs,
3689 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3690 if the user wants either, the user must explicitly specify them and we
3691 won't interfere with the user's specification. */
3692
3693 set_masks = POWERPC_MASKS;
3694 #ifdef OS_MISSING_POWERPC64
3695 if (OS_MISSING_POWERPC64)
3696 set_masks &= ~OPTION_MASK_POWERPC64;
3697 #endif
3698 #ifdef OS_MISSING_ALTIVEC
3699 if (OS_MISSING_ALTIVEC)
3700 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3701 | OTHER_VSX_VECTOR_MASKS);
3702 #endif
3703
3704 /* Don't override by the processor default if given explicitly. */
3705 set_masks &= ~rs6000_isa_flags_explicit;
3706
3707 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3708 the cpu in a target attribute or pragma, but did not specify a tuning
3709 option, use the cpu for the tuning option rather than the option specified
3710 with -mtune on the command line. Process a '--with-cpu' configuration
3711 request as an implicit --cpu. */
3712 if (rs6000_cpu_index >= 0)
3713 cpu_index = rs6000_cpu_index;
3714 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3715 cpu_index = main_target_opt->x_rs6000_cpu_index;
3716 else if (OPTION_TARGET_CPU_DEFAULT)
3717 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3718
3719 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3720 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3721 with those from the cpu, except for options that were explicitly set. If
3722 we don't have a cpu, do not override the target bits set in
3723 TARGET_DEFAULT. */
3724 if (cpu_index >= 0)
3725 {
3726 rs6000_cpu_index = cpu_index;
3727 rs6000_isa_flags &= ~set_masks;
3728 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3729 & set_masks);
3730 }
3731 else
3732 {
3733 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3734 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3735 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3736 to using rs6000_isa_flags, we need to do the initialization here.
3737
3738 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3739 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3740 HOST_WIDE_INT flags;
3741 if (TARGET_DEFAULT)
3742 flags = TARGET_DEFAULT;
3743 else
3744 {
3745 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3746 const char *default_cpu = (!TARGET_POWERPC64
3747 ? "powerpc"
3748 : (BYTES_BIG_ENDIAN
3749 ? "powerpc64"
3750 : "powerpc64le"));
3751 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3752 flags = processor_target_table[default_cpu_index].target_enable;
3753 }
3754 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3755 }
3756
3757 if (rs6000_tune_index >= 0)
3758 tune_index = rs6000_tune_index;
3759 else if (cpu_index >= 0)
3760 rs6000_tune_index = tune_index = cpu_index;
3761 else
3762 {
3763 size_t i;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3766
3767 tune_index = -1;
3768 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3769 if (processor_target_table[i].processor == tune_proc)
3770 {
3771 tune_index = i;
3772 break;
3773 }
3774 }
3775
3776 if (cpu_index >= 0)
3777 rs6000_cpu = processor_target_table[cpu_index].processor;
3778 else
3779 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3780
3781 gcc_assert (tune_index >= 0);
3782 rs6000_tune = processor_target_table[tune_index].processor;
3783
3784 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3785 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3786 || rs6000_cpu == PROCESSOR_PPCE5500)
3787 {
3788 if (TARGET_ALTIVEC)
3789 error ("AltiVec not supported in this target");
3790 }
3791
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN && optimize_size)
3795 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3796
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3801
3802 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3803 {
3804 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3805 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3807 "-mmultiple");
3808 }
3809
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for direct_move matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table[tune_index].target_enable
3814 & OPTION_MASK_DIRECT_MOVE))
3815 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3816
3817 if (!rs6000_fold_gimple)
3818 fprintf (stderr,
3819 "gimple folding of rs6000 builtins has been disabled.\n");
3820
3821 /* Add some warnings for VSX. */
3822 if (TARGET_VSX)
3823 {
3824 const char *msg = NULL;
3825 if (!TARGET_HARD_FLOAT)
3826 {
3827 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3828 msg = N_("%<-mvsx%> requires hardware floating point");
3829 else
3830 {
3831 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3832 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3833 }
3834 }
3835 else if (TARGET_AVOID_XFORM > 0)
3836 msg = N_("%<-mvsx%> needs indexed addressing");
3837 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3838 & OPTION_MASK_ALTIVEC))
3839 {
3840 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3841 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3842 else
3843 msg = N_("%<-mno-altivec%> disables vsx");
3844 }
3845
3846 if (msg)
3847 {
3848 warning (0, msg);
3849 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3850 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3851 }
3852 }
3853
3854 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3855 the -mcpu setting to enable options that conflict. */
3856 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3857 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3858 | OPTION_MASK_ALTIVEC
3859 | OPTION_MASK_VSX)) != 0)
3860 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3861 | OPTION_MASK_DIRECT_MOVE)
3862 & ~rs6000_isa_flags_explicit);
3863
3864 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3865 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3866
3867 #ifdef XCOFF_DEBUGGING_INFO
3868 /* For AIX default to 64-bit DWARF. */
3869 if (!global_options_set.x_dwarf_offset_size)
3870 dwarf_offset_size = POINTER_SIZE_UNITS;
3871 #endif
3872
3873 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3874 off all of the options that depend on those flags. */
3875 ignore_masks = rs6000_disable_incompatible_switches ();
3876
3877 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3878 unless the user explicitly used the -mno-<option> to disable the code. */
3879 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3880 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3881 else if (TARGET_P9_MINMAX)
3882 {
3883 if (cpu_index >= 0)
3884 {
3885 if (cpu_index == PROCESSOR_POWER9)
3886 {
3887 /* legacy behavior: allow -mcpu=power9 with certain
3888 capabilities explicitly disabled. */
3889 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3890 }
3891 else
3892 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3893 "for <xxx> less than power9", "-mcpu");
3894 }
3895 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3896 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3897 & rs6000_isa_flags_explicit))
3898 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3899 were explicitly cleared. */
3900 error ("%qs incompatible with explicitly disabled options",
3901 "-mpower9-minmax");
3902 else
3903 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3904 }
3905 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3906 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3907 else if (TARGET_VSX)
3908 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3909 else if (TARGET_POPCNTD)
3910 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3911 else if (TARGET_DFP)
3912 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3913 else if (TARGET_CMPB)
3914 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3915 else if (TARGET_FPRND)
3916 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3917 else if (TARGET_POPCNTB)
3918 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3919 else if (TARGET_ALTIVEC)
3920 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3921
3922 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3923 {
3924 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3925 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3926 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3927 }
3928
3929 if (!TARGET_FPRND && TARGET_VSX)
3930 {
3931 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3932 /* TARGET_VSX = 1 implies Power 7 and newer */
3933 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3934 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3935 }
3936
3937 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3938 {
3939 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3940 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3941 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3942 }
3943
3944 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3945 {
3946 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3947 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3948 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3949 }
3950
3951 if (TARGET_P8_VECTOR && !TARGET_VSX)
3952 {
3953 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3954 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3955 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3956 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3957 {
3958 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3959 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3960 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3961 }
3962 else
3963 {
3964 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3965 not explicit. */
3966 rs6000_isa_flags |= OPTION_MASK_VSX;
3967 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3968 }
3969 }
3970
3971 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3972 {
3973 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3974 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3975 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3976 }
3977
3978 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3979 silently turn off quad memory mode. */
3980 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3981 {
3982 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3983 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3984
3985 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3986 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3987
3988 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3989 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3990 }
3991
3992 /* Non-atomic quad memory load/store are disabled for little endian, since
3993 the words are reversed, but atomic operations can still be done by
3994 swapping the words. */
3995 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3996 {
3997 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3998 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3999 "mode"));
4000
4001 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4002 }
4003
4004 /* Assume if the user asked for normal quad memory instructions, they want
4005 the atomic versions as well, unless they explicity told us not to use quad
4006 word atomic instructions. */
4007 if (TARGET_QUAD_MEMORY
4008 && !TARGET_QUAD_MEMORY_ATOMIC
4009 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4010 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4011
4012 /* If we can shrink-wrap the TOC register save separately, then use
4013 -msave-toc-indirect unless explicitly disabled. */
4014 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4015 && flag_shrink_wrap_separate
4016 && optimize_function_for_speed_p (cfun))
4017 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4018
4019 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4020 generating power8 instructions. Power9 does not optimize power8 fusion
4021 cases. */
4022 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4023 {
4024 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4025 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4026 else
4027 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4028 }
4029
4030 /* Setting additional fusion flags turns on base fusion. */
4031 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4032 {
4033 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4034 {
4035 if (TARGET_P8_FUSION_SIGN)
4036 error ("%qs requires %qs", "-mpower8-fusion-sign",
4037 "-mpower8-fusion");
4038
4039 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4040 }
4041 else
4042 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4043 }
4044
4045 /* Power8 does not fuse sign extended loads with the addis. If we are
4046 optimizing at high levels for speed, convert a sign extended load into a
4047 zero extending load, and an explicit sign extension. */
4048 if (TARGET_P8_FUSION
4049 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4050 && optimize_function_for_speed_p (cfun)
4051 && optimize >= 3)
4052 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4053
4054 /* ISA 3.0 vector instructions include ISA 2.07. */
4055 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4056 {
4057 /* We prefer to not mention undocumented options in
4058 error messages. However, if users have managed to select
4059 power9-vector without selecting power8-vector, they
4060 already know about undocumented flags. */
4061 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4062 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4063 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4064 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4065 {
4066 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4067 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4068 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4069 }
4070 else
4071 {
4072 /* OPTION_MASK_P9_VECTOR is explicit and
4073 OPTION_MASK_P8_VECTOR is not explicit. */
4074 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4075 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4076 }
4077 }
4078
4079 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4080 support. If we only have ISA 2.06 support, and the user did not specify
4081 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4082 but we don't enable the full vectorization support */
4083 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4084 TARGET_ALLOW_MOVMISALIGN = 1;
4085
4086 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4087 {
4088 if (TARGET_ALLOW_MOVMISALIGN > 0
4089 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4090 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4091
4092 TARGET_ALLOW_MOVMISALIGN = 0;
4093 }
4094
4095 /* Determine when unaligned vector accesses are permitted, and when
4096 they are preferred over masked Altivec loads. Note that if
4097 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4098 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4099 not true. */
4100 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4101 {
4102 if (!TARGET_VSX)
4103 {
4104 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4105 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4106
4107 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4108 }
4109
4110 else if (!TARGET_ALLOW_MOVMISALIGN)
4111 {
4112 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4113 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4114 "-mallow-movmisalign");
4115
4116 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4117 }
4118 }
4119
4120 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4121 {
4122 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4123 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4124 else
4125 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4126 }
4127
4128 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4129 {
4130 if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
4131 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4132 else
4133 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4134 }
4135
4136 /* Use long double size to select the appropriate long double. We use
4137 TYPE_PRECISION to differentiate the 3 different long double types. We map
4138 128 into the precision used for TFmode. */
4139 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4140 ? 64
4141 : FLOAT_PRECISION_TFmode);
4142
4143 /* Set long double size before the IEEE 128-bit tests. */
4144 if (!global_options_set.x_rs6000_long_double_type_size)
4145 {
4146 if (main_target_opt != NULL
4147 && (main_target_opt->x_rs6000_long_double_type_size
4148 != default_long_double_size))
4149 error ("target attribute or pragma changes %<long double%> size");
4150 else
4151 rs6000_long_double_type_size = default_long_double_size;
4152 }
4153 else if (rs6000_long_double_type_size == 128)
4154 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4155 else if (global_options_set.x_rs6000_ieeequad)
4156 {
4157 if (global_options.x_rs6000_ieeequad)
4158 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4159 else
4160 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4161 }
4162
4163 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4164 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4165 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4166 those systems will not pick up this default. Warn if the user changes the
4167 default unless -Wno-psabi. */
4168 if (!global_options_set.x_rs6000_ieeequad)
4169 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4170
4171 else
4172 {
4173 if (global_options.x_rs6000_ieeequad
4174 && (!TARGET_POPCNTD || !TARGET_VSX))
4175 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4176
4177 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4178 {
4179 /* Determine if the user can change the default long double type at
4180 compilation time. Only C and C++ support this, and you need GLIBC
4181 2.32 or newer. Only issue one warning. */
4182 static bool warned_change_long_double;
4183
4184 if (!warned_change_long_double
4185 && (!glibc_supports_ieee_128bit ()
4186 || (!lang_GNU_C () && !lang_GNU_CXX ())))
4187 {
4188 warned_change_long_double = true;
4189 if (TARGET_IEEEQUAD)
4190 warning (OPT_Wpsabi, "Using IEEE extended precision "
4191 "%<long double%>");
4192 else
4193 warning (OPT_Wpsabi, "Using IBM extended precision "
4194 "%<long double%>");
4195 }
4196 }
4197 }
4198
4199 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4200 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4201 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4202 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4203 the keyword as well as the type. */
4204 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4205
4206 /* IEEE 128-bit floating point requires VSX support. */
4207 if (TARGET_FLOAT128_KEYWORD)
4208 {
4209 if (!TARGET_VSX)
4210 {
4211 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4212 error ("%qs requires VSX support", "-mfloat128");
4213
4214 TARGET_FLOAT128_TYPE = 0;
4215 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4216 | OPTION_MASK_FLOAT128_HW);
4217 }
4218 else if (!TARGET_FLOAT128_TYPE)
4219 {
4220 TARGET_FLOAT128_TYPE = 1;
4221 warning (0, "The %<-mfloat128%> option may not be fully supported");
4222 }
4223 }
4224
4225 /* Enable the __float128 keyword under Linux by default. */
4226 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4227 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4228 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4229
4230 /* If we have are supporting the float128 type and full ISA 3.0 support,
4231 enable -mfloat128-hardware by default. However, don't enable the
4232 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4233 because sometimes the compiler wants to put things in an integer
4234 container, and if we don't have __int128 support, it is impossible. */
4235 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4236 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4237 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4238 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4239
4240 if (TARGET_FLOAT128_HW
4241 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4242 {
4243 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4244 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4245
4246 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4247 }
4248
4249 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4250 {
4251 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4252 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4253
4254 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4255 }
4256
4257 /* Enable -mprefixed by default on power10 systems. */
4258 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4259 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4260
4261 /* -mprefixed requires -mcpu=power10 (or later). */
4262 else if (TARGET_PREFIXED && !TARGET_POWER10)
4263 {
4264 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4265 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4266
4267 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4268 }
4269
4270 /* -mpcrel requires prefixed load/store addressing. */
4271 if (TARGET_PCREL && !TARGET_PREFIXED)
4272 {
4273 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4274 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4275
4276 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4277 }
4278
4279 /* Print the options after updating the defaults. */
4280 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4281 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4282
4283 /* E500mc does "better" if we inline more aggressively. Respect the
4284 user's opinion, though. */
4285 if (rs6000_block_move_inline_limit == 0
4286 && (rs6000_tune == PROCESSOR_PPCE500MC
4287 || rs6000_tune == PROCESSOR_PPCE500MC64
4288 || rs6000_tune == PROCESSOR_PPCE5500
4289 || rs6000_tune == PROCESSOR_PPCE6500))
4290 rs6000_block_move_inline_limit = 128;
4291
4292 /* store_one_arg depends on expand_block_move to handle at least the
4293 size of reg_parm_stack_space. */
4294 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4295 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4296
4297 if (global_init_p)
4298 {
4299 /* If the appropriate debug option is enabled, replace the target hooks
4300 with debug versions that call the real version and then prints
4301 debugging information. */
4302 if (TARGET_DEBUG_COST)
4303 {
4304 targetm.rtx_costs = rs6000_debug_rtx_costs;
4305 targetm.address_cost = rs6000_debug_address_cost;
4306 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4307 }
4308
4309 if (TARGET_DEBUG_ADDR)
4310 {
4311 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4312 targetm.legitimize_address = rs6000_debug_legitimize_address;
4313 rs6000_secondary_reload_class_ptr
4314 = rs6000_debug_secondary_reload_class;
4315 targetm.secondary_memory_needed
4316 = rs6000_debug_secondary_memory_needed;
4317 targetm.can_change_mode_class
4318 = rs6000_debug_can_change_mode_class;
4319 rs6000_preferred_reload_class_ptr
4320 = rs6000_debug_preferred_reload_class;
4321 rs6000_mode_dependent_address_ptr
4322 = rs6000_debug_mode_dependent_address;
4323 }
4324
4325 if (rs6000_veclibabi_name)
4326 {
4327 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4328 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4329 else
4330 {
4331 error ("unknown vectorization library ABI type (%qs) for "
4332 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4333 ret = false;
4334 }
4335 }
4336 }
4337
4338 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4339 target attribute or pragma which automatically enables both options,
4340 unless the altivec ABI was set. This is set by default for 64-bit, but
4341 not for 32-bit. */
4342 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4343 {
4344 TARGET_FLOAT128_TYPE = 0;
4345 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4346 | OPTION_MASK_FLOAT128_KEYWORD)
4347 & ~rs6000_isa_flags_explicit);
4348 }
4349
4350 /* Enable Altivec ABI for AIX -maltivec. */
4351 if (TARGET_XCOFF
4352 && (TARGET_ALTIVEC || TARGET_VSX)
4353 && !global_options_set.x_rs6000_altivec_abi)
4354 {
4355 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4356 error ("target attribute or pragma changes AltiVec ABI");
4357 else
4358 rs6000_altivec_abi = 1;
4359 }
4360
4361 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4362 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4363 be explicitly overridden in either case. */
4364 if (TARGET_ELF)
4365 {
4366 if (!global_options_set.x_rs6000_altivec_abi
4367 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4368 {
4369 if (main_target_opt != NULL &&
4370 !main_target_opt->x_rs6000_altivec_abi)
4371 error ("target attribute or pragma changes AltiVec ABI");
4372 else
4373 rs6000_altivec_abi = 1;
4374 }
4375 }
4376
4377 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4378 So far, the only darwin64 targets are also MACH-O. */
4379 if (TARGET_MACHO
4380 && DEFAULT_ABI == ABI_DARWIN
4381 && TARGET_64BIT)
4382 {
4383 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4384 error ("target attribute or pragma changes darwin64 ABI");
4385 else
4386 {
4387 rs6000_darwin64_abi = 1;
4388 /* Default to natural alignment, for better performance. */
4389 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4390 }
4391 }
4392
4393 /* Place FP constants in the constant pool instead of TOC
4394 if section anchors enabled. */
4395 if (flag_section_anchors
4396 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4397 TARGET_NO_FP_IN_TOC = 1;
4398
4399 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4400 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4401
4402 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4403 SUBTARGET_OVERRIDE_OPTIONS;
4404 #endif
4405 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4406 SUBSUBTARGET_OVERRIDE_OPTIONS;
4407 #endif
4408 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4409 SUB3TARGET_OVERRIDE_OPTIONS;
4410 #endif
4411
4412 /* If the ABI has support for PC-relative relocations, enable it by default.
4413 This test depends on the sub-target tests above setting the code model to
4414 medium for ELF v2 systems. */
4415 if (PCREL_SUPPORTED_BY_OS
4416 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4417 rs6000_isa_flags |= OPTION_MASK_PCREL;
4418
4419 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4420 after the subtarget override options are done. */
4421 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4422 {
4423 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4424 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4425
4426 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4427 }
4428
4429 /* Enable -mmma by default on power10 systems. */
4430 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4431 rs6000_isa_flags |= OPTION_MASK_MMA;
4432
4433 /* Turn off vector pair/mma options on non-power10 systems. */
4434 else if (!TARGET_POWER10 && TARGET_MMA)
4435 {
4436 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4437 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4438
4439 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4440 }
4441
4442 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4443 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4444
4445 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4446 && rs6000_tune != PROCESSOR_POWER5
4447 && rs6000_tune != PROCESSOR_POWER6
4448 && rs6000_tune != PROCESSOR_POWER7
4449 && rs6000_tune != PROCESSOR_POWER8
4450 && rs6000_tune != PROCESSOR_POWER9
4451 && rs6000_tune != PROCESSOR_POWER10
4452 && rs6000_tune != PROCESSOR_PPCA2
4453 && rs6000_tune != PROCESSOR_CELL
4454 && rs6000_tune != PROCESSOR_PPC476);
4455 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4456 || rs6000_tune == PROCESSOR_POWER5
4457 || rs6000_tune == PROCESSOR_POWER7
4458 || rs6000_tune == PROCESSOR_POWER8);
4459 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4460 || rs6000_tune == PROCESSOR_POWER5
4461 || rs6000_tune == PROCESSOR_POWER6
4462 || rs6000_tune == PROCESSOR_POWER7
4463 || rs6000_tune == PROCESSOR_POWER8
4464 || rs6000_tune == PROCESSOR_POWER9
4465 || rs6000_tune == PROCESSOR_POWER10
4466 || rs6000_tune == PROCESSOR_PPCE500MC
4467 || rs6000_tune == PROCESSOR_PPCE500MC64
4468 || rs6000_tune == PROCESSOR_PPCE5500
4469 || rs6000_tune == PROCESSOR_PPCE6500);
4470
4471 /* Allow debug switches to override the above settings. These are set to -1
4472 in rs6000.opt to indicate the user hasn't directly set the switch. */
4473 if (TARGET_ALWAYS_HINT >= 0)
4474 rs6000_always_hint = TARGET_ALWAYS_HINT;
4475
4476 if (TARGET_SCHED_GROUPS >= 0)
4477 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4478
4479 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4480 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4481
4482 rs6000_sched_restricted_insns_priority
4483 = (rs6000_sched_groups ? 1 : 0);
4484
4485 /* Handle -msched-costly-dep option. */
4486 rs6000_sched_costly_dep
4487 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4488
4489 if (rs6000_sched_costly_dep_str)
4490 {
4491 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4492 rs6000_sched_costly_dep = no_dep_costly;
4493 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4494 rs6000_sched_costly_dep = all_deps_costly;
4495 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4496 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4497 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4498 rs6000_sched_costly_dep = store_to_load_dep_costly;
4499 else
4500 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4501 atoi (rs6000_sched_costly_dep_str));
4502 }
4503
4504 /* Handle -minsert-sched-nops option. */
4505 rs6000_sched_insert_nops
4506 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4507
4508 if (rs6000_sched_insert_nops_str)
4509 {
4510 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4511 rs6000_sched_insert_nops = sched_finish_none;
4512 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4513 rs6000_sched_insert_nops = sched_finish_pad_groups;
4514 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4515 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4516 else
4517 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4518 atoi (rs6000_sched_insert_nops_str));
4519 }
4520
4521 /* Handle stack protector */
4522 if (!global_options_set.x_rs6000_stack_protector_guard)
4523 #ifdef TARGET_THREAD_SSP_OFFSET
4524 rs6000_stack_protector_guard = SSP_TLS;
4525 #else
4526 rs6000_stack_protector_guard = SSP_GLOBAL;
4527 #endif
4528
4529 #ifdef TARGET_THREAD_SSP_OFFSET
4530 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4531 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4532 #endif
4533
4534 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4535 {
4536 char *endp;
4537 const char *str = rs6000_stack_protector_guard_offset_str;
4538
4539 errno = 0;
4540 long offset = strtol (str, &endp, 0);
4541 if (!*str || *endp || errno)
4542 error ("%qs is not a valid number in %qs", str,
4543 "-mstack-protector-guard-offset=");
4544
4545 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4546 || (TARGET_64BIT && (offset & 3)))
4547 error ("%qs is not a valid offset in %qs", str,
4548 "-mstack-protector-guard-offset=");
4549
4550 rs6000_stack_protector_guard_offset = offset;
4551 }
4552
4553 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4554 {
4555 const char *str = rs6000_stack_protector_guard_reg_str;
4556 int reg = decode_reg_name (str);
4557
4558 if (!IN_RANGE (reg, 1, 31))
4559 error ("%qs is not a valid base register in %qs", str,
4560 "-mstack-protector-guard-reg=");
4561
4562 rs6000_stack_protector_guard_reg = reg;
4563 }
4564
4565 if (rs6000_stack_protector_guard == SSP_TLS
4566 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4567 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4568
4569 if (global_init_p)
4570 {
4571 #ifdef TARGET_REGNAMES
4572 /* If the user desires alternate register names, copy in the
4573 alternate names now. */
4574 if (TARGET_REGNAMES)
4575 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4576 #endif
4577
4578 /* Set aix_struct_return last, after the ABI is determined.
4579 If -maix-struct-return or -msvr4-struct-return was explicitly
4580 used, don't override with the ABI default. */
4581 if (!global_options_set.x_aix_struct_return)
4582 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4583
4584 #if 0
4585 /* IBM XL compiler defaults to unsigned bitfields. */
4586 if (TARGET_XL_COMPAT)
4587 flag_signed_bitfields = 0;
4588 #endif
4589
4590 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4591 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4592
4593 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4594
4595 /* We can only guarantee the availability of DI pseudo-ops when
4596 assembling for 64-bit targets. */
4597 if (!TARGET_64BIT)
4598 {
4599 targetm.asm_out.aligned_op.di = NULL;
4600 targetm.asm_out.unaligned_op.di = NULL;
4601 }
4602
4603
4604 /* Set branch target alignment, if not optimizing for size. */
4605 if (!optimize_size)
4606 {
4607 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4608 aligned 8byte to avoid misprediction by the branch predictor. */
4609 if (rs6000_tune == PROCESSOR_TITAN
4610 || rs6000_tune == PROCESSOR_CELL)
4611 {
4612 if (flag_align_functions && !str_align_functions)
4613 str_align_functions = "8";
4614 if (flag_align_jumps && !str_align_jumps)
4615 str_align_jumps = "8";
4616 if (flag_align_loops && !str_align_loops)
4617 str_align_loops = "8";
4618 }
4619 if (rs6000_align_branch_targets)
4620 {
4621 if (flag_align_functions && !str_align_functions)
4622 str_align_functions = "16";
4623 if (flag_align_jumps && !str_align_jumps)
4624 str_align_jumps = "16";
4625 if (flag_align_loops && !str_align_loops)
4626 {
4627 can_override_loop_align = 1;
4628 str_align_loops = "16";
4629 }
4630 }
4631 }
4632
4633 /* Arrange to save and restore machine status around nested functions. */
4634 init_machine_status = rs6000_init_machine_status;
4635
4636 /* We should always be splitting complex arguments, but we can't break
4637 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4638 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4639 targetm.calls.split_complex_arg = NULL;
4640
4641 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4642 if (DEFAULT_ABI == ABI_AIX)
4643 targetm.calls.custom_function_descriptors = 0;
4644 }
4645
4646 /* Initialize rs6000_cost with the appropriate target costs. */
4647 if (optimize_size)
4648 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4649 else
4650 switch (rs6000_tune)
4651 {
4652 case PROCESSOR_RS64A:
4653 rs6000_cost = &rs64a_cost;
4654 break;
4655
4656 case PROCESSOR_MPCCORE:
4657 rs6000_cost = &mpccore_cost;
4658 break;
4659
4660 case PROCESSOR_PPC403:
4661 rs6000_cost = &ppc403_cost;
4662 break;
4663
4664 case PROCESSOR_PPC405:
4665 rs6000_cost = &ppc405_cost;
4666 break;
4667
4668 case PROCESSOR_PPC440:
4669 rs6000_cost = &ppc440_cost;
4670 break;
4671
4672 case PROCESSOR_PPC476:
4673 rs6000_cost = &ppc476_cost;
4674 break;
4675
4676 case PROCESSOR_PPC601:
4677 rs6000_cost = &ppc601_cost;
4678 break;
4679
4680 case PROCESSOR_PPC603:
4681 rs6000_cost = &ppc603_cost;
4682 break;
4683
4684 case PROCESSOR_PPC604:
4685 rs6000_cost = &ppc604_cost;
4686 break;
4687
4688 case PROCESSOR_PPC604e:
4689 rs6000_cost = &ppc604e_cost;
4690 break;
4691
4692 case PROCESSOR_PPC620:
4693 rs6000_cost = &ppc620_cost;
4694 break;
4695
4696 case PROCESSOR_PPC630:
4697 rs6000_cost = &ppc630_cost;
4698 break;
4699
4700 case PROCESSOR_CELL:
4701 rs6000_cost = &ppccell_cost;
4702 break;
4703
4704 case PROCESSOR_PPC750:
4705 case PROCESSOR_PPC7400:
4706 rs6000_cost = &ppc750_cost;
4707 break;
4708
4709 case PROCESSOR_PPC7450:
4710 rs6000_cost = &ppc7450_cost;
4711 break;
4712
4713 case PROCESSOR_PPC8540:
4714 case PROCESSOR_PPC8548:
4715 rs6000_cost = &ppc8540_cost;
4716 break;
4717
4718 case PROCESSOR_PPCE300C2:
4719 case PROCESSOR_PPCE300C3:
4720 rs6000_cost = &ppce300c2c3_cost;
4721 break;
4722
4723 case PROCESSOR_PPCE500MC:
4724 rs6000_cost = &ppce500mc_cost;
4725 break;
4726
4727 case PROCESSOR_PPCE500MC64:
4728 rs6000_cost = &ppce500mc64_cost;
4729 break;
4730
4731 case PROCESSOR_PPCE5500:
4732 rs6000_cost = &ppce5500_cost;
4733 break;
4734
4735 case PROCESSOR_PPCE6500:
4736 rs6000_cost = &ppce6500_cost;
4737 break;
4738
4739 case PROCESSOR_TITAN:
4740 rs6000_cost = &titan_cost;
4741 break;
4742
4743 case PROCESSOR_POWER4:
4744 case PROCESSOR_POWER5:
4745 rs6000_cost = &power4_cost;
4746 break;
4747
4748 case PROCESSOR_POWER6:
4749 rs6000_cost = &power6_cost;
4750 break;
4751
4752 case PROCESSOR_POWER7:
4753 rs6000_cost = &power7_cost;
4754 break;
4755
4756 case PROCESSOR_POWER8:
4757 rs6000_cost = &power8_cost;
4758 break;
4759
4760 case PROCESSOR_POWER9:
4761 case PROCESSOR_POWER10:
4762 rs6000_cost = &power9_cost;
4763 break;
4764
4765 case PROCESSOR_PPCA2:
4766 rs6000_cost = &ppca2_cost;
4767 break;
4768
4769 default:
4770 gcc_unreachable ();
4771 }
4772
4773 if (global_init_p)
4774 {
4775 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4776 param_simultaneous_prefetches,
4777 rs6000_cost->simultaneous_prefetches);
4778 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4779 param_l1_cache_size,
4780 rs6000_cost->l1_cache_size);
4781 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4782 param_l1_cache_line_size,
4783 rs6000_cost->cache_line_size);
4784 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4785 param_l2_cache_size,
4786 rs6000_cost->l2_cache_size);
4787
4788 /* Increase loop peeling limits based on performance analysis. */
4789 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4790 param_max_peeled_insns, 400);
4791 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4792 param_max_completely_peeled_insns, 400);
4793
4794 /* The lxvl/stxvl instructions don't perform well before Power10. */
4795 if (TARGET_POWER10)
4796 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4797 param_vect_partial_vector_usage, 1);
4798 else
4799 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4800 param_vect_partial_vector_usage, 0);
4801
4802 /* Use the 'model' -fsched-pressure algorithm by default. */
4803 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4804 param_sched_pressure_algorithm,
4805 SCHED_PRESSURE_MODEL);
4806
4807 /* If using typedef char *va_list, signal that
4808 __builtin_va_start (&ap, 0) can be optimized to
4809 ap = __builtin_next_arg (0). */
4810 if (DEFAULT_ABI != ABI_V4)
4811 targetm.expand_builtin_va_start = NULL;
4812 }
4813
4814 rs6000_override_options_after_change ();
4815
4816 /* If not explicitly specified via option, decide whether to generate indexed
4817 load/store instructions. A value of -1 indicates that the
4818 initial value of this variable has not been overwritten. During
4819 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4820 if (TARGET_AVOID_XFORM == -1)
4821 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4822 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4823 need indexed accesses and the type used is the scalar type of the element
4824 being loaded or stored. */
4825 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4826 && !TARGET_ALTIVEC);
4827
4828 /* Set the -mrecip options. */
4829 if (rs6000_recip_name)
4830 {
4831 char *p = ASTRDUP (rs6000_recip_name);
4832 char *q;
4833 unsigned int mask, i;
4834 bool invert;
4835
4836 while ((q = strtok (p, ",")) != NULL)
4837 {
4838 p = NULL;
4839 if (*q == '!')
4840 {
4841 invert = true;
4842 q++;
4843 }
4844 else
4845 invert = false;
4846
4847 if (!strcmp (q, "default"))
4848 mask = ((TARGET_RECIP_PRECISION)
4849 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4850 else
4851 {
4852 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4853 if (!strcmp (q, recip_options[i].string))
4854 {
4855 mask = recip_options[i].mask;
4856 break;
4857 }
4858
4859 if (i == ARRAY_SIZE (recip_options))
4860 {
4861 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4862 invert = false;
4863 mask = 0;
4864 ret = false;
4865 }
4866 }
4867
4868 if (invert)
4869 rs6000_recip_control &= ~mask;
4870 else
4871 rs6000_recip_control |= mask;
4872 }
4873 }
4874
4875 /* Set the builtin mask of the various options used that could affect which
4876 builtins were used. In the past we used target_flags, but we've run out
4877 of bits, and some options are no longer in target_flags. */
4878 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4879 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4880 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4881 rs6000_builtin_mask);
4882
4883 /* Initialize all of the registers. */
4884 rs6000_init_hard_regno_mode_ok (global_init_p);
4885
4886 /* Save the initial options in case the user does function specific options */
4887 if (global_init_p)
4888 target_option_default_node = target_option_current_node
4889 = build_target_option_node (&global_options, &global_options_set);
4890
4891 /* If not explicitly specified via option, decide whether to generate the
4892 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4893 if (TARGET_LINK_STACK == -1)
4894 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4895
4896 /* Deprecate use of -mno-speculate-indirect-jumps. */
4897 if (!rs6000_speculate_indirect_jumps)
4898 warning (0, "%qs is deprecated and not recommended in any circumstances",
4899 "-mno-speculate-indirect-jumps");
4900
4901 return ret;
4902 }
4903
4904 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4905 define the target cpu type. */
4906
4907 static void
4908 rs6000_option_override (void)
4909 {
4910 (void) rs6000_option_override_internal (true);
4911 }
4912
4913 \f
4914 /* Implement targetm.vectorize.builtin_mask_for_load. */
4915 static tree
4916 rs6000_builtin_mask_for_load (void)
4917 {
4918 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4919 if ((TARGET_ALTIVEC && !TARGET_VSX)
4920 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4921 return altivec_builtin_mask_for_load;
4922 else
4923 return 0;
4924 }
4925
4926 /* Implement LOOP_ALIGN. */
4927 align_flags
4928 rs6000_loop_align (rtx label)
4929 {
4930 basic_block bb;
4931 int ninsns;
4932
4933 /* Don't override loop alignment if -falign-loops was specified. */
4934 if (!can_override_loop_align)
4935 return align_loops;
4936
4937 bb = BLOCK_FOR_INSN (label);
4938 ninsns = num_loop_insns(bb->loop_father);
4939
4940 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4941 if (ninsns > 4 && ninsns <= 8
4942 && (rs6000_tune == PROCESSOR_POWER4
4943 || rs6000_tune == PROCESSOR_POWER5
4944 || rs6000_tune == PROCESSOR_POWER6
4945 || rs6000_tune == PROCESSOR_POWER7
4946 || rs6000_tune == PROCESSOR_POWER8))
4947 return align_flags (5);
4948 else
4949 return align_loops;
4950 }
4951
4952 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4953 after applying N number of iterations. This routine does not determine
4954 how may iterations are required to reach desired alignment. */
4955
4956 static bool
4957 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4958 {
4959 if (is_packed)
4960 return false;
4961
4962 if (TARGET_32BIT)
4963 {
4964 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4965 return true;
4966
4967 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4968 return true;
4969
4970 return false;
4971 }
4972 else
4973 {
4974 if (TARGET_MACHO)
4975 return false;
4976
4977 /* Assuming that all other types are naturally aligned. CHECKME! */
4978 return true;
4979 }
4980 }
4981
4982 /* Return true if the vector misalignment factor is supported by the
4983 target. */
4984 static bool
4985 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4986 const_tree type,
4987 int misalignment,
4988 bool is_packed)
4989 {
4990 if (TARGET_VSX)
4991 {
4992 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4993 return true;
4994
4995 /* Return if movmisalign pattern is not supported for this mode. */
4996 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4997 return false;
4998
4999 if (misalignment == -1)
5000 {
5001 /* Misalignment factor is unknown at compile time but we know
5002 it's word aligned. */
5003 if (rs6000_vector_alignment_reachable (type, is_packed))
5004 {
5005 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5006
5007 if (element_size == 64 || element_size == 32)
5008 return true;
5009 }
5010
5011 return false;
5012 }
5013
5014 /* VSX supports word-aligned vector. */
5015 if (misalignment % 4 == 0)
5016 return true;
5017 }
5018 return false;
5019 }
5020
5021 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5022 static int
5023 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5024 tree vectype, int misalign)
5025 {
5026 unsigned elements;
5027 tree elem_type;
5028
5029 switch (type_of_cost)
5030 {
5031 case scalar_stmt:
5032 case scalar_store:
5033 case vector_stmt:
5034 case vector_store:
5035 case vec_to_scalar:
5036 case scalar_to_vec:
5037 case cond_branch_not_taken:
5038 return 1;
5039 case scalar_load:
5040 case vector_load:
5041 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5042 return 2;
5043
5044 case vec_perm:
5045 /* Power7 has only one permute unit, make it a bit expensive. */
5046 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5047 return 3;
5048 else
5049 return 1;
5050
5051 case vec_promote_demote:
5052 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5053 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5054 return 4;
5055 else
5056 return 1;
5057
5058 case cond_branch_taken:
5059 return 3;
5060
5061 case unaligned_load:
5062 case vector_gather_load:
5063 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5064 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5065 return 2;
5066
5067 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5068 {
5069 elements = TYPE_VECTOR_SUBPARTS (vectype);
5070 if (elements == 2)
5071 /* Double word aligned. */
5072 return 4;
5073
5074 if (elements == 4)
5075 {
5076 switch (misalign)
5077 {
5078 case 8:
5079 /* Double word aligned. */
5080 return 4;
5081
5082 case -1:
5083 /* Unknown misalignment. */
5084 case 4:
5085 case 12:
5086 /* Word aligned. */
5087 return 33;
5088
5089 default:
5090 gcc_unreachable ();
5091 }
5092 }
5093 }
5094
5095 if (TARGET_ALTIVEC)
5096 /* Misaligned loads are not supported. */
5097 gcc_unreachable ();
5098
5099 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5100 return 4;
5101
5102 case unaligned_store:
5103 case vector_scatter_store:
5104 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5105 return 1;
5106
5107 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5108 {
5109 elements = TYPE_VECTOR_SUBPARTS (vectype);
5110 if (elements == 2)
5111 /* Double word aligned. */
5112 return 2;
5113
5114 if (elements == 4)
5115 {
5116 switch (misalign)
5117 {
5118 case 8:
5119 /* Double word aligned. */
5120 return 2;
5121
5122 case -1:
5123 /* Unknown misalignment. */
5124 case 4:
5125 case 12:
5126 /* Word aligned. */
5127 return 23;
5128
5129 default:
5130 gcc_unreachable ();
5131 }
5132 }
5133 }
5134
5135 if (TARGET_ALTIVEC)
5136 /* Misaligned stores are not supported. */
5137 gcc_unreachable ();
5138
5139 return 2;
5140
5141 case vec_construct:
5142 /* This is a rough approximation assuming non-constant elements
5143 constructed into a vector via element insertion. FIXME:
5144 vec_construct is not granular enough for uniformly good
5145 decisions. If the initialization is a splat, this is
5146 cheaper than we estimate. Improve this someday. */
5147 elem_type = TREE_TYPE (vectype);
5148 /* 32-bit vectors loaded into registers are stored as double
5149 precision, so we need 2 permutes, 2 converts, and 1 merge
5150 to construct a vector of short floats from them. */
5151 if (SCALAR_FLOAT_TYPE_P (elem_type)
5152 && TYPE_PRECISION (elem_type) == 32)
5153 return 5;
5154 /* On POWER9, integer vector types are built up in GPRs and then
5155 use a direct move (2 cycles). For POWER8 this is even worse,
5156 as we need two direct moves and a merge, and the direct moves
5157 are five cycles. */
5158 else if (INTEGRAL_TYPE_P (elem_type))
5159 {
5160 if (TARGET_P9_VECTOR)
5161 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5162 else
5163 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5164 }
5165 else
5166 /* V2DFmode doesn't need a direct move. */
5167 return 2;
5168
5169 default:
5170 gcc_unreachable ();
5171 }
5172 }
5173
5174 /* Implement targetm.vectorize.preferred_simd_mode. */
5175
5176 static machine_mode
5177 rs6000_preferred_simd_mode (scalar_mode mode)
5178 {
5179 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5180
5181 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5182 return vmode.require ();
5183
5184 return word_mode;
5185 }
5186
5187 typedef struct _rs6000_cost_data
5188 {
5189 struct loop *loop_info;
5190 unsigned cost[3];
5191 } rs6000_cost_data;
5192
5193 /* Test for likely overcommitment of vector hardware resources. If a
5194 loop iteration is relatively large, and too large a percentage of
5195 instructions in the loop are vectorized, the cost model may not
5196 adequately reflect delays from unavailable vector resources.
5197 Penalize the loop body cost for this case. */
5198
5199 static void
5200 rs6000_density_test (rs6000_cost_data *data)
5201 {
5202 const int DENSITY_PCT_THRESHOLD = 85;
5203 const int DENSITY_SIZE_THRESHOLD = 70;
5204 const int DENSITY_PENALTY = 10;
5205 struct loop *loop = data->loop_info;
5206 basic_block *bbs = get_loop_body (loop);
5207 int nbbs = loop->num_nodes;
5208 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5209 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5210 int i, density_pct;
5211
5212 for (i = 0; i < nbbs; i++)
5213 {
5214 basic_block bb = bbs[i];
5215 gimple_stmt_iterator gsi;
5216
5217 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5218 {
5219 gimple *stmt = gsi_stmt (gsi);
5220 if (is_gimple_debug (stmt))
5221 continue;
5222
5223 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5224
5225 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5226 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5227 not_vec_cost++;
5228 }
5229 }
5230
5231 free (bbs);
5232 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5233
5234 if (density_pct > DENSITY_PCT_THRESHOLD
5235 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5236 {
5237 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_NOTE, vect_location,
5240 "density %d%%, cost %d exceeds threshold, penalizing "
5241 "loop body cost by %d%%", density_pct,
5242 vec_cost + not_vec_cost, DENSITY_PENALTY);
5243 }
5244 }
5245
5246 /* Implement targetm.vectorize.init_cost. */
5247
5248 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5249 instruction is needed by the vectorization. */
5250 static bool rs6000_vect_nonmem;
5251
5252 static void *
5253 rs6000_init_cost (struct loop *loop_info)
5254 {
5255 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5256 data->loop_info = loop_info;
5257 data->cost[vect_prologue] = 0;
5258 data->cost[vect_body] = 0;
5259 data->cost[vect_epilogue] = 0;
5260 rs6000_vect_nonmem = false;
5261 return data;
5262 }
5263
5264 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5265 For some statement, we would like to further fine-grain tweak the cost on
5266 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5267 information on statement operation codes etc. One typical case here is
5268 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5269 for scalar cost, but it should be priced more whatever transformed to either
5270 compare + branch or compare + isel instructions. */
5271
5272 static unsigned
5273 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5274 struct _stmt_vec_info *stmt_info)
5275 {
5276 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5277 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5278 {
5279 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5280 if (subcode == COND_EXPR)
5281 return 2;
5282 }
5283
5284 return 0;
5285 }
5286
5287 /* Implement targetm.vectorize.add_stmt_cost. */
5288
5289 static unsigned
5290 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5291 enum vect_cost_for_stmt kind,
5292 struct _stmt_vec_info *stmt_info, tree vectype,
5293 int misalign, enum vect_cost_model_location where)
5294 {
5295 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5296 unsigned retval = 0;
5297
5298 if (flag_vect_cost_model)
5299 {
5300 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5301 misalign);
5302 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5303 /* Statements in an inner loop relative to the loop being
5304 vectorized are weighted more heavily. The value here is
5305 arbitrary and could potentially be improved with analysis. */
5306 if (where == vect_body && stmt_info
5307 && stmt_in_inner_loop_p (vinfo, stmt_info))
5308 count *= 50; /* FIXME. */
5309
5310 retval = (unsigned) (count * stmt_cost);
5311 cost_data->cost[where] += retval;
5312
5313 /* Check whether we're doing something other than just a copy loop.
5314 Not all such loops may be profitably vectorized; see
5315 rs6000_finish_cost. */
5316 if ((kind == vec_to_scalar || kind == vec_perm
5317 || kind == vec_promote_demote || kind == vec_construct
5318 || kind == scalar_to_vec)
5319 || (where == vect_body && kind == vector_stmt))
5320 rs6000_vect_nonmem = true;
5321 }
5322
5323 return retval;
5324 }
5325
5326 /* For some target specific vectorization cost which can't be handled per stmt,
5327 we check the requisite conditions and adjust the vectorization cost
5328 accordingly if satisfied. One typical example is to model shift cost for
5329 vector with length by counting number of required lengths under condition
5330 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5331
5332 static void
5333 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
5334 {
5335 struct loop *loop = data->loop_info;
5336 gcc_assert (loop);
5337 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
5338
5339 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5340 {
5341 rgroup_controls *rgc;
5342 unsigned int num_vectors_m1;
5343 unsigned int shift_cnt = 0;
5344 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5345 if (rgc->type)
5346 /* Each length needs one shift to fill into bits 0-7. */
5347 shift_cnt += num_vectors_m1 + 1;
5348
5349 rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
5350 NULL, NULL_TREE, 0, vect_body);
5351 }
5352 }
5353
5354 /* Implement targetm.vectorize.finish_cost. */
5355
5356 static void
5357 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5358 unsigned *body_cost, unsigned *epilogue_cost)
5359 {
5360 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5361
5362 if (cost_data->loop_info)
5363 {
5364 rs6000_adjust_vect_cost_per_loop (cost_data);
5365 rs6000_density_test (cost_data);
5366 }
5367
5368 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5369 that require versioning for any reason. The vectorization is at
5370 best a wash inside the loop, and the versioning checks make
5371 profitability highly unlikely and potentially quite harmful. */
5372 if (cost_data->loop_info)
5373 {
5374 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5375 if (!rs6000_vect_nonmem
5376 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5377 && LOOP_REQUIRES_VERSIONING (vec_info))
5378 cost_data->cost[vect_body] += 10000;
5379 }
5380
5381 *prologue_cost = cost_data->cost[vect_prologue];
5382 *body_cost = cost_data->cost[vect_body];
5383 *epilogue_cost = cost_data->cost[vect_epilogue];
5384 }
5385
5386 /* Implement targetm.vectorize.destroy_cost_data. */
5387
5388 static void
5389 rs6000_destroy_cost_data (void *data)
5390 {
5391 free (data);
5392 }
5393
5394 /* Implement targetm.loop_unroll_adjust. */
5395
5396 static unsigned
5397 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5398 {
5399 if (unroll_only_small_loops)
5400 {
5401 /* TODO: These are hardcoded values right now. We probably should use
5402 a PARAM here. */
5403 if (loop->ninsns <= 6)
5404 return MIN (4, nunroll);
5405 if (loop->ninsns <= 10)
5406 return MIN (2, nunroll);
5407
5408 return 0;
5409 }
5410
5411 return nunroll;
5412 }
5413
5414 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5415 library with vectorized intrinsics. */
5416
5417 static tree
5418 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5419 tree type_in)
5420 {
5421 char name[32];
5422 const char *suffix = NULL;
5423 tree fntype, new_fndecl, bdecl = NULL_TREE;
5424 int n_args = 1;
5425 const char *bname;
5426 machine_mode el_mode, in_mode;
5427 int n, in_n;
5428
5429 /* Libmass is suitable for unsafe math only as it does not correctly support
5430 parts of IEEE with the required precision such as denormals. Only support
5431 it if we have VSX to use the simd d2 or f4 functions.
5432 XXX: Add variable length support. */
5433 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5434 return NULL_TREE;
5435
5436 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5437 n = TYPE_VECTOR_SUBPARTS (type_out);
5438 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5439 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5440 if (el_mode != in_mode
5441 || n != in_n)
5442 return NULL_TREE;
5443
5444 switch (fn)
5445 {
5446 CASE_CFN_ATAN2:
5447 CASE_CFN_HYPOT:
5448 CASE_CFN_POW:
5449 n_args = 2;
5450 gcc_fallthrough ();
5451
5452 CASE_CFN_ACOS:
5453 CASE_CFN_ACOSH:
5454 CASE_CFN_ASIN:
5455 CASE_CFN_ASINH:
5456 CASE_CFN_ATAN:
5457 CASE_CFN_ATANH:
5458 CASE_CFN_CBRT:
5459 CASE_CFN_COS:
5460 CASE_CFN_COSH:
5461 CASE_CFN_ERF:
5462 CASE_CFN_ERFC:
5463 CASE_CFN_EXP2:
5464 CASE_CFN_EXP:
5465 CASE_CFN_EXPM1:
5466 CASE_CFN_LGAMMA:
5467 CASE_CFN_LOG10:
5468 CASE_CFN_LOG1P:
5469 CASE_CFN_LOG2:
5470 CASE_CFN_LOG:
5471 CASE_CFN_SIN:
5472 CASE_CFN_SINH:
5473 CASE_CFN_SQRT:
5474 CASE_CFN_TAN:
5475 CASE_CFN_TANH:
5476 if (el_mode == DFmode && n == 2)
5477 {
5478 bdecl = mathfn_built_in (double_type_node, fn);
5479 suffix = "d2"; /* pow -> powd2 */
5480 }
5481 else if (el_mode == SFmode && n == 4)
5482 {
5483 bdecl = mathfn_built_in (float_type_node, fn);
5484 suffix = "4"; /* powf -> powf4 */
5485 }
5486 else
5487 return NULL_TREE;
5488 if (!bdecl)
5489 return NULL_TREE;
5490 break;
5491
5492 default:
5493 return NULL_TREE;
5494 }
5495
5496 gcc_assert (suffix != NULL);
5497 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5498 if (!bname)
5499 return NULL_TREE;
5500
5501 strcpy (name, bname + strlen ("__builtin_"));
5502 strcat (name, suffix);
5503
5504 if (n_args == 1)
5505 fntype = build_function_type_list (type_out, type_in, NULL);
5506 else if (n_args == 2)
5507 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5508 else
5509 gcc_unreachable ();
5510
5511 /* Build a function declaration for the vectorized function. */
5512 new_fndecl = build_decl (BUILTINS_LOCATION,
5513 FUNCTION_DECL, get_identifier (name), fntype);
5514 TREE_PUBLIC (new_fndecl) = 1;
5515 DECL_EXTERNAL (new_fndecl) = 1;
5516 DECL_IS_NOVOPS (new_fndecl) = 1;
5517 TREE_READONLY (new_fndecl) = 1;
5518
5519 return new_fndecl;
5520 }
5521
5522 /* Returns a function decl for a vectorized version of the builtin function
5523 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5524 if it is not available. */
5525
5526 static tree
5527 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5528 tree type_in)
5529 {
5530 machine_mode in_mode, out_mode;
5531 int in_n, out_n;
5532
5533 if (TARGET_DEBUG_BUILTIN)
5534 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5535 combined_fn_name (combined_fn (fn)),
5536 GET_MODE_NAME (TYPE_MODE (type_out)),
5537 GET_MODE_NAME (TYPE_MODE (type_in)));
5538
5539 if (TREE_CODE (type_out) != VECTOR_TYPE
5540 || TREE_CODE (type_in) != VECTOR_TYPE)
5541 return NULL_TREE;
5542
5543 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5544 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5545 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5546 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5547
5548 switch (fn)
5549 {
5550 CASE_CFN_COPYSIGN:
5551 if (VECTOR_UNIT_VSX_P (V2DFmode)
5552 && out_mode == DFmode && out_n == 2
5553 && in_mode == DFmode && in_n == 2)
5554 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5555 if (VECTOR_UNIT_VSX_P (V4SFmode)
5556 && out_mode == SFmode && out_n == 4
5557 && in_mode == SFmode && in_n == 4)
5558 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5559 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5560 && out_mode == SFmode && out_n == 4
5561 && in_mode == SFmode && in_n == 4)
5562 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5563 break;
5564 CASE_CFN_CEIL:
5565 if (VECTOR_UNIT_VSX_P (V2DFmode)
5566 && out_mode == DFmode && out_n == 2
5567 && in_mode == DFmode && in_n == 2)
5568 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5569 if (VECTOR_UNIT_VSX_P (V4SFmode)
5570 && out_mode == SFmode && out_n == 4
5571 && in_mode == SFmode && in_n == 4)
5572 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5573 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5574 && out_mode == SFmode && out_n == 4
5575 && in_mode == SFmode && in_n == 4)
5576 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5577 break;
5578 CASE_CFN_FLOOR:
5579 if (VECTOR_UNIT_VSX_P (V2DFmode)
5580 && out_mode == DFmode && out_n == 2
5581 && in_mode == DFmode && in_n == 2)
5582 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5583 if (VECTOR_UNIT_VSX_P (V4SFmode)
5584 && out_mode == SFmode && out_n == 4
5585 && in_mode == SFmode && in_n == 4)
5586 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5587 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5588 && out_mode == SFmode && out_n == 4
5589 && in_mode == SFmode && in_n == 4)
5590 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5591 break;
5592 CASE_CFN_FMA:
5593 if (VECTOR_UNIT_VSX_P (V2DFmode)
5594 && out_mode == DFmode && out_n == 2
5595 && in_mode == DFmode && in_n == 2)
5596 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5597 if (VECTOR_UNIT_VSX_P (V4SFmode)
5598 && out_mode == SFmode && out_n == 4
5599 && in_mode == SFmode && in_n == 4)
5600 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5601 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5602 && out_mode == SFmode && out_n == 4
5603 && in_mode == SFmode && in_n == 4)
5604 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5605 break;
5606 CASE_CFN_TRUNC:
5607 if (VECTOR_UNIT_VSX_P (V2DFmode)
5608 && out_mode == DFmode && out_n == 2
5609 && in_mode == DFmode && in_n == 2)
5610 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5611 if (VECTOR_UNIT_VSX_P (V4SFmode)
5612 && out_mode == SFmode && out_n == 4
5613 && in_mode == SFmode && in_n == 4)
5614 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5615 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5616 && out_mode == SFmode && out_n == 4
5617 && in_mode == SFmode && in_n == 4)
5618 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5619 break;
5620 CASE_CFN_NEARBYINT:
5621 if (VECTOR_UNIT_VSX_P (V2DFmode)
5622 && flag_unsafe_math_optimizations
5623 && out_mode == DFmode && out_n == 2
5624 && in_mode == DFmode && in_n == 2)
5625 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5626 if (VECTOR_UNIT_VSX_P (V4SFmode)
5627 && flag_unsafe_math_optimizations
5628 && out_mode == SFmode && out_n == 4
5629 && in_mode == SFmode && in_n == 4)
5630 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5631 break;
5632 CASE_CFN_RINT:
5633 if (VECTOR_UNIT_VSX_P (V2DFmode)
5634 && !flag_trapping_math
5635 && out_mode == DFmode && out_n == 2
5636 && in_mode == DFmode && in_n == 2)
5637 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode)
5639 && !flag_trapping_math
5640 && out_mode == SFmode && out_n == 4
5641 && in_mode == SFmode && in_n == 4)
5642 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5643 break;
5644 default:
5645 break;
5646 }
5647
5648 /* Generate calls to libmass if appropriate. */
5649 if (rs6000_veclib_handler)
5650 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5651
5652 return NULL_TREE;
5653 }
5654
5655 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5656
5657 static tree
5658 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5659 tree type_in)
5660 {
5661 machine_mode in_mode, out_mode;
5662 int in_n, out_n;
5663
5664 if (TARGET_DEBUG_BUILTIN)
5665 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5666 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5667 GET_MODE_NAME (TYPE_MODE (type_out)),
5668 GET_MODE_NAME (TYPE_MODE (type_in)));
5669
5670 if (TREE_CODE (type_out) != VECTOR_TYPE
5671 || TREE_CODE (type_in) != VECTOR_TYPE)
5672 return NULL_TREE;
5673
5674 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5675 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5676 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5677 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5678
5679 enum rs6000_builtins fn
5680 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5681 switch (fn)
5682 {
5683 case RS6000_BUILTIN_RSQRTF:
5684 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5685 && out_mode == SFmode && out_n == 4
5686 && in_mode == SFmode && in_n == 4)
5687 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5688 break;
5689 case RS6000_BUILTIN_RSQRT:
5690 if (VECTOR_UNIT_VSX_P (V2DFmode)
5691 && out_mode == DFmode && out_n == 2
5692 && in_mode == DFmode && in_n == 2)
5693 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5694 break;
5695 case RS6000_BUILTIN_RECIPF:
5696 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5697 && out_mode == SFmode && out_n == 4
5698 && in_mode == SFmode && in_n == 4)
5699 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5700 break;
5701 case RS6000_BUILTIN_RECIP:
5702 if (VECTOR_UNIT_VSX_P (V2DFmode)
5703 && out_mode == DFmode && out_n == 2
5704 && in_mode == DFmode && in_n == 2)
5705 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5706 break;
5707 default:
5708 break;
5709 }
5710 return NULL_TREE;
5711 }
5712 \f
5713 /* Default CPU string for rs6000*_file_start functions. */
5714 static const char *rs6000_default_cpu;
5715
5716 #ifdef USING_ELFOS_H
5717 const char *rs6000_machine;
5718
5719 const char *
5720 rs6000_machine_from_flags (void)
5721 {
5722 HOST_WIDE_INT flags = rs6000_isa_flags;
5723
5724 /* Disable the flags that should never influence the .machine selection. */
5725 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5726
5727 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5728 return "power10";
5729 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5730 return "power9";
5731 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5732 return "power8";
5733 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5734 return "power7";
5735 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5736 return "power6";
5737 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5738 return "power5";
5739 if ((flags & ISA_2_1_MASKS) != 0)
5740 return "power4";
5741 if ((flags & OPTION_MASK_POWERPC64) != 0)
5742 return "ppc64";
5743 return "ppc";
5744 }
5745
5746 void
5747 emit_asm_machine (void)
5748 {
5749 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5750 }
5751 #endif
5752
5753 /* Do anything needed at the start of the asm file. */
5754
5755 static void
5756 rs6000_file_start (void)
5757 {
5758 char buffer[80];
5759 const char *start = buffer;
5760 FILE *file = asm_out_file;
5761
5762 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5763
5764 default_file_start ();
5765
5766 if (flag_verbose_asm)
5767 {
5768 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5769
5770 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5771 {
5772 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5773 start = "";
5774 }
5775
5776 if (global_options_set.x_rs6000_cpu_index)
5777 {
5778 fprintf (file, "%s -mcpu=%s", start,
5779 processor_target_table[rs6000_cpu_index].name);
5780 start = "";
5781 }
5782
5783 if (global_options_set.x_rs6000_tune_index)
5784 {
5785 fprintf (file, "%s -mtune=%s", start,
5786 processor_target_table[rs6000_tune_index].name);
5787 start = "";
5788 }
5789
5790 if (PPC405_ERRATUM77)
5791 {
5792 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5793 start = "";
5794 }
5795
5796 #ifdef USING_ELFOS_H
5797 switch (rs6000_sdata)
5798 {
5799 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5800 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5801 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5802 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5803 }
5804
5805 if (rs6000_sdata && g_switch_value)
5806 {
5807 fprintf (file, "%s -G %d", start,
5808 g_switch_value);
5809 start = "";
5810 }
5811 #endif
5812
5813 if (*start == '\0')
5814 putc ('\n', file);
5815 }
5816
5817 #ifdef USING_ELFOS_H
5818 rs6000_machine = rs6000_machine_from_flags ();
5819 emit_asm_machine ();
5820 #endif
5821
5822 if (DEFAULT_ABI == ABI_ELFv2)
5823 fprintf (file, "\t.abiversion 2\n");
5824 }
5825
5826 \f
5827 /* Return nonzero if this function is known to have a null epilogue. */
5828
5829 int
5830 direct_return (void)
5831 {
5832 if (reload_completed)
5833 {
5834 rs6000_stack_t *info = rs6000_stack_info ();
5835
5836 if (info->first_gp_reg_save == 32
5837 && info->first_fp_reg_save == 64
5838 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5839 && ! info->lr_save_p
5840 && ! info->cr_save_p
5841 && info->vrsave_size == 0
5842 && ! info->push_p)
5843 return 1;
5844 }
5845
5846 return 0;
5847 }
5848
5849 /* Helper for num_insns_constant. Calculate number of instructions to
5850 load VALUE to a single gpr using combinations of addi, addis, ori,
5851 oris, sldi and rldimi instructions. */
5852
5853 static int
5854 num_insns_constant_gpr (HOST_WIDE_INT value)
5855 {
5856 /* signed constant loadable with addi */
5857 if (SIGNED_INTEGER_16BIT_P (value))
5858 return 1;
5859
5860 /* constant loadable with addis */
5861 else if ((value & 0xffff) == 0
5862 && (value >> 31 == -1 || value >> 31 == 0))
5863 return 1;
5864
5865 /* PADDI can support up to 34 bit signed integers. */
5866 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5867 return 1;
5868
5869 else if (TARGET_POWERPC64)
5870 {
5871 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5872 HOST_WIDE_INT high = value >> 31;
5873
5874 if (high == 0 || high == -1)
5875 return 2;
5876
5877 high >>= 1;
5878
5879 if (low == 0 || low == high)
5880 return num_insns_constant_gpr (high) + 1;
5881 else if (high == 0)
5882 return num_insns_constant_gpr (low) + 1;
5883 else
5884 return (num_insns_constant_gpr (high)
5885 + num_insns_constant_gpr (low) + 1);
5886 }
5887
5888 else
5889 return 2;
5890 }
5891
5892 /* Helper for num_insns_constant. Allow constants formed by the
5893 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5894 and handle modes that require multiple gprs. */
5895
5896 static int
5897 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5898 {
5899 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5900 int total = 0;
5901 while (nregs-- > 0)
5902 {
5903 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5904 int insns = num_insns_constant_gpr (low);
5905 if (insns > 2
5906 /* We won't get more than 2 from num_insns_constant_gpr
5907 except when TARGET_POWERPC64 and mode is DImode or
5908 wider, so the register mode must be DImode. */
5909 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5910 insns = 2;
5911 total += insns;
5912 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5913 it all at once would be UB. */
5914 value >>= (BITS_PER_WORD - 1);
5915 value >>= 1;
5916 }
5917 return total;
5918 }
5919
5920 /* Return the number of instructions it takes to form a constant in as
5921 many gprs are needed for MODE. */
5922
5923 int
5924 num_insns_constant (rtx op, machine_mode mode)
5925 {
5926 HOST_WIDE_INT val;
5927
5928 switch (GET_CODE (op))
5929 {
5930 case CONST_INT:
5931 val = INTVAL (op);
5932 break;
5933
5934 case CONST_WIDE_INT:
5935 {
5936 int insns = 0;
5937 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5938 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5939 DImode);
5940 return insns;
5941 }
5942
5943 case CONST_DOUBLE:
5944 {
5945 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5946
5947 if (mode == SFmode || mode == SDmode)
5948 {
5949 long l;
5950
5951 if (mode == SDmode)
5952 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5953 else
5954 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5955 /* See the first define_split in rs6000.md handling a
5956 const_double_operand. */
5957 val = l;
5958 mode = SImode;
5959 }
5960 else if (mode == DFmode || mode == DDmode)
5961 {
5962 long l[2];
5963
5964 if (mode == DDmode)
5965 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5966 else
5967 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5968
5969 /* See the second (32-bit) and third (64-bit) define_split
5970 in rs6000.md handling a const_double_operand. */
5971 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5972 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5973 mode = DImode;
5974 }
5975 else if (mode == TFmode || mode == TDmode
5976 || mode == KFmode || mode == IFmode)
5977 {
5978 long l[4];
5979 int insns;
5980
5981 if (mode == TDmode)
5982 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5983 else
5984 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5985
5986 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5987 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5988 insns = num_insns_constant_multi (val, DImode);
5989 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5990 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5991 insns += num_insns_constant_multi (val, DImode);
5992 return insns;
5993 }
5994 else
5995 gcc_unreachable ();
5996 }
5997 break;
5998
5999 default:
6000 gcc_unreachable ();
6001 }
6002
6003 return num_insns_constant_multi (val, mode);
6004 }
6005
6006 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6007 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6008 corresponding element of the vector, but for V4SFmode, the
6009 corresponding "float" is interpreted as an SImode integer. */
6010
6011 HOST_WIDE_INT
6012 const_vector_elt_as_int (rtx op, unsigned int elt)
6013 {
6014 rtx tmp;
6015
6016 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6017 gcc_assert (GET_MODE (op) != V2DImode
6018 && GET_MODE (op) != V2DFmode);
6019
6020 tmp = CONST_VECTOR_ELT (op, elt);
6021 if (GET_MODE (op) == V4SFmode)
6022 tmp = gen_lowpart (SImode, tmp);
6023 return INTVAL (tmp);
6024 }
6025
6026 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6027 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6028 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6029 all items are set to the same value and contain COPIES replicas of the
6030 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6031 operand and the others are set to the value of the operand's msb. */
6032
6033 static bool
6034 vspltis_constant (rtx op, unsigned step, unsigned copies)
6035 {
6036 machine_mode mode = GET_MODE (op);
6037 machine_mode inner = GET_MODE_INNER (mode);
6038
6039 unsigned i;
6040 unsigned nunits;
6041 unsigned bitsize;
6042 unsigned mask;
6043
6044 HOST_WIDE_INT val;
6045 HOST_WIDE_INT splat_val;
6046 HOST_WIDE_INT msb_val;
6047
6048 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6049 return false;
6050
6051 nunits = GET_MODE_NUNITS (mode);
6052 bitsize = GET_MODE_BITSIZE (inner);
6053 mask = GET_MODE_MASK (inner);
6054
6055 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6056 splat_val = val;
6057 msb_val = val >= 0 ? 0 : -1;
6058
6059 /* Construct the value to be splatted, if possible. If not, return 0. */
6060 for (i = 2; i <= copies; i *= 2)
6061 {
6062 HOST_WIDE_INT small_val;
6063 bitsize /= 2;
6064 small_val = splat_val >> bitsize;
6065 mask >>= bitsize;
6066 if (splat_val != ((HOST_WIDE_INT)
6067 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6068 | (small_val & mask)))
6069 return false;
6070 splat_val = small_val;
6071 }
6072
6073 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6074 if (EASY_VECTOR_15 (splat_val))
6075 ;
6076
6077 /* Also check if we can splat, and then add the result to itself. Do so if
6078 the value is positive, of if the splat instruction is using OP's mode;
6079 for splat_val < 0, the splat and the add should use the same mode. */
6080 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6081 && (splat_val >= 0 || (step == 1 && copies == 1)))
6082 ;
6083
6084 /* Also check if are loading up the most significant bit which can be done by
6085 loading up -1 and shifting the value left by -1. */
6086 else if (EASY_VECTOR_MSB (splat_val, inner))
6087 ;
6088
6089 else
6090 return false;
6091
6092 /* Check if VAL is present in every STEP-th element, and the
6093 other elements are filled with its most significant bit. */
6094 for (i = 1; i < nunits; ++i)
6095 {
6096 HOST_WIDE_INT desired_val;
6097 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6098 if ((i & (step - 1)) == 0)
6099 desired_val = val;
6100 else
6101 desired_val = msb_val;
6102
6103 if (desired_val != const_vector_elt_as_int (op, elt))
6104 return false;
6105 }
6106
6107 return true;
6108 }
6109
6110 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6111 instruction, filling in the bottom elements with 0 or -1.
6112
6113 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6114 for the number of zeroes to shift in, or negative for the number of 0xff
6115 bytes to shift in.
6116
6117 OP is a CONST_VECTOR. */
6118
6119 int
6120 vspltis_shifted (rtx op)
6121 {
6122 machine_mode mode = GET_MODE (op);
6123 machine_mode inner = GET_MODE_INNER (mode);
6124
6125 unsigned i, j;
6126 unsigned nunits;
6127 unsigned mask;
6128
6129 HOST_WIDE_INT val;
6130
6131 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6132 return false;
6133
6134 /* We need to create pseudo registers to do the shift, so don't recognize
6135 shift vector constants after reload. */
6136 if (!can_create_pseudo_p ())
6137 return false;
6138
6139 nunits = GET_MODE_NUNITS (mode);
6140 mask = GET_MODE_MASK (inner);
6141
6142 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6143
6144 /* Check if the value can really be the operand of a vspltis[bhw]. */
6145 if (EASY_VECTOR_15 (val))
6146 ;
6147
6148 /* Also check if we are loading up the most significant bit which can be done
6149 by loading up -1 and shifting the value left by -1. */
6150 else if (EASY_VECTOR_MSB (val, inner))
6151 ;
6152
6153 else
6154 return 0;
6155
6156 /* Check if VAL is present in every STEP-th element until we find elements
6157 that are 0 or all 1 bits. */
6158 for (i = 1; i < nunits; ++i)
6159 {
6160 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6161 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6162
6163 /* If the value isn't the splat value, check for the remaining elements
6164 being 0/-1. */
6165 if (val != elt_val)
6166 {
6167 if (elt_val == 0)
6168 {
6169 for (j = i+1; j < nunits; ++j)
6170 {
6171 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6172 if (const_vector_elt_as_int (op, elt2) != 0)
6173 return 0;
6174 }
6175
6176 return (nunits - i) * GET_MODE_SIZE (inner);
6177 }
6178
6179 else if ((elt_val & mask) == mask)
6180 {
6181 for (j = i+1; j < nunits; ++j)
6182 {
6183 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6184 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6185 return 0;
6186 }
6187
6188 return -((nunits - i) * GET_MODE_SIZE (inner));
6189 }
6190
6191 else
6192 return 0;
6193 }
6194 }
6195
6196 /* If all elements are equal, we don't need to do VLSDOI. */
6197 return 0;
6198 }
6199
6200
6201 /* Return true if OP is of the given MODE and can be synthesized
6202 with a vspltisb, vspltish or vspltisw. */
6203
6204 bool
6205 easy_altivec_constant (rtx op, machine_mode mode)
6206 {
6207 unsigned step, copies;
6208
6209 if (mode == VOIDmode)
6210 mode = GET_MODE (op);
6211 else if (mode != GET_MODE (op))
6212 return false;
6213
6214 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6215 constants. */
6216 if (mode == V2DFmode)
6217 return zero_constant (op, mode);
6218
6219 else if (mode == V2DImode)
6220 {
6221 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6222 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6223 return false;
6224
6225 if (zero_constant (op, mode))
6226 return true;
6227
6228 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6229 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6230 return true;
6231
6232 return false;
6233 }
6234
6235 /* V1TImode is a special container for TImode. Ignore for now. */
6236 else if (mode == V1TImode)
6237 return false;
6238
6239 /* Start with a vspltisw. */
6240 step = GET_MODE_NUNITS (mode) / 4;
6241 copies = 1;
6242
6243 if (vspltis_constant (op, step, copies))
6244 return true;
6245
6246 /* Then try with a vspltish. */
6247 if (step == 1)
6248 copies <<= 1;
6249 else
6250 step >>= 1;
6251
6252 if (vspltis_constant (op, step, copies))
6253 return true;
6254
6255 /* And finally a vspltisb. */
6256 if (step == 1)
6257 copies <<= 1;
6258 else
6259 step >>= 1;
6260
6261 if (vspltis_constant (op, step, copies))
6262 return true;
6263
6264 if (vspltis_shifted (op) != 0)
6265 return true;
6266
6267 return false;
6268 }
6269
6270 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6271 result is OP. Abort if it is not possible. */
6272
6273 rtx
6274 gen_easy_altivec_constant (rtx op)
6275 {
6276 machine_mode mode = GET_MODE (op);
6277 int nunits = GET_MODE_NUNITS (mode);
6278 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6279 unsigned step = nunits / 4;
6280 unsigned copies = 1;
6281
6282 /* Start with a vspltisw. */
6283 if (vspltis_constant (op, step, copies))
6284 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6285
6286 /* Then try with a vspltish. */
6287 if (step == 1)
6288 copies <<= 1;
6289 else
6290 step >>= 1;
6291
6292 if (vspltis_constant (op, step, copies))
6293 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6294
6295 /* And finally a vspltisb. */
6296 if (step == 1)
6297 copies <<= 1;
6298 else
6299 step >>= 1;
6300
6301 if (vspltis_constant (op, step, copies))
6302 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6303
6304 gcc_unreachable ();
6305 }
6306
6307 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6308 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6309
6310 Return the number of instructions needed (1 or 2) into the address pointed
6311 via NUM_INSNS_PTR.
6312
6313 Return the constant that is being split via CONSTANT_PTR. */
6314
6315 bool
6316 xxspltib_constant_p (rtx op,
6317 machine_mode mode,
6318 int *num_insns_ptr,
6319 int *constant_ptr)
6320 {
6321 size_t nunits = GET_MODE_NUNITS (mode);
6322 size_t i;
6323 HOST_WIDE_INT value;
6324 rtx element;
6325
6326 /* Set the returned values to out of bound values. */
6327 *num_insns_ptr = -1;
6328 *constant_ptr = 256;
6329
6330 if (!TARGET_P9_VECTOR)
6331 return false;
6332
6333 if (mode == VOIDmode)
6334 mode = GET_MODE (op);
6335
6336 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6337 return false;
6338
6339 /* Handle (vec_duplicate <constant>). */
6340 if (GET_CODE (op) == VEC_DUPLICATE)
6341 {
6342 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6343 && mode != V2DImode)
6344 return false;
6345
6346 element = XEXP (op, 0);
6347 if (!CONST_INT_P (element))
6348 return false;
6349
6350 value = INTVAL (element);
6351 if (!IN_RANGE (value, -128, 127))
6352 return false;
6353 }
6354
6355 /* Handle (const_vector [...]). */
6356 else if (GET_CODE (op) == CONST_VECTOR)
6357 {
6358 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6359 && mode != V2DImode)
6360 return false;
6361
6362 element = CONST_VECTOR_ELT (op, 0);
6363 if (!CONST_INT_P (element))
6364 return false;
6365
6366 value = INTVAL (element);
6367 if (!IN_RANGE (value, -128, 127))
6368 return false;
6369
6370 for (i = 1; i < nunits; i++)
6371 {
6372 element = CONST_VECTOR_ELT (op, i);
6373 if (!CONST_INT_P (element))
6374 return false;
6375
6376 if (value != INTVAL (element))
6377 return false;
6378 }
6379 }
6380
6381 /* Handle integer constants being loaded into the upper part of the VSX
6382 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6383 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6384 else if (CONST_INT_P (op))
6385 {
6386 if (!SCALAR_INT_MODE_P (mode))
6387 return false;
6388
6389 value = INTVAL (op);
6390 if (!IN_RANGE (value, -128, 127))
6391 return false;
6392
6393 if (!IN_RANGE (value, -1, 0))
6394 {
6395 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6396 return false;
6397
6398 if (EASY_VECTOR_15 (value))
6399 return false;
6400 }
6401 }
6402
6403 else
6404 return false;
6405
6406 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6407 sign extend. Special case 0/-1 to allow getting any VSX register instead
6408 of an Altivec register. */
6409 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6410 && EASY_VECTOR_15 (value))
6411 return false;
6412
6413 /* Return # of instructions and the constant byte for XXSPLTIB. */
6414 if (mode == V16QImode)
6415 *num_insns_ptr = 1;
6416
6417 else if (IN_RANGE (value, -1, 0))
6418 *num_insns_ptr = 1;
6419
6420 else
6421 *num_insns_ptr = 2;
6422
6423 *constant_ptr = (int) value;
6424 return true;
6425 }
6426
6427 const char *
6428 output_vec_const_move (rtx *operands)
6429 {
6430 int shift;
6431 machine_mode mode;
6432 rtx dest, vec;
6433
6434 dest = operands[0];
6435 vec = operands[1];
6436 mode = GET_MODE (dest);
6437
6438 if (TARGET_VSX)
6439 {
6440 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6441 int xxspltib_value = 256;
6442 int num_insns = -1;
6443
6444 if (zero_constant (vec, mode))
6445 {
6446 if (TARGET_P9_VECTOR)
6447 return "xxspltib %x0,0";
6448
6449 else if (dest_vmx_p)
6450 return "vspltisw %0,0";
6451
6452 else
6453 return "xxlxor %x0,%x0,%x0";
6454 }
6455
6456 if (all_ones_constant (vec, mode))
6457 {
6458 if (TARGET_P9_VECTOR)
6459 return "xxspltib %x0,255";
6460
6461 else if (dest_vmx_p)
6462 return "vspltisw %0,-1";
6463
6464 else if (TARGET_P8_VECTOR)
6465 return "xxlorc %x0,%x0,%x0";
6466
6467 else
6468 gcc_unreachable ();
6469 }
6470
6471 if (TARGET_P9_VECTOR
6472 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6473 {
6474 if (num_insns == 1)
6475 {
6476 operands[2] = GEN_INT (xxspltib_value & 0xff);
6477 return "xxspltib %x0,%2";
6478 }
6479
6480 return "#";
6481 }
6482 }
6483
6484 if (TARGET_ALTIVEC)
6485 {
6486 rtx splat_vec;
6487
6488 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6489 if (zero_constant (vec, mode))
6490 return "vspltisw %0,0";
6491
6492 if (all_ones_constant (vec, mode))
6493 return "vspltisw %0,-1";
6494
6495 /* Do we need to construct a value using VSLDOI? */
6496 shift = vspltis_shifted (vec);
6497 if (shift != 0)
6498 return "#";
6499
6500 splat_vec = gen_easy_altivec_constant (vec);
6501 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6502 operands[1] = XEXP (splat_vec, 0);
6503 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6504 return "#";
6505
6506 switch (GET_MODE (splat_vec))
6507 {
6508 case E_V4SImode:
6509 return "vspltisw %0,%1";
6510
6511 case E_V8HImode:
6512 return "vspltish %0,%1";
6513
6514 case E_V16QImode:
6515 return "vspltisb %0,%1";
6516
6517 default:
6518 gcc_unreachable ();
6519 }
6520 }
6521
6522 gcc_unreachable ();
6523 }
6524
6525 /* Initialize vector TARGET to VALS. */
6526
6527 void
6528 rs6000_expand_vector_init (rtx target, rtx vals)
6529 {
6530 machine_mode mode = GET_MODE (target);
6531 machine_mode inner_mode = GET_MODE_INNER (mode);
6532 unsigned int n_elts = GET_MODE_NUNITS (mode);
6533 int n_var = 0, one_var = -1;
6534 bool all_same = true, all_const_zero = true;
6535 rtx x, mem;
6536 unsigned int i;
6537
6538 for (i = 0; i < n_elts; ++i)
6539 {
6540 x = XVECEXP (vals, 0, i);
6541 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6542 ++n_var, one_var = i;
6543 else if (x != CONST0_RTX (inner_mode))
6544 all_const_zero = false;
6545
6546 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6547 all_same = false;
6548 }
6549
6550 if (n_var == 0)
6551 {
6552 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6553 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6554 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6555 {
6556 /* Zero register. */
6557 emit_move_insn (target, CONST0_RTX (mode));
6558 return;
6559 }
6560 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6561 {
6562 /* Splat immediate. */
6563 emit_insn (gen_rtx_SET (target, const_vec));
6564 return;
6565 }
6566 else
6567 {
6568 /* Load from constant pool. */
6569 emit_move_insn (target, const_vec);
6570 return;
6571 }
6572 }
6573
6574 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6575 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6576 {
6577 rtx op[2];
6578 size_t i;
6579 size_t num_elements = all_same ? 1 : 2;
6580 for (i = 0; i < num_elements; i++)
6581 {
6582 op[i] = XVECEXP (vals, 0, i);
6583 /* Just in case there is a SUBREG with a smaller mode, do a
6584 conversion. */
6585 if (GET_MODE (op[i]) != inner_mode)
6586 {
6587 rtx tmp = gen_reg_rtx (inner_mode);
6588 convert_move (tmp, op[i], 0);
6589 op[i] = tmp;
6590 }
6591 /* Allow load with splat double word. */
6592 else if (MEM_P (op[i]))
6593 {
6594 if (!all_same)
6595 op[i] = force_reg (inner_mode, op[i]);
6596 }
6597 else if (!REG_P (op[i]))
6598 op[i] = force_reg (inner_mode, op[i]);
6599 }
6600
6601 if (all_same)
6602 {
6603 if (mode == V2DFmode)
6604 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6605 else
6606 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6607 }
6608 else
6609 {
6610 if (mode == V2DFmode)
6611 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6612 else
6613 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6614 }
6615 return;
6616 }
6617
6618 /* Special case initializing vector int if we are on 64-bit systems with
6619 direct move or we have the ISA 3.0 instructions. */
6620 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6621 && TARGET_DIRECT_MOVE_64BIT)
6622 {
6623 if (all_same)
6624 {
6625 rtx element0 = XVECEXP (vals, 0, 0);
6626 if (MEM_P (element0))
6627 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6628 else
6629 element0 = force_reg (SImode, element0);
6630
6631 if (TARGET_P9_VECTOR)
6632 emit_insn (gen_vsx_splat_v4si (target, element0));
6633 else
6634 {
6635 rtx tmp = gen_reg_rtx (DImode);
6636 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6637 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6638 }
6639 return;
6640 }
6641 else
6642 {
6643 rtx elements[4];
6644 size_t i;
6645
6646 for (i = 0; i < 4; i++)
6647 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6648
6649 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6650 elements[2], elements[3]));
6651 return;
6652 }
6653 }
6654
6655 /* With single precision floating point on VSX, know that internally single
6656 precision is actually represented as a double, and either make 2 V2DF
6657 vectors, and convert these vectors to single precision, or do one
6658 conversion, and splat the result to the other elements. */
6659 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6660 {
6661 if (all_same)
6662 {
6663 rtx element0 = XVECEXP (vals, 0, 0);
6664
6665 if (TARGET_P9_VECTOR)
6666 {
6667 if (MEM_P (element0))
6668 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6669
6670 emit_insn (gen_vsx_splat_v4sf (target, element0));
6671 }
6672
6673 else
6674 {
6675 rtx freg = gen_reg_rtx (V4SFmode);
6676 rtx sreg = force_reg (SFmode, element0);
6677 rtx cvt = (TARGET_XSCVDPSPN
6678 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6679 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6680
6681 emit_insn (cvt);
6682 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6683 const0_rtx));
6684 }
6685 }
6686 else
6687 {
6688 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6689 {
6690 rtx tmp_sf[4];
6691 rtx tmp_si[4];
6692 rtx tmp_di[4];
6693 rtx mrg_di[4];
6694 for (i = 0; i < 4; i++)
6695 {
6696 tmp_si[i] = gen_reg_rtx (SImode);
6697 tmp_di[i] = gen_reg_rtx (DImode);
6698 mrg_di[i] = gen_reg_rtx (DImode);
6699 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6700 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6701 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6702 }
6703
6704 if (!BYTES_BIG_ENDIAN)
6705 {
6706 std::swap (tmp_di[0], tmp_di[1]);
6707 std::swap (tmp_di[2], tmp_di[3]);
6708 }
6709
6710 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6711 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6712 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6713 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6714
6715 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6716 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6717 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6718 }
6719 else
6720 {
6721 rtx dbl_even = gen_reg_rtx (V2DFmode);
6722 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6723 rtx flt_even = gen_reg_rtx (V4SFmode);
6724 rtx flt_odd = gen_reg_rtx (V4SFmode);
6725 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6726 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6727 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6728 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6729
6730 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6731 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6732 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6733 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6734 rs6000_expand_extract_even (target, flt_even, flt_odd);
6735 }
6736 }
6737 return;
6738 }
6739
6740 /* Special case initializing vector short/char that are splats if we are on
6741 64-bit systems with direct move. */
6742 if (all_same && TARGET_DIRECT_MOVE_64BIT
6743 && (mode == V16QImode || mode == V8HImode))
6744 {
6745 rtx op0 = XVECEXP (vals, 0, 0);
6746 rtx di_tmp = gen_reg_rtx (DImode);
6747
6748 if (!REG_P (op0))
6749 op0 = force_reg (GET_MODE_INNER (mode), op0);
6750
6751 if (mode == V16QImode)
6752 {
6753 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6754 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6755 return;
6756 }
6757
6758 if (mode == V8HImode)
6759 {
6760 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6761 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6762 return;
6763 }
6764 }
6765
6766 /* Store value to stack temp. Load vector element. Splat. However, splat
6767 of 64-bit items is not supported on Altivec. */
6768 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6769 {
6770 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6771 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6772 XVECEXP (vals, 0, 0));
6773 x = gen_rtx_UNSPEC (VOIDmode,
6774 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6775 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6776 gen_rtvec (2,
6777 gen_rtx_SET (target, mem),
6778 x)));
6779 x = gen_rtx_VEC_SELECT (inner_mode, target,
6780 gen_rtx_PARALLEL (VOIDmode,
6781 gen_rtvec (1, const0_rtx)));
6782 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6783 return;
6784 }
6785
6786 /* One field is non-constant. Load constant then overwrite
6787 varying field. */
6788 if (n_var == 1)
6789 {
6790 rtx copy = copy_rtx (vals);
6791
6792 /* Load constant part of vector, substitute neighboring value for
6793 varying element. */
6794 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6795 rs6000_expand_vector_init (target, copy);
6796
6797 /* Insert variable. */
6798 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6799 GEN_INT (one_var));
6800 return;
6801 }
6802
6803 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6804 {
6805 rtx op[16];
6806 /* Force the values into word_mode registers. */
6807 for (i = 0; i < n_elts; i++)
6808 {
6809 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6810 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6811 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6812 }
6813
6814 /* Take unsigned char big endianness on 64bit as example for below
6815 construction, the input values are: A, B, C, D, ..., O, P. */
6816
6817 if (TARGET_DIRECT_MOVE_128)
6818 {
6819 /* Move to VSX register with vec_concat, each has 2 values.
6820 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6821 vr1[1] = { xxxxxxxC, xxxxxxxD };
6822 ...
6823 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6824 rtx vr1[8];
6825 for (i = 0; i < n_elts / 2; i++)
6826 {
6827 vr1[i] = gen_reg_rtx (V2DImode);
6828 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6829 op[i * 2 + 1]));
6830 }
6831
6832 /* Pack vectors with 2 values into vectors with 4 values.
6833 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6834 vr2[1] = { xxxExxxF, xxxGxxxH };
6835 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6836 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6837 rtx vr2[4];
6838 for (i = 0; i < n_elts / 4; i++)
6839 {
6840 vr2[i] = gen_reg_rtx (V4SImode);
6841 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6842 vr1[i * 2 + 1]));
6843 }
6844
6845 /* Pack vectors with 4 values into vectors with 8 values.
6846 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6847 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6848 rtx vr3[2];
6849 for (i = 0; i < n_elts / 8; i++)
6850 {
6851 vr3[i] = gen_reg_rtx (V8HImode);
6852 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
6853 vr2[i * 2 + 1]));
6854 }
6855
6856 /* If it's V8HImode, it's done and return it. */
6857 if (mode == V8HImode)
6858 {
6859 emit_insn (gen_rtx_SET (target, vr3[0]));
6860 return;
6861 }
6862
6863 /* Pack vectors with 8 values into 16 values. */
6864 rtx res = gen_reg_rtx (V16QImode);
6865 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
6866 emit_insn (gen_rtx_SET (target, res));
6867 }
6868 else
6869 {
6870 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
6871 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
6872 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
6873 rtx perm_idx;
6874
6875 /* Set up some common gen routines and values. */
6876 if (BYTES_BIG_ENDIAN)
6877 {
6878 if (mode == V16QImode)
6879 {
6880 merge_v16qi = gen_altivec_vmrghb;
6881 merge_v8hi = gen_altivec_vmrglh;
6882 }
6883 else
6884 merge_v8hi = gen_altivec_vmrghh;
6885
6886 merge_v4si = gen_altivec_vmrglw;
6887 perm_idx = GEN_INT (3);
6888 }
6889 else
6890 {
6891 if (mode == V16QImode)
6892 {
6893 merge_v16qi = gen_altivec_vmrglb;
6894 merge_v8hi = gen_altivec_vmrghh;
6895 }
6896 else
6897 merge_v8hi = gen_altivec_vmrglh;
6898
6899 merge_v4si = gen_altivec_vmrghw;
6900 perm_idx = GEN_INT (0);
6901 }
6902
6903 /* Move to VSX register with direct move.
6904 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
6905 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
6906 ...
6907 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
6908 rtx vr_qi[16];
6909 for (i = 0; i < n_elts; i++)
6910 {
6911 vr_qi[i] = gen_reg_rtx (V16QImode);
6912 if (TARGET_POWERPC64)
6913 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
6914 else
6915 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
6916 }
6917
6918 /* Merge/move to vector short.
6919 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
6920 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
6921 ...
6922 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
6923 rtx vr_hi[8];
6924 for (i = 0; i < 8; i++)
6925 {
6926 rtx tmp = vr_qi[i];
6927 if (mode == V16QImode)
6928 {
6929 tmp = gen_reg_rtx (V16QImode);
6930 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
6931 }
6932 vr_hi[i] = gen_reg_rtx (V8HImode);
6933 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
6934 }
6935
6936 /* Merge vector short to vector int.
6937 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
6938 vr_si[1] = { xxxxxxxx, xxxxEFGH };
6939 ...
6940 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
6941 rtx vr_si[4];
6942 for (i = 0; i < 4; i++)
6943 {
6944 rtx tmp = gen_reg_rtx (V8HImode);
6945 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
6946 vr_si[i] = gen_reg_rtx (V4SImode);
6947 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
6948 }
6949
6950 /* Merge vector int to vector long.
6951 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
6952 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
6953 rtx vr_di[2];
6954 for (i = 0; i < 2; i++)
6955 {
6956 rtx tmp = gen_reg_rtx (V4SImode);
6957 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
6958 vr_di[i] = gen_reg_rtx (V2DImode);
6959 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
6960 }
6961
6962 rtx res = gen_reg_rtx (V2DImode);
6963 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
6964 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
6965 }
6966
6967 return;
6968 }
6969
6970 /* Construct the vector in memory one field at a time
6971 and load the whole vector. */
6972 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6973 for (i = 0; i < n_elts; i++)
6974 emit_move_insn (adjust_address_nv (mem, inner_mode,
6975 i * GET_MODE_SIZE (inner_mode)),
6976 XVECEXP (vals, 0, i));
6977 emit_move_insn (target, mem);
6978 }
6979
6980 /* Set field ELT_RTX of TARGET to VAL. */
6981
6982 void
6983 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
6984 {
6985 machine_mode mode = GET_MODE (target);
6986 machine_mode inner_mode = GET_MODE_INNER (mode);
6987 rtx reg = gen_reg_rtx (mode);
6988 rtx mask, mem, x;
6989 int width = GET_MODE_SIZE (inner_mode);
6990 int i;
6991
6992 val = force_reg (GET_MODE (val), val);
6993
6994 if (VECTOR_MEM_VSX_P (mode))
6995 {
6996 rtx insn = NULL_RTX;
6997
6998 if (mode == V2DFmode)
6999 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7000
7001 else if (mode == V2DImode)
7002 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7003
7004 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7005 {
7006 if (mode == V4SImode)
7007 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7008 else if (mode == V8HImode)
7009 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7010 else if (mode == V16QImode)
7011 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7012 else if (mode == V4SFmode)
7013 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7014 }
7015
7016 if (insn)
7017 {
7018 emit_insn (insn);
7019 return;
7020 }
7021 }
7022
7023 gcc_assert (CONST_INT_P (elt_rtx));
7024
7025 /* Simplify setting single element vectors like V1TImode. */
7026 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7027 && INTVAL (elt_rtx) == 0)
7028 {
7029 emit_move_insn (target, gen_lowpart (mode, val));
7030 return;
7031 }
7032
7033 /* Load single variable value. */
7034 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7035 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7036 x = gen_rtx_UNSPEC (VOIDmode,
7037 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7038 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7039 gen_rtvec (2,
7040 gen_rtx_SET (reg, mem),
7041 x)));
7042
7043 /* Linear sequence. */
7044 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7045 for (i = 0; i < 16; ++i)
7046 XVECEXP (mask, 0, i) = GEN_INT (i);
7047
7048 /* Set permute mask to insert element into target. */
7049 for (i = 0; i < width; ++i)
7050 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7051 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7052
7053 if (BYTES_BIG_ENDIAN)
7054 x = gen_rtx_UNSPEC (mode,
7055 gen_rtvec (3, target, reg,
7056 force_reg (V16QImode, x)),
7057 UNSPEC_VPERM);
7058 else
7059 {
7060 if (TARGET_P9_VECTOR)
7061 x = gen_rtx_UNSPEC (mode,
7062 gen_rtvec (3, reg, target,
7063 force_reg (V16QImode, x)),
7064 UNSPEC_VPERMR);
7065 else
7066 {
7067 /* Invert selector. We prefer to generate VNAND on P8 so
7068 that future fusion opportunities can kick in, but must
7069 generate VNOR elsewhere. */
7070 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7071 rtx iorx = (TARGET_P8_VECTOR
7072 ? gen_rtx_IOR (V16QImode, notx, notx)
7073 : gen_rtx_AND (V16QImode, notx, notx));
7074 rtx tmp = gen_reg_rtx (V16QImode);
7075 emit_insn (gen_rtx_SET (tmp, iorx));
7076
7077 /* Permute with operands reversed and adjusted selector. */
7078 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7079 UNSPEC_VPERM);
7080 }
7081 }
7082
7083 emit_insn (gen_rtx_SET (target, x));
7084 }
7085
7086 /* Extract field ELT from VEC into TARGET. */
7087
7088 void
7089 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7090 {
7091 machine_mode mode = GET_MODE (vec);
7092 machine_mode inner_mode = GET_MODE_INNER (mode);
7093 rtx mem;
7094
7095 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7096 {
7097 switch (mode)
7098 {
7099 default:
7100 break;
7101 case E_V1TImode:
7102 emit_move_insn (target, gen_lowpart (TImode, vec));
7103 break;
7104 case E_V2DFmode:
7105 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7106 return;
7107 case E_V2DImode:
7108 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7109 return;
7110 case E_V4SFmode:
7111 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7112 return;
7113 case E_V16QImode:
7114 if (TARGET_DIRECT_MOVE_64BIT)
7115 {
7116 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7117 return;
7118 }
7119 else
7120 break;
7121 case E_V8HImode:
7122 if (TARGET_DIRECT_MOVE_64BIT)
7123 {
7124 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7125 return;
7126 }
7127 else
7128 break;
7129 case E_V4SImode:
7130 if (TARGET_DIRECT_MOVE_64BIT)
7131 {
7132 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7133 return;
7134 }
7135 break;
7136 }
7137 }
7138 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7139 && TARGET_DIRECT_MOVE_64BIT)
7140 {
7141 if (GET_MODE (elt) != DImode)
7142 {
7143 rtx tmp = gen_reg_rtx (DImode);
7144 convert_move (tmp, elt, 0);
7145 elt = tmp;
7146 }
7147 else if (!REG_P (elt))
7148 elt = force_reg (DImode, elt);
7149
7150 switch (mode)
7151 {
7152 case E_V1TImode:
7153 emit_move_insn (target, gen_lowpart (TImode, vec));
7154 return;
7155
7156 case E_V2DFmode:
7157 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7158 return;
7159
7160 case E_V2DImode:
7161 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7162 return;
7163
7164 case E_V4SFmode:
7165 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7166 return;
7167
7168 case E_V4SImode:
7169 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7170 return;
7171
7172 case E_V8HImode:
7173 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7174 return;
7175
7176 case E_V16QImode:
7177 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7178 return;
7179
7180 default:
7181 gcc_unreachable ();
7182 }
7183 }
7184
7185 /* Allocate mode-sized buffer. */
7186 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7187
7188 emit_move_insn (mem, vec);
7189 if (CONST_INT_P (elt))
7190 {
7191 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7192
7193 /* Add offset to field within buffer matching vector element. */
7194 mem = adjust_address_nv (mem, inner_mode,
7195 modulo_elt * GET_MODE_SIZE (inner_mode));
7196 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7197 }
7198 else
7199 {
7200 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7201 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7202 rtx new_addr = gen_reg_rtx (Pmode);
7203
7204 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7205 if (ele_size > 1)
7206 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7207 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7208 new_addr = change_address (mem, inner_mode, new_addr);
7209 emit_move_insn (target, new_addr);
7210 }
7211 }
7212
7213 /* Return the offset within a memory object (MEM) of a vector type to a given
7214 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7215 the element is constant, we return a constant integer.
7216
7217 Otherwise, we use a base register temporary to calculate the offset after
7218 masking it to fit within the bounds of the vector and scaling it. The
7219 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7220 built-in function. */
7221
7222 static rtx
7223 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7224 {
7225 if (CONST_INT_P (element))
7226 return GEN_INT (INTVAL (element) * scalar_size);
7227
7228 /* All insns should use the 'Q' constraint (address is a single register) if
7229 the element number is not a constant. */
7230 gcc_assert (satisfies_constraint_Q (mem));
7231
7232 /* Mask the element to make sure the element number is between 0 and the
7233 maximum number of elements - 1 so that we don't generate an address
7234 outside the vector. */
7235 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7236 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7237 emit_insn (gen_rtx_SET (base_tmp, and_op));
7238
7239 /* Shift the element to get the byte offset from the element number. */
7240 int shift = exact_log2 (scalar_size);
7241 gcc_assert (shift >= 0);
7242
7243 if (shift > 0)
7244 {
7245 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7246 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7247 }
7248
7249 return base_tmp;
7250 }
7251
7252 /* Helper function update PC-relative addresses when we are adjusting a memory
7253 address (ADDR) to a vector to point to a scalar field within the vector with
7254 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7255 use the base register temporary (BASE_TMP) to form the address. */
7256
7257 static rtx
7258 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7259 {
7260 rtx new_addr = NULL;
7261
7262 gcc_assert (CONST_INT_P (element_offset));
7263
7264 if (GET_CODE (addr) == CONST)
7265 addr = XEXP (addr, 0);
7266
7267 if (GET_CODE (addr) == PLUS)
7268 {
7269 rtx op0 = XEXP (addr, 0);
7270 rtx op1 = XEXP (addr, 1);
7271
7272 if (CONST_INT_P (op1))
7273 {
7274 HOST_WIDE_INT offset
7275 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7276
7277 if (offset == 0)
7278 new_addr = op0;
7279
7280 else
7281 {
7282 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7283 new_addr = gen_rtx_CONST (Pmode, plus);
7284 }
7285 }
7286
7287 else
7288 {
7289 emit_move_insn (base_tmp, addr);
7290 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7291 }
7292 }
7293
7294 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7295 {
7296 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7297 new_addr = gen_rtx_CONST (Pmode, plus);
7298 }
7299
7300 else
7301 gcc_unreachable ();
7302
7303 return new_addr;
7304 }
7305
7306 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7307 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7308 temporary (BASE_TMP) to fixup the address. Return the new memory address
7309 that is valid for reads or writes to a given register (SCALAR_REG).
7310
7311 This function is expected to be called after reload is completed when we are
7312 splitting insns. The temporary BASE_TMP might be set multiple times with
7313 this code. */
7314
7315 rtx
7316 rs6000_adjust_vec_address (rtx scalar_reg,
7317 rtx mem,
7318 rtx element,
7319 rtx base_tmp,
7320 machine_mode scalar_mode)
7321 {
7322 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7323 rtx addr = XEXP (mem, 0);
7324 rtx new_addr;
7325
7326 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7327 gcc_assert (!reg_mentioned_p (base_tmp, element));
7328
7329 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7330 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7331
7332 /* Calculate what we need to add to the address to get the element
7333 address. */
7334 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7335
7336 /* Create the new address pointing to the element within the vector. If we
7337 are adding 0, we don't have to change the address. */
7338 if (element_offset == const0_rtx)
7339 new_addr = addr;
7340
7341 /* A simple indirect address can be converted into a reg + offset
7342 address. */
7343 else if (REG_P (addr) || SUBREG_P (addr))
7344 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7345
7346 /* For references to local static variables, fold a constant offset into the
7347 address. */
7348 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7349 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7350
7351 /* Optimize D-FORM addresses with constant offset with a constant element, to
7352 include the element offset in the address directly. */
7353 else if (GET_CODE (addr) == PLUS)
7354 {
7355 rtx op0 = XEXP (addr, 0);
7356 rtx op1 = XEXP (addr, 1);
7357
7358 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7359 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7360 {
7361 /* op0 should never be r0, because r0+offset is not valid. But it
7362 doesn't hurt to make sure it is not r0. */
7363 gcc_assert (reg_or_subregno (op0) != 0);
7364
7365 /* D-FORM address with constant element number. */
7366 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7367 rtx offset_rtx = GEN_INT (offset);
7368 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7369 }
7370 else
7371 {
7372 /* If we don't have a D-FORM address with a constant element number,
7373 add the two elements in the current address. Then add the offset.
7374
7375 Previously, we tried to add the offset to OP1 and change the
7376 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7377 complicated because we had to verify that op1 was not GPR0 and we
7378 had a constant element offset (due to the way ADDI is defined).
7379 By doing the add of OP0 and OP1 first, and then adding in the
7380 offset, it has the benefit that if D-FORM instructions are
7381 allowed, the offset is part of the memory access to the vector
7382 element. */
7383 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7384 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7385 }
7386 }
7387
7388 else
7389 {
7390 emit_move_insn (base_tmp, addr);
7391 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7392 }
7393
7394 /* If the address isn't valid, move the address into the temporary base
7395 register. Some reasons it could not be valid include:
7396
7397 The address offset overflowed the 16 or 34 bit offset size;
7398 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7399 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7400 Only X_FORM loads can be done, and the address is D_FORM. */
7401
7402 enum insn_form iform
7403 = address_to_insn_form (new_addr, scalar_mode,
7404 reg_to_non_prefixed (scalar_reg, scalar_mode));
7405
7406 if (iform == INSN_FORM_BAD)
7407 {
7408 emit_move_insn (base_tmp, new_addr);
7409 new_addr = base_tmp;
7410 }
7411
7412 return change_address (mem, scalar_mode, new_addr);
7413 }
7414
7415 /* Split a variable vec_extract operation into the component instructions. */
7416
7417 void
7418 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7419 rtx tmp_altivec)
7420 {
7421 machine_mode mode = GET_MODE (src);
7422 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7423 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7424 int byte_shift = exact_log2 (scalar_size);
7425
7426 gcc_assert (byte_shift >= 0);
7427
7428 /* If we are given a memory address, optimize to load just the element. We
7429 don't have to adjust the vector element number on little endian
7430 systems. */
7431 if (MEM_P (src))
7432 {
7433 emit_move_insn (dest,
7434 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7435 scalar_mode));
7436 return;
7437 }
7438
7439 else if (REG_P (src) || SUBREG_P (src))
7440 {
7441 int num_elements = GET_MODE_NUNITS (mode);
7442 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7443 int bit_shift = 7 - exact_log2 (num_elements);
7444 rtx element2;
7445 unsigned int dest_regno = reg_or_subregno (dest);
7446 unsigned int src_regno = reg_or_subregno (src);
7447 unsigned int element_regno = reg_or_subregno (element);
7448
7449 gcc_assert (REG_P (tmp_gpr));
7450
7451 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7452 a general purpose register. */
7453 if (TARGET_P9_VECTOR
7454 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7455 && INT_REGNO_P (dest_regno)
7456 && ALTIVEC_REGNO_P (src_regno)
7457 && INT_REGNO_P (element_regno))
7458 {
7459 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7460 rtx element_si = gen_rtx_REG (SImode, element_regno);
7461
7462 if (mode == V16QImode)
7463 emit_insn (BYTES_BIG_ENDIAN
7464 ? gen_vextublx (dest_si, element_si, src)
7465 : gen_vextubrx (dest_si, element_si, src));
7466
7467 else if (mode == V8HImode)
7468 {
7469 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7470 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7471 emit_insn (BYTES_BIG_ENDIAN
7472 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7473 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7474 }
7475
7476
7477 else
7478 {
7479 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7480 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7481 emit_insn (BYTES_BIG_ENDIAN
7482 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7483 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7484 }
7485
7486 return;
7487 }
7488
7489
7490 gcc_assert (REG_P (tmp_altivec));
7491
7492 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7493 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7494 will shift the element into the upper position (adding 3 to convert a
7495 byte shift into a bit shift). */
7496 if (scalar_size == 8)
7497 {
7498 if (!BYTES_BIG_ENDIAN)
7499 {
7500 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7501 element2 = tmp_gpr;
7502 }
7503 else
7504 element2 = element;
7505
7506 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7507 bit. */
7508 emit_insn (gen_rtx_SET (tmp_gpr,
7509 gen_rtx_AND (DImode,
7510 gen_rtx_ASHIFT (DImode,
7511 element2,
7512 GEN_INT (6)),
7513 GEN_INT (64))));
7514 }
7515 else
7516 {
7517 if (!BYTES_BIG_ENDIAN)
7518 {
7519 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7520
7521 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7522 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7523 element2 = tmp_gpr;
7524 }
7525 else
7526 element2 = element;
7527
7528 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7529 }
7530
7531 /* Get the value into the lower byte of the Altivec register where VSLO
7532 expects it. */
7533 if (TARGET_P9_VECTOR)
7534 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7535 else if (can_create_pseudo_p ())
7536 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7537 else
7538 {
7539 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7540 emit_move_insn (tmp_di, tmp_gpr);
7541 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7542 }
7543
7544 /* Do the VSLO to get the value into the final location. */
7545 switch (mode)
7546 {
7547 case E_V2DFmode:
7548 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7549 return;
7550
7551 case E_V2DImode:
7552 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7553 return;
7554
7555 case E_V4SFmode:
7556 {
7557 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7558 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7559 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7560 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7561 tmp_altivec));
7562
7563 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7564 return;
7565 }
7566
7567 case E_V4SImode:
7568 case E_V8HImode:
7569 case E_V16QImode:
7570 {
7571 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7572 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7573 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7574 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7575 tmp_altivec));
7576 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7577 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7578 GEN_INT (64 - bits_in_element)));
7579 return;
7580 }
7581
7582 default:
7583 gcc_unreachable ();
7584 }
7585
7586 return;
7587 }
7588 else
7589 gcc_unreachable ();
7590 }
7591
7592 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7593 selects whether the alignment is abi mandated, optional, or
7594 both abi and optional alignment. */
7595
7596 unsigned int
7597 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7598 {
7599 if (how != align_opt)
7600 {
7601 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7602 align = 128;
7603 }
7604
7605 if (how != align_abi)
7606 {
7607 if (TREE_CODE (type) == ARRAY_TYPE
7608 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7609 {
7610 if (align < BITS_PER_WORD)
7611 align = BITS_PER_WORD;
7612 }
7613 }
7614
7615 return align;
7616 }
7617
7618 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7619 instructions simply ignore the low bits; VSX memory instructions
7620 are aligned to 4 or 8 bytes. */
7621
7622 static bool
7623 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7624 {
7625 return (STRICT_ALIGNMENT
7626 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7627 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7628 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7629 && (int) align < VECTOR_ALIGN (mode)))));
7630 }
7631
7632 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7633
7634 bool
7635 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7636 {
7637 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7638 {
7639 if (computed != 128)
7640 {
7641 static bool warned;
7642 if (!warned && warn_psabi)
7643 {
7644 warned = true;
7645 inform (input_location,
7646 "the layout of aggregates containing vectors with"
7647 " %d-byte alignment has changed in GCC 5",
7648 computed / BITS_PER_UNIT);
7649 }
7650 }
7651 /* In current GCC there is no special case. */
7652 return false;
7653 }
7654
7655 return false;
7656 }
7657
7658 /* AIX increases natural record alignment to doubleword if the first
7659 field is an FP double while the FP fields remain word aligned. */
7660
7661 unsigned int
7662 rs6000_special_round_type_align (tree type, unsigned int computed,
7663 unsigned int specified)
7664 {
7665 unsigned int align = MAX (computed, specified);
7666 tree field = TYPE_FIELDS (type);
7667
7668 /* Skip all non field decls */
7669 while (field != NULL
7670 && (TREE_CODE (field) != FIELD_DECL
7671 || DECL_FIELD_ABI_IGNORED (field)))
7672 field = DECL_CHAIN (field);
7673
7674 if (field != NULL && field != type)
7675 {
7676 type = TREE_TYPE (field);
7677 while (TREE_CODE (type) == ARRAY_TYPE)
7678 type = TREE_TYPE (type);
7679
7680 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7681 align = MAX (align, 64);
7682 }
7683
7684 return align;
7685 }
7686
7687 /* Darwin increases record alignment to the natural alignment of
7688 the first field. */
7689
7690 unsigned int
7691 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7692 unsigned int specified)
7693 {
7694 unsigned int align = MAX (computed, specified);
7695
7696 if (TYPE_PACKED (type))
7697 return align;
7698
7699 /* Find the first field, looking down into aggregates. */
7700 do {
7701 tree field = TYPE_FIELDS (type);
7702 /* Skip all non field decls */
7703 while (field != NULL
7704 && (TREE_CODE (field) != FIELD_DECL
7705 || DECL_FIELD_ABI_IGNORED (field)))
7706 field = DECL_CHAIN (field);
7707 if (! field)
7708 break;
7709 /* A packed field does not contribute any extra alignment. */
7710 if (DECL_PACKED (field))
7711 return align;
7712 type = TREE_TYPE (field);
7713 while (TREE_CODE (type) == ARRAY_TYPE)
7714 type = TREE_TYPE (type);
7715 } while (AGGREGATE_TYPE_P (type));
7716
7717 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7718 align = MAX (align, TYPE_ALIGN (type));
7719
7720 return align;
7721 }
7722
7723 /* Return 1 for an operand in small memory on V.4/eabi. */
7724
7725 int
7726 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7727 machine_mode mode ATTRIBUTE_UNUSED)
7728 {
7729 #if TARGET_ELF
7730 rtx sym_ref;
7731
7732 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7733 return 0;
7734
7735 if (DEFAULT_ABI != ABI_V4)
7736 return 0;
7737
7738 if (SYMBOL_REF_P (op))
7739 sym_ref = op;
7740
7741 else if (GET_CODE (op) != CONST
7742 || GET_CODE (XEXP (op, 0)) != PLUS
7743 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7744 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7745 return 0;
7746
7747 else
7748 {
7749 rtx sum = XEXP (op, 0);
7750 HOST_WIDE_INT summand;
7751
7752 /* We have to be careful here, because it is the referenced address
7753 that must be 32k from _SDA_BASE_, not just the symbol. */
7754 summand = INTVAL (XEXP (sum, 1));
7755 if (summand < 0 || summand > g_switch_value)
7756 return 0;
7757
7758 sym_ref = XEXP (sum, 0);
7759 }
7760
7761 return SYMBOL_REF_SMALL_P (sym_ref);
7762 #else
7763 return 0;
7764 #endif
7765 }
7766
7767 /* Return true if either operand is a general purpose register. */
7768
7769 bool
7770 gpr_or_gpr_p (rtx op0, rtx op1)
7771 {
7772 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7773 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7774 }
7775
7776 /* Return true if this is a move direct operation between GPR registers and
7777 floating point/VSX registers. */
7778
7779 bool
7780 direct_move_p (rtx op0, rtx op1)
7781 {
7782 if (!REG_P (op0) || !REG_P (op1))
7783 return false;
7784
7785 if (!TARGET_DIRECT_MOVE)
7786 return false;
7787
7788 int regno0 = REGNO (op0);
7789 int regno1 = REGNO (op1);
7790 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7791 return false;
7792
7793 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7794 return true;
7795
7796 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7797 return true;
7798
7799 return false;
7800 }
7801
7802 /* Return true if the ADDR is an acceptable address for a quad memory
7803 operation of mode MODE (either LQ/STQ for general purpose registers, or
7804 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7805 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7806 3.0 LXV/STXV instruction. */
7807
7808 bool
7809 quad_address_p (rtx addr, machine_mode mode, bool strict)
7810 {
7811 rtx op0, op1;
7812
7813 if (GET_MODE_SIZE (mode) < 16)
7814 return false;
7815
7816 if (legitimate_indirect_address_p (addr, strict))
7817 return true;
7818
7819 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7820 return false;
7821
7822 /* Is this a valid prefixed address? If the bottom four bits of the offset
7823 are non-zero, we could use a prefixed instruction (which does not have the
7824 DQ-form constraint that the traditional instruction had) instead of
7825 forcing the unaligned offset to a GPR. */
7826 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7827 return true;
7828
7829 if (GET_CODE (addr) != PLUS)
7830 return false;
7831
7832 op0 = XEXP (addr, 0);
7833 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7834 return false;
7835
7836 op1 = XEXP (addr, 1);
7837 if (!CONST_INT_P (op1))
7838 return false;
7839
7840 return quad_address_offset_p (INTVAL (op1));
7841 }
7842
7843 /* Return true if this is a load or store quad operation. This function does
7844 not handle the atomic quad memory instructions. */
7845
7846 bool
7847 quad_load_store_p (rtx op0, rtx op1)
7848 {
7849 bool ret;
7850
7851 if (!TARGET_QUAD_MEMORY)
7852 ret = false;
7853
7854 else if (REG_P (op0) && MEM_P (op1))
7855 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7856 && quad_memory_operand (op1, GET_MODE (op1))
7857 && !reg_overlap_mentioned_p (op0, op1));
7858
7859 else if (MEM_P (op0) && REG_P (op1))
7860 ret = (quad_memory_operand (op0, GET_MODE (op0))
7861 && quad_int_reg_operand (op1, GET_MODE (op1)));
7862
7863 else
7864 ret = false;
7865
7866 if (TARGET_DEBUG_ADDR)
7867 {
7868 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7869 ret ? "true" : "false");
7870 debug_rtx (gen_rtx_SET (op0, op1));
7871 }
7872
7873 return ret;
7874 }
7875
7876 /* Given an address, return a constant offset term if one exists. */
7877
7878 static rtx
7879 address_offset (rtx op)
7880 {
7881 if (GET_CODE (op) == PRE_INC
7882 || GET_CODE (op) == PRE_DEC)
7883 op = XEXP (op, 0);
7884 else if (GET_CODE (op) == PRE_MODIFY
7885 || GET_CODE (op) == LO_SUM)
7886 op = XEXP (op, 1);
7887
7888 if (GET_CODE (op) == CONST)
7889 op = XEXP (op, 0);
7890
7891 if (GET_CODE (op) == PLUS)
7892 op = XEXP (op, 1);
7893
7894 if (CONST_INT_P (op))
7895 return op;
7896
7897 return NULL_RTX;
7898 }
7899
7900 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7901 the mode. If we can't find (or don't know) the alignment of the symbol
7902 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7903 should be pessimistic]. Offsets are validated in the same way as for
7904 reg + offset. */
7905 static bool
7906 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7907 {
7908 /* We should not get here with this. */
7909 gcc_checking_assert (! mode_supports_dq_form (mode));
7910
7911 if (GET_CODE (x) == CONST)
7912 x = XEXP (x, 0);
7913
7914 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7915 x = XVECEXP (x, 0, 0);
7916
7917 rtx sym = NULL_RTX;
7918 unsigned HOST_WIDE_INT offset = 0;
7919
7920 if (GET_CODE (x) == PLUS)
7921 {
7922 sym = XEXP (x, 0);
7923 if (! SYMBOL_REF_P (sym))
7924 return false;
7925 if (!CONST_INT_P (XEXP (x, 1)))
7926 return false;
7927 offset = INTVAL (XEXP (x, 1));
7928 }
7929 else if (SYMBOL_REF_P (x))
7930 sym = x;
7931 else if (CONST_INT_P (x))
7932 offset = INTVAL (x);
7933 else if (GET_CODE (x) == LABEL_REF)
7934 offset = 0; // We assume code labels are Pmode aligned
7935 else
7936 return false; // not sure what we have here.
7937
7938 /* If we don't know the alignment of the thing to which the symbol refers,
7939 we assume optimistically it is "enough".
7940 ??? maybe we should be pessimistic instead. */
7941 unsigned align = 0;
7942
7943 if (sym)
7944 {
7945 tree decl = SYMBOL_REF_DECL (sym);
7946 #if TARGET_MACHO
7947 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7948 /* The decl in an indirection symbol is the original one, which might
7949 be less aligned than the indirection. Our indirections are always
7950 pointer-aligned. */
7951 ;
7952 else
7953 #endif
7954 if (decl && DECL_ALIGN (decl))
7955 align = DECL_ALIGN_UNIT (decl);
7956 }
7957
7958 unsigned int extra = 0;
7959 switch (mode)
7960 {
7961 case E_DFmode:
7962 case E_DDmode:
7963 case E_DImode:
7964 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7965 addressing. */
7966 if (VECTOR_MEM_VSX_P (mode))
7967 return false;
7968
7969 if (!TARGET_POWERPC64)
7970 extra = 4;
7971 else if ((offset & 3) || (align & 3))
7972 return false;
7973 break;
7974
7975 case E_TFmode:
7976 case E_IFmode:
7977 case E_KFmode:
7978 case E_TDmode:
7979 case E_TImode:
7980 case E_PTImode:
7981 extra = 8;
7982 if (!TARGET_POWERPC64)
7983 extra = 12;
7984 else if ((offset & 3) || (align & 3))
7985 return false;
7986 break;
7987
7988 default:
7989 break;
7990 }
7991
7992 /* We only care if the access(es) would cause a change to the high part. */
7993 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7994 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7995 }
7996
7997 /* Return true if the MEM operand is a memory operand suitable for use
7998 with a (full width, possibly multiple) gpr load/store. On
7999 powerpc64 this means the offset must be divisible by 4.
8000 Implements 'Y' constraint.
8001
8002 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8003 a constraint function we know the operand has satisfied a suitable
8004 memory predicate.
8005
8006 Offsetting a lo_sum should not be allowed, except where we know by
8007 alignment that a 32k boundary is not crossed. Note that by
8008 "offsetting" here we mean a further offset to access parts of the
8009 MEM. It's fine to have a lo_sum where the inner address is offset
8010 from a sym, since the same sym+offset will appear in the high part
8011 of the address calculation. */
8012
8013 bool
8014 mem_operand_gpr (rtx op, machine_mode mode)
8015 {
8016 unsigned HOST_WIDE_INT offset;
8017 int extra;
8018 rtx addr = XEXP (op, 0);
8019
8020 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8021 if (TARGET_UPDATE
8022 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8023 && mode_supports_pre_incdec_p (mode)
8024 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8025 return true;
8026
8027 /* Allow prefixed instructions if supported. If the bottom two bits of the
8028 offset are non-zero, we could use a prefixed instruction (which does not
8029 have the DS-form constraint that the traditional instruction had) instead
8030 of forcing the unaligned offset to a GPR. */
8031 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8032 return true;
8033
8034 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8035 really OK. Doing this early avoids teaching all the other machinery
8036 about them. */
8037 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8038 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8039
8040 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8041 if (!rs6000_offsettable_memref_p (op, mode, false))
8042 return false;
8043
8044 op = address_offset (addr);
8045 if (op == NULL_RTX)
8046 return true;
8047
8048 offset = INTVAL (op);
8049 if (TARGET_POWERPC64 && (offset & 3) != 0)
8050 return false;
8051
8052 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8053 if (extra < 0)
8054 extra = 0;
8055
8056 if (GET_CODE (addr) == LO_SUM)
8057 /* For lo_sum addresses, we must allow any offset except one that
8058 causes a wrap, so test only the low 16 bits. */
8059 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8060
8061 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8062 }
8063
8064 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8065 enforce an offset divisible by 4 even for 32-bit. */
8066
8067 bool
8068 mem_operand_ds_form (rtx op, machine_mode mode)
8069 {
8070 unsigned HOST_WIDE_INT offset;
8071 int extra;
8072 rtx addr = XEXP (op, 0);
8073
8074 /* Allow prefixed instructions if supported. If the bottom two bits of the
8075 offset are non-zero, we could use a prefixed instruction (which does not
8076 have the DS-form constraint that the traditional instruction had) instead
8077 of forcing the unaligned offset to a GPR. */
8078 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8079 return true;
8080
8081 if (!offsettable_address_p (false, mode, addr))
8082 return false;
8083
8084 op = address_offset (addr);
8085 if (op == NULL_RTX)
8086 return true;
8087
8088 offset = INTVAL (op);
8089 if ((offset & 3) != 0)
8090 return false;
8091
8092 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8093 if (extra < 0)
8094 extra = 0;
8095
8096 if (GET_CODE (addr) == LO_SUM)
8097 /* For lo_sum addresses, we must allow any offset except one that
8098 causes a wrap, so test only the low 16 bits. */
8099 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8100
8101 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8102 }
8103 \f
8104 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8105
8106 static bool
8107 reg_offset_addressing_ok_p (machine_mode mode)
8108 {
8109 switch (mode)
8110 {
8111 case E_V16QImode:
8112 case E_V8HImode:
8113 case E_V4SFmode:
8114 case E_V4SImode:
8115 case E_V2DFmode:
8116 case E_V2DImode:
8117 case E_V1TImode:
8118 case E_TImode:
8119 case E_TFmode:
8120 case E_KFmode:
8121 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8122 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8123 a vector mode, if we want to use the VSX registers to move it around,
8124 we need to restrict ourselves to reg+reg addressing. Similarly for
8125 IEEE 128-bit floating point that is passed in a single vector
8126 register. */
8127 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8128 return mode_supports_dq_form (mode);
8129 break;
8130
8131 /* The vector pair/quad types support offset addressing if the
8132 underlying vectors support offset addressing. */
8133 case E_OOmode:
8134 case E_XOmode:
8135 return TARGET_MMA;
8136
8137 case E_SDmode:
8138 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8139 addressing for the LFIWZX and STFIWX instructions. */
8140 if (TARGET_NO_SDMODE_STACK)
8141 return false;
8142 break;
8143
8144 default:
8145 break;
8146 }
8147
8148 return true;
8149 }
8150
8151 static bool
8152 virtual_stack_registers_memory_p (rtx op)
8153 {
8154 int regnum;
8155
8156 if (REG_P (op))
8157 regnum = REGNO (op);
8158
8159 else if (GET_CODE (op) == PLUS
8160 && REG_P (XEXP (op, 0))
8161 && CONST_INT_P (XEXP (op, 1)))
8162 regnum = REGNO (XEXP (op, 0));
8163
8164 else
8165 return false;
8166
8167 return (regnum >= FIRST_VIRTUAL_REGISTER
8168 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8169 }
8170
8171 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8172 is known to not straddle a 32k boundary. This function is used
8173 to determine whether -mcmodel=medium code can use TOC pointer
8174 relative addressing for OP. This means the alignment of the TOC
8175 pointer must also be taken into account, and unfortunately that is
8176 only 8 bytes. */
8177
8178 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8179 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8180 #endif
8181
8182 static bool
8183 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8184 machine_mode mode)
8185 {
8186 tree decl;
8187 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8188
8189 if (!SYMBOL_REF_P (op))
8190 return false;
8191
8192 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8193 SYMBOL_REF. */
8194 if (mode_supports_dq_form (mode))
8195 return false;
8196
8197 dsize = GET_MODE_SIZE (mode);
8198 decl = SYMBOL_REF_DECL (op);
8199 if (!decl)
8200 {
8201 if (dsize == 0)
8202 return false;
8203
8204 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8205 replacing memory addresses with an anchor plus offset. We
8206 could find the decl by rummaging around in the block->objects
8207 VEC for the given offset but that seems like too much work. */
8208 dalign = BITS_PER_UNIT;
8209 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8210 && SYMBOL_REF_ANCHOR_P (op)
8211 && SYMBOL_REF_BLOCK (op) != NULL)
8212 {
8213 struct object_block *block = SYMBOL_REF_BLOCK (op);
8214
8215 dalign = block->alignment;
8216 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8217 }
8218 else if (CONSTANT_POOL_ADDRESS_P (op))
8219 {
8220 /* It would be nice to have get_pool_align().. */
8221 machine_mode cmode = get_pool_mode (op);
8222
8223 dalign = GET_MODE_ALIGNMENT (cmode);
8224 }
8225 }
8226 else if (DECL_P (decl))
8227 {
8228 dalign = DECL_ALIGN (decl);
8229
8230 if (dsize == 0)
8231 {
8232 /* Allow BLKmode when the entire object is known to not
8233 cross a 32k boundary. */
8234 if (!DECL_SIZE_UNIT (decl))
8235 return false;
8236
8237 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8238 return false;
8239
8240 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8241 if (dsize > 32768)
8242 return false;
8243
8244 dalign /= BITS_PER_UNIT;
8245 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8246 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8247 return dalign >= dsize;
8248 }
8249 }
8250 else
8251 gcc_unreachable ();
8252
8253 /* Find how many bits of the alignment we know for this access. */
8254 dalign /= BITS_PER_UNIT;
8255 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8256 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8257 mask = dalign - 1;
8258 lsb = offset & -offset;
8259 mask &= lsb - 1;
8260 dalign = mask + 1;
8261
8262 return dalign >= dsize;
8263 }
8264
8265 static bool
8266 constant_pool_expr_p (rtx op)
8267 {
8268 rtx base, offset;
8269
8270 split_const (op, &base, &offset);
8271 return (SYMBOL_REF_P (base)
8272 && CONSTANT_POOL_ADDRESS_P (base)
8273 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8274 }
8275
8276 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8277 use that as the register to put the HIGH value into if register allocation
8278 is already done. */
8279
8280 rtx
8281 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8282 {
8283 rtx tocrel, tocreg, hi;
8284
8285 gcc_assert (TARGET_TOC);
8286
8287 if (TARGET_DEBUG_ADDR)
8288 {
8289 if (SYMBOL_REF_P (symbol))
8290 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8291 XSTR (symbol, 0));
8292 else
8293 {
8294 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8295 GET_RTX_NAME (GET_CODE (symbol)));
8296 debug_rtx (symbol);
8297 }
8298 }
8299
8300 if (!can_create_pseudo_p ())
8301 df_set_regs_ever_live (TOC_REGISTER, true);
8302
8303 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8304 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8305 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8306 return tocrel;
8307
8308 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8309 if (largetoc_reg != NULL)
8310 {
8311 emit_move_insn (largetoc_reg, hi);
8312 hi = largetoc_reg;
8313 }
8314 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8315 }
8316
8317 /* These are only used to pass through from print_operand/print_operand_address
8318 to rs6000_output_addr_const_extra over the intervening function
8319 output_addr_const which is not target code. */
8320 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8321
8322 /* Return true if OP is a toc pointer relative address (the output
8323 of create_TOC_reference). If STRICT, do not match non-split
8324 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8325 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8326 TOCREL_OFFSET_RET respectively. */
8327
8328 bool
8329 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8330 const_rtx *tocrel_offset_ret)
8331 {
8332 if (!TARGET_TOC)
8333 return false;
8334
8335 if (TARGET_CMODEL != CMODEL_SMALL)
8336 {
8337 /* When strict ensure we have everything tidy. */
8338 if (strict
8339 && !(GET_CODE (op) == LO_SUM
8340 && REG_P (XEXP (op, 0))
8341 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8342 return false;
8343
8344 /* When not strict, allow non-split TOC addresses and also allow
8345 (lo_sum (high ..)) TOC addresses created during reload. */
8346 if (GET_CODE (op) == LO_SUM)
8347 op = XEXP (op, 1);
8348 }
8349
8350 const_rtx tocrel_base = op;
8351 const_rtx tocrel_offset = const0_rtx;
8352
8353 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8354 {
8355 tocrel_base = XEXP (op, 0);
8356 tocrel_offset = XEXP (op, 1);
8357 }
8358
8359 if (tocrel_base_ret)
8360 *tocrel_base_ret = tocrel_base;
8361 if (tocrel_offset_ret)
8362 *tocrel_offset_ret = tocrel_offset;
8363
8364 return (GET_CODE (tocrel_base) == UNSPEC
8365 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8366 && REG_P (XVECEXP (tocrel_base, 0, 1))
8367 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8368 }
8369
8370 /* Return true if X is a constant pool address, and also for cmodel=medium
8371 if X is a toc-relative address known to be offsettable within MODE. */
8372
8373 bool
8374 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8375 bool strict)
8376 {
8377 const_rtx tocrel_base, tocrel_offset;
8378 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8379 && (TARGET_CMODEL != CMODEL_MEDIUM
8380 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8381 || mode == QImode
8382 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8383 INTVAL (tocrel_offset), mode)));
8384 }
8385
8386 static bool
8387 legitimate_small_data_p (machine_mode mode, rtx x)
8388 {
8389 return (DEFAULT_ABI == ABI_V4
8390 && !flag_pic && !TARGET_TOC
8391 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8392 && small_data_operand (x, mode));
8393 }
8394
8395 bool
8396 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8397 bool strict, bool worst_case)
8398 {
8399 unsigned HOST_WIDE_INT offset;
8400 unsigned int extra;
8401
8402 if (GET_CODE (x) != PLUS)
8403 return false;
8404 if (!REG_P (XEXP (x, 0)))
8405 return false;
8406 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8407 return false;
8408 if (mode_supports_dq_form (mode))
8409 return quad_address_p (x, mode, strict);
8410 if (!reg_offset_addressing_ok_p (mode))
8411 return virtual_stack_registers_memory_p (x);
8412 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8413 return true;
8414 if (!CONST_INT_P (XEXP (x, 1)))
8415 return false;
8416
8417 offset = INTVAL (XEXP (x, 1));
8418 extra = 0;
8419 switch (mode)
8420 {
8421 case E_DFmode:
8422 case E_DDmode:
8423 case E_DImode:
8424 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8425 addressing. */
8426 if (VECTOR_MEM_VSX_P (mode))
8427 return false;
8428
8429 if (!worst_case)
8430 break;
8431 if (!TARGET_POWERPC64)
8432 extra = 4;
8433 else if (offset & 3)
8434 return false;
8435 break;
8436
8437 case E_TFmode:
8438 case E_IFmode:
8439 case E_KFmode:
8440 case E_TDmode:
8441 case E_TImode:
8442 case E_PTImode:
8443 extra = 8;
8444 if (!worst_case)
8445 break;
8446 if (!TARGET_POWERPC64)
8447 extra = 12;
8448 else if (offset & 3)
8449 return false;
8450 break;
8451
8452 default:
8453 break;
8454 }
8455
8456 if (TARGET_PREFIXED)
8457 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8458 else
8459 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8460 }
8461
8462 bool
8463 legitimate_indexed_address_p (rtx x, int strict)
8464 {
8465 rtx op0, op1;
8466
8467 if (GET_CODE (x) != PLUS)
8468 return false;
8469
8470 op0 = XEXP (x, 0);
8471 op1 = XEXP (x, 1);
8472
8473 return (REG_P (op0) && REG_P (op1)
8474 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8475 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8476 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8477 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8478 }
8479
8480 bool
8481 avoiding_indexed_address_p (machine_mode mode)
8482 {
8483 unsigned int msize = GET_MODE_SIZE (mode);
8484
8485 /* Avoid indexed addressing for modes that have non-indexed load/store
8486 instruction forms. On power10, vector pairs have an indexed
8487 form, but vector quads don't. */
8488 if (msize > 16)
8489 return msize != 32;
8490
8491 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8492 }
8493
8494 bool
8495 legitimate_indirect_address_p (rtx x, int strict)
8496 {
8497 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8498 }
8499
8500 bool
8501 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8502 {
8503 if (!TARGET_MACHO || !flag_pic
8504 || mode != SImode || !MEM_P (x))
8505 return false;
8506 x = XEXP (x, 0);
8507
8508 if (GET_CODE (x) != LO_SUM)
8509 return false;
8510 if (!REG_P (XEXP (x, 0)))
8511 return false;
8512 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8513 return false;
8514 x = XEXP (x, 1);
8515
8516 return CONSTANT_P (x);
8517 }
8518
8519 static bool
8520 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8521 {
8522 if (GET_CODE (x) != LO_SUM)
8523 return false;
8524 if (!REG_P (XEXP (x, 0)))
8525 return false;
8526 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8527 return false;
8528 /* quad word addresses are restricted, and we can't use LO_SUM. */
8529 if (mode_supports_dq_form (mode))
8530 return false;
8531 x = XEXP (x, 1);
8532
8533 if (TARGET_ELF || TARGET_MACHO)
8534 {
8535 bool large_toc_ok;
8536
8537 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8538 return false;
8539 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8540 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8541 recognizes some LO_SUM addresses as valid although this
8542 function says opposite. In most cases, LRA through different
8543 transformations can generate correct code for address reloads.
8544 It cannot manage only some LO_SUM cases. So we need to add
8545 code here saying that some addresses are still valid. */
8546 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8547 && small_toc_ref (x, VOIDmode));
8548 if (TARGET_TOC && ! large_toc_ok)
8549 return false;
8550 if (GET_MODE_NUNITS (mode) != 1)
8551 return false;
8552 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8553 && !(/* ??? Assume floating point reg based on mode? */
8554 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8555 return false;
8556
8557 return CONSTANT_P (x) || large_toc_ok;
8558 }
8559
8560 return false;
8561 }
8562
8563
8564 /* Try machine-dependent ways of modifying an illegitimate address
8565 to be legitimate. If we find one, return the new, valid address.
8566 This is used from only one place: `memory_address' in explow.c.
8567
8568 OLDX is the address as it was before break_out_memory_refs was
8569 called. In some cases it is useful to look at this to decide what
8570 needs to be done.
8571
8572 It is always safe for this function to do nothing. It exists to
8573 recognize opportunities to optimize the output.
8574
8575 On RS/6000, first check for the sum of a register with a constant
8576 integer that is out of range. If so, generate code to add the
8577 constant with the low-order 16 bits masked to the register and force
8578 this result into another register (this can be done with `cau').
8579 Then generate an address of REG+(CONST&0xffff), allowing for the
8580 possibility of bit 16 being a one.
8581
8582 Then check for the sum of a register and something not constant, try to
8583 load the other things into a register and return the sum. */
8584
8585 static rtx
8586 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8587 machine_mode mode)
8588 {
8589 unsigned int extra;
8590
8591 if (!reg_offset_addressing_ok_p (mode)
8592 || mode_supports_dq_form (mode))
8593 {
8594 if (virtual_stack_registers_memory_p (x))
8595 return x;
8596
8597 /* In theory we should not be seeing addresses of the form reg+0,
8598 but just in case it is generated, optimize it away. */
8599 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8600 return force_reg (Pmode, XEXP (x, 0));
8601
8602 /* For TImode with load/store quad, restrict addresses to just a single
8603 pointer, so it works with both GPRs and VSX registers. */
8604 /* Make sure both operands are registers. */
8605 else if (GET_CODE (x) == PLUS
8606 && (mode != TImode || !TARGET_VSX))
8607 return gen_rtx_PLUS (Pmode,
8608 force_reg (Pmode, XEXP (x, 0)),
8609 force_reg (Pmode, XEXP (x, 1)));
8610 else
8611 return force_reg (Pmode, x);
8612 }
8613 if (SYMBOL_REF_P (x))
8614 {
8615 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8616 if (model != 0)
8617 return rs6000_legitimize_tls_address (x, model);
8618 }
8619
8620 extra = 0;
8621 switch (mode)
8622 {
8623 case E_TFmode:
8624 case E_TDmode:
8625 case E_TImode:
8626 case E_PTImode:
8627 case E_IFmode:
8628 case E_KFmode:
8629 /* As in legitimate_offset_address_p we do not assume
8630 worst-case. The mode here is just a hint as to the registers
8631 used. A TImode is usually in gprs, but may actually be in
8632 fprs. Leave worst-case scenario for reload to handle via
8633 insn constraints. PTImode is only GPRs. */
8634 extra = 8;
8635 break;
8636 default:
8637 break;
8638 }
8639
8640 if (GET_CODE (x) == PLUS
8641 && REG_P (XEXP (x, 0))
8642 && CONST_INT_P (XEXP (x, 1))
8643 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8644 >= 0x10000 - extra))
8645 {
8646 HOST_WIDE_INT high_int, low_int;
8647 rtx sum;
8648 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8649 if (low_int >= 0x8000 - extra)
8650 low_int = 0;
8651 high_int = INTVAL (XEXP (x, 1)) - low_int;
8652 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8653 gen_int_mode (high_int, Pmode)), 0);
8654 return plus_constant (Pmode, sum, low_int);
8655 }
8656 else if (GET_CODE (x) == PLUS
8657 && REG_P (XEXP (x, 0))
8658 && !CONST_INT_P (XEXP (x, 1))
8659 && GET_MODE_NUNITS (mode) == 1
8660 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8661 || (/* ??? Assume floating point reg based on mode? */
8662 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8663 && !avoiding_indexed_address_p (mode))
8664 {
8665 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8666 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8667 }
8668 else if ((TARGET_ELF
8669 #if TARGET_MACHO
8670 || !MACHO_DYNAMIC_NO_PIC_P
8671 #endif
8672 )
8673 && TARGET_32BIT
8674 && TARGET_NO_TOC_OR_PCREL
8675 && !flag_pic
8676 && !CONST_INT_P (x)
8677 && !CONST_WIDE_INT_P (x)
8678 && !CONST_DOUBLE_P (x)
8679 && CONSTANT_P (x)
8680 && GET_MODE_NUNITS (mode) == 1
8681 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8682 || (/* ??? Assume floating point reg based on mode? */
8683 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8684 {
8685 rtx reg = gen_reg_rtx (Pmode);
8686 if (TARGET_ELF)
8687 emit_insn (gen_elf_high (reg, x));
8688 else
8689 emit_insn (gen_macho_high (Pmode, reg, x));
8690 return gen_rtx_LO_SUM (Pmode, reg, x);
8691 }
8692 else if (TARGET_TOC
8693 && SYMBOL_REF_P (x)
8694 && constant_pool_expr_p (x)
8695 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8696 return create_TOC_reference (x, NULL_RTX);
8697 else
8698 return x;
8699 }
8700
8701 /* Debug version of rs6000_legitimize_address. */
8702 static rtx
8703 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8704 {
8705 rtx ret;
8706 rtx_insn *insns;
8707
8708 start_sequence ();
8709 ret = rs6000_legitimize_address (x, oldx, mode);
8710 insns = get_insns ();
8711 end_sequence ();
8712
8713 if (ret != x)
8714 {
8715 fprintf (stderr,
8716 "\nrs6000_legitimize_address: mode %s, old code %s, "
8717 "new code %s, modified\n",
8718 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8719 GET_RTX_NAME (GET_CODE (ret)));
8720
8721 fprintf (stderr, "Original address:\n");
8722 debug_rtx (x);
8723
8724 fprintf (stderr, "oldx:\n");
8725 debug_rtx (oldx);
8726
8727 fprintf (stderr, "New address:\n");
8728 debug_rtx (ret);
8729
8730 if (insns)
8731 {
8732 fprintf (stderr, "Insns added:\n");
8733 debug_rtx_list (insns, 20);
8734 }
8735 }
8736 else
8737 {
8738 fprintf (stderr,
8739 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8740 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8741
8742 debug_rtx (x);
8743 }
8744
8745 if (insns)
8746 emit_insn (insns);
8747
8748 return ret;
8749 }
8750
8751 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8752 We need to emit DTP-relative relocations. */
8753
8754 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8755 static void
8756 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8757 {
8758 switch (size)
8759 {
8760 case 4:
8761 fputs ("\t.long\t", file);
8762 break;
8763 case 8:
8764 fputs (DOUBLE_INT_ASM_OP, file);
8765 break;
8766 default:
8767 gcc_unreachable ();
8768 }
8769 output_addr_const (file, x);
8770 if (TARGET_ELF)
8771 fputs ("@dtprel+0x8000", file);
8772 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8773 {
8774 switch (SYMBOL_REF_TLS_MODEL (x))
8775 {
8776 case 0:
8777 break;
8778 case TLS_MODEL_LOCAL_EXEC:
8779 fputs ("@le", file);
8780 break;
8781 case TLS_MODEL_INITIAL_EXEC:
8782 fputs ("@ie", file);
8783 break;
8784 case TLS_MODEL_GLOBAL_DYNAMIC:
8785 case TLS_MODEL_LOCAL_DYNAMIC:
8786 fputs ("@m", file);
8787 break;
8788 default:
8789 gcc_unreachable ();
8790 }
8791 }
8792 }
8793
8794 /* Return true if X is a symbol that refers to real (rather than emulated)
8795 TLS. */
8796
8797 static bool
8798 rs6000_real_tls_symbol_ref_p (rtx x)
8799 {
8800 return (SYMBOL_REF_P (x)
8801 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8802 }
8803
8804 /* In the name of slightly smaller debug output, and to cater to
8805 general assembler lossage, recognize various UNSPEC sequences
8806 and turn them back into a direct symbol reference. */
8807
8808 static rtx
8809 rs6000_delegitimize_address (rtx orig_x)
8810 {
8811 rtx x, y, offset;
8812
8813 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8814 orig_x = XVECEXP (orig_x, 0, 0);
8815
8816 orig_x = delegitimize_mem_from_attrs (orig_x);
8817
8818 x = orig_x;
8819 if (MEM_P (x))
8820 x = XEXP (x, 0);
8821
8822 y = x;
8823 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8824 y = XEXP (y, 1);
8825
8826 offset = NULL_RTX;
8827 if (GET_CODE (y) == PLUS
8828 && GET_MODE (y) == Pmode
8829 && CONST_INT_P (XEXP (y, 1)))
8830 {
8831 offset = XEXP (y, 1);
8832 y = XEXP (y, 0);
8833 }
8834
8835 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8836 {
8837 y = XVECEXP (y, 0, 0);
8838
8839 #ifdef HAVE_AS_TLS
8840 /* Do not associate thread-local symbols with the original
8841 constant pool symbol. */
8842 if (TARGET_XCOFF
8843 && SYMBOL_REF_P (y)
8844 && CONSTANT_POOL_ADDRESS_P (y)
8845 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8846 return orig_x;
8847 #endif
8848
8849 if (offset != NULL_RTX)
8850 y = gen_rtx_PLUS (Pmode, y, offset);
8851 if (!MEM_P (orig_x))
8852 return y;
8853 else
8854 return replace_equiv_address_nv (orig_x, y);
8855 }
8856
8857 if (TARGET_MACHO
8858 && GET_CODE (orig_x) == LO_SUM
8859 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8860 {
8861 y = XEXP (XEXP (orig_x, 1), 0);
8862 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8863 return XVECEXP (y, 0, 0);
8864 }
8865
8866 return orig_x;
8867 }
8868
8869 /* Return true if X shouldn't be emitted into the debug info.
8870 The linker doesn't like .toc section references from
8871 .debug_* sections, so reject .toc section symbols. */
8872
8873 static bool
8874 rs6000_const_not_ok_for_debug_p (rtx x)
8875 {
8876 if (GET_CODE (x) == UNSPEC)
8877 return true;
8878 if (SYMBOL_REF_P (x)
8879 && CONSTANT_POOL_ADDRESS_P (x))
8880 {
8881 rtx c = get_pool_constant (x);
8882 machine_mode cmode = get_pool_mode (x);
8883 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8884 return true;
8885 }
8886
8887 return false;
8888 }
8889
8890 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8891
8892 static bool
8893 rs6000_legitimate_combined_insn (rtx_insn *insn)
8894 {
8895 int icode = INSN_CODE (insn);
8896
8897 /* Reject creating doloop insns. Combine should not be allowed
8898 to create these for a number of reasons:
8899 1) In a nested loop, if combine creates one of these in an
8900 outer loop and the register allocator happens to allocate ctr
8901 to the outer loop insn, then the inner loop can't use ctr.
8902 Inner loops ought to be more highly optimized.
8903 2) Combine often wants to create one of these from what was
8904 originally a three insn sequence, first combining the three
8905 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8906 allocated ctr, the splitter takes use back to the three insn
8907 sequence. It's better to stop combine at the two insn
8908 sequence.
8909 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8910 insns, the register allocator sometimes uses floating point
8911 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8912 jump insn and output reloads are not implemented for jumps,
8913 the ctrsi/ctrdi splitters need to handle all possible cases.
8914 That's a pain, and it gets to be seriously difficult when a
8915 splitter that runs after reload needs memory to transfer from
8916 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8917 for the difficult case. It's better to not create problems
8918 in the first place. */
8919 if (icode != CODE_FOR_nothing
8920 && (icode == CODE_FOR_bdz_si
8921 || icode == CODE_FOR_bdz_di
8922 || icode == CODE_FOR_bdnz_si
8923 || icode == CODE_FOR_bdnz_di
8924 || icode == CODE_FOR_bdztf_si
8925 || icode == CODE_FOR_bdztf_di
8926 || icode == CODE_FOR_bdnztf_si
8927 || icode == CODE_FOR_bdnztf_di))
8928 return false;
8929
8930 return true;
8931 }
8932
8933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8934
8935 static GTY(()) rtx rs6000_tls_symbol;
8936 static rtx
8937 rs6000_tls_get_addr (void)
8938 {
8939 if (!rs6000_tls_symbol)
8940 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8941
8942 return rs6000_tls_symbol;
8943 }
8944
8945 /* Construct the SYMBOL_REF for TLS GOT references. */
8946
8947 static GTY(()) rtx rs6000_got_symbol;
8948 rtx
8949 rs6000_got_sym (void)
8950 {
8951 if (!rs6000_got_symbol)
8952 {
8953 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8954 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8955 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8956 }
8957
8958 return rs6000_got_symbol;
8959 }
8960
8961 /* AIX Thread-Local Address support. */
8962
8963 static rtx
8964 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8965 {
8966 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8967 const char *name;
8968 char *tlsname;
8969
8970 name = XSTR (addr, 0);
8971 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8972 or the symbol will be in TLS private data section. */
8973 if (name[strlen (name) - 1] != ']'
8974 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8975 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8976 {
8977 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8978 strcpy (tlsname, name);
8979 strcat (tlsname,
8980 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8981 tlsaddr = copy_rtx (addr);
8982 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8983 }
8984 else
8985 tlsaddr = addr;
8986
8987 /* Place addr into TOC constant pool. */
8988 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8989
8990 /* Output the TOC entry and create the MEM referencing the value. */
8991 if (constant_pool_expr_p (XEXP (sym, 0))
8992 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8993 {
8994 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8995 mem = gen_const_mem (Pmode, tocref);
8996 set_mem_alias_set (mem, get_TOC_alias_set ());
8997 }
8998 else
8999 return sym;
9000
9001 /* Use global-dynamic for local-dynamic. */
9002 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9003 || model == TLS_MODEL_LOCAL_DYNAMIC)
9004 {
9005 /* Create new TOC reference for @m symbol. */
9006 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9007 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9008 strcpy (tlsname, "*LCM");
9009 strcat (tlsname, name + 3);
9010 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9011 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9012 tocref = create_TOC_reference (modaddr, NULL_RTX);
9013 rtx modmem = gen_const_mem (Pmode, tocref);
9014 set_mem_alias_set (modmem, get_TOC_alias_set ());
9015
9016 rtx modreg = gen_reg_rtx (Pmode);
9017 emit_insn (gen_rtx_SET (modreg, modmem));
9018
9019 tmpreg = gen_reg_rtx (Pmode);
9020 emit_insn (gen_rtx_SET (tmpreg, mem));
9021
9022 dest = gen_reg_rtx (Pmode);
9023 if (TARGET_32BIT)
9024 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9025 else
9026 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9027 return dest;
9028 }
9029 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9030 else if (TARGET_32BIT)
9031 {
9032 tlsreg = gen_reg_rtx (SImode);
9033 emit_insn (gen_tls_get_tpointer (tlsreg));
9034 }
9035 else
9036 tlsreg = gen_rtx_REG (DImode, 13);
9037
9038 /* Load the TOC value into temporary register. */
9039 tmpreg = gen_reg_rtx (Pmode);
9040 emit_insn (gen_rtx_SET (tmpreg, mem));
9041 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9042 gen_rtx_MINUS (Pmode, addr, tlsreg));
9043
9044 /* Add TOC symbol value to TLS pointer. */
9045 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9046
9047 return dest;
9048 }
9049
9050 /* Passes the tls arg value for global dynamic and local dynamic
9051 emit_library_call_value in rs6000_legitimize_tls_address to
9052 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9053 marker relocs put on __tls_get_addr calls. */
9054 static rtx global_tlsarg;
9055
9056 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9057 this (thread-local) address. */
9058
9059 static rtx
9060 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9061 {
9062 rtx dest, insn;
9063
9064 if (TARGET_XCOFF)
9065 return rs6000_legitimize_tls_address_aix (addr, model);
9066
9067 dest = gen_reg_rtx (Pmode);
9068 if (model == TLS_MODEL_LOCAL_EXEC
9069 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9070 {
9071 rtx tlsreg;
9072
9073 if (TARGET_64BIT)
9074 {
9075 tlsreg = gen_rtx_REG (Pmode, 13);
9076 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9077 }
9078 else
9079 {
9080 tlsreg = gen_rtx_REG (Pmode, 2);
9081 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9082 }
9083 emit_insn (insn);
9084 }
9085 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9086 {
9087 rtx tlsreg, tmp;
9088
9089 tmp = gen_reg_rtx (Pmode);
9090 if (TARGET_64BIT)
9091 {
9092 tlsreg = gen_rtx_REG (Pmode, 13);
9093 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9094 }
9095 else
9096 {
9097 tlsreg = gen_rtx_REG (Pmode, 2);
9098 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9099 }
9100 emit_insn (insn);
9101 if (TARGET_64BIT)
9102 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9103 else
9104 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9105 emit_insn (insn);
9106 }
9107 else
9108 {
9109 rtx got, tga, tmp1, tmp2;
9110
9111 /* We currently use relocations like @got@tlsgd for tls, which
9112 means the linker will handle allocation of tls entries, placing
9113 them in the .got section. So use a pointer to the .got section,
9114 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9115 or to secondary GOT sections used by 32-bit -fPIC. */
9116 if (rs6000_pcrel_p ())
9117 got = const0_rtx;
9118 else if (TARGET_64BIT)
9119 got = gen_rtx_REG (Pmode, 2);
9120 else
9121 {
9122 if (flag_pic == 1)
9123 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9124 else
9125 {
9126 rtx gsym = rs6000_got_sym ();
9127 got = gen_reg_rtx (Pmode);
9128 if (flag_pic == 0)
9129 rs6000_emit_move (got, gsym, Pmode);
9130 else
9131 {
9132 rtx mem, lab;
9133
9134 tmp1 = gen_reg_rtx (Pmode);
9135 tmp2 = gen_reg_rtx (Pmode);
9136 mem = gen_const_mem (Pmode, tmp1);
9137 lab = gen_label_rtx ();
9138 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9139 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9140 if (TARGET_LINK_STACK)
9141 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9142 emit_move_insn (tmp2, mem);
9143 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9144 set_unique_reg_note (last, REG_EQUAL, gsym);
9145 }
9146 }
9147 }
9148
9149 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9150 {
9151 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9152 UNSPEC_TLSGD);
9153 tga = rs6000_tls_get_addr ();
9154 rtx argreg = gen_rtx_REG (Pmode, 3);
9155 emit_insn (gen_rtx_SET (argreg, arg));
9156 global_tlsarg = arg;
9157 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9158 global_tlsarg = NULL_RTX;
9159
9160 /* Make a note so that the result of this call can be CSEd. */
9161 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9162 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9163 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9164 }
9165 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9166 {
9167 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9168 tga = rs6000_tls_get_addr ();
9169 tmp1 = gen_reg_rtx (Pmode);
9170 rtx argreg = gen_rtx_REG (Pmode, 3);
9171 emit_insn (gen_rtx_SET (argreg, arg));
9172 global_tlsarg = arg;
9173 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9174 global_tlsarg = NULL_RTX;
9175
9176 /* Make a note so that the result of this call can be CSEd. */
9177 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9178 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9179 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9180
9181 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9182 {
9183 if (TARGET_64BIT)
9184 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9185 else
9186 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9187 }
9188 else if (rs6000_tls_size == 32)
9189 {
9190 tmp2 = gen_reg_rtx (Pmode);
9191 if (TARGET_64BIT)
9192 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9193 else
9194 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9195 emit_insn (insn);
9196 if (TARGET_64BIT)
9197 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9198 else
9199 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9200 }
9201 else
9202 {
9203 tmp2 = gen_reg_rtx (Pmode);
9204 if (TARGET_64BIT)
9205 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9206 else
9207 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9208 emit_insn (insn);
9209 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9210 }
9211 emit_insn (insn);
9212 }
9213 else
9214 {
9215 /* IE, or 64-bit offset LE. */
9216 tmp2 = gen_reg_rtx (Pmode);
9217 if (TARGET_64BIT)
9218 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9219 else
9220 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9221 emit_insn (insn);
9222 if (rs6000_pcrel_p ())
9223 {
9224 if (TARGET_64BIT)
9225 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9226 else
9227 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9228 }
9229 else if (TARGET_64BIT)
9230 insn = gen_tls_tls_64 (dest, tmp2, addr);
9231 else
9232 insn = gen_tls_tls_32 (dest, tmp2, addr);
9233 emit_insn (insn);
9234 }
9235 }
9236
9237 return dest;
9238 }
9239
9240 /* Only create the global variable for the stack protect guard if we are using
9241 the global flavor of that guard. */
9242 static tree
9243 rs6000_init_stack_protect_guard (void)
9244 {
9245 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9246 return default_stack_protect_guard ();
9247
9248 return NULL_TREE;
9249 }
9250
9251 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9252
9253 static bool
9254 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9255 {
9256 if (GET_CODE (x) == HIGH
9257 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9258 return true;
9259
9260 /* A TLS symbol in the TOC cannot contain a sum. */
9261 if (GET_CODE (x) == CONST
9262 && GET_CODE (XEXP (x, 0)) == PLUS
9263 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9264 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9265 return true;
9266
9267 /* Do not place an ELF TLS symbol in the constant pool. */
9268 return TARGET_ELF && tls_referenced_p (x);
9269 }
9270
9271 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9272 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9273 can be addressed relative to the toc pointer. */
9274
9275 static bool
9276 use_toc_relative_ref (rtx sym, machine_mode mode)
9277 {
9278 return ((constant_pool_expr_p (sym)
9279 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9280 get_pool_mode (sym)))
9281 || (TARGET_CMODEL == CMODEL_MEDIUM
9282 && SYMBOL_REF_LOCAL_P (sym)
9283 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9284 }
9285
9286 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9287 that is a valid memory address for an instruction.
9288 The MODE argument is the machine mode for the MEM expression
9289 that wants to use this address.
9290
9291 On the RS/6000, there are four valid address: a SYMBOL_REF that
9292 refers to a constant pool entry of an address (or the sum of it
9293 plus a constant), a short (16-bit signed) constant plus a register,
9294 the sum of two registers, or a register indirect, possibly with an
9295 auto-increment. For DFmode, DDmode and DImode with a constant plus
9296 register, we must ensure that both words are addressable or PowerPC64
9297 with offset word aligned.
9298
9299 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9300 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9301 because adjacent memory cells are accessed by adding word-sized offsets
9302 during assembly output. */
9303 static bool
9304 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9305 {
9306 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9307 bool quad_offset_p = mode_supports_dq_form (mode);
9308
9309 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9310 return 0;
9311
9312 /* Handle unaligned altivec lvx/stvx type addresses. */
9313 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9314 && GET_CODE (x) == AND
9315 && CONST_INT_P (XEXP (x, 1))
9316 && INTVAL (XEXP (x, 1)) == -16)
9317 {
9318 x = XEXP (x, 0);
9319 return (legitimate_indirect_address_p (x, reg_ok_strict)
9320 || legitimate_indexed_address_p (x, reg_ok_strict)
9321 || virtual_stack_registers_memory_p (x));
9322 }
9323
9324 if (legitimate_indirect_address_p (x, reg_ok_strict))
9325 return 1;
9326 if (TARGET_UPDATE
9327 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9328 && mode_supports_pre_incdec_p (mode)
9329 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9330 return 1;
9331
9332 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9333 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9334 return 1;
9335
9336 /* Handle restricted vector d-form offsets in ISA 3.0. */
9337 if (quad_offset_p)
9338 {
9339 if (quad_address_p (x, mode, reg_ok_strict))
9340 return 1;
9341 }
9342 else if (virtual_stack_registers_memory_p (x))
9343 return 1;
9344
9345 else if (reg_offset_p)
9346 {
9347 if (legitimate_small_data_p (mode, x))
9348 return 1;
9349 if (legitimate_constant_pool_address_p (x, mode,
9350 reg_ok_strict || lra_in_progress))
9351 return 1;
9352 }
9353
9354 /* For TImode, if we have TImode in VSX registers, only allow register
9355 indirect addresses. This will allow the values to go in either GPRs
9356 or VSX registers without reloading. The vector types would tend to
9357 go into VSX registers, so we allow REG+REG, while TImode seems
9358 somewhat split, in that some uses are GPR based, and some VSX based. */
9359 /* FIXME: We could loosen this by changing the following to
9360 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9361 but currently we cannot allow REG+REG addressing for TImode. See
9362 PR72827 for complete details on how this ends up hoodwinking DSE. */
9363 if (mode == TImode && TARGET_VSX)
9364 return 0;
9365 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9366 if (! reg_ok_strict
9367 && reg_offset_p
9368 && GET_CODE (x) == PLUS
9369 && REG_P (XEXP (x, 0))
9370 && (XEXP (x, 0) == virtual_stack_vars_rtx
9371 || XEXP (x, 0) == arg_pointer_rtx)
9372 && CONST_INT_P (XEXP (x, 1)))
9373 return 1;
9374 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9375 return 1;
9376 if (!FLOAT128_2REG_P (mode)
9377 && (TARGET_HARD_FLOAT
9378 || TARGET_POWERPC64
9379 || (mode != DFmode && mode != DDmode))
9380 && (TARGET_POWERPC64 || mode != DImode)
9381 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9382 && mode != PTImode
9383 && !avoiding_indexed_address_p (mode)
9384 && legitimate_indexed_address_p (x, reg_ok_strict))
9385 return 1;
9386 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9387 && mode_supports_pre_modify_p (mode)
9388 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9389 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9390 reg_ok_strict, false)
9391 || (!avoiding_indexed_address_p (mode)
9392 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9393 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9394 {
9395 /* There is no prefixed version of the load/store with update. */
9396 rtx addr = XEXP (x, 1);
9397 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9398 }
9399 if (reg_offset_p && !quad_offset_p
9400 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9401 return 1;
9402 return 0;
9403 }
9404
9405 /* Debug version of rs6000_legitimate_address_p. */
9406 static bool
9407 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9408 bool reg_ok_strict)
9409 {
9410 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9411 fprintf (stderr,
9412 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9413 "strict = %d, reload = %s, code = %s\n",
9414 ret ? "true" : "false",
9415 GET_MODE_NAME (mode),
9416 reg_ok_strict,
9417 (reload_completed ? "after" : "before"),
9418 GET_RTX_NAME (GET_CODE (x)));
9419 debug_rtx (x);
9420
9421 return ret;
9422 }
9423
9424 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9425
9426 static bool
9427 rs6000_mode_dependent_address_p (const_rtx addr,
9428 addr_space_t as ATTRIBUTE_UNUSED)
9429 {
9430 return rs6000_mode_dependent_address_ptr (addr);
9431 }
9432
9433 /* Go to LABEL if ADDR (a legitimate address expression)
9434 has an effect that depends on the machine mode it is used for.
9435
9436 On the RS/6000 this is true of all integral offsets (since AltiVec
9437 and VSX modes don't allow them) or is a pre-increment or decrement.
9438
9439 ??? Except that due to conceptual problems in offsettable_address_p
9440 we can't really report the problems of integral offsets. So leave
9441 this assuming that the adjustable offset must be valid for the
9442 sub-words of a TFmode operand, which is what we had before. */
9443
9444 static bool
9445 rs6000_mode_dependent_address (const_rtx addr)
9446 {
9447 switch (GET_CODE (addr))
9448 {
9449 case PLUS:
9450 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9451 is considered a legitimate address before reload, so there
9452 are no offset restrictions in that case. Note that this
9453 condition is safe in strict mode because any address involving
9454 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9455 been rejected as illegitimate. */
9456 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9457 && XEXP (addr, 0) != arg_pointer_rtx
9458 && CONST_INT_P (XEXP (addr, 1)))
9459 {
9460 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9461 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9462 if (TARGET_PREFIXED)
9463 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9464 else
9465 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9466 }
9467 break;
9468
9469 case LO_SUM:
9470 /* Anything in the constant pool is sufficiently aligned that
9471 all bytes have the same high part address. */
9472 return !legitimate_constant_pool_address_p (addr, QImode, false);
9473
9474 /* Auto-increment cases are now treated generically in recog.c. */
9475 case PRE_MODIFY:
9476 return TARGET_UPDATE;
9477
9478 /* AND is only allowed in Altivec loads. */
9479 case AND:
9480 return true;
9481
9482 default:
9483 break;
9484 }
9485
9486 return false;
9487 }
9488
9489 /* Debug version of rs6000_mode_dependent_address. */
9490 static bool
9491 rs6000_debug_mode_dependent_address (const_rtx addr)
9492 {
9493 bool ret = rs6000_mode_dependent_address (addr);
9494
9495 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9496 ret ? "true" : "false");
9497 debug_rtx (addr);
9498
9499 return ret;
9500 }
9501
9502 /* Implement FIND_BASE_TERM. */
9503
9504 rtx
9505 rs6000_find_base_term (rtx op)
9506 {
9507 rtx base;
9508
9509 base = op;
9510 if (GET_CODE (base) == CONST)
9511 base = XEXP (base, 0);
9512 if (GET_CODE (base) == PLUS)
9513 base = XEXP (base, 0);
9514 if (GET_CODE (base) == UNSPEC)
9515 switch (XINT (base, 1))
9516 {
9517 case UNSPEC_TOCREL:
9518 case UNSPEC_MACHOPIC_OFFSET:
9519 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9520 for aliasing purposes. */
9521 return XVECEXP (base, 0, 0);
9522 }
9523
9524 return op;
9525 }
9526
9527 /* More elaborate version of recog's offsettable_memref_p predicate
9528 that works around the ??? note of rs6000_mode_dependent_address.
9529 In particular it accepts
9530
9531 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9532
9533 in 32-bit mode, that the recog predicate rejects. */
9534
9535 static bool
9536 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9537 {
9538 bool worst_case;
9539
9540 if (!MEM_P (op))
9541 return false;
9542
9543 /* First mimic offsettable_memref_p. */
9544 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9545 return true;
9546
9547 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9548 the latter predicate knows nothing about the mode of the memory
9549 reference and, therefore, assumes that it is the largest supported
9550 mode (TFmode). As a consequence, legitimate offsettable memory
9551 references are rejected. rs6000_legitimate_offset_address_p contains
9552 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9553 at least with a little bit of help here given that we know the
9554 actual registers used. */
9555 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9556 || GET_MODE_SIZE (reg_mode) == 4);
9557 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9558 strict, worst_case);
9559 }
9560
9561 /* Determine the reassociation width to be used in reassociate_bb.
9562 This takes into account how many parallel operations we
9563 can actually do of a given type, and also the latency.
9564 P8:
9565 int add/sub 6/cycle
9566 mul 2/cycle
9567 vect add/sub/mul 2/cycle
9568 fp add/sub/mul 2/cycle
9569 dfp 1/cycle
9570 */
9571
9572 static int
9573 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9574 machine_mode mode)
9575 {
9576 switch (rs6000_tune)
9577 {
9578 case PROCESSOR_POWER8:
9579 case PROCESSOR_POWER9:
9580 case PROCESSOR_POWER10:
9581 if (DECIMAL_FLOAT_MODE_P (mode))
9582 return 1;
9583 if (VECTOR_MODE_P (mode))
9584 return 4;
9585 if (INTEGRAL_MODE_P (mode))
9586 return 1;
9587 if (FLOAT_MODE_P (mode))
9588 return 4;
9589 break;
9590 default:
9591 break;
9592 }
9593 return 1;
9594 }
9595
9596 /* Change register usage conditional on target flags. */
9597 static void
9598 rs6000_conditional_register_usage (void)
9599 {
9600 int i;
9601
9602 if (TARGET_DEBUG_TARGET)
9603 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9604
9605 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9606 if (TARGET_64BIT)
9607 fixed_regs[13] = call_used_regs[13] = 1;
9608
9609 /* Conditionally disable FPRs. */
9610 if (TARGET_SOFT_FLOAT)
9611 for (i = 32; i < 64; i++)
9612 fixed_regs[i] = call_used_regs[i] = 1;
9613
9614 /* The TOC register is not killed across calls in a way that is
9615 visible to the compiler. */
9616 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9617 call_used_regs[2] = 0;
9618
9619 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9620 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9621
9622 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9623 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9624 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9625
9626 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9627 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9628 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9629
9630 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9631 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9632
9633 if (!TARGET_ALTIVEC && !TARGET_VSX)
9634 {
9635 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9636 fixed_regs[i] = call_used_regs[i] = 1;
9637 call_used_regs[VRSAVE_REGNO] = 1;
9638 }
9639
9640 if (TARGET_ALTIVEC || TARGET_VSX)
9641 global_regs[VSCR_REGNO] = 1;
9642
9643 if (TARGET_ALTIVEC_ABI)
9644 {
9645 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9646 call_used_regs[i] = 1;
9647
9648 /* AIX reserves VR20:31 in non-extended ABI mode. */
9649 if (TARGET_XCOFF)
9650 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9651 fixed_regs[i] = call_used_regs[i] = 1;
9652 }
9653 }
9654
9655 \f
9656 /* Output insns to set DEST equal to the constant SOURCE as a series of
9657 lis, ori and shl instructions and return TRUE. */
9658
9659 bool
9660 rs6000_emit_set_const (rtx dest, rtx source)
9661 {
9662 machine_mode mode = GET_MODE (dest);
9663 rtx temp, set;
9664 rtx_insn *insn;
9665 HOST_WIDE_INT c;
9666
9667 gcc_checking_assert (CONST_INT_P (source));
9668 c = INTVAL (source);
9669 switch (mode)
9670 {
9671 case E_QImode:
9672 case E_HImode:
9673 emit_insn (gen_rtx_SET (dest, source));
9674 return true;
9675
9676 case E_SImode:
9677 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9678
9679 emit_insn (gen_rtx_SET (copy_rtx (temp),
9680 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9681 emit_insn (gen_rtx_SET (dest,
9682 gen_rtx_IOR (SImode, copy_rtx (temp),
9683 GEN_INT (c & 0xffff))));
9684 break;
9685
9686 case E_DImode:
9687 if (!TARGET_POWERPC64)
9688 {
9689 rtx hi, lo;
9690
9691 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9692 DImode);
9693 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9694 DImode);
9695 emit_move_insn (hi, GEN_INT (c >> 32));
9696 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9697 emit_move_insn (lo, GEN_INT (c));
9698 }
9699 else
9700 rs6000_emit_set_long_const (dest, c);
9701 break;
9702
9703 default:
9704 gcc_unreachable ();
9705 }
9706
9707 insn = get_last_insn ();
9708 set = single_set (insn);
9709 if (! CONSTANT_P (SET_SRC (set)))
9710 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9711
9712 return true;
9713 }
9714
9715 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9716 Output insns to set DEST equal to the constant C as a series of
9717 lis, ori and shl instructions. */
9718
9719 static void
9720 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9721 {
9722 rtx temp;
9723 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9724
9725 ud1 = c & 0xffff;
9726 c = c >> 16;
9727 ud2 = c & 0xffff;
9728 c = c >> 16;
9729 ud3 = c & 0xffff;
9730 c = c >> 16;
9731 ud4 = c & 0xffff;
9732
9733 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9734 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9735 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9736
9737 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9738 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9739 {
9740 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9741
9742 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9743 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9744 if (ud1 != 0)
9745 emit_move_insn (dest,
9746 gen_rtx_IOR (DImode, copy_rtx (temp),
9747 GEN_INT (ud1)));
9748 }
9749 else if (ud3 == 0 && ud4 == 0)
9750 {
9751 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9752
9753 gcc_assert (ud2 & 0x8000);
9754 emit_move_insn (copy_rtx (temp),
9755 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9756 if (ud1 != 0)
9757 emit_move_insn (copy_rtx (temp),
9758 gen_rtx_IOR (DImode, copy_rtx (temp),
9759 GEN_INT (ud1)));
9760 emit_move_insn (dest,
9761 gen_rtx_ZERO_EXTEND (DImode,
9762 gen_lowpart (SImode,
9763 copy_rtx (temp))));
9764 }
9765 else if (ud1 == ud3 && ud2 == ud4)
9766 {
9767 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9768 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9769 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9770 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9771 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9772 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9773 }
9774 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9775 || (ud4 == 0 && ! (ud3 & 0x8000)))
9776 {
9777 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9778
9779 emit_move_insn (copy_rtx (temp),
9780 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9781 if (ud2 != 0)
9782 emit_move_insn (copy_rtx (temp),
9783 gen_rtx_IOR (DImode, copy_rtx (temp),
9784 GEN_INT (ud2)));
9785 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9786 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9787 GEN_INT (16)));
9788 if (ud1 != 0)
9789 emit_move_insn (dest,
9790 gen_rtx_IOR (DImode, copy_rtx (temp),
9791 GEN_INT (ud1)));
9792 }
9793 else
9794 {
9795 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9796
9797 emit_move_insn (copy_rtx (temp),
9798 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9799 if (ud3 != 0)
9800 emit_move_insn (copy_rtx (temp),
9801 gen_rtx_IOR (DImode, copy_rtx (temp),
9802 GEN_INT (ud3)));
9803
9804 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9805 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9806 GEN_INT (32)));
9807 if (ud2 != 0)
9808 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9809 gen_rtx_IOR (DImode, copy_rtx (temp),
9810 GEN_INT (ud2 << 16)));
9811 if (ud1 != 0)
9812 emit_move_insn (dest,
9813 gen_rtx_IOR (DImode, copy_rtx (temp),
9814 GEN_INT (ud1)));
9815 }
9816 }
9817
9818 /* Helper for the following. Get rid of [r+r] memory refs
9819 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9820
9821 static void
9822 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9823 {
9824 if (MEM_P (operands[0])
9825 && !REG_P (XEXP (operands[0], 0))
9826 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9827 GET_MODE (operands[0]), false))
9828 operands[0]
9829 = replace_equiv_address (operands[0],
9830 copy_addr_to_reg (XEXP (operands[0], 0)));
9831
9832 if (MEM_P (operands[1])
9833 && !REG_P (XEXP (operands[1], 0))
9834 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9835 GET_MODE (operands[1]), false))
9836 operands[1]
9837 = replace_equiv_address (operands[1],
9838 copy_addr_to_reg (XEXP (operands[1], 0)));
9839 }
9840
9841 /* Generate a vector of constants to permute MODE for a little-endian
9842 storage operation by swapping the two halves of a vector. */
9843 static rtvec
9844 rs6000_const_vec (machine_mode mode)
9845 {
9846 int i, subparts;
9847 rtvec v;
9848
9849 switch (mode)
9850 {
9851 case E_V1TImode:
9852 subparts = 1;
9853 break;
9854 case E_V2DFmode:
9855 case E_V2DImode:
9856 subparts = 2;
9857 break;
9858 case E_V4SFmode:
9859 case E_V4SImode:
9860 subparts = 4;
9861 break;
9862 case E_V8HImode:
9863 subparts = 8;
9864 break;
9865 case E_V16QImode:
9866 subparts = 16;
9867 break;
9868 default:
9869 gcc_unreachable();
9870 }
9871
9872 v = rtvec_alloc (subparts);
9873
9874 for (i = 0; i < subparts / 2; ++i)
9875 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9876 for (i = subparts / 2; i < subparts; ++i)
9877 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9878
9879 return v;
9880 }
9881
9882 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9883 store operation. */
9884 void
9885 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9886 {
9887 /* Scalar permutations are easier to express in integer modes rather than
9888 floating-point modes, so cast them here. We use V1TImode instead
9889 of TImode to ensure that the values don't go through GPRs. */
9890 if (FLOAT128_VECTOR_P (mode))
9891 {
9892 dest = gen_lowpart (V1TImode, dest);
9893 source = gen_lowpart (V1TImode, source);
9894 mode = V1TImode;
9895 }
9896
9897 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9898 scalar. */
9899 if (mode == TImode || mode == V1TImode)
9900 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9901 GEN_INT (64))));
9902 else
9903 {
9904 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9905 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9906 }
9907 }
9908
9909 /* Emit a little-endian load from vector memory location SOURCE to VSX
9910 register DEST in mode MODE. The load is done with two permuting
9911 insn's that represent an lxvd2x and xxpermdi. */
9912 void
9913 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9914 {
9915 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9916 V1TImode). */
9917 if (mode == TImode || mode == V1TImode)
9918 {
9919 mode = V2DImode;
9920 dest = gen_lowpart (V2DImode, dest);
9921 source = adjust_address (source, V2DImode, 0);
9922 }
9923
9924 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9925 rs6000_emit_le_vsx_permute (tmp, source, mode);
9926 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9927 }
9928
9929 /* Emit a little-endian store to vector memory location DEST from VSX
9930 register SOURCE in mode MODE. The store is done with two permuting
9931 insn's that represent an xxpermdi and an stxvd2x. */
9932 void
9933 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9934 {
9935 /* This should never be called after LRA. */
9936 gcc_assert (can_create_pseudo_p ());
9937
9938 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9939 V1TImode). */
9940 if (mode == TImode || mode == V1TImode)
9941 {
9942 mode = V2DImode;
9943 dest = adjust_address (dest, V2DImode, 0);
9944 source = gen_lowpart (V2DImode, source);
9945 }
9946
9947 rtx tmp = gen_reg_rtx_and_attrs (source);
9948 rs6000_emit_le_vsx_permute (tmp, source, mode);
9949 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9950 }
9951
9952 /* Emit a sequence representing a little-endian VSX load or store,
9953 moving data from SOURCE to DEST in mode MODE. This is done
9954 separately from rs6000_emit_move to ensure it is called only
9955 during expand. LE VSX loads and stores introduced later are
9956 handled with a split. The expand-time RTL generation allows
9957 us to optimize away redundant pairs of register-permutes. */
9958 void
9959 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9960 {
9961 gcc_assert (!BYTES_BIG_ENDIAN
9962 && VECTOR_MEM_VSX_P (mode)
9963 && !TARGET_P9_VECTOR
9964 && !gpr_or_gpr_p (dest, source)
9965 && (MEM_P (source) ^ MEM_P (dest)));
9966
9967 if (MEM_P (source))
9968 {
9969 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9970 rs6000_emit_le_vsx_load (dest, source, mode);
9971 }
9972 else
9973 {
9974 if (!REG_P (source))
9975 source = force_reg (mode, source);
9976 rs6000_emit_le_vsx_store (dest, source, mode);
9977 }
9978 }
9979
9980 /* Return whether a SFmode or SImode move can be done without converting one
9981 mode to another. This arrises when we have:
9982
9983 (SUBREG:SF (REG:SI ...))
9984 (SUBREG:SI (REG:SF ...))
9985
9986 and one of the values is in a floating point/vector register, where SFmode
9987 scalars are stored in DFmode format. */
9988
9989 bool
9990 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9991 {
9992 if (TARGET_ALLOW_SF_SUBREG)
9993 return true;
9994
9995 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9996 return true;
9997
9998 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9999 return true;
10000
10001 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10002 if (SUBREG_P (dest))
10003 {
10004 rtx dest_subreg = SUBREG_REG (dest);
10005 rtx src_subreg = SUBREG_REG (src);
10006 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10007 }
10008
10009 return false;
10010 }
10011
10012
10013 /* Helper function to change moves with:
10014
10015 (SUBREG:SF (REG:SI)) and
10016 (SUBREG:SI (REG:SF))
10017
10018 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10019 values are stored as DFmode values in the VSX registers. We need to convert
10020 the bits before we can use a direct move or operate on the bits in the
10021 vector register as an integer type.
10022
10023 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10024
10025 static bool
10026 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10027 {
10028 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10029 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10030 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10031 {
10032 rtx inner_source = SUBREG_REG (source);
10033 machine_mode inner_mode = GET_MODE (inner_source);
10034
10035 if (mode == SImode && inner_mode == SFmode)
10036 {
10037 emit_insn (gen_movsi_from_sf (dest, inner_source));
10038 return true;
10039 }
10040
10041 if (mode == SFmode && inner_mode == SImode)
10042 {
10043 emit_insn (gen_movsf_from_si (dest, inner_source));
10044 return true;
10045 }
10046 }
10047
10048 return false;
10049 }
10050
10051 /* Emit a move from SOURCE to DEST in mode MODE. */
10052 void
10053 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10054 {
10055 rtx operands[2];
10056 operands[0] = dest;
10057 operands[1] = source;
10058
10059 if (TARGET_DEBUG_ADDR)
10060 {
10061 fprintf (stderr,
10062 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10063 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10064 GET_MODE_NAME (mode),
10065 lra_in_progress,
10066 reload_completed,
10067 can_create_pseudo_p ());
10068 debug_rtx (dest);
10069 fprintf (stderr, "source:\n");
10070 debug_rtx (source);
10071 }
10072
10073 /* Check that we get CONST_WIDE_INT only when we should. */
10074 if (CONST_WIDE_INT_P (operands[1])
10075 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10076 gcc_unreachable ();
10077
10078 #ifdef HAVE_AS_GNU_ATTRIBUTE
10079 /* If we use a long double type, set the flags in .gnu_attribute that say
10080 what the long double type is. This is to allow the linker's warning
10081 message for the wrong long double to be useful, even if the function does
10082 not do a call (for example, doing a 128-bit add on power9 if the long
10083 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10084 used if they aren't the default long dobule type. */
10085 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10086 {
10087 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10088 rs6000_passes_float = rs6000_passes_long_double = true;
10089
10090 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10091 rs6000_passes_float = rs6000_passes_long_double = true;
10092 }
10093 #endif
10094
10095 /* See if we need to special case SImode/SFmode SUBREG moves. */
10096 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10097 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10098 return;
10099
10100 /* Check if GCC is setting up a block move that will end up using FP
10101 registers as temporaries. We must make sure this is acceptable. */
10102 if (MEM_P (operands[0])
10103 && MEM_P (operands[1])
10104 && mode == DImode
10105 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10106 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10107 && ! (rs6000_slow_unaligned_access (SImode,
10108 (MEM_ALIGN (operands[0]) > 32
10109 ? 32 : MEM_ALIGN (operands[0])))
10110 || rs6000_slow_unaligned_access (SImode,
10111 (MEM_ALIGN (operands[1]) > 32
10112 ? 32 : MEM_ALIGN (operands[1]))))
10113 && ! MEM_VOLATILE_P (operands [0])
10114 && ! MEM_VOLATILE_P (operands [1]))
10115 {
10116 emit_move_insn (adjust_address (operands[0], SImode, 0),
10117 adjust_address (operands[1], SImode, 0));
10118 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10119 adjust_address (copy_rtx (operands[1]), SImode, 4));
10120 return;
10121 }
10122
10123 if (can_create_pseudo_p () && MEM_P (operands[0])
10124 && !gpc_reg_operand (operands[1], mode))
10125 operands[1] = force_reg (mode, operands[1]);
10126
10127 /* Recognize the case where operand[1] is a reference to thread-local
10128 data and load its address to a register. */
10129 if (tls_referenced_p (operands[1]))
10130 {
10131 enum tls_model model;
10132 rtx tmp = operands[1];
10133 rtx addend = NULL;
10134
10135 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10136 {
10137 addend = XEXP (XEXP (tmp, 0), 1);
10138 tmp = XEXP (XEXP (tmp, 0), 0);
10139 }
10140
10141 gcc_assert (SYMBOL_REF_P (tmp));
10142 model = SYMBOL_REF_TLS_MODEL (tmp);
10143 gcc_assert (model != 0);
10144
10145 tmp = rs6000_legitimize_tls_address (tmp, model);
10146 if (addend)
10147 {
10148 tmp = gen_rtx_PLUS (mode, tmp, addend);
10149 tmp = force_operand (tmp, operands[0]);
10150 }
10151 operands[1] = tmp;
10152 }
10153
10154 /* 128-bit constant floating-point values on Darwin should really be loaded
10155 as two parts. However, this premature splitting is a problem when DFmode
10156 values can go into Altivec registers. */
10157 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10158 && !reg_addr[DFmode].scalar_in_vmx_p)
10159 {
10160 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10161 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10162 DFmode);
10163 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10164 GET_MODE_SIZE (DFmode)),
10165 simplify_gen_subreg (DFmode, operands[1], mode,
10166 GET_MODE_SIZE (DFmode)),
10167 DFmode);
10168 return;
10169 }
10170
10171 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10172 p1:SD) if p1 is not of floating point class and p0 is spilled as
10173 we can have no analogous movsd_store for this. */
10174 if (lra_in_progress && mode == DDmode
10175 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10176 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10177 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10178 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10179 {
10180 enum reg_class cl;
10181 int regno = REGNO (SUBREG_REG (operands[1]));
10182
10183 if (!HARD_REGISTER_NUM_P (regno))
10184 {
10185 cl = reg_preferred_class (regno);
10186 regno = reg_renumber[regno];
10187 if (regno < 0)
10188 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10189 }
10190 if (regno >= 0 && ! FP_REGNO_P (regno))
10191 {
10192 mode = SDmode;
10193 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10194 operands[1] = SUBREG_REG (operands[1]);
10195 }
10196 }
10197 if (lra_in_progress
10198 && mode == SDmode
10199 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10200 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10201 && (REG_P (operands[1])
10202 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10203 {
10204 int regno = reg_or_subregno (operands[1]);
10205 enum reg_class cl;
10206
10207 if (!HARD_REGISTER_NUM_P (regno))
10208 {
10209 cl = reg_preferred_class (regno);
10210 gcc_assert (cl != NO_REGS);
10211 regno = reg_renumber[regno];
10212 if (regno < 0)
10213 regno = ira_class_hard_regs[cl][0];
10214 }
10215 if (FP_REGNO_P (regno))
10216 {
10217 if (GET_MODE (operands[0]) != DDmode)
10218 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10219 emit_insn (gen_movsd_store (operands[0], operands[1]));
10220 }
10221 else if (INT_REGNO_P (regno))
10222 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10223 else
10224 gcc_unreachable();
10225 return;
10226 }
10227 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10228 p:DD)) if p0 is not of floating point class and p1 is spilled as
10229 we can have no analogous movsd_load for this. */
10230 if (lra_in_progress && mode == DDmode
10231 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10232 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10233 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10234 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10235 {
10236 enum reg_class cl;
10237 int regno = REGNO (SUBREG_REG (operands[0]));
10238
10239 if (!HARD_REGISTER_NUM_P (regno))
10240 {
10241 cl = reg_preferred_class (regno);
10242 regno = reg_renumber[regno];
10243 if (regno < 0)
10244 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10245 }
10246 if (regno >= 0 && ! FP_REGNO_P (regno))
10247 {
10248 mode = SDmode;
10249 operands[0] = SUBREG_REG (operands[0]);
10250 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10251 }
10252 }
10253 if (lra_in_progress
10254 && mode == SDmode
10255 && (REG_P (operands[0])
10256 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10257 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10258 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10259 {
10260 int regno = reg_or_subregno (operands[0]);
10261 enum reg_class cl;
10262
10263 if (!HARD_REGISTER_NUM_P (regno))
10264 {
10265 cl = reg_preferred_class (regno);
10266 gcc_assert (cl != NO_REGS);
10267 regno = reg_renumber[regno];
10268 if (regno < 0)
10269 regno = ira_class_hard_regs[cl][0];
10270 }
10271 if (FP_REGNO_P (regno))
10272 {
10273 if (GET_MODE (operands[1]) != DDmode)
10274 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10275 emit_insn (gen_movsd_load (operands[0], operands[1]));
10276 }
10277 else if (INT_REGNO_P (regno))
10278 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10279 else
10280 gcc_unreachable();
10281 return;
10282 }
10283
10284 /* FIXME: In the long term, this switch statement should go away
10285 and be replaced by a sequence of tests based on things like
10286 mode == Pmode. */
10287 switch (mode)
10288 {
10289 case E_HImode:
10290 case E_QImode:
10291 if (CONSTANT_P (operands[1])
10292 && !CONST_INT_P (operands[1]))
10293 operands[1] = force_const_mem (mode, operands[1]);
10294 break;
10295
10296 case E_TFmode:
10297 case E_TDmode:
10298 case E_IFmode:
10299 case E_KFmode:
10300 if (FLOAT128_2REG_P (mode))
10301 rs6000_eliminate_indexed_memrefs (operands);
10302 /* fall through */
10303
10304 case E_DFmode:
10305 case E_DDmode:
10306 case E_SFmode:
10307 case E_SDmode:
10308 if (CONSTANT_P (operands[1])
10309 && ! easy_fp_constant (operands[1], mode))
10310 operands[1] = force_const_mem (mode, operands[1]);
10311 break;
10312
10313 case E_V16QImode:
10314 case E_V8HImode:
10315 case E_V4SFmode:
10316 case E_V4SImode:
10317 case E_V2DFmode:
10318 case E_V2DImode:
10319 case E_V1TImode:
10320 if (CONSTANT_P (operands[1])
10321 && !easy_vector_constant (operands[1], mode))
10322 operands[1] = force_const_mem (mode, operands[1]);
10323 break;
10324
10325 case E_OOmode:
10326 case E_XOmode:
10327 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10328 error ("%qs is an opaque type, and you can't set it to other values.",
10329 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10330 break;
10331
10332 case E_SImode:
10333 case E_DImode:
10334 /* Use default pattern for address of ELF small data */
10335 if (TARGET_ELF
10336 && mode == Pmode
10337 && DEFAULT_ABI == ABI_V4
10338 && (SYMBOL_REF_P (operands[1])
10339 || GET_CODE (operands[1]) == CONST)
10340 && small_data_operand (operands[1], mode))
10341 {
10342 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10343 return;
10344 }
10345
10346 /* Use the default pattern for loading up PC-relative addresses. */
10347 if (TARGET_PCREL && mode == Pmode
10348 && pcrel_local_or_external_address (operands[1], Pmode))
10349 {
10350 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10351 return;
10352 }
10353
10354 if (DEFAULT_ABI == ABI_V4
10355 && mode == Pmode && mode == SImode
10356 && flag_pic == 1 && got_operand (operands[1], mode))
10357 {
10358 emit_insn (gen_movsi_got (operands[0], operands[1]));
10359 return;
10360 }
10361
10362 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10363 && TARGET_NO_TOC_OR_PCREL
10364 && ! flag_pic
10365 && mode == Pmode
10366 && CONSTANT_P (operands[1])
10367 && GET_CODE (operands[1]) != HIGH
10368 && !CONST_INT_P (operands[1]))
10369 {
10370 rtx target = (!can_create_pseudo_p ()
10371 ? operands[0]
10372 : gen_reg_rtx (mode));
10373
10374 /* If this is a function address on -mcall-aixdesc,
10375 convert it to the address of the descriptor. */
10376 if (DEFAULT_ABI == ABI_AIX
10377 && SYMBOL_REF_P (operands[1])
10378 && XSTR (operands[1], 0)[0] == '.')
10379 {
10380 const char *name = XSTR (operands[1], 0);
10381 rtx new_ref;
10382 while (*name == '.')
10383 name++;
10384 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10385 CONSTANT_POOL_ADDRESS_P (new_ref)
10386 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10387 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10388 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10389 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10390 operands[1] = new_ref;
10391 }
10392
10393 if (DEFAULT_ABI == ABI_DARWIN)
10394 {
10395 #if TARGET_MACHO
10396 /* This is not PIC code, but could require the subset of
10397 indirections used by mdynamic-no-pic. */
10398 if (MACHO_DYNAMIC_NO_PIC_P)
10399 {
10400 /* Take care of any required data indirection. */
10401 operands[1] = rs6000_machopic_legitimize_pic_address (
10402 operands[1], mode, operands[0]);
10403 if (operands[0] != operands[1])
10404 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10405 return;
10406 }
10407 #endif
10408 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10409 emit_insn (gen_macho_low (Pmode, operands[0],
10410 target, operands[1]));
10411 return;
10412 }
10413
10414 emit_insn (gen_elf_high (target, operands[1]));
10415 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10416 return;
10417 }
10418
10419 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10420 and we have put it in the TOC, we just need to make a TOC-relative
10421 reference to it. */
10422 if (TARGET_TOC
10423 && SYMBOL_REF_P (operands[1])
10424 && use_toc_relative_ref (operands[1], mode))
10425 operands[1] = create_TOC_reference (operands[1], operands[0]);
10426 else if (mode == Pmode
10427 && CONSTANT_P (operands[1])
10428 && GET_CODE (operands[1]) != HIGH
10429 && ((REG_P (operands[0])
10430 && FP_REGNO_P (REGNO (operands[0])))
10431 || !CONST_INT_P (operands[1])
10432 || (num_insns_constant (operands[1], mode)
10433 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10434 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10435 && (TARGET_CMODEL == CMODEL_SMALL
10436 || can_create_pseudo_p ()
10437 || (REG_P (operands[0])
10438 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10439 {
10440
10441 #if TARGET_MACHO
10442 /* Darwin uses a special PIC legitimizer. */
10443 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10444 {
10445 operands[1] =
10446 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10447 operands[0]);
10448 if (operands[0] != operands[1])
10449 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10450 return;
10451 }
10452 #endif
10453
10454 /* If we are to limit the number of things we put in the TOC and
10455 this is a symbol plus a constant we can add in one insn,
10456 just put the symbol in the TOC and add the constant. */
10457 if (GET_CODE (operands[1]) == CONST
10458 && TARGET_NO_SUM_IN_TOC
10459 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10460 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10461 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10462 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10463 && ! side_effects_p (operands[0]))
10464 {
10465 rtx sym =
10466 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10467 rtx other = XEXP (XEXP (operands[1], 0), 1);
10468
10469 sym = force_reg (mode, sym);
10470 emit_insn (gen_add3_insn (operands[0], sym, other));
10471 return;
10472 }
10473
10474 operands[1] = force_const_mem (mode, operands[1]);
10475
10476 if (TARGET_TOC
10477 && SYMBOL_REF_P (XEXP (operands[1], 0))
10478 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10479 {
10480 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10481 operands[0]);
10482 operands[1] = gen_const_mem (mode, tocref);
10483 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10484 }
10485 }
10486 break;
10487
10488 case E_TImode:
10489 if (!VECTOR_MEM_VSX_P (TImode))
10490 rs6000_eliminate_indexed_memrefs (operands);
10491 break;
10492
10493 case E_PTImode:
10494 rs6000_eliminate_indexed_memrefs (operands);
10495 break;
10496
10497 default:
10498 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10499 }
10500
10501 /* Above, we may have called force_const_mem which may have returned
10502 an invalid address. If we can, fix this up; otherwise, reload will
10503 have to deal with it. */
10504 if (MEM_P (operands[1]))
10505 operands[1] = validize_mem (operands[1]);
10506
10507 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10508 }
10509 \f
10510
10511 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10512 static void
10513 init_float128_ibm (machine_mode mode)
10514 {
10515 if (!TARGET_XL_COMPAT)
10516 {
10517 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10518 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10519 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10520 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10521
10522 if (!TARGET_HARD_FLOAT)
10523 {
10524 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10525 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10526 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10527 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10528 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10529 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10530 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10531 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10532
10533 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10534 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10535 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10536 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10537 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10538 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10539 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10540 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10541 }
10542 }
10543 else
10544 {
10545 set_optab_libfunc (add_optab, mode, "_xlqadd");
10546 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10547 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10548 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10549 }
10550
10551 /* Add various conversions for IFmode to use the traditional TFmode
10552 names. */
10553 if (mode == IFmode)
10554 {
10555 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10556 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10557 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10558 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10559 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10560 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10561
10562 if (TARGET_POWERPC64)
10563 {
10564 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10565 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10566 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10567 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10568 }
10569 }
10570 }
10571
10572 /* Create a decl for either complex long double multiply or complex long double
10573 divide when long double is IEEE 128-bit floating point. We can't use
10574 __multc3 and __divtc3 because the original long double using IBM extended
10575 double used those names. The complex multiply/divide functions are encoded
10576 as builtin functions with a complex result and 4 scalar inputs. */
10577
10578 static void
10579 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10580 {
10581 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10582 name, NULL_TREE);
10583
10584 set_builtin_decl (fncode, fndecl, true);
10585
10586 if (TARGET_DEBUG_BUILTIN)
10587 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10588
10589 return;
10590 }
10591
10592 /* Set up IEEE 128-bit floating point routines. Use different names if the
10593 arguments can be passed in a vector register. The historical PowerPC
10594 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10595 continue to use that if we aren't using vector registers to pass IEEE
10596 128-bit floating point. */
10597
10598 static void
10599 init_float128_ieee (machine_mode mode)
10600 {
10601 if (FLOAT128_VECTOR_P (mode))
10602 {
10603 static bool complex_muldiv_init_p = false;
10604
10605 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10606 we have clone or target attributes, this will be called a second
10607 time. We want to create the built-in function only once. */
10608 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10609 {
10610 complex_muldiv_init_p = true;
10611 built_in_function fncode_mul =
10612 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10613 - MIN_MODE_COMPLEX_FLOAT);
10614 built_in_function fncode_div =
10615 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10616 - MIN_MODE_COMPLEX_FLOAT);
10617
10618 tree fntype = build_function_type_list (complex_long_double_type_node,
10619 long_double_type_node,
10620 long_double_type_node,
10621 long_double_type_node,
10622 long_double_type_node,
10623 NULL_TREE);
10624
10625 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10626 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10627 }
10628
10629 set_optab_libfunc (add_optab, mode, "__addkf3");
10630 set_optab_libfunc (sub_optab, mode, "__subkf3");
10631 set_optab_libfunc (neg_optab, mode, "__negkf2");
10632 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10633 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10634 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10635 set_optab_libfunc (abs_optab, mode, "__abskf2");
10636 set_optab_libfunc (powi_optab, mode, "__powikf2");
10637
10638 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10639 set_optab_libfunc (ne_optab, mode, "__nekf2");
10640 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10641 set_optab_libfunc (ge_optab, mode, "__gekf2");
10642 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10643 set_optab_libfunc (le_optab, mode, "__lekf2");
10644 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10645
10646 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10647 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10648 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10649 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10650
10651 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10652 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10653 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10654
10655 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10656 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10657 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10658
10659 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10660 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10661 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10662 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10663 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10664 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10665
10666 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10667 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10668 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10669 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10670
10671 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10672 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10673 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10674 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10675
10676 if (TARGET_POWERPC64)
10677 {
10678 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10679 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10680 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10681 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10682 }
10683 }
10684
10685 else
10686 {
10687 set_optab_libfunc (add_optab, mode, "_q_add");
10688 set_optab_libfunc (sub_optab, mode, "_q_sub");
10689 set_optab_libfunc (neg_optab, mode, "_q_neg");
10690 set_optab_libfunc (smul_optab, mode, "_q_mul");
10691 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10692 if (TARGET_PPC_GPOPT)
10693 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10694
10695 set_optab_libfunc (eq_optab, mode, "_q_feq");
10696 set_optab_libfunc (ne_optab, mode, "_q_fne");
10697 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10698 set_optab_libfunc (ge_optab, mode, "_q_fge");
10699 set_optab_libfunc (lt_optab, mode, "_q_flt");
10700 set_optab_libfunc (le_optab, mode, "_q_fle");
10701
10702 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10703 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10704 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10705 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10706 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10707 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10708 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10709 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10710 }
10711 }
10712
10713 static void
10714 rs6000_init_libfuncs (void)
10715 {
10716 /* __float128 support. */
10717 if (TARGET_FLOAT128_TYPE)
10718 {
10719 init_float128_ibm (IFmode);
10720 init_float128_ieee (KFmode);
10721 }
10722
10723 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10724 if (TARGET_LONG_DOUBLE_128)
10725 {
10726 if (!TARGET_IEEEQUAD)
10727 init_float128_ibm (TFmode);
10728
10729 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10730 else
10731 init_float128_ieee (TFmode);
10732 }
10733 }
10734
10735 /* Emit a potentially record-form instruction, setting DST from SRC.
10736 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10737 signed comparison of DST with zero. If DOT is 1, the generated RTL
10738 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10739 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10740 a separate COMPARE. */
10741
10742 void
10743 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10744 {
10745 if (dot == 0)
10746 {
10747 emit_move_insn (dst, src);
10748 return;
10749 }
10750
10751 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10752 {
10753 emit_move_insn (dst, src);
10754 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10755 return;
10756 }
10757
10758 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10759 if (dot == 1)
10760 {
10761 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10762 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10763 }
10764 else
10765 {
10766 rtx set = gen_rtx_SET (dst, src);
10767 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10768 }
10769 }
10770
10771 \f
10772 /* A validation routine: say whether CODE, a condition code, and MODE
10773 match. The other alternatives either don't make sense or should
10774 never be generated. */
10775
10776 void
10777 validate_condition_mode (enum rtx_code code, machine_mode mode)
10778 {
10779 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10780 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10781 && GET_MODE_CLASS (mode) == MODE_CC);
10782
10783 /* These don't make sense. */
10784 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10785 || mode != CCUNSmode);
10786
10787 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10788 || mode == CCUNSmode);
10789
10790 gcc_assert (mode == CCFPmode
10791 || (code != ORDERED && code != UNORDERED
10792 && code != UNEQ && code != LTGT
10793 && code != UNGT && code != UNLT
10794 && code != UNGE && code != UNLE));
10795
10796 /* These are invalid; the information is not there. */
10797 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10798 }
10799
10800 \f
10801 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10802 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10803 not zero, store there the bit offset (counted from the right) where
10804 the single stretch of 1 bits begins; and similarly for B, the bit
10805 offset where it ends. */
10806
10807 bool
10808 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10809 {
10810 unsigned HOST_WIDE_INT val = INTVAL (mask);
10811 unsigned HOST_WIDE_INT bit;
10812 int nb, ne;
10813 int n = GET_MODE_PRECISION (mode);
10814
10815 if (mode != DImode && mode != SImode)
10816 return false;
10817
10818 if (INTVAL (mask) >= 0)
10819 {
10820 bit = val & -val;
10821 ne = exact_log2 (bit);
10822 nb = exact_log2 (val + bit);
10823 }
10824 else if (val + 1 == 0)
10825 {
10826 nb = n;
10827 ne = 0;
10828 }
10829 else if (val & 1)
10830 {
10831 val = ~val;
10832 bit = val & -val;
10833 nb = exact_log2 (bit);
10834 ne = exact_log2 (val + bit);
10835 }
10836 else
10837 {
10838 bit = val & -val;
10839 ne = exact_log2 (bit);
10840 if (val + bit == 0)
10841 nb = n;
10842 else
10843 nb = 0;
10844 }
10845
10846 nb--;
10847
10848 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10849 return false;
10850
10851 if (b)
10852 *b = nb;
10853 if (e)
10854 *e = ne;
10855
10856 return true;
10857 }
10858
10859 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10860 or rldicr instruction, to implement an AND with it in mode MODE. */
10861
10862 bool
10863 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10864 {
10865 int nb, ne;
10866
10867 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10868 return false;
10869
10870 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10871 does not wrap. */
10872 if (mode == DImode)
10873 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10874
10875 /* For SImode, rlwinm can do everything. */
10876 if (mode == SImode)
10877 return (nb < 32 && ne < 32);
10878
10879 return false;
10880 }
10881
10882 /* Return the instruction template for an AND with mask in mode MODE, with
10883 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10884
10885 const char *
10886 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10887 {
10888 int nb, ne;
10889
10890 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10891 gcc_unreachable ();
10892
10893 if (mode == DImode && ne == 0)
10894 {
10895 operands[3] = GEN_INT (63 - nb);
10896 if (dot)
10897 return "rldicl. %0,%1,0,%3";
10898 return "rldicl %0,%1,0,%3";
10899 }
10900
10901 if (mode == DImode && nb == 63)
10902 {
10903 operands[3] = GEN_INT (63 - ne);
10904 if (dot)
10905 return "rldicr. %0,%1,0,%3";
10906 return "rldicr %0,%1,0,%3";
10907 }
10908
10909 if (nb < 32 && ne < 32)
10910 {
10911 operands[3] = GEN_INT (31 - nb);
10912 operands[4] = GEN_INT (31 - ne);
10913 if (dot)
10914 return "rlwinm. %0,%1,0,%3,%4";
10915 return "rlwinm %0,%1,0,%3,%4";
10916 }
10917
10918 gcc_unreachable ();
10919 }
10920
10921 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10922 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10923 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10924
10925 bool
10926 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10927 {
10928 int nb, ne;
10929
10930 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10931 return false;
10932
10933 int n = GET_MODE_PRECISION (mode);
10934 int sh = -1;
10935
10936 if (CONST_INT_P (XEXP (shift, 1)))
10937 {
10938 sh = INTVAL (XEXP (shift, 1));
10939 if (sh < 0 || sh >= n)
10940 return false;
10941 }
10942
10943 rtx_code code = GET_CODE (shift);
10944
10945 /* Convert any shift by 0 to a rotate, to simplify below code. */
10946 if (sh == 0)
10947 code = ROTATE;
10948
10949 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10950 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10951 code = ASHIFT;
10952 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10953 {
10954 code = LSHIFTRT;
10955 sh = n - sh;
10956 }
10957
10958 /* DImode rotates need rld*. */
10959 if (mode == DImode && code == ROTATE)
10960 return (nb == 63 || ne == 0 || ne == sh);
10961
10962 /* SImode rotates need rlw*. */
10963 if (mode == SImode && code == ROTATE)
10964 return (nb < 32 && ne < 32 && sh < 32);
10965
10966 /* Wrap-around masks are only okay for rotates. */
10967 if (ne > nb)
10968 return false;
10969
10970 /* Variable shifts are only okay for rotates. */
10971 if (sh < 0)
10972 return false;
10973
10974 /* Don't allow ASHIFT if the mask is wrong for that. */
10975 if (code == ASHIFT && ne < sh)
10976 return false;
10977
10978 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10979 if the mask is wrong for that. */
10980 if (nb < 32 && ne < 32 && sh < 32
10981 && !(code == LSHIFTRT && nb >= 32 - sh))
10982 return true;
10983
10984 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10985 if the mask is wrong for that. */
10986 if (code == LSHIFTRT)
10987 sh = 64 - sh;
10988 if (nb == 63 || ne == 0 || ne == sh)
10989 return !(code == LSHIFTRT && nb >= sh);
10990
10991 return false;
10992 }
10993
10994 /* Return the instruction template for a shift with mask in mode MODE, with
10995 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10996
10997 const char *
10998 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10999 {
11000 int nb, ne;
11001
11002 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11003 gcc_unreachable ();
11004
11005 if (mode == DImode && ne == 0)
11006 {
11007 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11008 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11009 operands[3] = GEN_INT (63 - nb);
11010 if (dot)
11011 return "rld%I2cl. %0,%1,%2,%3";
11012 return "rld%I2cl %0,%1,%2,%3";
11013 }
11014
11015 if (mode == DImode && nb == 63)
11016 {
11017 operands[3] = GEN_INT (63 - ne);
11018 if (dot)
11019 return "rld%I2cr. %0,%1,%2,%3";
11020 return "rld%I2cr %0,%1,%2,%3";
11021 }
11022
11023 if (mode == DImode
11024 && GET_CODE (operands[4]) != LSHIFTRT
11025 && CONST_INT_P (operands[2])
11026 && ne == INTVAL (operands[2]))
11027 {
11028 operands[3] = GEN_INT (63 - nb);
11029 if (dot)
11030 return "rld%I2c. %0,%1,%2,%3";
11031 return "rld%I2c %0,%1,%2,%3";
11032 }
11033
11034 if (nb < 32 && ne < 32)
11035 {
11036 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11037 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11038 operands[3] = GEN_INT (31 - nb);
11039 operands[4] = GEN_INT (31 - ne);
11040 /* This insn can also be a 64-bit rotate with mask that really makes
11041 it just a shift right (with mask); the %h below are to adjust for
11042 that situation (shift count is >= 32 in that case). */
11043 if (dot)
11044 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11045 return "rlw%I2nm %0,%1,%h2,%3,%4";
11046 }
11047
11048 gcc_unreachable ();
11049 }
11050
11051 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11052 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11053 ASHIFT, or LSHIFTRT) in mode MODE. */
11054
11055 bool
11056 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11057 {
11058 int nb, ne;
11059
11060 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11061 return false;
11062
11063 int n = GET_MODE_PRECISION (mode);
11064
11065 int sh = INTVAL (XEXP (shift, 1));
11066 if (sh < 0 || sh >= n)
11067 return false;
11068
11069 rtx_code code = GET_CODE (shift);
11070
11071 /* Convert any shift by 0 to a rotate, to simplify below code. */
11072 if (sh == 0)
11073 code = ROTATE;
11074
11075 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11076 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11077 code = ASHIFT;
11078 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11079 {
11080 code = LSHIFTRT;
11081 sh = n - sh;
11082 }
11083
11084 /* DImode rotates need rldimi. */
11085 if (mode == DImode && code == ROTATE)
11086 return (ne == sh);
11087
11088 /* SImode rotates need rlwimi. */
11089 if (mode == SImode && code == ROTATE)
11090 return (nb < 32 && ne < 32 && sh < 32);
11091
11092 /* Wrap-around masks are only okay for rotates. */
11093 if (ne > nb)
11094 return false;
11095
11096 /* Don't allow ASHIFT if the mask is wrong for that. */
11097 if (code == ASHIFT && ne < sh)
11098 return false;
11099
11100 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11101 if the mask is wrong for that. */
11102 if (nb < 32 && ne < 32 && sh < 32
11103 && !(code == LSHIFTRT && nb >= 32 - sh))
11104 return true;
11105
11106 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11107 if the mask is wrong for that. */
11108 if (code == LSHIFTRT)
11109 sh = 64 - sh;
11110 if (ne == sh)
11111 return !(code == LSHIFTRT && nb >= sh);
11112
11113 return false;
11114 }
11115
11116 /* Return the instruction template for an insert with mask in mode MODE, with
11117 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11118
11119 const char *
11120 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11121 {
11122 int nb, ne;
11123
11124 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11125 gcc_unreachable ();
11126
11127 /* Prefer rldimi because rlwimi is cracked. */
11128 if (TARGET_POWERPC64
11129 && (!dot || mode == DImode)
11130 && GET_CODE (operands[4]) != LSHIFTRT
11131 && ne == INTVAL (operands[2]))
11132 {
11133 operands[3] = GEN_INT (63 - nb);
11134 if (dot)
11135 return "rldimi. %0,%1,%2,%3";
11136 return "rldimi %0,%1,%2,%3";
11137 }
11138
11139 if (nb < 32 && ne < 32)
11140 {
11141 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11142 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11143 operands[3] = GEN_INT (31 - nb);
11144 operands[4] = GEN_INT (31 - ne);
11145 if (dot)
11146 return "rlwimi. %0,%1,%2,%3,%4";
11147 return "rlwimi %0,%1,%2,%3,%4";
11148 }
11149
11150 gcc_unreachable ();
11151 }
11152
11153 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11154 using two machine instructions. */
11155
11156 bool
11157 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11158 {
11159 /* There are two kinds of AND we can handle with two insns:
11160 1) those we can do with two rl* insn;
11161 2) ori[s];xori[s].
11162
11163 We do not handle that last case yet. */
11164
11165 /* If there is just one stretch of ones, we can do it. */
11166 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11167 return true;
11168
11169 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11170 one insn, we can do the whole thing with two. */
11171 unsigned HOST_WIDE_INT val = INTVAL (c);
11172 unsigned HOST_WIDE_INT bit1 = val & -val;
11173 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11174 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11175 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11176 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11177 }
11178
11179 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11180 If EXPAND is true, split rotate-and-mask instructions we generate to
11181 their constituent parts as well (this is used during expand); if DOT
11182 is 1, make the last insn a record-form instruction clobbering the
11183 destination GPR and setting the CC reg (from operands[3]); if 2, set
11184 that GPR as well as the CC reg. */
11185
11186 void
11187 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11188 {
11189 gcc_assert (!(expand && dot));
11190
11191 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11192
11193 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11194 shift right. This generates better code than doing the masks without
11195 shifts, or shifting first right and then left. */
11196 int nb, ne;
11197 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11198 {
11199 gcc_assert (mode == DImode);
11200
11201 int shift = 63 - nb;
11202 if (expand)
11203 {
11204 rtx tmp1 = gen_reg_rtx (DImode);
11205 rtx tmp2 = gen_reg_rtx (DImode);
11206 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11207 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11208 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11209 }
11210 else
11211 {
11212 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11213 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11214 emit_move_insn (operands[0], tmp);
11215 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11216 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11217 }
11218 return;
11219 }
11220
11221 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11222 that does the rest. */
11223 unsigned HOST_WIDE_INT bit1 = val & -val;
11224 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11225 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11226 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11227
11228 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11229 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11230
11231 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11232
11233 /* Two "no-rotate"-and-mask instructions, for SImode. */
11234 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11235 {
11236 gcc_assert (mode == SImode);
11237
11238 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11239 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11240 emit_move_insn (reg, tmp);
11241 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11242 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11243 return;
11244 }
11245
11246 gcc_assert (mode == DImode);
11247
11248 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11249 insns; we have to do the first in SImode, because it wraps. */
11250 if (mask2 <= 0xffffffff
11251 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11252 {
11253 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11254 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11255 GEN_INT (mask1));
11256 rtx reg_low = gen_lowpart (SImode, reg);
11257 emit_move_insn (reg_low, tmp);
11258 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11259 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11260 return;
11261 }
11262
11263 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11264 at the top end), rotate back and clear the other hole. */
11265 int right = exact_log2 (bit3);
11266 int left = 64 - right;
11267
11268 /* Rotate the mask too. */
11269 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11270
11271 if (expand)
11272 {
11273 rtx tmp1 = gen_reg_rtx (DImode);
11274 rtx tmp2 = gen_reg_rtx (DImode);
11275 rtx tmp3 = gen_reg_rtx (DImode);
11276 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11277 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11278 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11279 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11280 }
11281 else
11282 {
11283 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11284 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11285 emit_move_insn (operands[0], tmp);
11286 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11287 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11288 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11289 }
11290 }
11291 \f
11292 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11293 for lfq and stfq insns iff the registers are hard registers. */
11294
11295 int
11296 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11297 {
11298 /* We might have been passed a SUBREG. */
11299 if (!REG_P (reg1) || !REG_P (reg2))
11300 return 0;
11301
11302 /* We might have been passed non floating point registers. */
11303 if (!FP_REGNO_P (REGNO (reg1))
11304 || !FP_REGNO_P (REGNO (reg2)))
11305 return 0;
11306
11307 return (REGNO (reg1) == REGNO (reg2) - 1);
11308 }
11309
11310 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11311 addr1 and addr2 must be in consecutive memory locations
11312 (addr2 == addr1 + 8). */
11313
11314 int
11315 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11316 {
11317 rtx addr1, addr2;
11318 unsigned int reg1, reg2;
11319 int offset1, offset2;
11320
11321 /* The mems cannot be volatile. */
11322 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11323 return 0;
11324
11325 addr1 = XEXP (mem1, 0);
11326 addr2 = XEXP (mem2, 0);
11327
11328 /* Extract an offset (if used) from the first addr. */
11329 if (GET_CODE (addr1) == PLUS)
11330 {
11331 /* If not a REG, return zero. */
11332 if (!REG_P (XEXP (addr1, 0)))
11333 return 0;
11334 else
11335 {
11336 reg1 = REGNO (XEXP (addr1, 0));
11337 /* The offset must be constant! */
11338 if (!CONST_INT_P (XEXP (addr1, 1)))
11339 return 0;
11340 offset1 = INTVAL (XEXP (addr1, 1));
11341 }
11342 }
11343 else if (!REG_P (addr1))
11344 return 0;
11345 else
11346 {
11347 reg1 = REGNO (addr1);
11348 /* This was a simple (mem (reg)) expression. Offset is 0. */
11349 offset1 = 0;
11350 }
11351
11352 /* And now for the second addr. */
11353 if (GET_CODE (addr2) == PLUS)
11354 {
11355 /* If not a REG, return zero. */
11356 if (!REG_P (XEXP (addr2, 0)))
11357 return 0;
11358 else
11359 {
11360 reg2 = REGNO (XEXP (addr2, 0));
11361 /* The offset must be constant. */
11362 if (!CONST_INT_P (XEXP (addr2, 1)))
11363 return 0;
11364 offset2 = INTVAL (XEXP (addr2, 1));
11365 }
11366 }
11367 else if (!REG_P (addr2))
11368 return 0;
11369 else
11370 {
11371 reg2 = REGNO (addr2);
11372 /* This was a simple (mem (reg)) expression. Offset is 0. */
11373 offset2 = 0;
11374 }
11375
11376 /* Both of these must have the same base register. */
11377 if (reg1 != reg2)
11378 return 0;
11379
11380 /* The offset for the second addr must be 8 more than the first addr. */
11381 if (offset2 != offset1 + 8)
11382 return 0;
11383
11384 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11385 instructions. */
11386 return 1;
11387 }
11388 \f
11389 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11390 need to use DDmode, in all other cases we can use the same mode. */
11391 static machine_mode
11392 rs6000_secondary_memory_needed_mode (machine_mode mode)
11393 {
11394 if (lra_in_progress && mode == SDmode)
11395 return DDmode;
11396 return mode;
11397 }
11398
11399 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11400 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11401 only work on the traditional altivec registers, note if an altivec register
11402 was chosen. */
11403
11404 static enum rs6000_reg_type
11405 register_to_reg_type (rtx reg, bool *is_altivec)
11406 {
11407 HOST_WIDE_INT regno;
11408 enum reg_class rclass;
11409
11410 if (SUBREG_P (reg))
11411 reg = SUBREG_REG (reg);
11412
11413 if (!REG_P (reg))
11414 return NO_REG_TYPE;
11415
11416 regno = REGNO (reg);
11417 if (!HARD_REGISTER_NUM_P (regno))
11418 {
11419 if (!lra_in_progress && !reload_completed)
11420 return PSEUDO_REG_TYPE;
11421
11422 regno = true_regnum (reg);
11423 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11424 return PSEUDO_REG_TYPE;
11425 }
11426
11427 gcc_assert (regno >= 0);
11428
11429 if (is_altivec && ALTIVEC_REGNO_P (regno))
11430 *is_altivec = true;
11431
11432 rclass = rs6000_regno_regclass[regno];
11433 return reg_class_to_reg_type[(int)rclass];
11434 }
11435
11436 /* Helper function to return the cost of adding a TOC entry address. */
11437
11438 static inline int
11439 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11440 {
11441 int ret;
11442
11443 if (TARGET_CMODEL != CMODEL_SMALL)
11444 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11445
11446 else
11447 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11448
11449 return ret;
11450 }
11451
11452 /* Helper function for rs6000_secondary_reload to determine whether the memory
11453 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11454 needs reloading. Return negative if the memory is not handled by the memory
11455 helper functions and to try a different reload method, 0 if no additional
11456 instructions are need, and positive to give the extra cost for the
11457 memory. */
11458
11459 static int
11460 rs6000_secondary_reload_memory (rtx addr,
11461 enum reg_class rclass,
11462 machine_mode mode)
11463 {
11464 int extra_cost = 0;
11465 rtx reg, and_arg, plus_arg0, plus_arg1;
11466 addr_mask_type addr_mask;
11467 const char *type = NULL;
11468 const char *fail_msg = NULL;
11469
11470 if (GPR_REG_CLASS_P (rclass))
11471 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11472
11473 else if (rclass == FLOAT_REGS)
11474 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11475
11476 else if (rclass == ALTIVEC_REGS)
11477 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11478
11479 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11480 else if (rclass == VSX_REGS)
11481 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11482 & ~RELOAD_REG_AND_M16);
11483
11484 /* If the register allocator hasn't made up its mind yet on the register
11485 class to use, settle on defaults to use. */
11486 else if (rclass == NO_REGS)
11487 {
11488 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11489 & ~RELOAD_REG_AND_M16);
11490
11491 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11492 addr_mask &= ~(RELOAD_REG_INDEXED
11493 | RELOAD_REG_PRE_INCDEC
11494 | RELOAD_REG_PRE_MODIFY);
11495 }
11496
11497 else
11498 addr_mask = 0;
11499
11500 /* If the register isn't valid in this register class, just return now. */
11501 if ((addr_mask & RELOAD_REG_VALID) == 0)
11502 {
11503 if (TARGET_DEBUG_ADDR)
11504 {
11505 fprintf (stderr,
11506 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11507 "not valid in class\n",
11508 GET_MODE_NAME (mode), reg_class_names[rclass]);
11509 debug_rtx (addr);
11510 }
11511
11512 return -1;
11513 }
11514
11515 switch (GET_CODE (addr))
11516 {
11517 /* Does the register class supports auto update forms for this mode? We
11518 don't need a scratch register, since the powerpc only supports
11519 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11520 case PRE_INC:
11521 case PRE_DEC:
11522 reg = XEXP (addr, 0);
11523 if (!base_reg_operand (addr, GET_MODE (reg)))
11524 {
11525 fail_msg = "no base register #1";
11526 extra_cost = -1;
11527 }
11528
11529 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11530 {
11531 extra_cost = 1;
11532 type = "update";
11533 }
11534 break;
11535
11536 case PRE_MODIFY:
11537 reg = XEXP (addr, 0);
11538 plus_arg1 = XEXP (addr, 1);
11539 if (!base_reg_operand (reg, GET_MODE (reg))
11540 || GET_CODE (plus_arg1) != PLUS
11541 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11542 {
11543 fail_msg = "bad PRE_MODIFY";
11544 extra_cost = -1;
11545 }
11546
11547 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11548 {
11549 extra_cost = 1;
11550 type = "update";
11551 }
11552 break;
11553
11554 /* Do we need to simulate AND -16 to clear the bottom address bits used
11555 in VMX load/stores? Only allow the AND for vector sizes. */
11556 case AND:
11557 and_arg = XEXP (addr, 0);
11558 if (GET_MODE_SIZE (mode) != 16
11559 || !CONST_INT_P (XEXP (addr, 1))
11560 || INTVAL (XEXP (addr, 1)) != -16)
11561 {
11562 fail_msg = "bad Altivec AND #1";
11563 extra_cost = -1;
11564 }
11565
11566 if (rclass != ALTIVEC_REGS)
11567 {
11568 if (legitimate_indirect_address_p (and_arg, false))
11569 extra_cost = 1;
11570
11571 else if (legitimate_indexed_address_p (and_arg, false))
11572 extra_cost = 2;
11573
11574 else
11575 {
11576 fail_msg = "bad Altivec AND #2";
11577 extra_cost = -1;
11578 }
11579
11580 type = "and";
11581 }
11582 break;
11583
11584 /* If this is an indirect address, make sure it is a base register. */
11585 case REG:
11586 case SUBREG:
11587 if (!legitimate_indirect_address_p (addr, false))
11588 {
11589 extra_cost = 1;
11590 type = "move";
11591 }
11592 break;
11593
11594 /* If this is an indexed address, make sure the register class can handle
11595 indexed addresses for this mode. */
11596 case PLUS:
11597 plus_arg0 = XEXP (addr, 0);
11598 plus_arg1 = XEXP (addr, 1);
11599
11600 /* (plus (plus (reg) (constant)) (constant)) is generated during
11601 push_reload processing, so handle it now. */
11602 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11603 {
11604 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11605 {
11606 extra_cost = 1;
11607 type = "offset";
11608 }
11609 }
11610
11611 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11612 push_reload processing, so handle it now. */
11613 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11614 {
11615 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11616 {
11617 extra_cost = 1;
11618 type = "indexed #2";
11619 }
11620 }
11621
11622 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11623 {
11624 fail_msg = "no base register #2";
11625 extra_cost = -1;
11626 }
11627
11628 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11629 {
11630 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11631 || !legitimate_indexed_address_p (addr, false))
11632 {
11633 extra_cost = 1;
11634 type = "indexed";
11635 }
11636 }
11637
11638 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11639 && CONST_INT_P (plus_arg1))
11640 {
11641 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11642 {
11643 extra_cost = 1;
11644 type = "vector d-form offset";
11645 }
11646 }
11647
11648 /* Make sure the register class can handle offset addresses. */
11649 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11650 {
11651 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11652 {
11653 extra_cost = 1;
11654 type = "offset #2";
11655 }
11656 }
11657
11658 else
11659 {
11660 fail_msg = "bad PLUS";
11661 extra_cost = -1;
11662 }
11663
11664 break;
11665
11666 case LO_SUM:
11667 /* Quad offsets are restricted and can't handle normal addresses. */
11668 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11669 {
11670 extra_cost = -1;
11671 type = "vector d-form lo_sum";
11672 }
11673
11674 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11675 {
11676 fail_msg = "bad LO_SUM";
11677 extra_cost = -1;
11678 }
11679
11680 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11681 {
11682 extra_cost = 1;
11683 type = "lo_sum";
11684 }
11685 break;
11686
11687 /* Static addresses need to create a TOC entry. */
11688 case CONST:
11689 case SYMBOL_REF:
11690 case LABEL_REF:
11691 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11692 {
11693 extra_cost = -1;
11694 type = "vector d-form lo_sum #2";
11695 }
11696
11697 else
11698 {
11699 type = "address";
11700 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11701 }
11702 break;
11703
11704 /* TOC references look like offsetable memory. */
11705 case UNSPEC:
11706 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11707 {
11708 fail_msg = "bad UNSPEC";
11709 extra_cost = -1;
11710 }
11711
11712 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11713 {
11714 extra_cost = -1;
11715 type = "vector d-form lo_sum #3";
11716 }
11717
11718 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11719 {
11720 extra_cost = 1;
11721 type = "toc reference";
11722 }
11723 break;
11724
11725 default:
11726 {
11727 fail_msg = "bad address";
11728 extra_cost = -1;
11729 }
11730 }
11731
11732 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11733 {
11734 if (extra_cost < 0)
11735 fprintf (stderr,
11736 "rs6000_secondary_reload_memory error: mode = %s, "
11737 "class = %s, addr_mask = '%s', %s\n",
11738 GET_MODE_NAME (mode),
11739 reg_class_names[rclass],
11740 rs6000_debug_addr_mask (addr_mask, false),
11741 (fail_msg != NULL) ? fail_msg : "<bad address>");
11742
11743 else
11744 fprintf (stderr,
11745 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11746 "addr_mask = '%s', extra cost = %d, %s\n",
11747 GET_MODE_NAME (mode),
11748 reg_class_names[rclass],
11749 rs6000_debug_addr_mask (addr_mask, false),
11750 extra_cost,
11751 (type) ? type : "<none>");
11752
11753 debug_rtx (addr);
11754 }
11755
11756 return extra_cost;
11757 }
11758
11759 /* Helper function for rs6000_secondary_reload to return true if a move to a
11760 different register classe is really a simple move. */
11761
11762 static bool
11763 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11764 enum rs6000_reg_type from_type,
11765 machine_mode mode)
11766 {
11767 int size = GET_MODE_SIZE (mode);
11768
11769 /* Add support for various direct moves available. In this function, we only
11770 look at cases where we don't need any extra registers, and one or more
11771 simple move insns are issued. Originally small integers are not allowed
11772 in FPR/VSX registers. Single precision binary floating is not a simple
11773 move because we need to convert to the single precision memory layout.
11774 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11775 need special direct move handling, which we do not support yet. */
11776 if (TARGET_DIRECT_MOVE
11777 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11778 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11779 {
11780 if (TARGET_POWERPC64)
11781 {
11782 /* ISA 2.07: MTVSRD or MVFVSRD. */
11783 if (size == 8)
11784 return true;
11785
11786 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11787 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11788 return true;
11789 }
11790
11791 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11792 if (TARGET_P8_VECTOR)
11793 {
11794 if (mode == SImode)
11795 return true;
11796
11797 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11798 return true;
11799 }
11800
11801 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11802 if (mode == SDmode)
11803 return true;
11804 }
11805
11806 /* Move to/from SPR. */
11807 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11808 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11809 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11810 return true;
11811
11812 return false;
11813 }
11814
11815 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11816 special direct moves that involve allocating an extra register, return the
11817 insn code of the helper function if there is such a function or
11818 CODE_FOR_nothing if not. */
11819
11820 static bool
11821 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11822 enum rs6000_reg_type from_type,
11823 machine_mode mode,
11824 secondary_reload_info *sri,
11825 bool altivec_p)
11826 {
11827 bool ret = false;
11828 enum insn_code icode = CODE_FOR_nothing;
11829 int cost = 0;
11830 int size = GET_MODE_SIZE (mode);
11831
11832 if (TARGET_POWERPC64 && size == 16)
11833 {
11834 /* Handle moving 128-bit values from GPRs to VSX point registers on
11835 ISA 2.07 (power8, power9) when running in 64-bit mode using
11836 XXPERMDI to glue the two 64-bit values back together. */
11837 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11838 {
11839 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11840 icode = reg_addr[mode].reload_vsx_gpr;
11841 }
11842
11843 /* Handle moving 128-bit values from VSX point registers to GPRs on
11844 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11845 bottom 64-bit value. */
11846 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11847 {
11848 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11849 icode = reg_addr[mode].reload_gpr_vsx;
11850 }
11851 }
11852
11853 else if (TARGET_POWERPC64 && mode == SFmode)
11854 {
11855 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11856 {
11857 cost = 3; /* xscvdpspn, mfvsrd, and. */
11858 icode = reg_addr[mode].reload_gpr_vsx;
11859 }
11860
11861 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11862 {
11863 cost = 2; /* mtvsrz, xscvspdpn. */
11864 icode = reg_addr[mode].reload_vsx_gpr;
11865 }
11866 }
11867
11868 else if (!TARGET_POWERPC64 && size == 8)
11869 {
11870 /* Handle moving 64-bit values from GPRs to floating point registers on
11871 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11872 32-bit values back together. Altivec register classes must be handled
11873 specially since a different instruction is used, and the secondary
11874 reload support requires a single instruction class in the scratch
11875 register constraint. However, right now TFmode is not allowed in
11876 Altivec registers, so the pattern will never match. */
11877 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11878 {
11879 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11880 icode = reg_addr[mode].reload_fpr_gpr;
11881 }
11882 }
11883
11884 if (icode != CODE_FOR_nothing)
11885 {
11886 ret = true;
11887 if (sri)
11888 {
11889 sri->icode = icode;
11890 sri->extra_cost = cost;
11891 }
11892 }
11893
11894 return ret;
11895 }
11896
11897 /* Return whether a move between two register classes can be done either
11898 directly (simple move) or via a pattern that uses a single extra temporary
11899 (using ISA 2.07's direct move in this case. */
11900
11901 static bool
11902 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11903 enum rs6000_reg_type from_type,
11904 machine_mode mode,
11905 secondary_reload_info *sri,
11906 bool altivec_p)
11907 {
11908 /* Fall back to load/store reloads if either type is not a register. */
11909 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11910 return false;
11911
11912 /* If we haven't allocated registers yet, assume the move can be done for the
11913 standard register types. */
11914 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11915 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11916 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11917 return true;
11918
11919 /* Moves to the same set of registers is a simple move for non-specialized
11920 registers. */
11921 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11922 return true;
11923
11924 /* Check whether a simple move can be done directly. */
11925 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11926 {
11927 if (sri)
11928 {
11929 sri->icode = CODE_FOR_nothing;
11930 sri->extra_cost = 0;
11931 }
11932 return true;
11933 }
11934
11935 /* Now check if we can do it in a few steps. */
11936 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11937 altivec_p);
11938 }
11939
11940 /* Inform reload about cases where moving X with a mode MODE to a register in
11941 RCLASS requires an extra scratch or immediate register. Return the class
11942 needed for the immediate register.
11943
11944 For VSX and Altivec, we may need a register to convert sp+offset into
11945 reg+sp.
11946
11947 For misaligned 64-bit gpr loads and stores we need a register to
11948 convert an offset address to indirect. */
11949
11950 static reg_class_t
11951 rs6000_secondary_reload (bool in_p,
11952 rtx x,
11953 reg_class_t rclass_i,
11954 machine_mode mode,
11955 secondary_reload_info *sri)
11956 {
11957 enum reg_class rclass = (enum reg_class) rclass_i;
11958 reg_class_t ret = ALL_REGS;
11959 enum insn_code icode;
11960 bool default_p = false;
11961 bool done_p = false;
11962
11963 /* Allow subreg of memory before/during reload. */
11964 bool memory_p = (MEM_P (x)
11965 || (!reload_completed && SUBREG_P (x)
11966 && MEM_P (SUBREG_REG (x))));
11967
11968 sri->icode = CODE_FOR_nothing;
11969 sri->t_icode = CODE_FOR_nothing;
11970 sri->extra_cost = 0;
11971 icode = ((in_p)
11972 ? reg_addr[mode].reload_load
11973 : reg_addr[mode].reload_store);
11974
11975 if (REG_P (x) || register_operand (x, mode))
11976 {
11977 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11978 bool altivec_p = (rclass == ALTIVEC_REGS);
11979 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11980
11981 if (!in_p)
11982 std::swap (to_type, from_type);
11983
11984 /* Can we do a direct move of some sort? */
11985 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11986 altivec_p))
11987 {
11988 icode = (enum insn_code)sri->icode;
11989 default_p = false;
11990 done_p = true;
11991 ret = NO_REGS;
11992 }
11993 }
11994
11995 /* Make sure 0.0 is not reloaded or forced into memory. */
11996 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11997 {
11998 ret = NO_REGS;
11999 default_p = false;
12000 done_p = true;
12001 }
12002
12003 /* If this is a scalar floating point value and we want to load it into the
12004 traditional Altivec registers, do it via a move via a traditional floating
12005 point register, unless we have D-form addressing. Also make sure that
12006 non-zero constants use a FPR. */
12007 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12008 && !mode_supports_vmx_dform (mode)
12009 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12010 && (memory_p || CONST_DOUBLE_P (x)))
12011 {
12012 ret = FLOAT_REGS;
12013 default_p = false;
12014 done_p = true;
12015 }
12016
12017 /* Handle reload of load/stores if we have reload helper functions. */
12018 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12019 {
12020 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12021 mode);
12022
12023 if (extra_cost >= 0)
12024 {
12025 done_p = true;
12026 ret = NO_REGS;
12027 if (extra_cost > 0)
12028 {
12029 sri->extra_cost = extra_cost;
12030 sri->icode = icode;
12031 }
12032 }
12033 }
12034
12035 /* Handle unaligned loads and stores of integer registers. */
12036 if (!done_p && TARGET_POWERPC64
12037 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12038 && memory_p
12039 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12040 {
12041 rtx addr = XEXP (x, 0);
12042 rtx off = address_offset (addr);
12043
12044 if (off != NULL_RTX)
12045 {
12046 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12047 unsigned HOST_WIDE_INT offset = INTVAL (off);
12048
12049 /* We need a secondary reload when our legitimate_address_p
12050 says the address is good (as otherwise the entire address
12051 will be reloaded), and the offset is not a multiple of
12052 four or we have an address wrap. Address wrap will only
12053 occur for LO_SUMs since legitimate_offset_address_p
12054 rejects addresses for 16-byte mems that will wrap. */
12055 if (GET_CODE (addr) == LO_SUM
12056 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12057 && ((offset & 3) != 0
12058 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12059 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12060 && (offset & 3) != 0))
12061 {
12062 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12063 if (in_p)
12064 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12065 : CODE_FOR_reload_di_load);
12066 else
12067 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12068 : CODE_FOR_reload_di_store);
12069 sri->extra_cost = 2;
12070 ret = NO_REGS;
12071 done_p = true;
12072 }
12073 else
12074 default_p = true;
12075 }
12076 else
12077 default_p = true;
12078 }
12079
12080 if (!done_p && !TARGET_POWERPC64
12081 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12082 && memory_p
12083 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12084 {
12085 rtx addr = XEXP (x, 0);
12086 rtx off = address_offset (addr);
12087
12088 if (off != NULL_RTX)
12089 {
12090 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12091 unsigned HOST_WIDE_INT offset = INTVAL (off);
12092
12093 /* We need a secondary reload when our legitimate_address_p
12094 says the address is good (as otherwise the entire address
12095 will be reloaded), and we have a wrap.
12096
12097 legitimate_lo_sum_address_p allows LO_SUM addresses to
12098 have any offset so test for wrap in the low 16 bits.
12099
12100 legitimate_offset_address_p checks for the range
12101 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12102 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12103 [0x7ff4,0x7fff] respectively, so test for the
12104 intersection of these ranges, [0x7ffc,0x7fff] and
12105 [0x7ff4,0x7ff7] respectively.
12106
12107 Note that the address we see here may have been
12108 manipulated by legitimize_reload_address. */
12109 if (GET_CODE (addr) == LO_SUM
12110 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12111 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12112 {
12113 if (in_p)
12114 sri->icode = CODE_FOR_reload_si_load;
12115 else
12116 sri->icode = CODE_FOR_reload_si_store;
12117 sri->extra_cost = 2;
12118 ret = NO_REGS;
12119 done_p = true;
12120 }
12121 else
12122 default_p = true;
12123 }
12124 else
12125 default_p = true;
12126 }
12127
12128 if (!done_p)
12129 default_p = true;
12130
12131 if (default_p)
12132 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12133
12134 gcc_assert (ret != ALL_REGS);
12135
12136 if (TARGET_DEBUG_ADDR)
12137 {
12138 fprintf (stderr,
12139 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12140 "mode = %s",
12141 reg_class_names[ret],
12142 in_p ? "true" : "false",
12143 reg_class_names[rclass],
12144 GET_MODE_NAME (mode));
12145
12146 if (reload_completed)
12147 fputs (", after reload", stderr);
12148
12149 if (!done_p)
12150 fputs (", done_p not set", stderr);
12151
12152 if (default_p)
12153 fputs (", default secondary reload", stderr);
12154
12155 if (sri->icode != CODE_FOR_nothing)
12156 fprintf (stderr, ", reload func = %s, extra cost = %d",
12157 insn_data[sri->icode].name, sri->extra_cost);
12158
12159 else if (sri->extra_cost > 0)
12160 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12161
12162 fputs ("\n", stderr);
12163 debug_rtx (x);
12164 }
12165
12166 return ret;
12167 }
12168
12169 /* Better tracing for rs6000_secondary_reload_inner. */
12170
12171 static void
12172 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12173 bool store_p)
12174 {
12175 rtx set, clobber;
12176
12177 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12178
12179 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12180 store_p ? "store" : "load");
12181
12182 if (store_p)
12183 set = gen_rtx_SET (mem, reg);
12184 else
12185 set = gen_rtx_SET (reg, mem);
12186
12187 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12188 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12189 }
12190
12191 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12192 ATTRIBUTE_NORETURN;
12193
12194 static void
12195 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12196 bool store_p)
12197 {
12198 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12199 gcc_unreachable ();
12200 }
12201
12202 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12203 reload helper functions. These were identified in
12204 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12205 reload, it calls the insns:
12206 reload_<RELOAD:mode>_<P:mptrsize>_store
12207 reload_<RELOAD:mode>_<P:mptrsize>_load
12208
12209 which in turn calls this function, to do whatever is necessary to create
12210 valid addresses. */
12211
12212 void
12213 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12214 {
12215 int regno = true_regnum (reg);
12216 machine_mode mode = GET_MODE (reg);
12217 addr_mask_type addr_mask;
12218 rtx addr;
12219 rtx new_addr;
12220 rtx op_reg, op0, op1;
12221 rtx and_op;
12222 rtx cc_clobber;
12223 rtvec rv;
12224
12225 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12226 || !base_reg_operand (scratch, GET_MODE (scratch)))
12227 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12228
12229 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12230 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12231
12232 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12233 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12234
12235 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12236 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12237
12238 else
12239 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12240
12241 /* Make sure the mode is valid in this register class. */
12242 if ((addr_mask & RELOAD_REG_VALID) == 0)
12243 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12244
12245 if (TARGET_DEBUG_ADDR)
12246 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12247
12248 new_addr = addr = XEXP (mem, 0);
12249 switch (GET_CODE (addr))
12250 {
12251 /* Does the register class support auto update forms for this mode? If
12252 not, do the update now. We don't need a scratch register, since the
12253 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12254 case PRE_INC:
12255 case PRE_DEC:
12256 op_reg = XEXP (addr, 0);
12257 if (!base_reg_operand (op_reg, Pmode))
12258 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12259
12260 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12261 {
12262 int delta = GET_MODE_SIZE (mode);
12263 if (GET_CODE (addr) == PRE_DEC)
12264 delta = -delta;
12265 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12266 new_addr = op_reg;
12267 }
12268 break;
12269
12270 case PRE_MODIFY:
12271 op0 = XEXP (addr, 0);
12272 op1 = XEXP (addr, 1);
12273 if (!base_reg_operand (op0, Pmode)
12274 || GET_CODE (op1) != PLUS
12275 || !rtx_equal_p (op0, XEXP (op1, 0)))
12276 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12277
12278 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12279 {
12280 emit_insn (gen_rtx_SET (op0, op1));
12281 new_addr = reg;
12282 }
12283 break;
12284
12285 /* Do we need to simulate AND -16 to clear the bottom address bits used
12286 in VMX load/stores? */
12287 case AND:
12288 op0 = XEXP (addr, 0);
12289 op1 = XEXP (addr, 1);
12290 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12291 {
12292 if (REG_P (op0) || SUBREG_P (op0))
12293 op_reg = op0;
12294
12295 else if (GET_CODE (op1) == PLUS)
12296 {
12297 emit_insn (gen_rtx_SET (scratch, op1));
12298 op_reg = scratch;
12299 }
12300
12301 else
12302 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12303
12304 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12305 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12306 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12307 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12308 new_addr = scratch;
12309 }
12310 break;
12311
12312 /* If this is an indirect address, make sure it is a base register. */
12313 case REG:
12314 case SUBREG:
12315 if (!base_reg_operand (addr, GET_MODE (addr)))
12316 {
12317 emit_insn (gen_rtx_SET (scratch, addr));
12318 new_addr = scratch;
12319 }
12320 break;
12321
12322 /* If this is an indexed address, make sure the register class can handle
12323 indexed addresses for this mode. */
12324 case PLUS:
12325 op0 = XEXP (addr, 0);
12326 op1 = XEXP (addr, 1);
12327 if (!base_reg_operand (op0, Pmode))
12328 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12329
12330 else if (int_reg_operand (op1, Pmode))
12331 {
12332 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12333 {
12334 emit_insn (gen_rtx_SET (scratch, addr));
12335 new_addr = scratch;
12336 }
12337 }
12338
12339 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12340 {
12341 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12342 || !quad_address_p (addr, mode, false))
12343 {
12344 emit_insn (gen_rtx_SET (scratch, addr));
12345 new_addr = scratch;
12346 }
12347 }
12348
12349 /* Make sure the register class can handle offset addresses. */
12350 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12351 {
12352 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12353 {
12354 emit_insn (gen_rtx_SET (scratch, addr));
12355 new_addr = scratch;
12356 }
12357 }
12358
12359 else
12360 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12361
12362 break;
12363
12364 case LO_SUM:
12365 op0 = XEXP (addr, 0);
12366 op1 = XEXP (addr, 1);
12367 if (!base_reg_operand (op0, Pmode))
12368 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12369
12370 else if (int_reg_operand (op1, Pmode))
12371 {
12372 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12373 {
12374 emit_insn (gen_rtx_SET (scratch, addr));
12375 new_addr = scratch;
12376 }
12377 }
12378
12379 /* Quad offsets are restricted and can't handle normal addresses. */
12380 else if (mode_supports_dq_form (mode))
12381 {
12382 emit_insn (gen_rtx_SET (scratch, addr));
12383 new_addr = scratch;
12384 }
12385
12386 /* Make sure the register class can handle offset addresses. */
12387 else if (legitimate_lo_sum_address_p (mode, addr, false))
12388 {
12389 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12390 {
12391 emit_insn (gen_rtx_SET (scratch, addr));
12392 new_addr = scratch;
12393 }
12394 }
12395
12396 else
12397 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12398
12399 break;
12400
12401 case SYMBOL_REF:
12402 case CONST:
12403 case LABEL_REF:
12404 rs6000_emit_move (scratch, addr, Pmode);
12405 new_addr = scratch;
12406 break;
12407
12408 default:
12409 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12410 }
12411
12412 /* Adjust the address if it changed. */
12413 if (addr != new_addr)
12414 {
12415 mem = replace_equiv_address_nv (mem, new_addr);
12416 if (TARGET_DEBUG_ADDR)
12417 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12418 }
12419
12420 /* Now create the move. */
12421 if (store_p)
12422 emit_insn (gen_rtx_SET (mem, reg));
12423 else
12424 emit_insn (gen_rtx_SET (reg, mem));
12425
12426 return;
12427 }
12428
12429 /* Convert reloads involving 64-bit gprs and misaligned offset
12430 addressing, or multiple 32-bit gprs and offsets that are too large,
12431 to use indirect addressing. */
12432
12433 void
12434 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12435 {
12436 int regno = true_regnum (reg);
12437 enum reg_class rclass;
12438 rtx addr;
12439 rtx scratch_or_premodify = scratch;
12440
12441 if (TARGET_DEBUG_ADDR)
12442 {
12443 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12444 store_p ? "store" : "load");
12445 fprintf (stderr, "reg:\n");
12446 debug_rtx (reg);
12447 fprintf (stderr, "mem:\n");
12448 debug_rtx (mem);
12449 fprintf (stderr, "scratch:\n");
12450 debug_rtx (scratch);
12451 }
12452
12453 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12454 gcc_assert (MEM_P (mem));
12455 rclass = REGNO_REG_CLASS (regno);
12456 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12457 addr = XEXP (mem, 0);
12458
12459 if (GET_CODE (addr) == PRE_MODIFY)
12460 {
12461 gcc_assert (REG_P (XEXP (addr, 0))
12462 && GET_CODE (XEXP (addr, 1)) == PLUS
12463 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12464 scratch_or_premodify = XEXP (addr, 0);
12465 addr = XEXP (addr, 1);
12466 }
12467 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12468
12469 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12470
12471 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12472
12473 /* Now create the move. */
12474 if (store_p)
12475 emit_insn (gen_rtx_SET (mem, reg));
12476 else
12477 emit_insn (gen_rtx_SET (reg, mem));
12478
12479 return;
12480 }
12481
12482 /* Given an rtx X being reloaded into a reg required to be
12483 in class CLASS, return the class of reg to actually use.
12484 In general this is just CLASS; but on some machines
12485 in some cases it is preferable to use a more restrictive class.
12486
12487 On the RS/6000, we have to return NO_REGS when we want to reload a
12488 floating-point CONST_DOUBLE to force it to be copied to memory.
12489
12490 We also don't want to reload integer values into floating-point
12491 registers if we can at all help it. In fact, this can
12492 cause reload to die, if it tries to generate a reload of CTR
12493 into a FP register and discovers it doesn't have the memory location
12494 required.
12495
12496 ??? Would it be a good idea to have reload do the converse, that is
12497 try to reload floating modes into FP registers if possible?
12498 */
12499
12500 static enum reg_class
12501 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12502 {
12503 machine_mode mode = GET_MODE (x);
12504 bool is_constant = CONSTANT_P (x);
12505
12506 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12507 reload class for it. */
12508 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12509 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12510 return NO_REGS;
12511
12512 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12513 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12514 return NO_REGS;
12515
12516 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12517 the reloading of address expressions using PLUS into floating point
12518 registers. */
12519 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12520 {
12521 if (is_constant)
12522 {
12523 /* Zero is always allowed in all VSX registers. */
12524 if (x == CONST0_RTX (mode))
12525 return rclass;
12526
12527 /* If this is a vector constant that can be formed with a few Altivec
12528 instructions, we want altivec registers. */
12529 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12530 return ALTIVEC_REGS;
12531
12532 /* If this is an integer constant that can easily be loaded into
12533 vector registers, allow it. */
12534 if (CONST_INT_P (x))
12535 {
12536 HOST_WIDE_INT value = INTVAL (x);
12537
12538 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12539 2.06 can generate it in the Altivec registers with
12540 VSPLTI<x>. */
12541 if (value == -1)
12542 {
12543 if (TARGET_P8_VECTOR)
12544 return rclass;
12545 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12546 return ALTIVEC_REGS;
12547 else
12548 return NO_REGS;
12549 }
12550
12551 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12552 a sign extend in the Altivec registers. */
12553 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12554 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12555 return ALTIVEC_REGS;
12556 }
12557
12558 /* Force constant to memory. */
12559 return NO_REGS;
12560 }
12561
12562 /* D-form addressing can easily reload the value. */
12563 if (mode_supports_vmx_dform (mode)
12564 || mode_supports_dq_form (mode))
12565 return rclass;
12566
12567 /* If this is a scalar floating point value and we don't have D-form
12568 addressing, prefer the traditional floating point registers so that we
12569 can use D-form (register+offset) addressing. */
12570 if (rclass == VSX_REGS
12571 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12572 return FLOAT_REGS;
12573
12574 /* Prefer the Altivec registers if Altivec is handling the vector
12575 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12576 loads. */
12577 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12578 || mode == V1TImode)
12579 return ALTIVEC_REGS;
12580
12581 return rclass;
12582 }
12583
12584 if (is_constant || GET_CODE (x) == PLUS)
12585 {
12586 if (reg_class_subset_p (GENERAL_REGS, rclass))
12587 return GENERAL_REGS;
12588 if (reg_class_subset_p (BASE_REGS, rclass))
12589 return BASE_REGS;
12590 return NO_REGS;
12591 }
12592
12593 /* For the vector pair and vector quad modes, prefer their natural register
12594 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12595 the GPR registers. */
12596 if (rclass == GEN_OR_FLOAT_REGS)
12597 {
12598 if (mode == OOmode)
12599 return VSX_REGS;
12600
12601 if (mode == XOmode)
12602 return FLOAT_REGS;
12603
12604 if (GET_MODE_CLASS (mode) == MODE_INT)
12605 return GENERAL_REGS;
12606 }
12607
12608 return rclass;
12609 }
12610
12611 /* Debug version of rs6000_preferred_reload_class. */
12612 static enum reg_class
12613 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12614 {
12615 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12616
12617 fprintf (stderr,
12618 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12619 "mode = %s, x:\n",
12620 reg_class_names[ret], reg_class_names[rclass],
12621 GET_MODE_NAME (GET_MODE (x)));
12622 debug_rtx (x);
12623
12624 return ret;
12625 }
12626
12627 /* If we are copying between FP or AltiVec registers and anything else, we need
12628 a memory location. The exception is when we are targeting ppc64 and the
12629 move to/from fpr to gpr instructions are available. Also, under VSX, you
12630 can copy vector registers from the FP register set to the Altivec register
12631 set and vice versa. */
12632
12633 static bool
12634 rs6000_secondary_memory_needed (machine_mode mode,
12635 reg_class_t from_class,
12636 reg_class_t to_class)
12637 {
12638 enum rs6000_reg_type from_type, to_type;
12639 bool altivec_p = ((from_class == ALTIVEC_REGS)
12640 || (to_class == ALTIVEC_REGS));
12641
12642 /* If a simple/direct move is available, we don't need secondary memory */
12643 from_type = reg_class_to_reg_type[(int)from_class];
12644 to_type = reg_class_to_reg_type[(int)to_class];
12645
12646 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12647 (secondary_reload_info *)0, altivec_p))
12648 return false;
12649
12650 /* If we have a floating point or vector register class, we need to use
12651 memory to transfer the data. */
12652 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12653 return true;
12654
12655 return false;
12656 }
12657
12658 /* Debug version of rs6000_secondary_memory_needed. */
12659 static bool
12660 rs6000_debug_secondary_memory_needed (machine_mode mode,
12661 reg_class_t from_class,
12662 reg_class_t to_class)
12663 {
12664 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12665
12666 fprintf (stderr,
12667 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12668 "to_class = %s, mode = %s\n",
12669 ret ? "true" : "false",
12670 reg_class_names[from_class],
12671 reg_class_names[to_class],
12672 GET_MODE_NAME (mode));
12673
12674 return ret;
12675 }
12676
12677 /* Return the register class of a scratch register needed to copy IN into
12678 or out of a register in RCLASS in MODE. If it can be done directly,
12679 NO_REGS is returned. */
12680
12681 static enum reg_class
12682 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12683 rtx in)
12684 {
12685 int regno;
12686
12687 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12688 #if TARGET_MACHO
12689 && MACHOPIC_INDIRECT
12690 #endif
12691 ))
12692 {
12693 /* We cannot copy a symbolic operand directly into anything
12694 other than BASE_REGS for TARGET_ELF. So indicate that a
12695 register from BASE_REGS is needed as an intermediate
12696 register.
12697
12698 On Darwin, pic addresses require a load from memory, which
12699 needs a base register. */
12700 if (rclass != BASE_REGS
12701 && (SYMBOL_REF_P (in)
12702 || GET_CODE (in) == HIGH
12703 || GET_CODE (in) == LABEL_REF
12704 || GET_CODE (in) == CONST))
12705 return BASE_REGS;
12706 }
12707
12708 if (REG_P (in))
12709 {
12710 regno = REGNO (in);
12711 if (!HARD_REGISTER_NUM_P (regno))
12712 {
12713 regno = true_regnum (in);
12714 if (!HARD_REGISTER_NUM_P (regno))
12715 regno = -1;
12716 }
12717 }
12718 else if (SUBREG_P (in))
12719 {
12720 regno = true_regnum (in);
12721 if (!HARD_REGISTER_NUM_P (regno))
12722 regno = -1;
12723 }
12724 else
12725 regno = -1;
12726
12727 /* If we have VSX register moves, prefer moving scalar values between
12728 Altivec registers and GPR by going via an FPR (and then via memory)
12729 instead of reloading the secondary memory address for Altivec moves. */
12730 if (TARGET_VSX
12731 && GET_MODE_SIZE (mode) < 16
12732 && !mode_supports_vmx_dform (mode)
12733 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12734 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12735 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12736 && (regno >= 0 && INT_REGNO_P (regno)))))
12737 return FLOAT_REGS;
12738
12739 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12740 into anything. */
12741 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12742 || (regno >= 0 && INT_REGNO_P (regno)))
12743 return NO_REGS;
12744
12745 /* Constants, memory, and VSX registers can go into VSX registers (both the
12746 traditional floating point and the altivec registers). */
12747 if (rclass == VSX_REGS
12748 && (regno == -1 || VSX_REGNO_P (regno)))
12749 return NO_REGS;
12750
12751 /* Constants, memory, and FP registers can go into FP registers. */
12752 if ((regno == -1 || FP_REGNO_P (regno))
12753 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12754 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12755
12756 /* Memory, and AltiVec registers can go into AltiVec registers. */
12757 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12758 && rclass == ALTIVEC_REGS)
12759 return NO_REGS;
12760
12761 /* We can copy among the CR registers. */
12762 if ((rclass == CR_REGS || rclass == CR0_REGS)
12763 && regno >= 0 && CR_REGNO_P (regno))
12764 return NO_REGS;
12765
12766 /* Otherwise, we need GENERAL_REGS. */
12767 return GENERAL_REGS;
12768 }
12769
12770 /* Debug version of rs6000_secondary_reload_class. */
12771 static enum reg_class
12772 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12773 machine_mode mode, rtx in)
12774 {
12775 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12776 fprintf (stderr,
12777 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12778 "mode = %s, input rtx:\n",
12779 reg_class_names[ret], reg_class_names[rclass],
12780 GET_MODE_NAME (mode));
12781 debug_rtx (in);
12782
12783 return ret;
12784 }
12785
12786 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12787
12788 static bool
12789 rs6000_can_change_mode_class (machine_mode from,
12790 machine_mode to,
12791 reg_class_t rclass)
12792 {
12793 unsigned from_size = GET_MODE_SIZE (from);
12794 unsigned to_size = GET_MODE_SIZE (to);
12795
12796 if (from_size != to_size)
12797 {
12798 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12799
12800 if (reg_classes_intersect_p (xclass, rclass))
12801 {
12802 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12803 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12804 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12805 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12806
12807 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12808 single register under VSX because the scalar part of the register
12809 is in the upper 64-bits, and not the lower 64-bits. Types like
12810 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12811 IEEE floating point can't overlap, and neither can small
12812 values. */
12813
12814 if (to_float128_vector_p && from_float128_vector_p)
12815 return true;
12816
12817 else if (to_float128_vector_p || from_float128_vector_p)
12818 return false;
12819
12820 /* TDmode in floating-mode registers must always go into a register
12821 pair with the most significant word in the even-numbered register
12822 to match ISA requirements. In little-endian mode, this does not
12823 match subreg numbering, so we cannot allow subregs. */
12824 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12825 return false;
12826
12827 /* Allow SD<->DD changes, since SDmode values are stored in
12828 the low half of the DDmode, just like target-independent
12829 code expects. We need to allow at least SD->DD since
12830 rs6000_secondary_memory_needed_mode asks for that change
12831 to be made for SD reloads. */
12832 if ((to == DDmode && from == SDmode)
12833 || (to == SDmode && from == DDmode))
12834 return true;
12835
12836 if (from_size < 8 || to_size < 8)
12837 return false;
12838
12839 if (from_size == 8 && (8 * to_nregs) != to_size)
12840 return false;
12841
12842 if (to_size == 8 && (8 * from_nregs) != from_size)
12843 return false;
12844
12845 return true;
12846 }
12847 else
12848 return true;
12849 }
12850
12851 /* Since the VSX register set includes traditional floating point registers
12852 and altivec registers, just check for the size being different instead of
12853 trying to check whether the modes are vector modes. Otherwise it won't
12854 allow say DF and DI to change classes. For types like TFmode and TDmode
12855 that take 2 64-bit registers, rather than a single 128-bit register, don't
12856 allow subregs of those types to other 128 bit types. */
12857 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12858 {
12859 unsigned num_regs = (from_size + 15) / 16;
12860 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12861 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12862 return false;
12863
12864 return (from_size == 8 || from_size == 16);
12865 }
12866
12867 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12868 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12869 return false;
12870
12871 return true;
12872 }
12873
12874 /* Debug version of rs6000_can_change_mode_class. */
12875 static bool
12876 rs6000_debug_can_change_mode_class (machine_mode from,
12877 machine_mode to,
12878 reg_class_t rclass)
12879 {
12880 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12881
12882 fprintf (stderr,
12883 "rs6000_can_change_mode_class, return %s, from = %s, "
12884 "to = %s, rclass = %s\n",
12885 ret ? "true" : "false",
12886 GET_MODE_NAME (from), GET_MODE_NAME (to),
12887 reg_class_names[rclass]);
12888
12889 return ret;
12890 }
12891 \f
12892 /* Return a string to do a move operation of 128 bits of data. */
12893
12894 const char *
12895 rs6000_output_move_128bit (rtx operands[])
12896 {
12897 rtx dest = operands[0];
12898 rtx src = operands[1];
12899 machine_mode mode = GET_MODE (dest);
12900 int dest_regno;
12901 int src_regno;
12902 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12903 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12904
12905 if (REG_P (dest))
12906 {
12907 dest_regno = REGNO (dest);
12908 dest_gpr_p = INT_REGNO_P (dest_regno);
12909 dest_fp_p = FP_REGNO_P (dest_regno);
12910 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12911 dest_vsx_p = dest_fp_p | dest_vmx_p;
12912 }
12913 else
12914 {
12915 dest_regno = -1;
12916 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12917 }
12918
12919 if (REG_P (src))
12920 {
12921 src_regno = REGNO (src);
12922 src_gpr_p = INT_REGNO_P (src_regno);
12923 src_fp_p = FP_REGNO_P (src_regno);
12924 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12925 src_vsx_p = src_fp_p | src_vmx_p;
12926 }
12927 else
12928 {
12929 src_regno = -1;
12930 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12931 }
12932
12933 /* Register moves. */
12934 if (dest_regno >= 0 && src_regno >= 0)
12935 {
12936 if (dest_gpr_p)
12937 {
12938 if (src_gpr_p)
12939 return "#";
12940
12941 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12942 return (WORDS_BIG_ENDIAN
12943 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12944 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12945
12946 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12947 return "#";
12948 }
12949
12950 else if (TARGET_VSX && dest_vsx_p)
12951 {
12952 if (src_vsx_p)
12953 return "xxlor %x0,%x1,%x1";
12954
12955 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12956 return (WORDS_BIG_ENDIAN
12957 ? "mtvsrdd %x0,%1,%L1"
12958 : "mtvsrdd %x0,%L1,%1");
12959
12960 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12961 return "#";
12962 }
12963
12964 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12965 return "vor %0,%1,%1";
12966
12967 else if (dest_fp_p && src_fp_p)
12968 return "#";
12969 }
12970
12971 /* Loads. */
12972 else if (dest_regno >= 0 && MEM_P (src))
12973 {
12974 if (dest_gpr_p)
12975 {
12976 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12977 return "lq %0,%1";
12978 else
12979 return "#";
12980 }
12981
12982 else if (TARGET_ALTIVEC && dest_vmx_p
12983 && altivec_indexed_or_indirect_operand (src, mode))
12984 return "lvx %0,%y1";
12985
12986 else if (TARGET_VSX && dest_vsx_p)
12987 {
12988 if (mode_supports_dq_form (mode)
12989 && quad_address_p (XEXP (src, 0), mode, true))
12990 return "lxv %x0,%1";
12991
12992 else if (TARGET_P9_VECTOR)
12993 return "lxvx %x0,%y1";
12994
12995 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12996 return "lxvw4x %x0,%y1";
12997
12998 else
12999 return "lxvd2x %x0,%y1";
13000 }
13001
13002 else if (TARGET_ALTIVEC && dest_vmx_p)
13003 return "lvx %0,%y1";
13004
13005 else if (dest_fp_p)
13006 return "#";
13007 }
13008
13009 /* Stores. */
13010 else if (src_regno >= 0 && MEM_P (dest))
13011 {
13012 if (src_gpr_p)
13013 {
13014 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13015 return "stq %1,%0";
13016 else
13017 return "#";
13018 }
13019
13020 else if (TARGET_ALTIVEC && src_vmx_p
13021 && altivec_indexed_or_indirect_operand (dest, mode))
13022 return "stvx %1,%y0";
13023
13024 else if (TARGET_VSX && src_vsx_p)
13025 {
13026 if (mode_supports_dq_form (mode)
13027 && quad_address_p (XEXP (dest, 0), mode, true))
13028 return "stxv %x1,%0";
13029
13030 else if (TARGET_P9_VECTOR)
13031 return "stxvx %x1,%y0";
13032
13033 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13034 return "stxvw4x %x1,%y0";
13035
13036 else
13037 return "stxvd2x %x1,%y0";
13038 }
13039
13040 else if (TARGET_ALTIVEC && src_vmx_p)
13041 return "stvx %1,%y0";
13042
13043 else if (src_fp_p)
13044 return "#";
13045 }
13046
13047 /* Constants. */
13048 else if (dest_regno >= 0
13049 && (CONST_INT_P (src)
13050 || CONST_WIDE_INT_P (src)
13051 || CONST_DOUBLE_P (src)
13052 || GET_CODE (src) == CONST_VECTOR))
13053 {
13054 if (dest_gpr_p)
13055 return "#";
13056
13057 else if ((dest_vmx_p && TARGET_ALTIVEC)
13058 || (dest_vsx_p && TARGET_VSX))
13059 return output_vec_const_move (operands);
13060 }
13061
13062 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13063 }
13064
13065 /* Validate a 128-bit move. */
13066 bool
13067 rs6000_move_128bit_ok_p (rtx operands[])
13068 {
13069 machine_mode mode = GET_MODE (operands[0]);
13070 return (gpc_reg_operand (operands[0], mode)
13071 || gpc_reg_operand (operands[1], mode));
13072 }
13073
13074 /* Return true if a 128-bit move needs to be split. */
13075 bool
13076 rs6000_split_128bit_ok_p (rtx operands[])
13077 {
13078 if (!reload_completed)
13079 return false;
13080
13081 if (!gpr_or_gpr_p (operands[0], operands[1]))
13082 return false;
13083
13084 if (quad_load_store_p (operands[0], operands[1]))
13085 return false;
13086
13087 return true;
13088 }
13089
13090 \f
13091 /* Given a comparison operation, return the bit number in CCR to test. We
13092 know this is a valid comparison.
13093
13094 SCC_P is 1 if this is for an scc. That means that %D will have been
13095 used instead of %C, so the bits will be in different places.
13096
13097 Return -1 if OP isn't a valid comparison for some reason. */
13098
13099 int
13100 ccr_bit (rtx op, int scc_p)
13101 {
13102 enum rtx_code code = GET_CODE (op);
13103 machine_mode cc_mode;
13104 int cc_regnum;
13105 int base_bit;
13106 rtx reg;
13107
13108 if (!COMPARISON_P (op))
13109 return -1;
13110
13111 reg = XEXP (op, 0);
13112
13113 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13114 return -1;
13115
13116 cc_mode = GET_MODE (reg);
13117 cc_regnum = REGNO (reg);
13118 base_bit = 4 * (cc_regnum - CR0_REGNO);
13119
13120 validate_condition_mode (code, cc_mode);
13121
13122 /* When generating a sCOND operation, only positive conditions are
13123 allowed. */
13124 if (scc_p)
13125 switch (code)
13126 {
13127 case EQ:
13128 case GT:
13129 case LT:
13130 case UNORDERED:
13131 case GTU:
13132 case LTU:
13133 break;
13134 default:
13135 return -1;
13136 }
13137
13138 switch (code)
13139 {
13140 case NE:
13141 return scc_p ? base_bit + 3 : base_bit + 2;
13142 case EQ:
13143 return base_bit + 2;
13144 case GT: case GTU: case UNLE:
13145 return base_bit + 1;
13146 case LT: case LTU: case UNGE:
13147 return base_bit;
13148 case ORDERED: case UNORDERED:
13149 return base_bit + 3;
13150
13151 case GE: case GEU:
13152 /* If scc, we will have done a cror to put the bit in the
13153 unordered position. So test that bit. For integer, this is ! LT
13154 unless this is an scc insn. */
13155 return scc_p ? base_bit + 3 : base_bit;
13156
13157 case LE: case LEU:
13158 return scc_p ? base_bit + 3 : base_bit + 1;
13159
13160 default:
13161 return -1;
13162 }
13163 }
13164 \f
13165 /* Return the GOT register. */
13166
13167 rtx
13168 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13169 {
13170 /* The second flow pass currently (June 1999) can't update
13171 regs_ever_live without disturbing other parts of the compiler, so
13172 update it here to make the prolog/epilogue code happy. */
13173 if (!can_create_pseudo_p ()
13174 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13175 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13176
13177 crtl->uses_pic_offset_table = 1;
13178
13179 return pic_offset_table_rtx;
13180 }
13181 \f
13182 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13183
13184 /* Write out a function code label. */
13185
13186 void
13187 rs6000_output_function_entry (FILE *file, const char *fname)
13188 {
13189 if (fname[0] != '.')
13190 {
13191 switch (DEFAULT_ABI)
13192 {
13193 default:
13194 gcc_unreachable ();
13195
13196 case ABI_AIX:
13197 if (DOT_SYMBOLS)
13198 putc ('.', file);
13199 else
13200 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13201 break;
13202
13203 case ABI_ELFv2:
13204 case ABI_V4:
13205 case ABI_DARWIN:
13206 break;
13207 }
13208 }
13209
13210 RS6000_OUTPUT_BASENAME (file, fname);
13211 }
13212
13213 /* Print an operand. Recognize special options, documented below. */
13214
13215 #if TARGET_ELF
13216 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13217 only introduced by the linker, when applying the sda21
13218 relocation. */
13219 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13220 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13221 #else
13222 #define SMALL_DATA_RELOC "sda21"
13223 #define SMALL_DATA_REG 0
13224 #endif
13225
13226 void
13227 print_operand (FILE *file, rtx x, int code)
13228 {
13229 int i;
13230 unsigned HOST_WIDE_INT uval;
13231
13232 switch (code)
13233 {
13234 /* %a is output_address. */
13235
13236 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13237 output_operand. */
13238
13239 case 'A':
13240 /* Write the MMA accumulator number associated with VSX register X. */
13241 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13242 output_operand_lossage ("invalid %%A value");
13243 else
13244 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13245 return;
13246
13247 case 'D':
13248 /* Like 'J' but get to the GT bit only. */
13249 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13250 {
13251 output_operand_lossage ("invalid %%D value");
13252 return;
13253 }
13254
13255 /* Bit 1 is GT bit. */
13256 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13257
13258 /* Add one for shift count in rlinm for scc. */
13259 fprintf (file, "%d", i + 1);
13260 return;
13261
13262 case 'e':
13263 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13264 if (! INT_P (x))
13265 {
13266 output_operand_lossage ("invalid %%e value");
13267 return;
13268 }
13269
13270 uval = INTVAL (x);
13271 if ((uval & 0xffff) == 0 && uval != 0)
13272 putc ('s', file);
13273 return;
13274
13275 case 'E':
13276 /* X is a CR register. Print the number of the EQ bit of the CR */
13277 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13278 output_operand_lossage ("invalid %%E value");
13279 else
13280 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13281 return;
13282
13283 case 'f':
13284 /* X is a CR register. Print the shift count needed to move it
13285 to the high-order four bits. */
13286 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13287 output_operand_lossage ("invalid %%f value");
13288 else
13289 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13290 return;
13291
13292 case 'F':
13293 /* Similar, but print the count for the rotate in the opposite
13294 direction. */
13295 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13296 output_operand_lossage ("invalid %%F value");
13297 else
13298 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13299 return;
13300
13301 case 'G':
13302 /* X is a constant integer. If it is negative, print "m",
13303 otherwise print "z". This is to make an aze or ame insn. */
13304 if (!CONST_INT_P (x))
13305 output_operand_lossage ("invalid %%G value");
13306 else if (INTVAL (x) >= 0)
13307 putc ('z', file);
13308 else
13309 putc ('m', file);
13310 return;
13311
13312 case 'h':
13313 /* If constant, output low-order five bits. Otherwise, write
13314 normally. */
13315 if (INT_P (x))
13316 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13317 else
13318 print_operand (file, x, 0);
13319 return;
13320
13321 case 'H':
13322 /* If constant, output low-order six bits. Otherwise, write
13323 normally. */
13324 if (INT_P (x))
13325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13326 else
13327 print_operand (file, x, 0);
13328 return;
13329
13330 case 'I':
13331 /* Print `i' if this is a constant, else nothing. */
13332 if (INT_P (x))
13333 putc ('i', file);
13334 return;
13335
13336 case 'j':
13337 /* Write the bit number in CCR for jump. */
13338 i = ccr_bit (x, 0);
13339 if (i == -1)
13340 output_operand_lossage ("invalid %%j code");
13341 else
13342 fprintf (file, "%d", i);
13343 return;
13344
13345 case 'J':
13346 /* Similar, but add one for shift count in rlinm for scc and pass
13347 scc flag to `ccr_bit'. */
13348 i = ccr_bit (x, 1);
13349 if (i == -1)
13350 output_operand_lossage ("invalid %%J code");
13351 else
13352 /* If we want bit 31, write a shift count of zero, not 32. */
13353 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13354 return;
13355
13356 case 'k':
13357 /* X must be a constant. Write the 1's complement of the
13358 constant. */
13359 if (! INT_P (x))
13360 output_operand_lossage ("invalid %%k value");
13361 else
13362 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13363 return;
13364
13365 case 'K':
13366 /* X must be a symbolic constant on ELF. Write an
13367 expression suitable for an 'addi' that adds in the low 16
13368 bits of the MEM. */
13369 if (GET_CODE (x) == CONST)
13370 {
13371 if (GET_CODE (XEXP (x, 0)) != PLUS
13372 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13373 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13374 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13375 output_operand_lossage ("invalid %%K value");
13376 }
13377 print_operand_address (file, x);
13378 fputs ("@l", file);
13379 return;
13380
13381 /* %l is output_asm_label. */
13382
13383 case 'L':
13384 /* Write second word of DImode or DFmode reference. Works on register
13385 or non-indexed memory only. */
13386 if (REG_P (x))
13387 fputs (reg_names[REGNO (x) + 1], file);
13388 else if (MEM_P (x))
13389 {
13390 machine_mode mode = GET_MODE (x);
13391 /* Handle possible auto-increment. Since it is pre-increment and
13392 we have already done it, we can just use an offset of word. */
13393 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13394 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13395 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13396 UNITS_PER_WORD));
13397 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13398 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13399 UNITS_PER_WORD));
13400 else
13401 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13402 UNITS_PER_WORD),
13403 0));
13404
13405 if (small_data_operand (x, GET_MODE (x)))
13406 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13407 reg_names[SMALL_DATA_REG]);
13408 }
13409 return;
13410
13411 case 'N': /* Unused */
13412 /* Write the number of elements in the vector times 4. */
13413 if (GET_CODE (x) != PARALLEL)
13414 output_operand_lossage ("invalid %%N value");
13415 else
13416 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13417 return;
13418
13419 case 'O': /* Unused */
13420 /* Similar, but subtract 1 first. */
13421 if (GET_CODE (x) != PARALLEL)
13422 output_operand_lossage ("invalid %%O value");
13423 else
13424 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13425 return;
13426
13427 case 'p':
13428 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13429 if (! INT_P (x)
13430 || INTVAL (x) < 0
13431 || (i = exact_log2 (INTVAL (x))) < 0)
13432 output_operand_lossage ("invalid %%p value");
13433 else
13434 fprintf (file, "%d", i);
13435 return;
13436
13437 case 'P':
13438 /* The operand must be an indirect memory reference. The result
13439 is the register name. */
13440 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13441 || REGNO (XEXP (x, 0)) >= 32)
13442 output_operand_lossage ("invalid %%P value");
13443 else
13444 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13445 return;
13446
13447 case 'q':
13448 /* This outputs the logical code corresponding to a boolean
13449 expression. The expression may have one or both operands
13450 negated (if one, only the first one). For condition register
13451 logical operations, it will also treat the negated
13452 CR codes as NOTs, but not handle NOTs of them. */
13453 {
13454 const char *const *t = 0;
13455 const char *s;
13456 enum rtx_code code = GET_CODE (x);
13457 static const char * const tbl[3][3] = {
13458 { "and", "andc", "nor" },
13459 { "or", "orc", "nand" },
13460 { "xor", "eqv", "xor" } };
13461
13462 if (code == AND)
13463 t = tbl[0];
13464 else if (code == IOR)
13465 t = tbl[1];
13466 else if (code == XOR)
13467 t = tbl[2];
13468 else
13469 output_operand_lossage ("invalid %%q value");
13470
13471 if (GET_CODE (XEXP (x, 0)) != NOT)
13472 s = t[0];
13473 else
13474 {
13475 if (GET_CODE (XEXP (x, 1)) == NOT)
13476 s = t[2];
13477 else
13478 s = t[1];
13479 }
13480
13481 fputs (s, file);
13482 }
13483 return;
13484
13485 case 'Q':
13486 if (! TARGET_MFCRF)
13487 return;
13488 fputc (',', file);
13489 /* FALLTHRU */
13490
13491 case 'R':
13492 /* X is a CR register. Print the mask for `mtcrf'. */
13493 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13494 output_operand_lossage ("invalid %%R value");
13495 else
13496 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13497 return;
13498
13499 case 's':
13500 /* Low 5 bits of 32 - value */
13501 if (! INT_P (x))
13502 output_operand_lossage ("invalid %%s value");
13503 else
13504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13505 return;
13506
13507 case 't':
13508 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13509 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13510 {
13511 output_operand_lossage ("invalid %%t value");
13512 return;
13513 }
13514
13515 /* Bit 3 is OV bit. */
13516 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13517
13518 /* If we want bit 31, write a shift count of zero, not 32. */
13519 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13520 return;
13521
13522 case 'T':
13523 /* Print the symbolic name of a branch target register. */
13524 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13525 x = XVECEXP (x, 0, 0);
13526 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13527 && REGNO (x) != CTR_REGNO))
13528 output_operand_lossage ("invalid %%T value");
13529 else if (REGNO (x) == LR_REGNO)
13530 fputs ("lr", file);
13531 else
13532 fputs ("ctr", file);
13533 return;
13534
13535 case 'u':
13536 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13537 for use in unsigned operand. */
13538 if (! INT_P (x))
13539 {
13540 output_operand_lossage ("invalid %%u value");
13541 return;
13542 }
13543
13544 uval = INTVAL (x);
13545 if ((uval & 0xffff) == 0)
13546 uval >>= 16;
13547
13548 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13549 return;
13550
13551 case 'v':
13552 /* High-order 16 bits of constant for use in signed operand. */
13553 if (! INT_P (x))
13554 output_operand_lossage ("invalid %%v value");
13555 else
13556 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13557 (INTVAL (x) >> 16) & 0xffff);
13558 return;
13559
13560 case 'U':
13561 /* Print `u' if this has an auto-increment or auto-decrement. */
13562 if (MEM_P (x)
13563 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13564 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13565 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13566 putc ('u', file);
13567 return;
13568
13569 case 'V':
13570 /* Print the trap code for this operand. */
13571 switch (GET_CODE (x))
13572 {
13573 case EQ:
13574 fputs ("eq", file); /* 4 */
13575 break;
13576 case NE:
13577 fputs ("ne", file); /* 24 */
13578 break;
13579 case LT:
13580 fputs ("lt", file); /* 16 */
13581 break;
13582 case LE:
13583 fputs ("le", file); /* 20 */
13584 break;
13585 case GT:
13586 fputs ("gt", file); /* 8 */
13587 break;
13588 case GE:
13589 fputs ("ge", file); /* 12 */
13590 break;
13591 case LTU:
13592 fputs ("llt", file); /* 2 */
13593 break;
13594 case LEU:
13595 fputs ("lle", file); /* 6 */
13596 break;
13597 case GTU:
13598 fputs ("lgt", file); /* 1 */
13599 break;
13600 case GEU:
13601 fputs ("lge", file); /* 5 */
13602 break;
13603 default:
13604 output_operand_lossage ("invalid %%V value");
13605 }
13606 break;
13607
13608 case 'w':
13609 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13610 normally. */
13611 if (INT_P (x))
13612 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13613 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13614 else
13615 print_operand (file, x, 0);
13616 return;
13617
13618 case 'x':
13619 /* X is a FPR or Altivec register used in a VSX context. */
13620 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13621 output_operand_lossage ("invalid %%x value");
13622 else
13623 {
13624 int reg = REGNO (x);
13625 int vsx_reg = (FP_REGNO_P (reg)
13626 ? reg - 32
13627 : reg - FIRST_ALTIVEC_REGNO + 32);
13628
13629 #ifdef TARGET_REGNAMES
13630 if (TARGET_REGNAMES)
13631 fprintf (file, "%%vs%d", vsx_reg);
13632 else
13633 #endif
13634 fprintf (file, "%d", vsx_reg);
13635 }
13636 return;
13637
13638 case 'X':
13639 if (MEM_P (x)
13640 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13641 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13642 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13643 putc ('x', file);
13644 return;
13645
13646 case 'Y':
13647 /* Like 'L', for third word of TImode/PTImode */
13648 if (REG_P (x))
13649 fputs (reg_names[REGNO (x) + 2], file);
13650 else if (MEM_P (x))
13651 {
13652 machine_mode mode = GET_MODE (x);
13653 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13654 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13655 output_address (mode, plus_constant (Pmode,
13656 XEXP (XEXP (x, 0), 0), 8));
13657 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13658 output_address (mode, plus_constant (Pmode,
13659 XEXP (XEXP (x, 0), 0), 8));
13660 else
13661 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13662 if (small_data_operand (x, GET_MODE (x)))
13663 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13664 reg_names[SMALL_DATA_REG]);
13665 }
13666 return;
13667
13668 case 'z':
13669 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13670 x = XVECEXP (x, 0, 1);
13671 /* X is a SYMBOL_REF. Write out the name preceded by a
13672 period and without any trailing data in brackets. Used for function
13673 names. If we are configured for System V (or the embedded ABI) on
13674 the PowerPC, do not emit the period, since those systems do not use
13675 TOCs and the like. */
13676 if (!SYMBOL_REF_P (x))
13677 {
13678 output_operand_lossage ("invalid %%z value");
13679 return;
13680 }
13681
13682 /* For macho, check to see if we need a stub. */
13683 if (TARGET_MACHO)
13684 {
13685 const char *name = XSTR (x, 0);
13686 #if TARGET_MACHO
13687 if (darwin_symbol_stubs
13688 && MACHOPIC_INDIRECT
13689 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13690 name = machopic_indirection_name (x, /*stub_p=*/true);
13691 #endif
13692 assemble_name (file, name);
13693 }
13694 else if (!DOT_SYMBOLS)
13695 assemble_name (file, XSTR (x, 0));
13696 else
13697 rs6000_output_function_entry (file, XSTR (x, 0));
13698 return;
13699
13700 case 'Z':
13701 /* Like 'L', for last word of TImode/PTImode. */
13702 if (REG_P (x))
13703 fputs (reg_names[REGNO (x) + 3], file);
13704 else if (MEM_P (x))
13705 {
13706 machine_mode mode = GET_MODE (x);
13707 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13708 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13709 output_address (mode, plus_constant (Pmode,
13710 XEXP (XEXP (x, 0), 0), 12));
13711 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13712 output_address (mode, plus_constant (Pmode,
13713 XEXP (XEXP (x, 0), 0), 12));
13714 else
13715 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13716 if (small_data_operand (x, GET_MODE (x)))
13717 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13718 reg_names[SMALL_DATA_REG]);
13719 }
13720 return;
13721
13722 /* Print AltiVec memory operand. */
13723 case 'y':
13724 {
13725 rtx tmp;
13726
13727 gcc_assert (MEM_P (x));
13728
13729 tmp = XEXP (x, 0);
13730
13731 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13732 && GET_CODE (tmp) == AND
13733 && CONST_INT_P (XEXP (tmp, 1))
13734 && INTVAL (XEXP (tmp, 1)) == -16)
13735 tmp = XEXP (tmp, 0);
13736 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13737 && GET_CODE (tmp) == PRE_MODIFY)
13738 tmp = XEXP (tmp, 1);
13739 if (REG_P (tmp))
13740 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13741 else
13742 {
13743 if (GET_CODE (tmp) != PLUS
13744 || !REG_P (XEXP (tmp, 0))
13745 || !REG_P (XEXP (tmp, 1)))
13746 {
13747 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13748 break;
13749 }
13750
13751 if (REGNO (XEXP (tmp, 0)) == 0)
13752 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13753 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13754 else
13755 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13756 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13757 }
13758 break;
13759 }
13760
13761 case 0:
13762 if (REG_P (x))
13763 fprintf (file, "%s", reg_names[REGNO (x)]);
13764 else if (MEM_P (x))
13765 {
13766 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13767 know the width from the mode. */
13768 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13769 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13770 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13771 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13772 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13773 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13774 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13775 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13776 else
13777 output_address (GET_MODE (x), XEXP (x, 0));
13778 }
13779 else if (toc_relative_expr_p (x, false,
13780 &tocrel_base_oac, &tocrel_offset_oac))
13781 /* This hack along with a corresponding hack in
13782 rs6000_output_addr_const_extra arranges to output addends
13783 where the assembler expects to find them. eg.
13784 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13785 without this hack would be output as "x@toc+4". We
13786 want "x+4@toc". */
13787 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13788 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13789 output_addr_const (file, XVECEXP (x, 0, 0));
13790 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13791 output_addr_const (file, XVECEXP (x, 0, 1));
13792 else
13793 output_addr_const (file, x);
13794 return;
13795
13796 case '&':
13797 if (const char *name = get_some_local_dynamic_name ())
13798 assemble_name (file, name);
13799 else
13800 output_operand_lossage ("'%%&' used without any "
13801 "local dynamic TLS references");
13802 return;
13803
13804 default:
13805 output_operand_lossage ("invalid %%xn code");
13806 }
13807 }
13808 \f
13809 /* Print the address of an operand. */
13810
13811 void
13812 print_operand_address (FILE *file, rtx x)
13813 {
13814 if (REG_P (x))
13815 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13816
13817 /* Is it a PC-relative address? */
13818 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13819 {
13820 HOST_WIDE_INT offset;
13821
13822 if (GET_CODE (x) == CONST)
13823 x = XEXP (x, 0);
13824
13825 if (GET_CODE (x) == PLUS)
13826 {
13827 offset = INTVAL (XEXP (x, 1));
13828 x = XEXP (x, 0);
13829 }
13830 else
13831 offset = 0;
13832
13833 output_addr_const (file, x);
13834
13835 if (offset)
13836 fprintf (file, "%+" PRId64, offset);
13837
13838 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13839 fprintf (file, "@got");
13840
13841 fprintf (file, "@pcrel");
13842 }
13843 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13844 || GET_CODE (x) == LABEL_REF)
13845 {
13846 output_addr_const (file, x);
13847 if (small_data_operand (x, GET_MODE (x)))
13848 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13849 reg_names[SMALL_DATA_REG]);
13850 else
13851 gcc_assert (!TARGET_TOC);
13852 }
13853 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13854 && REG_P (XEXP (x, 1)))
13855 {
13856 if (REGNO (XEXP (x, 0)) == 0)
13857 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13858 reg_names[ REGNO (XEXP (x, 0)) ]);
13859 else
13860 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13861 reg_names[ REGNO (XEXP (x, 1)) ]);
13862 }
13863 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13864 && CONST_INT_P (XEXP (x, 1)))
13865 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13866 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13867 #if TARGET_MACHO
13868 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13869 && CONSTANT_P (XEXP (x, 1)))
13870 {
13871 fprintf (file, "lo16(");
13872 output_addr_const (file, XEXP (x, 1));
13873 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13874 }
13875 #endif
13876 #if TARGET_ELF
13877 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13878 && CONSTANT_P (XEXP (x, 1)))
13879 {
13880 output_addr_const (file, XEXP (x, 1));
13881 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13882 }
13883 #endif
13884 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13885 {
13886 /* This hack along with a corresponding hack in
13887 rs6000_output_addr_const_extra arranges to output addends
13888 where the assembler expects to find them. eg.
13889 (lo_sum (reg 9)
13890 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13891 without this hack would be output as "x@toc+8@l(9)". We
13892 want "x+8@toc@l(9)". */
13893 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13894 if (GET_CODE (x) == LO_SUM)
13895 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13896 else
13897 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13898 }
13899 else
13900 output_addr_const (file, x);
13901 }
13902 \f
13903 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13904
13905 bool
13906 rs6000_output_addr_const_extra (FILE *file, rtx x)
13907 {
13908 if (GET_CODE (x) == UNSPEC)
13909 switch (XINT (x, 1))
13910 {
13911 case UNSPEC_TOCREL:
13912 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13913 && REG_P (XVECEXP (x, 0, 1))
13914 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13915 output_addr_const (file, XVECEXP (x, 0, 0));
13916 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13917 {
13918 if (INTVAL (tocrel_offset_oac) >= 0)
13919 fprintf (file, "+");
13920 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13921 }
13922 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13923 {
13924 putc ('-', file);
13925 assemble_name (file, toc_label_name);
13926 need_toc_init = 1;
13927 }
13928 else if (TARGET_ELF)
13929 fputs ("@toc", file);
13930 return true;
13931
13932 #if TARGET_MACHO
13933 case UNSPEC_MACHOPIC_OFFSET:
13934 output_addr_const (file, XVECEXP (x, 0, 0));
13935 putc ('-', file);
13936 machopic_output_function_base_name (file);
13937 return true;
13938 #endif
13939 }
13940 return false;
13941 }
13942 \f
13943 /* Target hook for assembling integer objects. The PowerPC version has
13944 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13945 is defined. It also needs to handle DI-mode objects on 64-bit
13946 targets. */
13947
13948 static bool
13949 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13950 {
13951 #ifdef RELOCATABLE_NEEDS_FIXUP
13952 /* Special handling for SI values. */
13953 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13954 {
13955 static int recurse = 0;
13956
13957 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13958 the .fixup section. Since the TOC section is already relocated, we
13959 don't need to mark it here. We used to skip the text section, but it
13960 should never be valid for relocated addresses to be placed in the text
13961 section. */
13962 if (DEFAULT_ABI == ABI_V4
13963 && (TARGET_RELOCATABLE || flag_pic > 1)
13964 && in_section != toc_section
13965 && !recurse
13966 && !CONST_SCALAR_INT_P (x)
13967 && CONSTANT_P (x))
13968 {
13969 char buf[256];
13970
13971 recurse = 1;
13972 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13973 fixuplabelno++;
13974 ASM_OUTPUT_LABEL (asm_out_file, buf);
13975 fprintf (asm_out_file, "\t.long\t(");
13976 output_addr_const (asm_out_file, x);
13977 fprintf (asm_out_file, ")@fixup\n");
13978 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13979 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13980 fprintf (asm_out_file, "\t.long\t");
13981 assemble_name (asm_out_file, buf);
13982 fprintf (asm_out_file, "\n\t.previous\n");
13983 recurse = 0;
13984 return true;
13985 }
13986 /* Remove initial .'s to turn a -mcall-aixdesc function
13987 address into the address of the descriptor, not the function
13988 itself. */
13989 else if (SYMBOL_REF_P (x)
13990 && XSTR (x, 0)[0] == '.'
13991 && DEFAULT_ABI == ABI_AIX)
13992 {
13993 const char *name = XSTR (x, 0);
13994 while (*name == '.')
13995 name++;
13996
13997 fprintf (asm_out_file, "\t.long\t%s\n", name);
13998 return true;
13999 }
14000 }
14001 #endif /* RELOCATABLE_NEEDS_FIXUP */
14002 return default_assemble_integer (x, size, aligned_p);
14003 }
14004
14005 /* Return a template string for assembly to emit when making an
14006 external call. FUNOP is the call mem argument operand number. */
14007
14008 static const char *
14009 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14010 {
14011 /* -Wformat-overflow workaround, without which gcc thinks that %u
14012 might produce 10 digits. */
14013 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14014
14015 char arg[12];
14016 arg[0] = 0;
14017 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14018 {
14019 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14020 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14021 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14022 sprintf (arg, "(%%&@tlsld)");
14023 }
14024
14025 /* The magic 32768 offset here corresponds to the offset of
14026 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14027 char z[11];
14028 sprintf (z, "%%z%u%s", funop,
14029 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14030 ? "+32768" : ""));
14031
14032 static char str[32]; /* 1 spare */
14033 if (rs6000_pcrel_p ())
14034 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14035 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14036 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14037 sibcall ? "" : "\n\tnop");
14038 else if (DEFAULT_ABI == ABI_V4)
14039 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14040 flag_pic ? "@plt" : "");
14041 #if TARGET_MACHO
14042 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14043 else if (DEFAULT_ABI == ABI_DARWIN)
14044 {
14045 /* The cookie is in operand func+2. */
14046 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14047 int cookie = INTVAL (operands[funop + 2]);
14048 if (cookie & CALL_LONG)
14049 {
14050 tree funname = get_identifier (XSTR (operands[funop], 0));
14051 tree labelname = get_prev_label (funname);
14052 gcc_checking_assert (labelname && !sibcall);
14053
14054 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14055 instruction will reach 'foo', otherwise link as 'bl L42'".
14056 "L42" should be a 'branch island', that will do a far jump to
14057 'foo'. Branch islands are generated in
14058 macho_branch_islands(). */
14059 sprintf (str, "jbsr %%z%u,%.10s", funop,
14060 IDENTIFIER_POINTER (labelname));
14061 }
14062 else
14063 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14064 after the call. */
14065 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14066 }
14067 #endif
14068 else
14069 gcc_unreachable ();
14070 return str;
14071 }
14072
14073 const char *
14074 rs6000_call_template (rtx *operands, unsigned int funop)
14075 {
14076 return rs6000_call_template_1 (operands, funop, false);
14077 }
14078
14079 const char *
14080 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14081 {
14082 return rs6000_call_template_1 (operands, funop, true);
14083 }
14084
14085 /* As above, for indirect calls. */
14086
14087 static const char *
14088 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14089 bool sibcall)
14090 {
14091 /* -Wformat-overflow workaround, without which gcc thinks that %u
14092 might produce 10 digits. Note that -Wformat-overflow will not
14093 currently warn here for str[], so do not rely on a warning to
14094 ensure str[] is correctly sized. */
14095 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14096
14097 /* Currently, funop is either 0 or 1. The maximum string is always
14098 a !speculate 64-bit __tls_get_addr call.
14099
14100 ABI_ELFv2, pcrel:
14101 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14102 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14103 . 9 crset 2\n\t
14104 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14105 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14106 . 8 beq%T1l-
14107 .---
14108 .142
14109
14110 ABI_AIX:
14111 . 9 ld 2,%3\n\t
14112 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14113 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14114 . 9 crset 2\n\t
14115 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14116 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14117 . 10 beq%T1l-\n\t
14118 . 10 ld 2,%4(1)
14119 .---
14120 .151
14121
14122 ABI_ELFv2:
14123 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14124 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14125 . 9 crset 2\n\t
14126 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14127 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14128 . 10 beq%T1l-\n\t
14129 . 10 ld 2,%3(1)
14130 .---
14131 .142
14132
14133 ABI_V4:
14134 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14135 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14136 . 9 crset 2\n\t
14137 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14138 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14139 . 8 beq%T1l-
14140 .---
14141 .141 */
14142 static char str[160]; /* 8 spare */
14143 char *s = str;
14144 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14145
14146 if (DEFAULT_ABI == ABI_AIX)
14147 s += sprintf (s,
14148 "l%s 2,%%%u\n\t",
14149 ptrload, funop + 3);
14150
14151 /* We don't need the extra code to stop indirect call speculation if
14152 calling via LR. */
14153 bool speculate = (TARGET_MACHO
14154 || rs6000_speculate_indirect_jumps
14155 || (REG_P (operands[funop])
14156 && REGNO (operands[funop]) == LR_REGNO));
14157
14158 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14159 {
14160 const char *rel64 = TARGET_64BIT ? "64" : "";
14161 char tls[29];
14162 tls[0] = 0;
14163 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14164 {
14165 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14166 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14167 rel64, funop + 1);
14168 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14169 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14170 rel64);
14171 }
14172
14173 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14174 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14175 && flag_pic == 2 ? "+32768" : "");
14176 if (!speculate)
14177 {
14178 s += sprintf (s,
14179 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14180 tls, rel64, notoc, funop, addend);
14181 s += sprintf (s, "crset 2\n\t");
14182 }
14183 s += sprintf (s,
14184 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14185 tls, rel64, notoc, funop, addend);
14186 }
14187 else if (!speculate)
14188 s += sprintf (s, "crset 2\n\t");
14189
14190 if (rs6000_pcrel_p ())
14191 {
14192 if (speculate)
14193 sprintf (s, "b%%T%ul", funop);
14194 else
14195 sprintf (s, "beq%%T%ul-", funop);
14196 }
14197 else if (DEFAULT_ABI == ABI_AIX)
14198 {
14199 if (speculate)
14200 sprintf (s,
14201 "b%%T%ul\n\t"
14202 "l%s 2,%%%u(1)",
14203 funop, ptrload, funop + 4);
14204 else
14205 sprintf (s,
14206 "beq%%T%ul-\n\t"
14207 "l%s 2,%%%u(1)",
14208 funop, ptrload, funop + 4);
14209 }
14210 else if (DEFAULT_ABI == ABI_ELFv2)
14211 {
14212 if (speculate)
14213 sprintf (s,
14214 "b%%T%ul\n\t"
14215 "l%s 2,%%%u(1)",
14216 funop, ptrload, funop + 3);
14217 else
14218 sprintf (s,
14219 "beq%%T%ul-\n\t"
14220 "l%s 2,%%%u(1)",
14221 funop, ptrload, funop + 3);
14222 }
14223 else
14224 {
14225 if (speculate)
14226 sprintf (s,
14227 "b%%T%u%s",
14228 funop, sibcall ? "" : "l");
14229 else
14230 sprintf (s,
14231 "beq%%T%u%s-%s",
14232 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14233 }
14234 return str;
14235 }
14236
14237 const char *
14238 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14239 {
14240 return rs6000_indirect_call_template_1 (operands, funop, false);
14241 }
14242
14243 const char *
14244 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14245 {
14246 return rs6000_indirect_call_template_1 (operands, funop, true);
14247 }
14248
14249 #if HAVE_AS_PLTSEQ
14250 /* Output indirect call insns. WHICH identifies the type of sequence. */
14251 const char *
14252 rs6000_pltseq_template (rtx *operands, int which)
14253 {
14254 const char *rel64 = TARGET_64BIT ? "64" : "";
14255 char tls[30];
14256 tls[0] = 0;
14257 if (GET_CODE (operands[3]) == UNSPEC)
14258 {
14259 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14260 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14261 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14262 off, rel64);
14263 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14264 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14265 off, rel64);
14266 }
14267
14268 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14269 static char str[96]; /* 10 spare */
14270 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14271 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14272 && flag_pic == 2 ? "+32768" : "");
14273 switch (which)
14274 {
14275 case RS6000_PLTSEQ_TOCSAVE:
14276 sprintf (str,
14277 "st%s\n\t"
14278 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14279 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14280 tls, rel64);
14281 break;
14282 case RS6000_PLTSEQ_PLT16_HA:
14283 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14284 sprintf (str,
14285 "lis %%0,0\n\t"
14286 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14287 tls, off, rel64);
14288 else
14289 sprintf (str,
14290 "addis %%0,%%1,0\n\t"
14291 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14292 tls, off, rel64, addend);
14293 break;
14294 case RS6000_PLTSEQ_PLT16_LO:
14295 sprintf (str,
14296 "l%s %%0,0(%%1)\n\t"
14297 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14298 TARGET_64BIT ? "d" : "wz",
14299 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14300 break;
14301 case RS6000_PLTSEQ_MTCTR:
14302 sprintf (str,
14303 "mtctr %%1\n\t"
14304 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14305 tls, rel64, addend);
14306 break;
14307 case RS6000_PLTSEQ_PLT_PCREL34:
14308 sprintf (str,
14309 "pl%s %%0,0(0),1\n\t"
14310 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14311 TARGET_64BIT ? "d" : "wz",
14312 tls, rel64);
14313 break;
14314 default:
14315 gcc_unreachable ();
14316 }
14317 return str;
14318 }
14319 #endif
14320 \f
14321 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14322 /* Emit an assembler directive to set symbol visibility for DECL to
14323 VISIBILITY_TYPE. */
14324
14325 static void
14326 rs6000_assemble_visibility (tree decl, int vis)
14327 {
14328 if (TARGET_XCOFF)
14329 return;
14330
14331 /* Functions need to have their entry point symbol visibility set as
14332 well as their descriptor symbol visibility. */
14333 if (DEFAULT_ABI == ABI_AIX
14334 && DOT_SYMBOLS
14335 && TREE_CODE (decl) == FUNCTION_DECL)
14336 {
14337 static const char * const visibility_types[] = {
14338 NULL, "protected", "hidden", "internal"
14339 };
14340
14341 const char *name, *type;
14342
14343 name = ((* targetm.strip_name_encoding)
14344 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14345 type = visibility_types[vis];
14346
14347 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14348 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14349 }
14350 else
14351 default_assemble_visibility (decl, vis);
14352 }
14353 #endif
14354 \f
14355 enum rtx_code
14356 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14357 {
14358 /* Reversal of FP compares takes care -- an ordered compare
14359 becomes an unordered compare and vice versa. */
14360 if (mode == CCFPmode
14361 && (!flag_finite_math_only
14362 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14363 || code == UNEQ || code == LTGT))
14364 return reverse_condition_maybe_unordered (code);
14365 else
14366 return reverse_condition (code);
14367 }
14368
14369 /* Generate a compare for CODE. Return a brand-new rtx that
14370 represents the result of the compare. */
14371
14372 static rtx
14373 rs6000_generate_compare (rtx cmp, machine_mode mode)
14374 {
14375 machine_mode comp_mode;
14376 rtx compare_result;
14377 enum rtx_code code = GET_CODE (cmp);
14378 rtx op0 = XEXP (cmp, 0);
14379 rtx op1 = XEXP (cmp, 1);
14380
14381 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14382 comp_mode = CCmode;
14383 else if (FLOAT_MODE_P (mode))
14384 comp_mode = CCFPmode;
14385 else if (code == GTU || code == LTU
14386 || code == GEU || code == LEU)
14387 comp_mode = CCUNSmode;
14388 else if ((code == EQ || code == NE)
14389 && unsigned_reg_p (op0)
14390 && (unsigned_reg_p (op1)
14391 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14392 /* These are unsigned values, perhaps there will be a later
14393 ordering compare that can be shared with this one. */
14394 comp_mode = CCUNSmode;
14395 else
14396 comp_mode = CCmode;
14397
14398 /* If we have an unsigned compare, make sure we don't have a signed value as
14399 an immediate. */
14400 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14401 && INTVAL (op1) < 0)
14402 {
14403 op0 = copy_rtx_if_shared (op0);
14404 op1 = force_reg (GET_MODE (op0), op1);
14405 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14406 }
14407
14408 /* First, the compare. */
14409 compare_result = gen_reg_rtx (comp_mode);
14410
14411 /* IEEE 128-bit support in VSX registers when we do not have hardware
14412 support. */
14413 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14414 {
14415 rtx libfunc = NULL_RTX;
14416 bool check_nan = false;
14417 rtx dest;
14418
14419 switch (code)
14420 {
14421 case EQ:
14422 case NE:
14423 libfunc = optab_libfunc (eq_optab, mode);
14424 break;
14425
14426 case GT:
14427 case GE:
14428 libfunc = optab_libfunc (ge_optab, mode);
14429 break;
14430
14431 case LT:
14432 case LE:
14433 libfunc = optab_libfunc (le_optab, mode);
14434 break;
14435
14436 case UNORDERED:
14437 case ORDERED:
14438 libfunc = optab_libfunc (unord_optab, mode);
14439 code = (code == UNORDERED) ? NE : EQ;
14440 break;
14441
14442 case UNGE:
14443 case UNGT:
14444 check_nan = true;
14445 libfunc = optab_libfunc (ge_optab, mode);
14446 code = (code == UNGE) ? GE : GT;
14447 break;
14448
14449 case UNLE:
14450 case UNLT:
14451 check_nan = true;
14452 libfunc = optab_libfunc (le_optab, mode);
14453 code = (code == UNLE) ? LE : LT;
14454 break;
14455
14456 case UNEQ:
14457 case LTGT:
14458 check_nan = true;
14459 libfunc = optab_libfunc (eq_optab, mode);
14460 code = (code = UNEQ) ? EQ : NE;
14461 break;
14462
14463 default:
14464 gcc_unreachable ();
14465 }
14466
14467 gcc_assert (libfunc);
14468
14469 if (!check_nan)
14470 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14471 SImode, op0, mode, op1, mode);
14472
14473 /* The library signals an exception for signalling NaNs, so we need to
14474 handle isgreater, etc. by first checking isordered. */
14475 else
14476 {
14477 rtx ne_rtx, normal_dest, unord_dest;
14478 rtx unord_func = optab_libfunc (unord_optab, mode);
14479 rtx join_label = gen_label_rtx ();
14480 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14481 rtx unord_cmp = gen_reg_rtx (comp_mode);
14482
14483
14484 /* Test for either value being a NaN. */
14485 gcc_assert (unord_func);
14486 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14487 SImode, op0, mode, op1, mode);
14488
14489 /* Set value (0) if either value is a NaN, and jump to the join
14490 label. */
14491 dest = gen_reg_rtx (SImode);
14492 emit_move_insn (dest, const1_rtx);
14493 emit_insn (gen_rtx_SET (unord_cmp,
14494 gen_rtx_COMPARE (comp_mode, unord_dest,
14495 const0_rtx)));
14496
14497 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14498 emit_jump_insn (gen_rtx_SET (pc_rtx,
14499 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14500 join_ref,
14501 pc_rtx)));
14502
14503 /* Do the normal comparison, knowing that the values are not
14504 NaNs. */
14505 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14506 SImode, op0, mode, op1, mode);
14507
14508 emit_insn (gen_cstoresi4 (dest,
14509 gen_rtx_fmt_ee (code, SImode, normal_dest,
14510 const0_rtx),
14511 normal_dest, const0_rtx));
14512
14513 /* Join NaN and non-Nan paths. Compare dest against 0. */
14514 emit_label (join_label);
14515 code = NE;
14516 }
14517
14518 emit_insn (gen_rtx_SET (compare_result,
14519 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14520 }
14521
14522 else
14523 {
14524 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14525 CLOBBERs to match cmptf_internal2 pattern. */
14526 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14527 && FLOAT128_IBM_P (GET_MODE (op0))
14528 && TARGET_HARD_FLOAT)
14529 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14530 gen_rtvec (10,
14531 gen_rtx_SET (compare_result,
14532 gen_rtx_COMPARE (comp_mode, op0, op1)),
14533 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14534 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14535 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14536 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14537 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14538 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14539 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14540 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14541 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14542 else if (GET_CODE (op1) == UNSPEC
14543 && XINT (op1, 1) == UNSPEC_SP_TEST)
14544 {
14545 rtx op1b = XVECEXP (op1, 0, 0);
14546 comp_mode = CCEQmode;
14547 compare_result = gen_reg_rtx (CCEQmode);
14548 if (TARGET_64BIT)
14549 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14550 else
14551 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14552 }
14553 else
14554 emit_insn (gen_rtx_SET (compare_result,
14555 gen_rtx_COMPARE (comp_mode, op0, op1)));
14556 }
14557
14558 validate_condition_mode (code, GET_MODE (compare_result));
14559
14560 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14561 }
14562
14563 \f
14564 /* Return the diagnostic message string if the binary operation OP is
14565 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14566
14567 static const char*
14568 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14569 const_tree type1,
14570 const_tree type2)
14571 {
14572 machine_mode mode1 = TYPE_MODE (type1);
14573 machine_mode mode2 = TYPE_MODE (type2);
14574
14575 /* For complex modes, use the inner type. */
14576 if (COMPLEX_MODE_P (mode1))
14577 mode1 = GET_MODE_INNER (mode1);
14578
14579 if (COMPLEX_MODE_P (mode2))
14580 mode2 = GET_MODE_INNER (mode2);
14581
14582 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14583 double to intermix unless -mfloat128-convert. */
14584 if (mode1 == mode2)
14585 return NULL;
14586
14587 if (!TARGET_FLOAT128_CVT)
14588 {
14589 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
14590 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
14591 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
14592 "point types");
14593 }
14594
14595 return NULL;
14596 }
14597
14598 \f
14599 /* Expand floating point conversion to/from __float128 and __ibm128. */
14600
14601 void
14602 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14603 {
14604 machine_mode dest_mode = GET_MODE (dest);
14605 machine_mode src_mode = GET_MODE (src);
14606 convert_optab cvt = unknown_optab;
14607 bool do_move = false;
14608 rtx libfunc = NULL_RTX;
14609 rtx dest2;
14610 typedef rtx (*rtx_2func_t) (rtx, rtx);
14611 rtx_2func_t hw_convert = (rtx_2func_t)0;
14612 size_t kf_or_tf;
14613
14614 struct hw_conv_t {
14615 rtx_2func_t from_df;
14616 rtx_2func_t from_sf;
14617 rtx_2func_t from_si_sign;
14618 rtx_2func_t from_si_uns;
14619 rtx_2func_t from_di_sign;
14620 rtx_2func_t from_di_uns;
14621 rtx_2func_t to_df;
14622 rtx_2func_t to_sf;
14623 rtx_2func_t to_si_sign;
14624 rtx_2func_t to_si_uns;
14625 rtx_2func_t to_di_sign;
14626 rtx_2func_t to_di_uns;
14627 } hw_conversions[2] = {
14628 /* convertions to/from KFmode */
14629 {
14630 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14631 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14632 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14633 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14634 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14635 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14636 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14637 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14638 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14639 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14640 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14641 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14642 },
14643
14644 /* convertions to/from TFmode */
14645 {
14646 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14647 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14648 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14649 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14650 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14651 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14652 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14653 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14654 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14655 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14656 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14657 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14658 },
14659 };
14660
14661 if (dest_mode == src_mode)
14662 gcc_unreachable ();
14663
14664 /* Eliminate memory operations. */
14665 if (MEM_P (src))
14666 src = force_reg (src_mode, src);
14667
14668 if (MEM_P (dest))
14669 {
14670 rtx tmp = gen_reg_rtx (dest_mode);
14671 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14672 rs6000_emit_move (dest, tmp, dest_mode);
14673 return;
14674 }
14675
14676 /* Convert to IEEE 128-bit floating point. */
14677 if (FLOAT128_IEEE_P (dest_mode))
14678 {
14679 if (dest_mode == KFmode)
14680 kf_or_tf = 0;
14681 else if (dest_mode == TFmode)
14682 kf_or_tf = 1;
14683 else
14684 gcc_unreachable ();
14685
14686 switch (src_mode)
14687 {
14688 case E_DFmode:
14689 cvt = sext_optab;
14690 hw_convert = hw_conversions[kf_or_tf].from_df;
14691 break;
14692
14693 case E_SFmode:
14694 cvt = sext_optab;
14695 hw_convert = hw_conversions[kf_or_tf].from_sf;
14696 break;
14697
14698 case E_KFmode:
14699 case E_IFmode:
14700 case E_TFmode:
14701 if (FLOAT128_IBM_P (src_mode))
14702 cvt = sext_optab;
14703 else
14704 do_move = true;
14705 break;
14706
14707 case E_SImode:
14708 if (unsigned_p)
14709 {
14710 cvt = ufloat_optab;
14711 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14712 }
14713 else
14714 {
14715 cvt = sfloat_optab;
14716 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14717 }
14718 break;
14719
14720 case E_DImode:
14721 if (unsigned_p)
14722 {
14723 cvt = ufloat_optab;
14724 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14725 }
14726 else
14727 {
14728 cvt = sfloat_optab;
14729 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14730 }
14731 break;
14732
14733 default:
14734 gcc_unreachable ();
14735 }
14736 }
14737
14738 /* Convert from IEEE 128-bit floating point. */
14739 else if (FLOAT128_IEEE_P (src_mode))
14740 {
14741 if (src_mode == KFmode)
14742 kf_or_tf = 0;
14743 else if (src_mode == TFmode)
14744 kf_or_tf = 1;
14745 else
14746 gcc_unreachable ();
14747
14748 switch (dest_mode)
14749 {
14750 case E_DFmode:
14751 cvt = trunc_optab;
14752 hw_convert = hw_conversions[kf_or_tf].to_df;
14753 break;
14754
14755 case E_SFmode:
14756 cvt = trunc_optab;
14757 hw_convert = hw_conversions[kf_or_tf].to_sf;
14758 break;
14759
14760 case E_KFmode:
14761 case E_IFmode:
14762 case E_TFmode:
14763 if (FLOAT128_IBM_P (dest_mode))
14764 cvt = trunc_optab;
14765 else
14766 do_move = true;
14767 break;
14768
14769 case E_SImode:
14770 if (unsigned_p)
14771 {
14772 cvt = ufix_optab;
14773 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14774 }
14775 else
14776 {
14777 cvt = sfix_optab;
14778 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14779 }
14780 break;
14781
14782 case E_DImode:
14783 if (unsigned_p)
14784 {
14785 cvt = ufix_optab;
14786 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14787 }
14788 else
14789 {
14790 cvt = sfix_optab;
14791 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14792 }
14793 break;
14794
14795 default:
14796 gcc_unreachable ();
14797 }
14798 }
14799
14800 /* Both IBM format. */
14801 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14802 do_move = true;
14803
14804 else
14805 gcc_unreachable ();
14806
14807 /* Handle conversion between TFmode/KFmode/IFmode. */
14808 if (do_move)
14809 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14810
14811 /* Handle conversion if we have hardware support. */
14812 else if (TARGET_FLOAT128_HW && hw_convert)
14813 emit_insn ((hw_convert) (dest, src));
14814
14815 /* Call an external function to do the conversion. */
14816 else if (cvt != unknown_optab)
14817 {
14818 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14819 gcc_assert (libfunc != NULL_RTX);
14820
14821 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14822 src, src_mode);
14823
14824 gcc_assert (dest2 != NULL_RTX);
14825 if (!rtx_equal_p (dest, dest2))
14826 emit_move_insn (dest, dest2);
14827 }
14828
14829 else
14830 gcc_unreachable ();
14831
14832 return;
14833 }
14834
14835 \f
14836 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14837 can be used as that dest register. Return the dest register. */
14838
14839 rtx
14840 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14841 {
14842 if (op2 == const0_rtx)
14843 return op1;
14844
14845 if (GET_CODE (scratch) == SCRATCH)
14846 scratch = gen_reg_rtx (mode);
14847
14848 if (logical_operand (op2, mode))
14849 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14850 else
14851 emit_insn (gen_rtx_SET (scratch,
14852 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14853
14854 return scratch;
14855 }
14856
14857 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14858 requires this. The result is mode MODE. */
14859 rtx
14860 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14861 {
14862 rtx cond[2];
14863 int n = 0;
14864 if (code == LTGT || code == LE || code == UNLT)
14865 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14866 if (code == LTGT || code == GE || code == UNGT)
14867 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14868 if (code == LE || code == GE || code == UNEQ)
14869 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14870 if (code == UNLT || code == UNGT || code == UNEQ)
14871 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14872
14873 gcc_assert (n == 2);
14874
14875 rtx cc = gen_reg_rtx (CCEQmode);
14876 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14877 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14878
14879 return cc;
14880 }
14881
14882 void
14883 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14884 {
14885 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14886 rtx_code cond_code = GET_CODE (condition_rtx);
14887
14888 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14889 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14890 ;
14891 else if (cond_code == NE
14892 || cond_code == GE || cond_code == LE
14893 || cond_code == GEU || cond_code == LEU
14894 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14895 {
14896 rtx not_result = gen_reg_rtx (CCEQmode);
14897 rtx not_op, rev_cond_rtx;
14898 machine_mode cc_mode;
14899
14900 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14901
14902 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14903 SImode, XEXP (condition_rtx, 0), const0_rtx);
14904 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14905 emit_insn (gen_rtx_SET (not_result, not_op));
14906 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14907 }
14908
14909 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14910 if (op_mode == VOIDmode)
14911 op_mode = GET_MODE (XEXP (operands[1], 1));
14912
14913 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14914 {
14915 PUT_MODE (condition_rtx, DImode);
14916 convert_move (operands[0], condition_rtx, 0);
14917 }
14918 else
14919 {
14920 PUT_MODE (condition_rtx, SImode);
14921 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14922 }
14923 }
14924
14925 /* Emit a branch of kind CODE to location LOC. */
14926
14927 void
14928 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14929 {
14930 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14931 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14932 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14933 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14934 }
14935
14936 /* Return the string to output a conditional branch to LABEL, which is
14937 the operand template of the label, or NULL if the branch is really a
14938 conditional return.
14939
14940 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14941 condition code register and its mode specifies what kind of
14942 comparison we made.
14943
14944 REVERSED is nonzero if we should reverse the sense of the comparison.
14945
14946 INSN is the insn. */
14947
14948 char *
14949 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14950 {
14951 static char string[64];
14952 enum rtx_code code = GET_CODE (op);
14953 rtx cc_reg = XEXP (op, 0);
14954 machine_mode mode = GET_MODE (cc_reg);
14955 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14956 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14957 int really_reversed = reversed ^ need_longbranch;
14958 char *s = string;
14959 const char *ccode;
14960 const char *pred;
14961 rtx note;
14962
14963 validate_condition_mode (code, mode);
14964
14965 /* Work out which way this really branches. We could use
14966 reverse_condition_maybe_unordered here always but this
14967 makes the resulting assembler clearer. */
14968 if (really_reversed)
14969 {
14970 /* Reversal of FP compares takes care -- an ordered compare
14971 becomes an unordered compare and vice versa. */
14972 if (mode == CCFPmode)
14973 code = reverse_condition_maybe_unordered (code);
14974 else
14975 code = reverse_condition (code);
14976 }
14977
14978 switch (code)
14979 {
14980 /* Not all of these are actually distinct opcodes, but
14981 we distinguish them for clarity of the resulting assembler. */
14982 case NE: case LTGT:
14983 ccode = "ne"; break;
14984 case EQ: case UNEQ:
14985 ccode = "eq"; break;
14986 case GE: case GEU:
14987 ccode = "ge"; break;
14988 case GT: case GTU: case UNGT:
14989 ccode = "gt"; break;
14990 case LE: case LEU:
14991 ccode = "le"; break;
14992 case LT: case LTU: case UNLT:
14993 ccode = "lt"; break;
14994 case UNORDERED: ccode = "un"; break;
14995 case ORDERED: ccode = "nu"; break;
14996 case UNGE: ccode = "nl"; break;
14997 case UNLE: ccode = "ng"; break;
14998 default:
14999 gcc_unreachable ();
15000 }
15001
15002 /* Maybe we have a guess as to how likely the branch is. */
15003 pred = "";
15004 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15005 if (note != NULL_RTX)
15006 {
15007 /* PROB is the difference from 50%. */
15008 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15009 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15010
15011 /* Only hint for highly probable/improbable branches on newer cpus when
15012 we have real profile data, as static prediction overrides processor
15013 dynamic prediction. For older cpus we may as well always hint, but
15014 assume not taken for branches that are very close to 50% as a
15015 mispredicted taken branch is more expensive than a
15016 mispredicted not-taken branch. */
15017 if (rs6000_always_hint
15018 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15019 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15020 && br_prob_note_reliable_p (note)))
15021 {
15022 if (abs (prob) > REG_BR_PROB_BASE / 20
15023 && ((prob > 0) ^ need_longbranch))
15024 pred = "+";
15025 else
15026 pred = "-";
15027 }
15028 }
15029
15030 if (label == NULL)
15031 s += sprintf (s, "b%slr%s ", ccode, pred);
15032 else
15033 s += sprintf (s, "b%s%s ", ccode, pred);
15034
15035 /* We need to escape any '%' characters in the reg_names string.
15036 Assume they'd only be the first character.... */
15037 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15038 *s++ = '%';
15039 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15040
15041 if (label != NULL)
15042 {
15043 /* If the branch distance was too far, we may have to use an
15044 unconditional branch to go the distance. */
15045 if (need_longbranch)
15046 s += sprintf (s, ",$+8\n\tb %s", label);
15047 else
15048 s += sprintf (s, ",%s", label);
15049 }
15050
15051 return string;
15052 }
15053
15054 /* Return insn for VSX or Altivec comparisons. */
15055
15056 static rtx
15057 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15058 {
15059 rtx mask;
15060 machine_mode mode = GET_MODE (op0);
15061
15062 switch (code)
15063 {
15064 default:
15065 break;
15066
15067 case GE:
15068 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15069 return NULL_RTX;
15070 /* FALLTHRU */
15071
15072 case EQ:
15073 case GT:
15074 case GTU:
15075 case ORDERED:
15076 case UNORDERED:
15077 case UNEQ:
15078 case LTGT:
15079 mask = gen_reg_rtx (mode);
15080 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15081 return mask;
15082 }
15083
15084 return NULL_RTX;
15085 }
15086
15087 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15088 DMODE is expected destination mode. This is a recursive function. */
15089
15090 static rtx
15091 rs6000_emit_vector_compare (enum rtx_code rcode,
15092 rtx op0, rtx op1,
15093 machine_mode dmode)
15094 {
15095 rtx mask;
15096 bool swap_operands = false;
15097 bool try_again = false;
15098
15099 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15100 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15101
15102 /* See if the comparison works as is. */
15103 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15104 if (mask)
15105 return mask;
15106
15107 switch (rcode)
15108 {
15109 case LT:
15110 rcode = GT;
15111 swap_operands = true;
15112 try_again = true;
15113 break;
15114 case LTU:
15115 rcode = GTU;
15116 swap_operands = true;
15117 try_again = true;
15118 break;
15119 case NE:
15120 case UNLE:
15121 case UNLT:
15122 case UNGE:
15123 case UNGT:
15124 /* Invert condition and try again.
15125 e.g., A != B becomes ~(A==B). */
15126 {
15127 enum rtx_code rev_code;
15128 enum insn_code nor_code;
15129 rtx mask2;
15130
15131 rev_code = reverse_condition_maybe_unordered (rcode);
15132 if (rev_code == UNKNOWN)
15133 return NULL_RTX;
15134
15135 nor_code = optab_handler (one_cmpl_optab, dmode);
15136 if (nor_code == CODE_FOR_nothing)
15137 return NULL_RTX;
15138
15139 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15140 if (!mask2)
15141 return NULL_RTX;
15142
15143 mask = gen_reg_rtx (dmode);
15144 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15145 return mask;
15146 }
15147 break;
15148 case GE:
15149 case GEU:
15150 case LE:
15151 case LEU:
15152 /* Try GT/GTU/LT/LTU OR EQ */
15153 {
15154 rtx c_rtx, eq_rtx;
15155 enum insn_code ior_code;
15156 enum rtx_code new_code;
15157
15158 switch (rcode)
15159 {
15160 case GE:
15161 new_code = GT;
15162 break;
15163
15164 case GEU:
15165 new_code = GTU;
15166 break;
15167
15168 case LE:
15169 new_code = LT;
15170 break;
15171
15172 case LEU:
15173 new_code = LTU;
15174 break;
15175
15176 default:
15177 gcc_unreachable ();
15178 }
15179
15180 ior_code = optab_handler (ior_optab, dmode);
15181 if (ior_code == CODE_FOR_nothing)
15182 return NULL_RTX;
15183
15184 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15185 if (!c_rtx)
15186 return NULL_RTX;
15187
15188 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15189 if (!eq_rtx)
15190 return NULL_RTX;
15191
15192 mask = gen_reg_rtx (dmode);
15193 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15194 return mask;
15195 }
15196 break;
15197 default:
15198 return NULL_RTX;
15199 }
15200
15201 if (try_again)
15202 {
15203 if (swap_operands)
15204 std::swap (op0, op1);
15205
15206 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15207 if (mask)
15208 return mask;
15209 }
15210
15211 /* You only get two chances. */
15212 return NULL_RTX;
15213 }
15214
15215 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15216 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15217 operands for the relation operation COND. */
15218
15219 int
15220 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15221 rtx cond, rtx cc_op0, rtx cc_op1)
15222 {
15223 machine_mode dest_mode = GET_MODE (dest);
15224 machine_mode mask_mode = GET_MODE (cc_op0);
15225 enum rtx_code rcode = GET_CODE (cond);
15226 machine_mode cc_mode = CCmode;
15227 rtx mask;
15228 rtx cond2;
15229 bool invert_move = false;
15230
15231 if (VECTOR_UNIT_NONE_P (dest_mode))
15232 return 0;
15233
15234 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15235 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15236
15237 switch (rcode)
15238 {
15239 /* Swap operands if we can, and fall back to doing the operation as
15240 specified, and doing a NOR to invert the test. */
15241 case NE:
15242 case UNLE:
15243 case UNLT:
15244 case UNGE:
15245 case UNGT:
15246 /* Invert condition and try again.
15247 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15248 invert_move = true;
15249 rcode = reverse_condition_maybe_unordered (rcode);
15250 if (rcode == UNKNOWN)
15251 return 0;
15252 break;
15253
15254 case GE:
15255 case LE:
15256 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15257 {
15258 /* Invert condition to avoid compound test. */
15259 invert_move = true;
15260 rcode = reverse_condition (rcode);
15261 }
15262 break;
15263
15264 case GTU:
15265 case GEU:
15266 case LTU:
15267 case LEU:
15268 /* Mark unsigned tests with CCUNSmode. */
15269 cc_mode = CCUNSmode;
15270
15271 /* Invert condition to avoid compound test if necessary. */
15272 if (rcode == GEU || rcode == LEU)
15273 {
15274 invert_move = true;
15275 rcode = reverse_condition (rcode);
15276 }
15277 break;
15278
15279 default:
15280 break;
15281 }
15282
15283 /* Get the vector mask for the given relational operations. */
15284 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15285
15286 if (!mask)
15287 return 0;
15288
15289 if (invert_move)
15290 std::swap (op_true, op_false);
15291
15292 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15293 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15294 && (GET_CODE (op_true) == CONST_VECTOR
15295 || GET_CODE (op_false) == CONST_VECTOR))
15296 {
15297 rtx constant_0 = CONST0_RTX (dest_mode);
15298 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15299
15300 if (op_true == constant_m1 && op_false == constant_0)
15301 {
15302 emit_move_insn (dest, mask);
15303 return 1;
15304 }
15305
15306 else if (op_true == constant_0 && op_false == constant_m1)
15307 {
15308 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15309 return 1;
15310 }
15311
15312 /* If we can't use the vector comparison directly, perhaps we can use
15313 the mask for the true or false fields, instead of loading up a
15314 constant. */
15315 if (op_true == constant_m1)
15316 op_true = mask;
15317
15318 if (op_false == constant_0)
15319 op_false = mask;
15320 }
15321
15322 if (!REG_P (op_true) && !SUBREG_P (op_true))
15323 op_true = force_reg (dest_mode, op_true);
15324
15325 if (!REG_P (op_false) && !SUBREG_P (op_false))
15326 op_false = force_reg (dest_mode, op_false);
15327
15328 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
15329 CONST0_RTX (dest_mode));
15330 emit_insn (gen_rtx_SET (dest,
15331 gen_rtx_IF_THEN_ELSE (dest_mode,
15332 cond2,
15333 op_true,
15334 op_false)));
15335 return 1;
15336 }
15337
15338 /* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
15339 minimum with "C" semantics.
15340
15341 Unless you use -ffast-math, you can't use these instructions to replace
15342 conditions that implicitly reverse the condition because the comparison
15343 might generate a NaN or signed zer0.
15344
15345 I.e. the following can be replaced all of the time
15346 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15347 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15348 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15349 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15350
15351 The following can be replaced only if -ffast-math is used:
15352 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15353 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15354 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15355 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15356
15357 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15358 nonzero/true, FALSE_COND if it is zero/false.
15359
15360 Return false if we can't generate the appropriate minimum or maximum, and
15361 true if we can did the minimum or maximum. */
15362
15363 static bool
15364 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15365 {
15366 enum rtx_code code = GET_CODE (op);
15367 rtx op0 = XEXP (op, 0);
15368 rtx op1 = XEXP (op, 1);
15369 machine_mode compare_mode = GET_MODE (op0);
15370 machine_mode result_mode = GET_MODE (dest);
15371 bool max_p = false;
15372
15373 if (result_mode != compare_mode)
15374 return false;
15375
15376 if (code == GE || code == GT)
15377 max_p = true;
15378 else if (code == LE || code == LT)
15379 max_p = false;
15380 else
15381 return false;
15382
15383 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15384 ;
15385
15386 /* Only when NaNs and signed-zeros are not in effect, smax could be
15387 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15388 `op0 > op1 ? op1 : op0`. */
15389 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15390 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15391 max_p = !max_p;
15392
15393 else
15394 return false;
15395
15396 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15397 return true;
15398 }
15399
15400 /* Possibly emit a floating point conditional move by generating a compare that
15401 sets a mask instruction and a XXSEL select instruction.
15402
15403 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15404 nonzero/true, FALSE_COND if it is zero/false.
15405
15406 Return false if the operation cannot be generated, and true if we could
15407 generate the instruction. */
15408
15409 static bool
15410 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15411 {
15412 enum rtx_code code = GET_CODE (op);
15413 rtx op0 = XEXP (op, 0);
15414 rtx op1 = XEXP (op, 1);
15415 machine_mode result_mode = GET_MODE (dest);
15416 rtx compare_rtx;
15417 rtx cmove_rtx;
15418 rtx clobber_rtx;
15419
15420 if (!can_create_pseudo_p ())
15421 return 0;
15422
15423 switch (code)
15424 {
15425 case EQ:
15426 case GE:
15427 case GT:
15428 break;
15429
15430 case NE:
15431 case LT:
15432 case LE:
15433 code = swap_condition (code);
15434 std::swap (op0, op1);
15435 break;
15436
15437 default:
15438 return false;
15439 }
15440
15441 /* Generate: [(parallel [(set (dest)
15442 (if_then_else (op (cmp1) (cmp2))
15443 (true)
15444 (false)))
15445 (clobber (scratch))])]. */
15446
15447 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15448 cmove_rtx = gen_rtx_SET (dest,
15449 gen_rtx_IF_THEN_ELSE (result_mode,
15450 compare_rtx,
15451 true_cond,
15452 false_cond));
15453
15454 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15455 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15456 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15457
15458 return true;
15459 }
15460
15461 /* Helper function to return true if the target has instructions to do a
15462 compare and set mask instruction that can be used with XXSEL to implement a
15463 conditional move. It is also assumed that such a target also supports the
15464 "C" minimum and maximum instructions. */
15465
15466 static bool
15467 have_compare_and_set_mask (machine_mode mode)
15468 {
15469 switch (mode)
15470 {
15471 case E_SFmode:
15472 case E_DFmode:
15473 return TARGET_P9_MINMAX;
15474
15475 default:
15476 break;
15477 }
15478
15479 return false;
15480 }
15481
15482 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15483 operands of the last comparison is nonzero/true, FALSE_COND if it
15484 is zero/false. Return 0 if the hardware has no such operation. */
15485
15486 bool
15487 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15488 {
15489 enum rtx_code code = GET_CODE (op);
15490 rtx op0 = XEXP (op, 0);
15491 rtx op1 = XEXP (op, 1);
15492 machine_mode compare_mode = GET_MODE (op0);
15493 machine_mode result_mode = GET_MODE (dest);
15494 rtx temp;
15495 bool is_against_zero;
15496
15497 /* These modes should always match. */
15498 if (GET_MODE (op1) != compare_mode
15499 /* In the isel case however, we can use a compare immediate, so
15500 op1 may be a small constant. */
15501 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15502 return false;
15503 if (GET_MODE (true_cond) != result_mode)
15504 return false;
15505 if (GET_MODE (false_cond) != result_mode)
15506 return false;
15507
15508 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15509 instructions. */
15510 if (have_compare_and_set_mask (compare_mode)
15511 && have_compare_and_set_mask (result_mode))
15512 {
15513 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15514 return true;
15515
15516 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15517 return true;
15518 }
15519
15520 /* Don't allow using floating point comparisons for integer results for
15521 now. */
15522 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15523 return false;
15524
15525 /* First, work out if the hardware can do this at all, or
15526 if it's too slow.... */
15527 if (!FLOAT_MODE_P (compare_mode))
15528 {
15529 if (TARGET_ISEL)
15530 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15531 return false;
15532 }
15533
15534 is_against_zero = op1 == CONST0_RTX (compare_mode);
15535
15536 /* A floating-point subtract might overflow, underflow, or produce
15537 an inexact result, thus changing the floating-point flags, so it
15538 can't be generated if we care about that. It's safe if one side
15539 of the construct is zero, since then no subtract will be
15540 generated. */
15541 if (SCALAR_FLOAT_MODE_P (compare_mode)
15542 && flag_trapping_math && ! is_against_zero)
15543 return false;
15544
15545 /* Eliminate half of the comparisons by switching operands, this
15546 makes the remaining code simpler. */
15547 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15548 || code == LTGT || code == LT || code == UNLE)
15549 {
15550 code = reverse_condition_maybe_unordered (code);
15551 temp = true_cond;
15552 true_cond = false_cond;
15553 false_cond = temp;
15554 }
15555
15556 /* UNEQ and LTGT take four instructions for a comparison with zero,
15557 it'll probably be faster to use a branch here too. */
15558 if (code == UNEQ && HONOR_NANS (compare_mode))
15559 return false;
15560
15561 /* We're going to try to implement comparisons by performing
15562 a subtract, then comparing against zero. Unfortunately,
15563 Inf - Inf is NaN which is not zero, and so if we don't
15564 know that the operand is finite and the comparison
15565 would treat EQ different to UNORDERED, we can't do it. */
15566 if (HONOR_INFINITIES (compare_mode)
15567 && code != GT && code != UNGE
15568 && (!CONST_DOUBLE_P (op1)
15569 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15570 /* Constructs of the form (a OP b ? a : b) are safe. */
15571 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15572 || (! rtx_equal_p (op0, true_cond)
15573 && ! rtx_equal_p (op1, true_cond))))
15574 return false;
15575
15576 /* At this point we know we can use fsel. */
15577
15578 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15579 is no fsel instruction. */
15580 if (compare_mode != SFmode && compare_mode != DFmode)
15581 return false;
15582
15583 /* Reduce the comparison to a comparison against zero. */
15584 if (! is_against_zero)
15585 {
15586 temp = gen_reg_rtx (compare_mode);
15587 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15588 op0 = temp;
15589 op1 = CONST0_RTX (compare_mode);
15590 }
15591
15592 /* If we don't care about NaNs we can reduce some of the comparisons
15593 down to faster ones. */
15594 if (! HONOR_NANS (compare_mode))
15595 switch (code)
15596 {
15597 case GT:
15598 code = LE;
15599 temp = true_cond;
15600 true_cond = false_cond;
15601 false_cond = temp;
15602 break;
15603 case UNGE:
15604 code = GE;
15605 break;
15606 case UNEQ:
15607 code = EQ;
15608 break;
15609 default:
15610 break;
15611 }
15612
15613 /* Now, reduce everything down to a GE. */
15614 switch (code)
15615 {
15616 case GE:
15617 break;
15618
15619 case LE:
15620 temp = gen_reg_rtx (compare_mode);
15621 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15622 op0 = temp;
15623 break;
15624
15625 case ORDERED:
15626 temp = gen_reg_rtx (compare_mode);
15627 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15628 op0 = temp;
15629 break;
15630
15631 case EQ:
15632 temp = gen_reg_rtx (compare_mode);
15633 emit_insn (gen_rtx_SET (temp,
15634 gen_rtx_NEG (compare_mode,
15635 gen_rtx_ABS (compare_mode, op0))));
15636 op0 = temp;
15637 break;
15638
15639 case UNGE:
15640 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15641 temp = gen_reg_rtx (result_mode);
15642 emit_insn (gen_rtx_SET (temp,
15643 gen_rtx_IF_THEN_ELSE (result_mode,
15644 gen_rtx_GE (VOIDmode,
15645 op0, op1),
15646 true_cond, false_cond)));
15647 false_cond = true_cond;
15648 true_cond = temp;
15649
15650 temp = gen_reg_rtx (compare_mode);
15651 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15652 op0 = temp;
15653 break;
15654
15655 case GT:
15656 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15657 temp = gen_reg_rtx (result_mode);
15658 emit_insn (gen_rtx_SET (temp,
15659 gen_rtx_IF_THEN_ELSE (result_mode,
15660 gen_rtx_GE (VOIDmode,
15661 op0, op1),
15662 true_cond, false_cond)));
15663 true_cond = false_cond;
15664 false_cond = temp;
15665
15666 temp = gen_reg_rtx (compare_mode);
15667 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15668 op0 = temp;
15669 break;
15670
15671 default:
15672 gcc_unreachable ();
15673 }
15674
15675 emit_insn (gen_rtx_SET (dest,
15676 gen_rtx_IF_THEN_ELSE (result_mode,
15677 gen_rtx_GE (VOIDmode,
15678 op0, op1),
15679 true_cond, false_cond)));
15680 return true;
15681 }
15682
15683 /* Same as above, but for ints (isel). */
15684
15685 bool
15686 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15687 {
15688 rtx condition_rtx, cr;
15689 machine_mode mode = GET_MODE (dest);
15690 enum rtx_code cond_code;
15691 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15692 bool signedp;
15693
15694 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15695 return false;
15696
15697 /* We still have to do the compare, because isel doesn't do a
15698 compare, it just looks at the CRx bits set by a previous compare
15699 instruction. */
15700 condition_rtx = rs6000_generate_compare (op, mode);
15701 cond_code = GET_CODE (condition_rtx);
15702 cr = XEXP (condition_rtx, 0);
15703 signedp = GET_MODE (cr) == CCmode;
15704
15705 isel_func = (mode == SImode
15706 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15707 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15708
15709 switch (cond_code)
15710 {
15711 case LT: case GT: case LTU: case GTU: case EQ:
15712 /* isel handles these directly. */
15713 break;
15714
15715 default:
15716 /* We need to swap the sense of the comparison. */
15717 {
15718 std::swap (false_cond, true_cond);
15719 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15720 }
15721 break;
15722 }
15723
15724 false_cond = force_reg (mode, false_cond);
15725 if (true_cond != const0_rtx)
15726 true_cond = force_reg (mode, true_cond);
15727
15728 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15729
15730 return true;
15731 }
15732
15733 void
15734 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15735 {
15736 machine_mode mode = GET_MODE (op0);
15737 enum rtx_code c;
15738 rtx target;
15739
15740 /* VSX/altivec have direct min/max insns. */
15741 if ((code == SMAX || code == SMIN)
15742 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15743 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15744 {
15745 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15746 return;
15747 }
15748
15749 if (code == SMAX || code == SMIN)
15750 c = GE;
15751 else
15752 c = GEU;
15753
15754 if (code == SMAX || code == UMAX)
15755 target = emit_conditional_move (dest, c, op0, op1, mode,
15756 op0, op1, mode, 0);
15757 else
15758 target = emit_conditional_move (dest, c, op0, op1, mode,
15759 op1, op0, mode, 0);
15760 gcc_assert (target);
15761 if (target != dest)
15762 emit_move_insn (dest, target);
15763 }
15764
15765 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15766 COND is true. Mark the jump as unlikely to be taken. */
15767
15768 static void
15769 emit_unlikely_jump (rtx cond, rtx label)
15770 {
15771 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15772 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15773 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15774 }
15775
15776 /* A subroutine of the atomic operation splitters. Emit a load-locked
15777 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15778 the zero_extend operation. */
15779
15780 static void
15781 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15782 {
15783 rtx (*fn) (rtx, rtx) = NULL;
15784
15785 switch (mode)
15786 {
15787 case E_QImode:
15788 fn = gen_load_lockedqi;
15789 break;
15790 case E_HImode:
15791 fn = gen_load_lockedhi;
15792 break;
15793 case E_SImode:
15794 if (GET_MODE (mem) == QImode)
15795 fn = gen_load_lockedqi_si;
15796 else if (GET_MODE (mem) == HImode)
15797 fn = gen_load_lockedhi_si;
15798 else
15799 fn = gen_load_lockedsi;
15800 break;
15801 case E_DImode:
15802 fn = gen_load_lockeddi;
15803 break;
15804 case E_TImode:
15805 fn = gen_load_lockedti;
15806 break;
15807 default:
15808 gcc_unreachable ();
15809 }
15810 emit_insn (fn (reg, mem));
15811 }
15812
15813 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15814 instruction in MODE. */
15815
15816 static void
15817 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15818 {
15819 rtx (*fn) (rtx, rtx, rtx) = NULL;
15820
15821 switch (mode)
15822 {
15823 case E_QImode:
15824 fn = gen_store_conditionalqi;
15825 break;
15826 case E_HImode:
15827 fn = gen_store_conditionalhi;
15828 break;
15829 case E_SImode:
15830 fn = gen_store_conditionalsi;
15831 break;
15832 case E_DImode:
15833 fn = gen_store_conditionaldi;
15834 break;
15835 case E_TImode:
15836 fn = gen_store_conditionalti;
15837 break;
15838 default:
15839 gcc_unreachable ();
15840 }
15841
15842 /* Emit sync before stwcx. to address PPC405 Erratum. */
15843 if (PPC405_ERRATUM77)
15844 emit_insn (gen_hwsync ());
15845
15846 emit_insn (fn (res, mem, val));
15847 }
15848
15849 /* Expand barriers before and after a load_locked/store_cond sequence. */
15850
15851 static rtx
15852 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15853 {
15854 rtx addr = XEXP (mem, 0);
15855
15856 if (!legitimate_indirect_address_p (addr, reload_completed)
15857 && !legitimate_indexed_address_p (addr, reload_completed))
15858 {
15859 addr = force_reg (Pmode, addr);
15860 mem = replace_equiv_address_nv (mem, addr);
15861 }
15862
15863 switch (model)
15864 {
15865 case MEMMODEL_RELAXED:
15866 case MEMMODEL_CONSUME:
15867 case MEMMODEL_ACQUIRE:
15868 break;
15869 case MEMMODEL_RELEASE:
15870 case MEMMODEL_ACQ_REL:
15871 emit_insn (gen_lwsync ());
15872 break;
15873 case MEMMODEL_SEQ_CST:
15874 emit_insn (gen_hwsync ());
15875 break;
15876 default:
15877 gcc_unreachable ();
15878 }
15879 return mem;
15880 }
15881
15882 static void
15883 rs6000_post_atomic_barrier (enum memmodel model)
15884 {
15885 switch (model)
15886 {
15887 case MEMMODEL_RELAXED:
15888 case MEMMODEL_CONSUME:
15889 case MEMMODEL_RELEASE:
15890 break;
15891 case MEMMODEL_ACQUIRE:
15892 case MEMMODEL_ACQ_REL:
15893 case MEMMODEL_SEQ_CST:
15894 emit_insn (gen_isync ());
15895 break;
15896 default:
15897 gcc_unreachable ();
15898 }
15899 }
15900
15901 /* A subroutine of the various atomic expanders. For sub-word operations,
15902 we must adjust things to operate on SImode. Given the original MEM,
15903 return a new aligned memory. Also build and return the quantities by
15904 which to shift and mask. */
15905
15906 static rtx
15907 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15908 {
15909 rtx addr, align, shift, mask, mem;
15910 HOST_WIDE_INT shift_mask;
15911 machine_mode mode = GET_MODE (orig_mem);
15912
15913 /* For smaller modes, we have to implement this via SImode. */
15914 shift_mask = (mode == QImode ? 0x18 : 0x10);
15915
15916 addr = XEXP (orig_mem, 0);
15917 addr = force_reg (GET_MODE (addr), addr);
15918
15919 /* Aligned memory containing subword. Generate a new memory. We
15920 do not want any of the existing MEM_ATTR data, as we're now
15921 accessing memory outside the original object. */
15922 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15923 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15924 mem = gen_rtx_MEM (SImode, align);
15925 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15926 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15927 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15928
15929 /* Shift amount for subword relative to aligned word. */
15930 shift = gen_reg_rtx (SImode);
15931 addr = gen_lowpart (SImode, addr);
15932 rtx tmp = gen_reg_rtx (SImode);
15933 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15934 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15935 if (BYTES_BIG_ENDIAN)
15936 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15937 shift, 1, OPTAB_LIB_WIDEN);
15938 *pshift = shift;
15939
15940 /* Mask for insertion. */
15941 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15942 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15943 *pmask = mask;
15944
15945 return mem;
15946 }
15947
15948 /* A subroutine of the various atomic expanders. For sub-word operands,
15949 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15950
15951 static rtx
15952 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15953 {
15954 rtx x;
15955
15956 x = gen_reg_rtx (SImode);
15957 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15958 gen_rtx_NOT (SImode, mask),
15959 oldval)));
15960
15961 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15962
15963 return x;
15964 }
15965
15966 /* A subroutine of the various atomic expanders. For sub-word operands,
15967 extract WIDE to NARROW via SHIFT. */
15968
15969 static void
15970 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15971 {
15972 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15973 wide, 1, OPTAB_LIB_WIDEN);
15974 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15975 }
15976
15977 /* Expand an atomic compare and swap operation. */
15978
15979 void
15980 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15981 {
15982 rtx boolval, retval, mem, oldval, newval, cond;
15983 rtx label1, label2, x, mask, shift;
15984 machine_mode mode, orig_mode;
15985 enum memmodel mod_s, mod_f;
15986 bool is_weak;
15987
15988 boolval = operands[0];
15989 retval = operands[1];
15990 mem = operands[2];
15991 oldval = operands[3];
15992 newval = operands[4];
15993 is_weak = (INTVAL (operands[5]) != 0);
15994 mod_s = memmodel_base (INTVAL (operands[6]));
15995 mod_f = memmodel_base (INTVAL (operands[7]));
15996 orig_mode = mode = GET_MODE (mem);
15997
15998 mask = shift = NULL_RTX;
15999 if (mode == QImode || mode == HImode)
16000 {
16001 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16002 lwarx and shift/mask operations. With power8, we need to do the
16003 comparison in SImode, but the store is still done in QI/HImode. */
16004 oldval = convert_modes (SImode, mode, oldval, 1);
16005
16006 if (!TARGET_SYNC_HI_QI)
16007 {
16008 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16009
16010 /* Shift and mask OLDVAL into position with the word. */
16011 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16012 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16013
16014 /* Shift and mask NEWVAL into position within the word. */
16015 newval = convert_modes (SImode, mode, newval, 1);
16016 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16017 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16018 }
16019
16020 /* Prepare to adjust the return value. */
16021 retval = gen_reg_rtx (SImode);
16022 mode = SImode;
16023 }
16024 else if (reg_overlap_mentioned_p (retval, oldval))
16025 oldval = copy_to_reg (oldval);
16026
16027 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16028 oldval = copy_to_mode_reg (mode, oldval);
16029
16030 if (reg_overlap_mentioned_p (retval, newval))
16031 newval = copy_to_reg (newval);
16032
16033 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16034
16035 label1 = NULL_RTX;
16036 if (!is_weak)
16037 {
16038 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16039 emit_label (XEXP (label1, 0));
16040 }
16041 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16042
16043 emit_load_locked (mode, retval, mem);
16044
16045 x = retval;
16046 if (mask)
16047 x = expand_simple_binop (SImode, AND, retval, mask,
16048 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16049
16050 cond = gen_reg_rtx (CCmode);
16051 /* If we have TImode, synthesize a comparison. */
16052 if (mode != TImode)
16053 x = gen_rtx_COMPARE (CCmode, x, oldval);
16054 else
16055 {
16056 rtx xor1_result = gen_reg_rtx (DImode);
16057 rtx xor2_result = gen_reg_rtx (DImode);
16058 rtx or_result = gen_reg_rtx (DImode);
16059 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16060 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16061 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16062 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16063
16064 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16065 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16066 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16067 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16068 }
16069
16070 emit_insn (gen_rtx_SET (cond, x));
16071
16072 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16073 emit_unlikely_jump (x, label2);
16074
16075 x = newval;
16076 if (mask)
16077 x = rs6000_mask_atomic_subword (retval, newval, mask);
16078
16079 emit_store_conditional (orig_mode, cond, mem, x);
16080
16081 if (!is_weak)
16082 {
16083 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16084 emit_unlikely_jump (x, label1);
16085 }
16086
16087 if (!is_mm_relaxed (mod_f))
16088 emit_label (XEXP (label2, 0));
16089
16090 rs6000_post_atomic_barrier (mod_s);
16091
16092 if (is_mm_relaxed (mod_f))
16093 emit_label (XEXP (label2, 0));
16094
16095 if (shift)
16096 rs6000_finish_atomic_subword (operands[1], retval, shift);
16097 else if (mode != GET_MODE (operands[1]))
16098 convert_move (operands[1], retval, 1);
16099
16100 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16101 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16102 emit_insn (gen_rtx_SET (boolval, x));
16103 }
16104
16105 /* Expand an atomic exchange operation. */
16106
16107 void
16108 rs6000_expand_atomic_exchange (rtx operands[])
16109 {
16110 rtx retval, mem, val, cond;
16111 machine_mode mode;
16112 enum memmodel model;
16113 rtx label, x, mask, shift;
16114
16115 retval = operands[0];
16116 mem = operands[1];
16117 val = operands[2];
16118 model = memmodel_base (INTVAL (operands[3]));
16119 mode = GET_MODE (mem);
16120
16121 mask = shift = NULL_RTX;
16122 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16123 {
16124 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16125
16126 /* Shift and mask VAL into position with the word. */
16127 val = convert_modes (SImode, mode, val, 1);
16128 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16129 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16130
16131 /* Prepare to adjust the return value. */
16132 retval = gen_reg_rtx (SImode);
16133 mode = SImode;
16134 }
16135
16136 mem = rs6000_pre_atomic_barrier (mem, model);
16137
16138 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16139 emit_label (XEXP (label, 0));
16140
16141 emit_load_locked (mode, retval, mem);
16142
16143 x = val;
16144 if (mask)
16145 x = rs6000_mask_atomic_subword (retval, val, mask);
16146
16147 cond = gen_reg_rtx (CCmode);
16148 emit_store_conditional (mode, cond, mem, x);
16149
16150 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16151 emit_unlikely_jump (x, label);
16152
16153 rs6000_post_atomic_barrier (model);
16154
16155 if (shift)
16156 rs6000_finish_atomic_subword (operands[0], retval, shift);
16157 }
16158
16159 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16160 to perform. MEM is the memory on which to operate. VAL is the second
16161 operand of the binary operator. BEFORE and AFTER are optional locations to
16162 return the value of MEM either before of after the operation. MODEL_RTX
16163 is a CONST_INT containing the memory model to use. */
16164
16165 void
16166 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16167 rtx orig_before, rtx orig_after, rtx model_rtx)
16168 {
16169 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16170 machine_mode mode = GET_MODE (mem);
16171 machine_mode store_mode = mode;
16172 rtx label, x, cond, mask, shift;
16173 rtx before = orig_before, after = orig_after;
16174
16175 mask = shift = NULL_RTX;
16176 /* On power8, we want to use SImode for the operation. On previous systems,
16177 use the operation in a subword and shift/mask to get the proper byte or
16178 halfword. */
16179 if (mode == QImode || mode == HImode)
16180 {
16181 if (TARGET_SYNC_HI_QI)
16182 {
16183 val = convert_modes (SImode, mode, val, 1);
16184
16185 /* Prepare to adjust the return value. */
16186 before = gen_reg_rtx (SImode);
16187 if (after)
16188 after = gen_reg_rtx (SImode);
16189 mode = SImode;
16190 }
16191 else
16192 {
16193 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16194
16195 /* Shift and mask VAL into position with the word. */
16196 val = convert_modes (SImode, mode, val, 1);
16197 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16198 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16199
16200 switch (code)
16201 {
16202 case IOR:
16203 case XOR:
16204 /* We've already zero-extended VAL. That is sufficient to
16205 make certain that it does not affect other bits. */
16206 mask = NULL;
16207 break;
16208
16209 case AND:
16210 /* If we make certain that all of the other bits in VAL are
16211 set, that will be sufficient to not affect other bits. */
16212 x = gen_rtx_NOT (SImode, mask);
16213 x = gen_rtx_IOR (SImode, x, val);
16214 emit_insn (gen_rtx_SET (val, x));
16215 mask = NULL;
16216 break;
16217
16218 case NOT:
16219 case PLUS:
16220 case MINUS:
16221 /* These will all affect bits outside the field and need
16222 adjustment via MASK within the loop. */
16223 break;
16224
16225 default:
16226 gcc_unreachable ();
16227 }
16228
16229 /* Prepare to adjust the return value. */
16230 before = gen_reg_rtx (SImode);
16231 if (after)
16232 after = gen_reg_rtx (SImode);
16233 store_mode = mode = SImode;
16234 }
16235 }
16236
16237 mem = rs6000_pre_atomic_barrier (mem, model);
16238
16239 label = gen_label_rtx ();
16240 emit_label (label);
16241 label = gen_rtx_LABEL_REF (VOIDmode, label);
16242
16243 if (before == NULL_RTX)
16244 before = gen_reg_rtx (mode);
16245
16246 emit_load_locked (mode, before, mem);
16247
16248 if (code == NOT)
16249 {
16250 x = expand_simple_binop (mode, AND, before, val,
16251 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16252 after = expand_simple_unop (mode, NOT, x, after, 1);
16253 }
16254 else
16255 {
16256 after = expand_simple_binop (mode, code, before, val,
16257 after, 1, OPTAB_LIB_WIDEN);
16258 }
16259
16260 x = after;
16261 if (mask)
16262 {
16263 x = expand_simple_binop (SImode, AND, after, mask,
16264 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16265 x = rs6000_mask_atomic_subword (before, x, mask);
16266 }
16267 else if (store_mode != mode)
16268 x = convert_modes (store_mode, mode, x, 1);
16269
16270 cond = gen_reg_rtx (CCmode);
16271 emit_store_conditional (store_mode, cond, mem, x);
16272
16273 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16274 emit_unlikely_jump (x, label);
16275
16276 rs6000_post_atomic_barrier (model);
16277
16278 if (shift)
16279 {
16280 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16281 then do the calcuations in a SImode register. */
16282 if (orig_before)
16283 rs6000_finish_atomic_subword (orig_before, before, shift);
16284 if (orig_after)
16285 rs6000_finish_atomic_subword (orig_after, after, shift);
16286 }
16287 else if (store_mode != mode)
16288 {
16289 /* QImode/HImode on machines with lbarx/lharx where we do the native
16290 operation and then do the calcuations in a SImode register. */
16291 if (orig_before)
16292 convert_move (orig_before, before, 1);
16293 if (orig_after)
16294 convert_move (orig_after, after, 1);
16295 }
16296 else if (orig_after && after != orig_after)
16297 emit_move_insn (orig_after, after);
16298 }
16299
16300 /* Emit instructions to move SRC to DST. Called by splitters for
16301 multi-register moves. It will emit at most one instruction for
16302 each register that is accessed; that is, it won't emit li/lis pairs
16303 (or equivalent for 64-bit code). One of SRC or DST must be a hard
16304 register. */
16305
16306 void
16307 rs6000_split_multireg_move (rtx dst, rtx src)
16308 {
16309 /* The register number of the first register being moved. */
16310 int reg;
16311 /* The mode that is to be moved. */
16312 machine_mode mode;
16313 /* The mode that the move is being done in, and its size. */
16314 machine_mode reg_mode;
16315 int reg_mode_size;
16316 /* The number of registers that will be moved. */
16317 int nregs;
16318
16319 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
16320 mode = GET_MODE (dst);
16321 nregs = hard_regno_nregs (reg, mode);
16322
16323 /* If we have a vector quad register for MMA, and this is a load or store,
16324 see if we can use vector paired load/stores. */
16325 if (mode == XOmode && TARGET_MMA
16326 && (MEM_P (dst) || MEM_P (src)))
16327 {
16328 reg_mode = OOmode;
16329 nregs /= 2;
16330 }
16331 /* If we have a vector pair/quad mode, split it into two/four separate
16332 vectors. */
16333 else if (mode == OOmode || mode == XOmode)
16334 reg_mode = V1TImode;
16335 else if (FP_REGNO_P (reg))
16336 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
16337 (TARGET_HARD_FLOAT ? DFmode : SFmode);
16338 else if (ALTIVEC_REGNO_P (reg))
16339 reg_mode = V16QImode;
16340 else
16341 reg_mode = word_mode;
16342 reg_mode_size = GET_MODE_SIZE (reg_mode);
16343
16344 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
16345
16346 /* TDmode residing in FP registers is special, since the ISA requires that
16347 the lower-numbered word of a register pair is always the most significant
16348 word, even in little-endian mode. This does not match the usual subreg
16349 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
16350 the appropriate constituent registers "by hand" in little-endian mode.
16351
16352 Note we do not need to check for destructive overlap here since TDmode
16353 can only reside in even/odd register pairs. */
16354 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
16355 {
16356 rtx p_src, p_dst;
16357 int i;
16358
16359 for (i = 0; i < nregs; i++)
16360 {
16361 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
16362 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
16363 else
16364 p_src = simplify_gen_subreg (reg_mode, src, mode,
16365 i * reg_mode_size);
16366
16367 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
16368 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
16369 else
16370 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
16371 i * reg_mode_size);
16372
16373 emit_insn (gen_rtx_SET (p_dst, p_src));
16374 }
16375
16376 return;
16377 }
16378
16379 /* The __vector_pair and __vector_quad modes are multi-register
16380 modes, so if we have to load or store the registers, we have to be
16381 careful to properly swap them if we're in little endian mode
16382 below. This means the last register gets the first memory
16383 location. We also need to be careful of using the right register
16384 numbers if we are splitting XO to OO. */
16385 if (mode == OOmode || mode == XOmode)
16386 {
16387 nregs = hard_regno_nregs (reg, mode);
16388 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
16389 if (MEM_P (dst))
16390 {
16391 unsigned offset = 0;
16392 unsigned size = GET_MODE_SIZE (reg_mode);
16393
16394 /* If we are reading an accumulator register, we have to
16395 deprime it before we can access it. */
16396 if (TARGET_MMA
16397 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16398 emit_insn (gen_mma_xxmfacc (src, src));
16399
16400 for (int i = 0; i < nregs; i += reg_mode_nregs)
16401 {
16402 unsigned subreg =
16403 (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
16404 rtx dst2 = adjust_address (dst, reg_mode, offset);
16405 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
16406 offset += size;
16407 emit_insn (gen_rtx_SET (dst2, src2));
16408 }
16409
16410 return;
16411 }
16412
16413 if (MEM_P (src))
16414 {
16415 unsigned offset = 0;
16416 unsigned size = GET_MODE_SIZE (reg_mode);
16417
16418 for (int i = 0; i < nregs; i += reg_mode_nregs)
16419 {
16420 unsigned subreg =
16421 (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
16422 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
16423 rtx src2 = adjust_address (src, reg_mode, offset);
16424 offset += size;
16425 emit_insn (gen_rtx_SET (dst2, src2));
16426 }
16427
16428 /* If we are writing an accumulator register, we have to
16429 prime it after we've written it. */
16430 if (TARGET_MMA
16431 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16432 emit_insn (gen_mma_xxmtacc (dst, dst));
16433
16434 return;
16435 }
16436
16437 if (GET_CODE (src) == UNSPEC)
16438 {
16439 gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
16440 gcc_assert (REG_P (dst));
16441 if (GET_MODE (src) == XOmode)
16442 gcc_assert (FP_REGNO_P (REGNO (dst)));
16443 if (GET_MODE (src) == OOmode)
16444 gcc_assert (VSX_REGNO_P (REGNO (dst)));
16445
16446 reg_mode = GET_MODE (XVECEXP (src, 0, 0));
16447 for (int i = 0; i < XVECLEN (src, 0); i++)
16448 {
16449 rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
16450 emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
16451 }
16452
16453 /* We are writing an accumulator register, so we have to
16454 prime it after we've written it. */
16455 if (GET_MODE (src) == XOmode)
16456 emit_insn (gen_mma_xxmtacc (dst, dst));
16457
16458 return;
16459 }
16460
16461 /* Register -> register moves can use common code. */
16462 }
16463
16464 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
16465 {
16466 /* If we are reading an accumulator register, we have to
16467 deprime it before we can access it. */
16468 if (TARGET_MMA
16469 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16470 emit_insn (gen_mma_xxmfacc (src, src));
16471
16472 /* Move register range backwards, if we might have destructive
16473 overlap. */
16474 int i;
16475 /* XO/OO are opaque so cannot use subregs. */
16476 if (mode == OOmode || mode == XOmode )
16477 {
16478 for (i = nregs - 1; i >= 0; i--)
16479 {
16480 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
16481 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
16482 emit_insn (gen_rtx_SET (dst_i, src_i));
16483 }
16484 }
16485 else
16486 {
16487 for (i = nregs - 1; i >= 0; i--)
16488 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16489 i * reg_mode_size),
16490 simplify_gen_subreg (reg_mode, src, mode,
16491 i * reg_mode_size)));
16492 }
16493
16494 /* If we are writing an accumulator register, we have to
16495 prime it after we've written it. */
16496 if (TARGET_MMA
16497 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16498 emit_insn (gen_mma_xxmtacc (dst, dst));
16499 }
16500 else
16501 {
16502 int i;
16503 int j = -1;
16504 bool used_update = false;
16505 rtx restore_basereg = NULL_RTX;
16506
16507 if (MEM_P (src) && INT_REGNO_P (reg))
16508 {
16509 rtx breg;
16510
16511 if (GET_CODE (XEXP (src, 0)) == PRE_INC
16512 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
16513 {
16514 rtx delta_rtx;
16515 breg = XEXP (XEXP (src, 0), 0);
16516 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
16517 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
16518 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
16519 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16520 src = replace_equiv_address (src, breg);
16521 }
16522 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
16523 {
16524 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
16525 {
16526 rtx basereg = XEXP (XEXP (src, 0), 0);
16527 if (TARGET_UPDATE)
16528 {
16529 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
16530 emit_insn (gen_rtx_SET (ndst,
16531 gen_rtx_MEM (reg_mode,
16532 XEXP (src, 0))));
16533 used_update = true;
16534 }
16535 else
16536 emit_insn (gen_rtx_SET (basereg,
16537 XEXP (XEXP (src, 0), 1)));
16538 src = replace_equiv_address (src, basereg);
16539 }
16540 else
16541 {
16542 rtx basereg = gen_rtx_REG (Pmode, reg);
16543 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
16544 src = replace_equiv_address (src, basereg);
16545 }
16546 }
16547
16548 breg = XEXP (src, 0);
16549 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
16550 breg = XEXP (breg, 0);
16551
16552 /* If the base register we are using to address memory is
16553 also a destination reg, then change that register last. */
16554 if (REG_P (breg)
16555 && REGNO (breg) >= REGNO (dst)
16556 && REGNO (breg) < REGNO (dst) + nregs)
16557 j = REGNO (breg) - REGNO (dst);
16558 }
16559 else if (MEM_P (dst) && INT_REGNO_P (reg))
16560 {
16561 rtx breg;
16562
16563 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
16564 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
16565 {
16566 rtx delta_rtx;
16567 breg = XEXP (XEXP (dst, 0), 0);
16568 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
16569 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
16570 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
16571
16572 /* We have to update the breg before doing the store.
16573 Use store with update, if available. */
16574
16575 if (TARGET_UPDATE)
16576 {
16577 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16578 emit_insn (TARGET_32BIT
16579 ? (TARGET_POWERPC64
16580 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
16581 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
16582 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
16583 used_update = true;
16584 }
16585 else
16586 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16587 dst = replace_equiv_address (dst, breg);
16588 }
16589 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
16590 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
16591 {
16592 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
16593 {
16594 rtx basereg = XEXP (XEXP (dst, 0), 0);
16595 if (TARGET_UPDATE)
16596 {
16597 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16598 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
16599 XEXP (dst, 0)),
16600 nsrc));
16601 used_update = true;
16602 }
16603 else
16604 emit_insn (gen_rtx_SET (basereg,
16605 XEXP (XEXP (dst, 0), 1)));
16606 dst = replace_equiv_address (dst, basereg);
16607 }
16608 else
16609 {
16610 rtx basereg = XEXP (XEXP (dst, 0), 0);
16611 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
16612 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
16613 && REG_P (basereg)
16614 && REG_P (offsetreg)
16615 && REGNO (basereg) != REGNO (offsetreg));
16616 if (REGNO (basereg) == 0)
16617 {
16618 rtx tmp = offsetreg;
16619 offsetreg = basereg;
16620 basereg = tmp;
16621 }
16622 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
16623 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
16624 dst = replace_equiv_address (dst, basereg);
16625 }
16626 }
16627 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
16628 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
16629 }
16630
16631 /* If we are reading an accumulator register, we have to
16632 deprime it before we can access it. */
16633 if (TARGET_MMA && REG_P (src)
16634 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16635 emit_insn (gen_mma_xxmfacc (src, src));
16636
16637 for (i = 0; i < nregs; i++)
16638 {
16639 /* Calculate index to next subword. */
16640 ++j;
16641 if (j == nregs)
16642 j = 0;
16643
16644 /* If compiler already emitted move of first word by
16645 store with update, no need to do anything. */
16646 if (j == 0 && used_update)
16647 continue;
16648
16649 /* XO/OO are opaque so cannot use subregs. */
16650 if (mode == OOmode || mode == XOmode )
16651 {
16652 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
16653 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
16654 emit_insn (gen_rtx_SET (dst_i, src_i));
16655 }
16656 else
16657 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16658 j * reg_mode_size),
16659 simplify_gen_subreg (reg_mode, src, mode,
16660 j * reg_mode_size)));
16661 }
16662
16663 /* If we are writing an accumulator register, we have to
16664 prime it after we've written it. */
16665 if (TARGET_MMA && REG_P (dst)
16666 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16667 emit_insn (gen_mma_xxmtacc (dst, dst));
16668
16669 if (restore_basereg != NULL_RTX)
16670 emit_insn (restore_basereg);
16671 }
16672 }
16673
16674 static GTY(()) alias_set_type TOC_alias_set = -1;
16675
16676 alias_set_type
16677 get_TOC_alias_set (void)
16678 {
16679 if (TOC_alias_set == -1)
16680 TOC_alias_set = new_alias_set ();
16681 return TOC_alias_set;
16682 }
16683
16684 /* The mode the ABI uses for a word. This is not the same as word_mode
16685 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16686
16687 static scalar_int_mode
16688 rs6000_abi_word_mode (void)
16689 {
16690 return TARGET_32BIT ? SImode : DImode;
16691 }
16692
16693 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16694 static char *
16695 rs6000_offload_options (void)
16696 {
16697 if (TARGET_64BIT)
16698 return xstrdup ("-foffload-abi=lp64");
16699 else
16700 return xstrdup ("-foffload-abi=ilp32");
16701 }
16702
16703 \f
16704 /* A quick summary of the various types of 'constant-pool tables'
16705 under PowerPC:
16706
16707 Target Flags Name One table per
16708 AIX (none) AIX TOC object file
16709 AIX -mfull-toc AIX TOC object file
16710 AIX -mminimal-toc AIX minimal TOC translation unit
16711 SVR4/EABI (none) SVR4 SDATA object file
16712 SVR4/EABI -fpic SVR4 pic object file
16713 SVR4/EABI -fPIC SVR4 PIC translation unit
16714 SVR4/EABI -mrelocatable EABI TOC function
16715 SVR4/EABI -maix AIX TOC object file
16716 SVR4/EABI -maix -mminimal-toc
16717 AIX minimal TOC translation unit
16718
16719 Name Reg. Set by entries contains:
16720 made by addrs? fp? sum?
16721
16722 AIX TOC 2 crt0 as Y option option
16723 AIX minimal TOC 30 prolog gcc Y Y option
16724 SVR4 SDATA 13 crt0 gcc N Y N
16725 SVR4 pic 30 prolog ld Y not yet N
16726 SVR4 PIC 30 prolog gcc Y option option
16727 EABI TOC 30 prolog gcc Y option option
16728
16729 */
16730
16731 /* Hash functions for the hash table. */
16732
16733 static unsigned
16734 rs6000_hash_constant (rtx k)
16735 {
16736 enum rtx_code code = GET_CODE (k);
16737 machine_mode mode = GET_MODE (k);
16738 unsigned result = (code << 3) ^ mode;
16739 const char *format;
16740 int flen, fidx;
16741
16742 format = GET_RTX_FORMAT (code);
16743 flen = strlen (format);
16744 fidx = 0;
16745
16746 switch (code)
16747 {
16748 case LABEL_REF:
16749 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16750
16751 case CONST_WIDE_INT:
16752 {
16753 int i;
16754 flen = CONST_WIDE_INT_NUNITS (k);
16755 for (i = 0; i < flen; i++)
16756 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16757 return result;
16758 }
16759
16760 case CONST_DOUBLE:
16761 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16762
16763 case CODE_LABEL:
16764 fidx = 3;
16765 break;
16766
16767 default:
16768 break;
16769 }
16770
16771 for (; fidx < flen; fidx++)
16772 switch (format[fidx])
16773 {
16774 case 's':
16775 {
16776 unsigned i, len;
16777 const char *str = XSTR (k, fidx);
16778 len = strlen (str);
16779 result = result * 613 + len;
16780 for (i = 0; i < len; i++)
16781 result = result * 613 + (unsigned) str[i];
16782 break;
16783 }
16784 case 'u':
16785 case 'e':
16786 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16787 break;
16788 case 'i':
16789 case 'n':
16790 result = result * 613 + (unsigned) XINT (k, fidx);
16791 break;
16792 case 'w':
16793 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16794 result = result * 613 + (unsigned) XWINT (k, fidx);
16795 else
16796 {
16797 size_t i;
16798 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16799 result = result * 613 + (unsigned) (XWINT (k, fidx)
16800 >> CHAR_BIT * i);
16801 }
16802 break;
16803 case '0':
16804 break;
16805 default:
16806 gcc_unreachable ();
16807 }
16808
16809 return result;
16810 }
16811
16812 hashval_t
16813 toc_hasher::hash (toc_hash_struct *thc)
16814 {
16815 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16816 }
16817
16818 /* Compare H1 and H2 for equivalence. */
16819
16820 bool
16821 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16822 {
16823 rtx r1 = h1->key;
16824 rtx r2 = h2->key;
16825
16826 if (h1->key_mode != h2->key_mode)
16827 return 0;
16828
16829 return rtx_equal_p (r1, r2);
16830 }
16831
16832 /* These are the names given by the C++ front-end to vtables, and
16833 vtable-like objects. Ideally, this logic should not be here;
16834 instead, there should be some programmatic way of inquiring as
16835 to whether or not an object is a vtable. */
16836
16837 #define VTABLE_NAME_P(NAME) \
16838 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16839 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16840 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16841 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16842 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16843
16844 #ifdef NO_DOLLAR_IN_LABEL
16845 /* Return a GGC-allocated character string translating dollar signs in
16846 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16847
16848 const char *
16849 rs6000_xcoff_strip_dollar (const char *name)
16850 {
16851 char *strip, *p;
16852 const char *q;
16853 size_t len;
16854
16855 q = (const char *) strchr (name, '$');
16856
16857 if (q == 0 || q == name)
16858 return name;
16859
16860 len = strlen (name);
16861 strip = XALLOCAVEC (char, len + 1);
16862 strcpy (strip, name);
16863 p = strip + (q - name);
16864 while (p)
16865 {
16866 *p = '_';
16867 p = strchr (p + 1, '$');
16868 }
16869
16870 return ggc_alloc_string (strip, len);
16871 }
16872 #endif
16873
16874 void
16875 rs6000_output_symbol_ref (FILE *file, rtx x)
16876 {
16877 const char *name = XSTR (x, 0);
16878
16879 /* Currently C++ toc references to vtables can be emitted before it
16880 is decided whether the vtable is public or private. If this is
16881 the case, then the linker will eventually complain that there is
16882 a reference to an unknown section. Thus, for vtables only,
16883 we emit the TOC reference to reference the identifier and not the
16884 symbol. */
16885 if (VTABLE_NAME_P (name))
16886 {
16887 RS6000_OUTPUT_BASENAME (file, name);
16888 }
16889 else
16890 assemble_name (file, name);
16891 }
16892
16893 /* Output a TOC entry. We derive the entry name from what is being
16894 written. */
16895
16896 void
16897 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16898 {
16899 char buf[256];
16900 const char *name = buf;
16901 rtx base = x;
16902 HOST_WIDE_INT offset = 0;
16903
16904 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16905
16906 /* When the linker won't eliminate them, don't output duplicate
16907 TOC entries (this happens on AIX if there is any kind of TOC,
16908 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16909 CODE_LABELs. */
16910 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16911 {
16912 struct toc_hash_struct *h;
16913
16914 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16915 time because GGC is not initialized at that point. */
16916 if (toc_hash_table == NULL)
16917 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16918
16919 h = ggc_alloc<toc_hash_struct> ();
16920 h->key = x;
16921 h->key_mode = mode;
16922 h->labelno = labelno;
16923
16924 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16925 if (*found == NULL)
16926 *found = h;
16927 else /* This is indeed a duplicate.
16928 Set this label equal to that label. */
16929 {
16930 fputs ("\t.set ", file);
16931 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16932 fprintf (file, "%d,", labelno);
16933 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16934 fprintf (file, "%d\n", ((*found)->labelno));
16935
16936 #ifdef HAVE_AS_TLS
16937 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16938 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16939 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16940 {
16941 fputs ("\t.set ", file);
16942 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16943 fprintf (file, "%d,", labelno);
16944 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16945 fprintf (file, "%d\n", ((*found)->labelno));
16946 }
16947 #endif
16948 return;
16949 }
16950 }
16951
16952 /* If we're going to put a double constant in the TOC, make sure it's
16953 aligned properly when strict alignment is on. */
16954 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16955 && STRICT_ALIGNMENT
16956 && GET_MODE_BITSIZE (mode) >= 64
16957 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16958 ASM_OUTPUT_ALIGN (file, 3);
16959 }
16960
16961 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16962
16963 /* Handle FP constants specially. Note that if we have a minimal
16964 TOC, things we put here aren't actually in the TOC, so we can allow
16965 FP constants. */
16966 if (CONST_DOUBLE_P (x)
16967 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16968 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16969 {
16970 long k[4];
16971
16972 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16973 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16974 else
16975 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16976
16977 if (TARGET_64BIT)
16978 {
16979 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16980 fputs (DOUBLE_INT_ASM_OP, file);
16981 else
16982 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16983 k[0] & 0xffffffff, k[1] & 0xffffffff,
16984 k[2] & 0xffffffff, k[3] & 0xffffffff);
16985 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16986 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16987 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16988 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16989 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16990 return;
16991 }
16992 else
16993 {
16994 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16995 fputs ("\t.long ", file);
16996 else
16997 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16998 k[0] & 0xffffffff, k[1] & 0xffffffff,
16999 k[2] & 0xffffffff, k[3] & 0xffffffff);
17000 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17001 k[0] & 0xffffffff, k[1] & 0xffffffff,
17002 k[2] & 0xffffffff, k[3] & 0xffffffff);
17003 return;
17004 }
17005 }
17006 else if (CONST_DOUBLE_P (x)
17007 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17008 {
17009 long k[2];
17010
17011 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17012 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17013 else
17014 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17015
17016 if (TARGET_64BIT)
17017 {
17018 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17019 fputs (DOUBLE_INT_ASM_OP, file);
17020 else
17021 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17022 k[0] & 0xffffffff, k[1] & 0xffffffff);
17023 fprintf (file, "0x%lx%08lx\n",
17024 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17025 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17026 return;
17027 }
17028 else
17029 {
17030 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17031 fputs ("\t.long ", file);
17032 else
17033 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17034 k[0] & 0xffffffff, k[1] & 0xffffffff);
17035 fprintf (file, "0x%lx,0x%lx\n",
17036 k[0] & 0xffffffff, k[1] & 0xffffffff);
17037 return;
17038 }
17039 }
17040 else if (CONST_DOUBLE_P (x)
17041 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17042 {
17043 long l;
17044
17045 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17046 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17047 else
17048 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17049
17050 if (TARGET_64BIT)
17051 {
17052 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17053 fputs (DOUBLE_INT_ASM_OP, file);
17054 else
17055 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17056 if (WORDS_BIG_ENDIAN)
17057 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17058 else
17059 fprintf (file, "0x%lx\n", l & 0xffffffff);
17060 return;
17061 }
17062 else
17063 {
17064 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17065 fputs ("\t.long ", file);
17066 else
17067 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17068 fprintf (file, "0x%lx\n", l & 0xffffffff);
17069 return;
17070 }
17071 }
17072 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17073 {
17074 unsigned HOST_WIDE_INT low;
17075 HOST_WIDE_INT high;
17076
17077 low = INTVAL (x) & 0xffffffff;
17078 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17079
17080 /* TOC entries are always Pmode-sized, so when big-endian
17081 smaller integer constants in the TOC need to be padded.
17082 (This is still a win over putting the constants in
17083 a separate constant pool, because then we'd have
17084 to have both a TOC entry _and_ the actual constant.)
17085
17086 For a 32-bit target, CONST_INT values are loaded and shifted
17087 entirely within `low' and can be stored in one TOC entry. */
17088
17089 /* It would be easy to make this work, but it doesn't now. */
17090 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17091
17092 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17093 {
17094 low |= high << 32;
17095 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17096 high = (HOST_WIDE_INT) low >> 32;
17097 low &= 0xffffffff;
17098 }
17099
17100 if (TARGET_64BIT)
17101 {
17102 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17103 fputs (DOUBLE_INT_ASM_OP, file);
17104 else
17105 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17106 (long) high & 0xffffffff, (long) low & 0xffffffff);
17107 fprintf (file, "0x%lx%08lx\n",
17108 (long) high & 0xffffffff, (long) low & 0xffffffff);
17109 return;
17110 }
17111 else
17112 {
17113 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17114 {
17115 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17116 fputs ("\t.long ", file);
17117 else
17118 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17119 (long) high & 0xffffffff, (long) low & 0xffffffff);
17120 fprintf (file, "0x%lx,0x%lx\n",
17121 (long) high & 0xffffffff, (long) low & 0xffffffff);
17122 }
17123 else
17124 {
17125 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17126 fputs ("\t.long ", file);
17127 else
17128 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17129 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17130 }
17131 return;
17132 }
17133 }
17134
17135 if (GET_CODE (x) == CONST)
17136 {
17137 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17138 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17139
17140 base = XEXP (XEXP (x, 0), 0);
17141 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17142 }
17143
17144 switch (GET_CODE (base))
17145 {
17146 case SYMBOL_REF:
17147 name = XSTR (base, 0);
17148 break;
17149
17150 case LABEL_REF:
17151 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17152 CODE_LABEL_NUMBER (XEXP (base, 0)));
17153 break;
17154
17155 case CODE_LABEL:
17156 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17157 break;
17158
17159 default:
17160 gcc_unreachable ();
17161 }
17162
17163 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17164 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17165 else
17166 {
17167 fputs ("\t.tc ", file);
17168 RS6000_OUTPUT_BASENAME (file, name);
17169
17170 if (offset < 0)
17171 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17172 else if (offset)
17173 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17174
17175 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17176 after other TOC symbols, reducing overflow of small TOC access
17177 to [TC] symbols. */
17178 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17179 ? "[TE]," : "[TC],", file);
17180 }
17181
17182 /* Currently C++ toc references to vtables can be emitted before it
17183 is decided whether the vtable is public or private. If this is
17184 the case, then the linker will eventually complain that there is
17185 a TOC reference to an unknown section. Thus, for vtables only,
17186 we emit the TOC reference to reference the symbol and not the
17187 section. */
17188 if (VTABLE_NAME_P (name))
17189 {
17190 RS6000_OUTPUT_BASENAME (file, name);
17191 if (offset < 0)
17192 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17193 else if (offset > 0)
17194 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17195 }
17196 else
17197 output_addr_const (file, x);
17198
17199 #if HAVE_AS_TLS
17200 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17201 {
17202 switch (SYMBOL_REF_TLS_MODEL (base))
17203 {
17204 case 0:
17205 break;
17206 case TLS_MODEL_LOCAL_EXEC:
17207 fputs ("@le", file);
17208 break;
17209 case TLS_MODEL_INITIAL_EXEC:
17210 fputs ("@ie", file);
17211 break;
17212 /* Use global-dynamic for local-dynamic. */
17213 case TLS_MODEL_GLOBAL_DYNAMIC:
17214 case TLS_MODEL_LOCAL_DYNAMIC:
17215 putc ('\n', file);
17216 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17217 fputs ("\t.tc .", file);
17218 RS6000_OUTPUT_BASENAME (file, name);
17219 fputs ("[TC],", file);
17220 output_addr_const (file, x);
17221 fputs ("@m", file);
17222 break;
17223 default:
17224 gcc_unreachable ();
17225 }
17226 }
17227 #endif
17228
17229 putc ('\n', file);
17230 }
17231 \f
17232 /* Output an assembler pseudo-op to write an ASCII string of N characters
17233 starting at P to FILE.
17234
17235 On the RS/6000, we have to do this using the .byte operation and
17236 write out special characters outside the quoted string.
17237 Also, the assembler is broken; very long strings are truncated,
17238 so we must artificially break them up early. */
17239
17240 void
17241 output_ascii (FILE *file, const char *p, int n)
17242 {
17243 char c;
17244 int i, count_string;
17245 const char *for_string = "\t.byte \"";
17246 const char *for_decimal = "\t.byte ";
17247 const char *to_close = NULL;
17248
17249 count_string = 0;
17250 for (i = 0; i < n; i++)
17251 {
17252 c = *p++;
17253 if (c >= ' ' && c < 0177)
17254 {
17255 if (for_string)
17256 fputs (for_string, file);
17257 putc (c, file);
17258
17259 /* Write two quotes to get one. */
17260 if (c == '"')
17261 {
17262 putc (c, file);
17263 ++count_string;
17264 }
17265
17266 for_string = NULL;
17267 for_decimal = "\"\n\t.byte ";
17268 to_close = "\"\n";
17269 ++count_string;
17270
17271 if (count_string >= 512)
17272 {
17273 fputs (to_close, file);
17274
17275 for_string = "\t.byte \"";
17276 for_decimal = "\t.byte ";
17277 to_close = NULL;
17278 count_string = 0;
17279 }
17280 }
17281 else
17282 {
17283 if (for_decimal)
17284 fputs (for_decimal, file);
17285 fprintf (file, "%d", c);
17286
17287 for_string = "\n\t.byte \"";
17288 for_decimal = ", ";
17289 to_close = "\n";
17290 count_string = 0;
17291 }
17292 }
17293
17294 /* Now close the string if we have written one. Then end the line. */
17295 if (to_close)
17296 fputs (to_close, file);
17297 }
17298 \f
17299 /* Generate a unique section name for FILENAME for a section type
17300 represented by SECTION_DESC. Output goes into BUF.
17301
17302 SECTION_DESC can be any string, as long as it is different for each
17303 possible section type.
17304
17305 We name the section in the same manner as xlc. The name begins with an
17306 underscore followed by the filename (after stripping any leading directory
17307 names) with the last period replaced by the string SECTION_DESC. If
17308 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17309 the name. */
17310
17311 void
17312 rs6000_gen_section_name (char **buf, const char *filename,
17313 const char *section_desc)
17314 {
17315 const char *q, *after_last_slash, *last_period = 0;
17316 char *p;
17317 int len;
17318
17319 after_last_slash = filename;
17320 for (q = filename; *q; q++)
17321 {
17322 if (*q == '/')
17323 after_last_slash = q + 1;
17324 else if (*q == '.')
17325 last_period = q;
17326 }
17327
17328 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17329 *buf = (char *) xmalloc (len);
17330
17331 p = *buf;
17332 *p++ = '_';
17333
17334 for (q = after_last_slash; *q; q++)
17335 {
17336 if (q == last_period)
17337 {
17338 strcpy (p, section_desc);
17339 p += strlen (section_desc);
17340 break;
17341 }
17342
17343 else if (ISALNUM (*q))
17344 *p++ = *q;
17345 }
17346
17347 if (last_period == 0)
17348 strcpy (p, section_desc);
17349 else
17350 *p = '\0';
17351 }
17352 \f
17353 /* Emit profile function. */
17354
17355 void
17356 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17357 {
17358 /* Non-standard profiling for kernels, which just saves LR then calls
17359 _mcount without worrying about arg saves. The idea is to change
17360 the function prologue as little as possible as it isn't easy to
17361 account for arg save/restore code added just for _mcount. */
17362 if (TARGET_PROFILE_KERNEL)
17363 return;
17364
17365 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17366 {
17367 #ifndef NO_PROFILE_COUNTERS
17368 # define NO_PROFILE_COUNTERS 0
17369 #endif
17370 if (NO_PROFILE_COUNTERS)
17371 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17372 LCT_NORMAL, VOIDmode);
17373 else
17374 {
17375 char buf[30];
17376 const char *label_name;
17377 rtx fun;
17378
17379 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17380 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17381 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17382
17383 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17384 LCT_NORMAL, VOIDmode, fun, Pmode);
17385 }
17386 }
17387 else if (DEFAULT_ABI == ABI_DARWIN)
17388 {
17389 const char *mcount_name = RS6000_MCOUNT;
17390 int caller_addr_regno = LR_REGNO;
17391
17392 /* Be conservative and always set this, at least for now. */
17393 crtl->uses_pic_offset_table = 1;
17394
17395 #if TARGET_MACHO
17396 /* For PIC code, set up a stub and collect the caller's address
17397 from r0, which is where the prologue puts it. */
17398 if (MACHOPIC_INDIRECT
17399 && crtl->uses_pic_offset_table)
17400 caller_addr_regno = 0;
17401 #endif
17402 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17403 LCT_NORMAL, VOIDmode,
17404 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17405 }
17406 }
17407
17408 /* Write function profiler code. */
17409
17410 void
17411 output_function_profiler (FILE *file, int labelno)
17412 {
17413 char buf[100];
17414
17415 switch (DEFAULT_ABI)
17416 {
17417 default:
17418 gcc_unreachable ();
17419
17420 case ABI_V4:
17421 if (!TARGET_32BIT)
17422 {
17423 warning (0, "no profiling of 64-bit code for this ABI");
17424 return;
17425 }
17426 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17427 fprintf (file, "\tmflr %s\n", reg_names[0]);
17428 if (NO_PROFILE_COUNTERS)
17429 {
17430 asm_fprintf (file, "\tstw %s,4(%s)\n",
17431 reg_names[0], reg_names[1]);
17432 }
17433 else if (TARGET_SECURE_PLT && flag_pic)
17434 {
17435 if (TARGET_LINK_STACK)
17436 {
17437 char name[32];
17438 get_ppc476_thunk_name (name);
17439 asm_fprintf (file, "\tbl %s\n", name);
17440 }
17441 else
17442 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17443 asm_fprintf (file, "\tstw %s,4(%s)\n",
17444 reg_names[0], reg_names[1]);
17445 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17446 asm_fprintf (file, "\taddis %s,%s,",
17447 reg_names[12], reg_names[12]);
17448 assemble_name (file, buf);
17449 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17450 assemble_name (file, buf);
17451 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17452 }
17453 else if (flag_pic == 1)
17454 {
17455 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17456 asm_fprintf (file, "\tstw %s,4(%s)\n",
17457 reg_names[0], reg_names[1]);
17458 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17459 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17460 assemble_name (file, buf);
17461 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17462 }
17463 else if (flag_pic > 1)
17464 {
17465 asm_fprintf (file, "\tstw %s,4(%s)\n",
17466 reg_names[0], reg_names[1]);
17467 /* Now, we need to get the address of the label. */
17468 if (TARGET_LINK_STACK)
17469 {
17470 char name[32];
17471 get_ppc476_thunk_name (name);
17472 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17473 assemble_name (file, buf);
17474 fputs ("-.\n1:", file);
17475 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17476 asm_fprintf (file, "\taddi %s,%s,4\n",
17477 reg_names[11], reg_names[11]);
17478 }
17479 else
17480 {
17481 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17482 assemble_name (file, buf);
17483 fputs ("-.\n1:", file);
17484 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17485 }
17486 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17487 reg_names[0], reg_names[11]);
17488 asm_fprintf (file, "\tadd %s,%s,%s\n",
17489 reg_names[0], reg_names[0], reg_names[11]);
17490 }
17491 else
17492 {
17493 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17494 assemble_name (file, buf);
17495 fputs ("@ha\n", file);
17496 asm_fprintf (file, "\tstw %s,4(%s)\n",
17497 reg_names[0], reg_names[1]);
17498 asm_fprintf (file, "\tla %s,", reg_names[0]);
17499 assemble_name (file, buf);
17500 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17501 }
17502
17503 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17504 fprintf (file, "\tbl %s%s\n",
17505 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17506 break;
17507
17508 case ABI_AIX:
17509 case ABI_ELFv2:
17510 case ABI_DARWIN:
17511 /* Don't do anything, done in output_profile_hook (). */
17512 break;
17513 }
17514 }
17515
17516 \f
17517
17518 /* The following variable value is the last issued insn. */
17519
17520 static rtx_insn *last_scheduled_insn;
17521
17522 /* The following variable helps to balance issuing of load and
17523 store instructions */
17524
17525 static int load_store_pendulum;
17526
17527 /* The following variable helps pair divide insns during scheduling. */
17528 static int divide_cnt;
17529 /* The following variable helps pair and alternate vector and vector load
17530 insns during scheduling. */
17531 static int vec_pairing;
17532
17533
17534 /* Power4 load update and store update instructions are cracked into a
17535 load or store and an integer insn which are executed in the same cycle.
17536 Branches have their own dispatch slot which does not count against the
17537 GCC issue rate, but it changes the program flow so there are no other
17538 instructions to issue in this cycle. */
17539
17540 static int
17541 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17542 {
17543 last_scheduled_insn = insn;
17544 if (GET_CODE (PATTERN (insn)) == USE
17545 || GET_CODE (PATTERN (insn)) == CLOBBER)
17546 {
17547 cached_can_issue_more = more;
17548 return cached_can_issue_more;
17549 }
17550
17551 if (insn_terminates_group_p (insn, current_group))
17552 {
17553 cached_can_issue_more = 0;
17554 return cached_can_issue_more;
17555 }
17556
17557 /* If no reservation, but reach here */
17558 if (recog_memoized (insn) < 0)
17559 return more;
17560
17561 if (rs6000_sched_groups)
17562 {
17563 if (is_microcoded_insn (insn))
17564 cached_can_issue_more = 0;
17565 else if (is_cracked_insn (insn))
17566 cached_can_issue_more = more > 2 ? more - 2 : 0;
17567 else
17568 cached_can_issue_more = more - 1;
17569
17570 return cached_can_issue_more;
17571 }
17572
17573 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17574 return 0;
17575
17576 cached_can_issue_more = more - 1;
17577 return cached_can_issue_more;
17578 }
17579
17580 static int
17581 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17582 {
17583 int r = rs6000_variable_issue_1 (insn, more);
17584 if (verbose)
17585 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17586 return r;
17587 }
17588
17589 /* Adjust the cost of a scheduling dependency. Return the new cost of
17590 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17591
17592 static int
17593 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17594 unsigned int)
17595 {
17596 enum attr_type attr_type;
17597
17598 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17599 return cost;
17600
17601 switch (dep_type)
17602 {
17603 case REG_DEP_TRUE:
17604 {
17605 /* Data dependency; DEP_INSN writes a register that INSN reads
17606 some cycles later. */
17607
17608 /* Separate a load from a narrower, dependent store. */
17609 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17610 || rs6000_tune == PROCESSOR_POWER10)
17611 && GET_CODE (PATTERN (insn)) == SET
17612 && GET_CODE (PATTERN (dep_insn)) == SET
17613 && MEM_P (XEXP (PATTERN (insn), 1))
17614 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17615 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17616 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17617 return cost + 14;
17618
17619 attr_type = get_attr_type (insn);
17620
17621 switch (attr_type)
17622 {
17623 case TYPE_JMPREG:
17624 /* Tell the first scheduling pass about the latency between
17625 a mtctr and bctr (and mtlr and br/blr). The first
17626 scheduling pass will not know about this latency since
17627 the mtctr instruction, which has the latency associated
17628 to it, will be generated by reload. */
17629 return 4;
17630 case TYPE_BRANCH:
17631 /* Leave some extra cycles between a compare and its
17632 dependent branch, to inhibit expensive mispredicts. */
17633 if ((rs6000_tune == PROCESSOR_PPC603
17634 || rs6000_tune == PROCESSOR_PPC604
17635 || rs6000_tune == PROCESSOR_PPC604e
17636 || rs6000_tune == PROCESSOR_PPC620
17637 || rs6000_tune == PROCESSOR_PPC630
17638 || rs6000_tune == PROCESSOR_PPC750
17639 || rs6000_tune == PROCESSOR_PPC7400
17640 || rs6000_tune == PROCESSOR_PPC7450
17641 || rs6000_tune == PROCESSOR_PPCE5500
17642 || rs6000_tune == PROCESSOR_PPCE6500
17643 || rs6000_tune == PROCESSOR_POWER4
17644 || rs6000_tune == PROCESSOR_POWER5
17645 || rs6000_tune == PROCESSOR_POWER7
17646 || rs6000_tune == PROCESSOR_POWER8
17647 || rs6000_tune == PROCESSOR_POWER9
17648 || rs6000_tune == PROCESSOR_POWER10
17649 || rs6000_tune == PROCESSOR_CELL)
17650 && recog_memoized (dep_insn)
17651 && (INSN_CODE (dep_insn) >= 0))
17652
17653 switch (get_attr_type (dep_insn))
17654 {
17655 case TYPE_CMP:
17656 case TYPE_FPCOMPARE:
17657 case TYPE_CR_LOGICAL:
17658 return cost + 2;
17659 case TYPE_EXTS:
17660 case TYPE_MUL:
17661 if (get_attr_dot (dep_insn) == DOT_YES)
17662 return cost + 2;
17663 else
17664 break;
17665 case TYPE_SHIFT:
17666 if (get_attr_dot (dep_insn) == DOT_YES
17667 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17668 return cost + 2;
17669 else
17670 break;
17671 default:
17672 break;
17673 }
17674 break;
17675
17676 case TYPE_STORE:
17677 case TYPE_FPSTORE:
17678 if ((rs6000_tune == PROCESSOR_POWER6)
17679 && recog_memoized (dep_insn)
17680 && (INSN_CODE (dep_insn) >= 0))
17681 {
17682
17683 if (GET_CODE (PATTERN (insn)) != SET)
17684 /* If this happens, we have to extend this to schedule
17685 optimally. Return default for now. */
17686 return cost;
17687
17688 /* Adjust the cost for the case where the value written
17689 by a fixed point operation is used as the address
17690 gen value on a store. */
17691 switch (get_attr_type (dep_insn))
17692 {
17693 case TYPE_LOAD:
17694 case TYPE_CNTLZ:
17695 {
17696 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17697 return get_attr_sign_extend (dep_insn)
17698 == SIGN_EXTEND_YES ? 6 : 4;
17699 break;
17700 }
17701 case TYPE_SHIFT:
17702 {
17703 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17704 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17705 6 : 3;
17706 break;
17707 }
17708 case TYPE_INTEGER:
17709 case TYPE_ADD:
17710 case TYPE_LOGICAL:
17711 case TYPE_EXTS:
17712 case TYPE_INSERT:
17713 {
17714 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17715 return 3;
17716 break;
17717 }
17718 case TYPE_STORE:
17719 case TYPE_FPLOAD:
17720 case TYPE_FPSTORE:
17721 {
17722 if (get_attr_update (dep_insn) == UPDATE_YES
17723 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17724 return 3;
17725 break;
17726 }
17727 case TYPE_MUL:
17728 {
17729 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17730 return 17;
17731 break;
17732 }
17733 case TYPE_DIV:
17734 {
17735 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17736 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17737 break;
17738 }
17739 default:
17740 break;
17741 }
17742 }
17743 break;
17744
17745 case TYPE_LOAD:
17746 if ((rs6000_tune == PROCESSOR_POWER6)
17747 && recog_memoized (dep_insn)
17748 && (INSN_CODE (dep_insn) >= 0))
17749 {
17750
17751 /* Adjust the cost for the case where the value written
17752 by a fixed point instruction is used within the address
17753 gen portion of a subsequent load(u)(x) */
17754 switch (get_attr_type (dep_insn))
17755 {
17756 case TYPE_LOAD:
17757 case TYPE_CNTLZ:
17758 {
17759 if (set_to_load_agen (dep_insn, insn))
17760 return get_attr_sign_extend (dep_insn)
17761 == SIGN_EXTEND_YES ? 6 : 4;
17762 break;
17763 }
17764 case TYPE_SHIFT:
17765 {
17766 if (set_to_load_agen (dep_insn, insn))
17767 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17768 6 : 3;
17769 break;
17770 }
17771 case TYPE_INTEGER:
17772 case TYPE_ADD:
17773 case TYPE_LOGICAL:
17774 case TYPE_EXTS:
17775 case TYPE_INSERT:
17776 {
17777 if (set_to_load_agen (dep_insn, insn))
17778 return 3;
17779 break;
17780 }
17781 case TYPE_STORE:
17782 case TYPE_FPLOAD:
17783 case TYPE_FPSTORE:
17784 {
17785 if (get_attr_update (dep_insn) == UPDATE_YES
17786 && set_to_load_agen (dep_insn, insn))
17787 return 3;
17788 break;
17789 }
17790 case TYPE_MUL:
17791 {
17792 if (set_to_load_agen (dep_insn, insn))
17793 return 17;
17794 break;
17795 }
17796 case TYPE_DIV:
17797 {
17798 if (set_to_load_agen (dep_insn, insn))
17799 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17800 break;
17801 }
17802 default:
17803 break;
17804 }
17805 }
17806 break;
17807
17808 default:
17809 break;
17810 }
17811
17812 /* Fall out to return default cost. */
17813 }
17814 break;
17815
17816 case REG_DEP_OUTPUT:
17817 /* Output dependency; DEP_INSN writes a register that INSN writes some
17818 cycles later. */
17819 if ((rs6000_tune == PROCESSOR_POWER6)
17820 && recog_memoized (dep_insn)
17821 && (INSN_CODE (dep_insn) >= 0))
17822 {
17823 attr_type = get_attr_type (insn);
17824
17825 switch (attr_type)
17826 {
17827 case TYPE_FP:
17828 case TYPE_FPSIMPLE:
17829 if (get_attr_type (dep_insn) == TYPE_FP
17830 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17831 return 1;
17832 break;
17833 default:
17834 break;
17835 }
17836 }
17837 /* Fall through, no cost for output dependency. */
17838 /* FALLTHRU */
17839
17840 case REG_DEP_ANTI:
17841 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17842 cycles later. */
17843 return 0;
17844
17845 default:
17846 gcc_unreachable ();
17847 }
17848
17849 return cost;
17850 }
17851
17852 /* Debug version of rs6000_adjust_cost. */
17853
17854 static int
17855 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17856 int cost, unsigned int dw)
17857 {
17858 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17859
17860 if (ret != cost)
17861 {
17862 const char *dep;
17863
17864 switch (dep_type)
17865 {
17866 default: dep = "unknown depencency"; break;
17867 case REG_DEP_TRUE: dep = "data dependency"; break;
17868 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17869 case REG_DEP_ANTI: dep = "anti depencency"; break;
17870 }
17871
17872 fprintf (stderr,
17873 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17874 "%s, insn:\n", ret, cost, dep);
17875
17876 debug_rtx (insn);
17877 }
17878
17879 return ret;
17880 }
17881
17882 /* The function returns a true if INSN is microcoded.
17883 Return false otherwise. */
17884
17885 static bool
17886 is_microcoded_insn (rtx_insn *insn)
17887 {
17888 if (!insn || !NONDEBUG_INSN_P (insn)
17889 || GET_CODE (PATTERN (insn)) == USE
17890 || GET_CODE (PATTERN (insn)) == CLOBBER)
17891 return false;
17892
17893 if (rs6000_tune == PROCESSOR_CELL)
17894 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17895
17896 if (rs6000_sched_groups
17897 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17898 {
17899 enum attr_type type = get_attr_type (insn);
17900 if ((type == TYPE_LOAD
17901 && get_attr_update (insn) == UPDATE_YES
17902 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17903 || ((type == TYPE_LOAD || type == TYPE_STORE)
17904 && get_attr_update (insn) == UPDATE_YES
17905 && get_attr_indexed (insn) == INDEXED_YES)
17906 || type == TYPE_MFCR)
17907 return true;
17908 }
17909
17910 return false;
17911 }
17912
17913 /* The function returns true if INSN is cracked into 2 instructions
17914 by the processor (and therefore occupies 2 issue slots). */
17915
17916 static bool
17917 is_cracked_insn (rtx_insn *insn)
17918 {
17919 if (!insn || !NONDEBUG_INSN_P (insn)
17920 || GET_CODE (PATTERN (insn)) == USE
17921 || GET_CODE (PATTERN (insn)) == CLOBBER)
17922 return false;
17923
17924 if (rs6000_sched_groups
17925 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17926 {
17927 enum attr_type type = get_attr_type (insn);
17928 if ((type == TYPE_LOAD
17929 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17930 && get_attr_update (insn) == UPDATE_NO)
17931 || (type == TYPE_LOAD
17932 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17933 && get_attr_update (insn) == UPDATE_YES
17934 && get_attr_indexed (insn) == INDEXED_NO)
17935 || (type == TYPE_STORE
17936 && get_attr_update (insn) == UPDATE_YES
17937 && get_attr_indexed (insn) == INDEXED_NO)
17938 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17939 && get_attr_update (insn) == UPDATE_YES)
17940 || (type == TYPE_CR_LOGICAL
17941 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17942 || (type == TYPE_EXTS
17943 && get_attr_dot (insn) == DOT_YES)
17944 || (type == TYPE_SHIFT
17945 && get_attr_dot (insn) == DOT_YES
17946 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17947 || (type == TYPE_MUL
17948 && get_attr_dot (insn) == DOT_YES)
17949 || type == TYPE_DIV
17950 || (type == TYPE_INSERT
17951 && get_attr_size (insn) == SIZE_32))
17952 return true;
17953 }
17954
17955 return false;
17956 }
17957
17958 /* The function returns true if INSN can be issued only from
17959 the branch slot. */
17960
17961 static bool
17962 is_branch_slot_insn (rtx_insn *insn)
17963 {
17964 if (!insn || !NONDEBUG_INSN_P (insn)
17965 || GET_CODE (PATTERN (insn)) == USE
17966 || GET_CODE (PATTERN (insn)) == CLOBBER)
17967 return false;
17968
17969 if (rs6000_sched_groups)
17970 {
17971 enum attr_type type = get_attr_type (insn);
17972 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17973 return true;
17974 return false;
17975 }
17976
17977 return false;
17978 }
17979
17980 /* The function returns true if out_inst sets a value that is
17981 used in the address generation computation of in_insn */
17982 static bool
17983 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17984 {
17985 rtx out_set, in_set;
17986
17987 /* For performance reasons, only handle the simple case where
17988 both loads are a single_set. */
17989 out_set = single_set (out_insn);
17990 if (out_set)
17991 {
17992 in_set = single_set (in_insn);
17993 if (in_set)
17994 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17995 }
17996
17997 return false;
17998 }
17999
18000 /* Try to determine base/offset/size parts of the given MEM.
18001 Return true if successful, false if all the values couldn't
18002 be determined.
18003
18004 This function only looks for REG or REG+CONST address forms.
18005 REG+REG address form will return false. */
18006
18007 static bool
18008 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18009 HOST_WIDE_INT *size)
18010 {
18011 rtx addr_rtx;
18012 if MEM_SIZE_KNOWN_P (mem)
18013 *size = MEM_SIZE (mem);
18014 else
18015 return false;
18016
18017 addr_rtx = (XEXP (mem, 0));
18018 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18019 addr_rtx = XEXP (addr_rtx, 1);
18020
18021 *offset = 0;
18022 while (GET_CODE (addr_rtx) == PLUS
18023 && CONST_INT_P (XEXP (addr_rtx, 1)))
18024 {
18025 *offset += INTVAL (XEXP (addr_rtx, 1));
18026 addr_rtx = XEXP (addr_rtx, 0);
18027 }
18028 if (!REG_P (addr_rtx))
18029 return false;
18030
18031 *base = addr_rtx;
18032 return true;
18033 }
18034
18035 /* The function returns true if the target storage location of
18036 mem1 is adjacent to the target storage location of mem2 */
18037 /* Return 1 if memory locations are adjacent. */
18038
18039 static bool
18040 adjacent_mem_locations (rtx mem1, rtx mem2)
18041 {
18042 rtx reg1, reg2;
18043 HOST_WIDE_INT off1, size1, off2, size2;
18044
18045 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18046 && get_memref_parts (mem2, &reg2, &off2, &size2))
18047 return ((REGNO (reg1) == REGNO (reg2))
18048 && ((off1 + size1 == off2)
18049 || (off2 + size2 == off1)));
18050
18051 return false;
18052 }
18053
18054 /* This function returns true if it can be determined that the two MEM
18055 locations overlap by at least 1 byte based on base reg/offset/size. */
18056
18057 static bool
18058 mem_locations_overlap (rtx mem1, rtx mem2)
18059 {
18060 rtx reg1, reg2;
18061 HOST_WIDE_INT off1, size1, off2, size2;
18062
18063 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18064 && get_memref_parts (mem2, &reg2, &off2, &size2))
18065 return ((REGNO (reg1) == REGNO (reg2))
18066 && (((off1 <= off2) && (off1 + size1 > off2))
18067 || ((off2 <= off1) && (off2 + size2 > off1))));
18068
18069 return false;
18070 }
18071
18072 /* A C statement (sans semicolon) to update the integer scheduling
18073 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18074 INSN earlier, reduce the priority to execute INSN later. Do not
18075 define this macro if you do not need to adjust the scheduling
18076 priorities of insns. */
18077
18078 static int
18079 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18080 {
18081 rtx load_mem, str_mem;
18082 /* On machines (like the 750) which have asymmetric integer units,
18083 where one integer unit can do multiply and divides and the other
18084 can't, reduce the priority of multiply/divide so it is scheduled
18085 before other integer operations. */
18086
18087 #if 0
18088 if (! INSN_P (insn))
18089 return priority;
18090
18091 if (GET_CODE (PATTERN (insn)) == USE)
18092 return priority;
18093
18094 switch (rs6000_tune) {
18095 case PROCESSOR_PPC750:
18096 switch (get_attr_type (insn))
18097 {
18098 default:
18099 break;
18100
18101 case TYPE_MUL:
18102 case TYPE_DIV:
18103 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18104 priority, priority);
18105 if (priority >= 0 && priority < 0x01000000)
18106 priority >>= 3;
18107 break;
18108 }
18109 }
18110 #endif
18111
18112 if (insn_must_be_first_in_group (insn)
18113 && reload_completed
18114 && current_sched_info->sched_max_insns_priority
18115 && rs6000_sched_restricted_insns_priority)
18116 {
18117
18118 /* Prioritize insns that can be dispatched only in the first
18119 dispatch slot. */
18120 if (rs6000_sched_restricted_insns_priority == 1)
18121 /* Attach highest priority to insn. This means that in
18122 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
18123 precede 'priority' (critical path) considerations. */
18124 return current_sched_info->sched_max_insns_priority;
18125 else if (rs6000_sched_restricted_insns_priority == 2)
18126 /* Increase priority of insn by a minimal amount. This means that in
18127 haifa-sched.c:ready_sort(), only 'priority' (critical path)
18128 considerations precede dispatch-slot restriction considerations. */
18129 return (priority + 1);
18130 }
18131
18132 if (rs6000_tune == PROCESSOR_POWER6
18133 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18134 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18135 /* Attach highest priority to insn if the scheduler has just issued two
18136 stores and this instruction is a load, or two loads and this instruction
18137 is a store. Power6 wants loads and stores scheduled alternately
18138 when possible */
18139 return current_sched_info->sched_max_insns_priority;
18140
18141 return priority;
18142 }
18143
18144 /* Return true if the instruction is nonpipelined on the Cell. */
18145 static bool
18146 is_nonpipeline_insn (rtx_insn *insn)
18147 {
18148 enum attr_type type;
18149 if (!insn || !NONDEBUG_INSN_P (insn)
18150 || GET_CODE (PATTERN (insn)) == USE
18151 || GET_CODE (PATTERN (insn)) == CLOBBER)
18152 return false;
18153
18154 type = get_attr_type (insn);
18155 if (type == TYPE_MUL
18156 || type == TYPE_DIV
18157 || type == TYPE_SDIV
18158 || type == TYPE_DDIV
18159 || type == TYPE_SSQRT
18160 || type == TYPE_DSQRT
18161 || type == TYPE_MFCR
18162 || type == TYPE_MFCRF
18163 || type == TYPE_MFJMPR)
18164 {
18165 return true;
18166 }
18167 return false;
18168 }
18169
18170
18171 /* Return how many instructions the machine can issue per cycle. */
18172
18173 static int
18174 rs6000_issue_rate (void)
18175 {
18176 /* Unless scheduling for register pressure, use issue rate of 1 for
18177 first scheduling pass to decrease degradation. */
18178 if (!reload_completed && !flag_sched_pressure)
18179 return 1;
18180
18181 switch (rs6000_tune) {
18182 case PROCESSOR_RS64A:
18183 case PROCESSOR_PPC601: /* ? */
18184 case PROCESSOR_PPC7450:
18185 return 3;
18186 case PROCESSOR_PPC440:
18187 case PROCESSOR_PPC603:
18188 case PROCESSOR_PPC750:
18189 case PROCESSOR_PPC7400:
18190 case PROCESSOR_PPC8540:
18191 case PROCESSOR_PPC8548:
18192 case PROCESSOR_CELL:
18193 case PROCESSOR_PPCE300C2:
18194 case PROCESSOR_PPCE300C3:
18195 case PROCESSOR_PPCE500MC:
18196 case PROCESSOR_PPCE500MC64:
18197 case PROCESSOR_PPCE5500:
18198 case PROCESSOR_PPCE6500:
18199 case PROCESSOR_TITAN:
18200 return 2;
18201 case PROCESSOR_PPC476:
18202 case PROCESSOR_PPC604:
18203 case PROCESSOR_PPC604e:
18204 case PROCESSOR_PPC620:
18205 case PROCESSOR_PPC630:
18206 return 4;
18207 case PROCESSOR_POWER4:
18208 case PROCESSOR_POWER5:
18209 case PROCESSOR_POWER6:
18210 case PROCESSOR_POWER7:
18211 return 5;
18212 case PROCESSOR_POWER8:
18213 return 7;
18214 case PROCESSOR_POWER9:
18215 case PROCESSOR_POWER10:
18216 return 6;
18217 default:
18218 return 1;
18219 }
18220 }
18221
18222 /* Return how many instructions to look ahead for better insn
18223 scheduling. */
18224
18225 static int
18226 rs6000_use_sched_lookahead (void)
18227 {
18228 switch (rs6000_tune)
18229 {
18230 case PROCESSOR_PPC8540:
18231 case PROCESSOR_PPC8548:
18232 return 4;
18233
18234 case PROCESSOR_CELL:
18235 return (reload_completed ? 8 : 0);
18236
18237 default:
18238 return 0;
18239 }
18240 }
18241
18242 /* We are choosing insn from the ready queue. Return zero if INSN can be
18243 chosen. */
18244 static int
18245 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18246 {
18247 if (ready_index == 0)
18248 return 0;
18249
18250 if (rs6000_tune != PROCESSOR_CELL)
18251 return 0;
18252
18253 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18254
18255 if (!reload_completed
18256 || is_nonpipeline_insn (insn)
18257 || is_microcoded_insn (insn))
18258 return 1;
18259
18260 return 0;
18261 }
18262
18263 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18264 and return true. */
18265
18266 static bool
18267 find_mem_ref (rtx pat, rtx *mem_ref)
18268 {
18269 const char * fmt;
18270 int i, j;
18271
18272 /* stack_tie does not produce any real memory traffic. */
18273 if (tie_operand (pat, VOIDmode))
18274 return false;
18275
18276 if (MEM_P (pat))
18277 {
18278 *mem_ref = pat;
18279 return true;
18280 }
18281
18282 /* Recursively process the pattern. */
18283 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18284
18285 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18286 {
18287 if (fmt[i] == 'e')
18288 {
18289 if (find_mem_ref (XEXP (pat, i), mem_ref))
18290 return true;
18291 }
18292 else if (fmt[i] == 'E')
18293 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18294 {
18295 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18296 return true;
18297 }
18298 }
18299
18300 return false;
18301 }
18302
18303 /* Determine if PAT is a PATTERN of a load insn. */
18304
18305 static bool
18306 is_load_insn1 (rtx pat, rtx *load_mem)
18307 {
18308 if (!pat || pat == NULL_RTX)
18309 return false;
18310
18311 if (GET_CODE (pat) == SET)
18312 return find_mem_ref (SET_SRC (pat), load_mem);
18313
18314 if (GET_CODE (pat) == PARALLEL)
18315 {
18316 int i;
18317
18318 for (i = 0; i < XVECLEN (pat, 0); i++)
18319 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18320 return true;
18321 }
18322
18323 return false;
18324 }
18325
18326 /* Determine if INSN loads from memory. */
18327
18328 static bool
18329 is_load_insn (rtx insn, rtx *load_mem)
18330 {
18331 if (!insn || !INSN_P (insn))
18332 return false;
18333
18334 if (CALL_P (insn))
18335 return false;
18336
18337 return is_load_insn1 (PATTERN (insn), load_mem);
18338 }
18339
18340 /* Determine if PAT is a PATTERN of a store insn. */
18341
18342 static bool
18343 is_store_insn1 (rtx pat, rtx *str_mem)
18344 {
18345 if (!pat || pat == NULL_RTX)
18346 return false;
18347
18348 if (GET_CODE (pat) == SET)
18349 return find_mem_ref (SET_DEST (pat), str_mem);
18350
18351 if (GET_CODE (pat) == PARALLEL)
18352 {
18353 int i;
18354
18355 for (i = 0; i < XVECLEN (pat, 0); i++)
18356 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18357 return true;
18358 }
18359
18360 return false;
18361 }
18362
18363 /* Determine if INSN stores to memory. */
18364
18365 static bool
18366 is_store_insn (rtx insn, rtx *str_mem)
18367 {
18368 if (!insn || !INSN_P (insn))
18369 return false;
18370
18371 return is_store_insn1 (PATTERN (insn), str_mem);
18372 }
18373
18374 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18375
18376 static bool
18377 is_power9_pairable_vec_type (enum attr_type type)
18378 {
18379 switch (type)
18380 {
18381 case TYPE_VECSIMPLE:
18382 case TYPE_VECCOMPLEX:
18383 case TYPE_VECDIV:
18384 case TYPE_VECCMP:
18385 case TYPE_VECPERM:
18386 case TYPE_VECFLOAT:
18387 case TYPE_VECFDIV:
18388 case TYPE_VECDOUBLE:
18389 return true;
18390 default:
18391 break;
18392 }
18393 return false;
18394 }
18395
18396 /* Returns whether the dependence between INSN and NEXT is considered
18397 costly by the given target. */
18398
18399 static bool
18400 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18401 {
18402 rtx insn;
18403 rtx next;
18404 rtx load_mem, str_mem;
18405
18406 /* If the flag is not enabled - no dependence is considered costly;
18407 allow all dependent insns in the same group.
18408 This is the most aggressive option. */
18409 if (rs6000_sched_costly_dep == no_dep_costly)
18410 return false;
18411
18412 /* If the flag is set to 1 - a dependence is always considered costly;
18413 do not allow dependent instructions in the same group.
18414 This is the most conservative option. */
18415 if (rs6000_sched_costly_dep == all_deps_costly)
18416 return true;
18417
18418 insn = DEP_PRO (dep);
18419 next = DEP_CON (dep);
18420
18421 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18422 && is_load_insn (next, &load_mem)
18423 && is_store_insn (insn, &str_mem))
18424 /* Prevent load after store in the same group. */
18425 return true;
18426
18427 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18428 && is_load_insn (next, &load_mem)
18429 && is_store_insn (insn, &str_mem)
18430 && DEP_TYPE (dep) == REG_DEP_TRUE
18431 && mem_locations_overlap(str_mem, load_mem))
18432 /* Prevent load after store in the same group if it is a true
18433 dependence. */
18434 return true;
18435
18436 /* The flag is set to X; dependences with latency >= X are considered costly,
18437 and will not be scheduled in the same group. */
18438 if (rs6000_sched_costly_dep <= max_dep_latency
18439 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18440 return true;
18441
18442 return false;
18443 }
18444
18445 /* Return the next insn after INSN that is found before TAIL is reached,
18446 skipping any "non-active" insns - insns that will not actually occupy
18447 an issue slot. Return NULL_RTX if such an insn is not found. */
18448
18449 static rtx_insn *
18450 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18451 {
18452 if (insn == NULL_RTX || insn == tail)
18453 return NULL;
18454
18455 while (1)
18456 {
18457 insn = NEXT_INSN (insn);
18458 if (insn == NULL_RTX || insn == tail)
18459 return NULL;
18460
18461 if (CALL_P (insn)
18462 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18463 || (NONJUMP_INSN_P (insn)
18464 && GET_CODE (PATTERN (insn)) != USE
18465 && GET_CODE (PATTERN (insn)) != CLOBBER
18466 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18467 break;
18468 }
18469 return insn;
18470 }
18471
18472 /* Move instruction at POS to the end of the READY list. */
18473
18474 static void
18475 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18476 {
18477 rtx_insn *tmp;
18478 int i;
18479
18480 tmp = ready[pos];
18481 for (i = pos; i < lastpos; i++)
18482 ready[i] = ready[i + 1];
18483 ready[lastpos] = tmp;
18484 }
18485
18486 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18487
18488 static int
18489 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18490 {
18491 /* For Power6, we need to handle some special cases to try and keep the
18492 store queue from overflowing and triggering expensive flushes.
18493
18494 This code monitors how load and store instructions are being issued
18495 and skews the ready list one way or the other to increase the likelihood
18496 that a desired instruction is issued at the proper time.
18497
18498 A couple of things are done. First, we maintain a "load_store_pendulum"
18499 to track the current state of load/store issue.
18500
18501 - If the pendulum is at zero, then no loads or stores have been
18502 issued in the current cycle so we do nothing.
18503
18504 - If the pendulum is 1, then a single load has been issued in this
18505 cycle and we attempt to locate another load in the ready list to
18506 issue with it.
18507
18508 - If the pendulum is -2, then two stores have already been
18509 issued in this cycle, so we increase the priority of the first load
18510 in the ready list to increase it's likelihood of being chosen first
18511 in the next cycle.
18512
18513 - If the pendulum is -1, then a single store has been issued in this
18514 cycle and we attempt to locate another store in the ready list to
18515 issue with it, preferring a store to an adjacent memory location to
18516 facilitate store pairing in the store queue.
18517
18518 - If the pendulum is 2, then two loads have already been
18519 issued in this cycle, so we increase the priority of the first store
18520 in the ready list to increase it's likelihood of being chosen first
18521 in the next cycle.
18522
18523 - If the pendulum < -2 or > 2, then do nothing.
18524
18525 Note: This code covers the most common scenarios. There exist non
18526 load/store instructions which make use of the LSU and which
18527 would need to be accounted for to strictly model the behavior
18528 of the machine. Those instructions are currently unaccounted
18529 for to help minimize compile time overhead of this code.
18530 */
18531 int pos;
18532 rtx load_mem, str_mem;
18533
18534 if (is_store_insn (last_scheduled_insn, &str_mem))
18535 /* Issuing a store, swing the load_store_pendulum to the left */
18536 load_store_pendulum--;
18537 else if (is_load_insn (last_scheduled_insn, &load_mem))
18538 /* Issuing a load, swing the load_store_pendulum to the right */
18539 load_store_pendulum++;
18540 else
18541 return cached_can_issue_more;
18542
18543 /* If the pendulum is balanced, or there is only one instruction on
18544 the ready list, then all is well, so return. */
18545 if ((load_store_pendulum == 0) || (lastpos <= 0))
18546 return cached_can_issue_more;
18547
18548 if (load_store_pendulum == 1)
18549 {
18550 /* A load has been issued in this cycle. Scan the ready list
18551 for another load to issue with it */
18552 pos = lastpos;
18553
18554 while (pos >= 0)
18555 {
18556 if (is_load_insn (ready[pos], &load_mem))
18557 {
18558 /* Found a load. Move it to the head of the ready list,
18559 and adjust it's priority so that it is more likely to
18560 stay there */
18561 move_to_end_of_ready (ready, pos, lastpos);
18562
18563 if (!sel_sched_p ()
18564 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18565 INSN_PRIORITY (ready[lastpos])++;
18566 break;
18567 }
18568 pos--;
18569 }
18570 }
18571 else if (load_store_pendulum == -2)
18572 {
18573 /* Two stores have been issued in this cycle. Increase the
18574 priority of the first load in the ready list to favor it for
18575 issuing in the next cycle. */
18576 pos = lastpos;
18577
18578 while (pos >= 0)
18579 {
18580 if (is_load_insn (ready[pos], &load_mem)
18581 && !sel_sched_p ()
18582 && INSN_PRIORITY_KNOWN (ready[pos]))
18583 {
18584 INSN_PRIORITY (ready[pos])++;
18585
18586 /* Adjust the pendulum to account for the fact that a load
18587 was found and increased in priority. This is to prevent
18588 increasing the priority of multiple loads */
18589 load_store_pendulum--;
18590
18591 break;
18592 }
18593 pos--;
18594 }
18595 }
18596 else if (load_store_pendulum == -1)
18597 {
18598 /* A store has been issued in this cycle. Scan the ready list for
18599 another store to issue with it, preferring a store to an adjacent
18600 memory location */
18601 int first_store_pos = -1;
18602
18603 pos = lastpos;
18604
18605 while (pos >= 0)
18606 {
18607 if (is_store_insn (ready[pos], &str_mem))
18608 {
18609 rtx str_mem2;
18610 /* Maintain the index of the first store found on the
18611 list */
18612 if (first_store_pos == -1)
18613 first_store_pos = pos;
18614
18615 if (is_store_insn (last_scheduled_insn, &str_mem2)
18616 && adjacent_mem_locations (str_mem, str_mem2))
18617 {
18618 /* Found an adjacent store. Move it to the head of the
18619 ready list, and adjust it's priority so that it is
18620 more likely to stay there */
18621 move_to_end_of_ready (ready, pos, lastpos);
18622
18623 if (!sel_sched_p ()
18624 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18625 INSN_PRIORITY (ready[lastpos])++;
18626
18627 first_store_pos = -1;
18628
18629 break;
18630 };
18631 }
18632 pos--;
18633 }
18634
18635 if (first_store_pos >= 0)
18636 {
18637 /* An adjacent store wasn't found, but a non-adjacent store was,
18638 so move the non-adjacent store to the front of the ready
18639 list, and adjust its priority so that it is more likely to
18640 stay there. */
18641 move_to_end_of_ready (ready, first_store_pos, lastpos);
18642 if (!sel_sched_p ()
18643 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18644 INSN_PRIORITY (ready[lastpos])++;
18645 }
18646 }
18647 else if (load_store_pendulum == 2)
18648 {
18649 /* Two loads have been issued in this cycle. Increase the priority
18650 of the first store in the ready list to favor it for issuing in
18651 the next cycle. */
18652 pos = lastpos;
18653
18654 while (pos >= 0)
18655 {
18656 if (is_store_insn (ready[pos], &str_mem)
18657 && !sel_sched_p ()
18658 && INSN_PRIORITY_KNOWN (ready[pos]))
18659 {
18660 INSN_PRIORITY (ready[pos])++;
18661
18662 /* Adjust the pendulum to account for the fact that a store
18663 was found and increased in priority. This is to prevent
18664 increasing the priority of multiple stores */
18665 load_store_pendulum++;
18666
18667 break;
18668 }
18669 pos--;
18670 }
18671 }
18672
18673 return cached_can_issue_more;
18674 }
18675
18676 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18677
18678 static int
18679 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18680 {
18681 int pos;
18682 enum attr_type type, type2;
18683
18684 type = get_attr_type (last_scheduled_insn);
18685
18686 /* Try to issue fixed point divides back-to-back in pairs so they will be
18687 routed to separate execution units and execute in parallel. */
18688 if (type == TYPE_DIV && divide_cnt == 0)
18689 {
18690 /* First divide has been scheduled. */
18691 divide_cnt = 1;
18692
18693 /* Scan the ready list looking for another divide, if found move it
18694 to the end of the list so it is chosen next. */
18695 pos = lastpos;
18696 while (pos >= 0)
18697 {
18698 if (recog_memoized (ready[pos]) >= 0
18699 && get_attr_type (ready[pos]) == TYPE_DIV)
18700 {
18701 move_to_end_of_ready (ready, pos, lastpos);
18702 break;
18703 }
18704 pos--;
18705 }
18706 }
18707 else
18708 {
18709 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18710 divide_cnt = 0;
18711
18712 /* The best dispatch throughput for vector and vector load insns can be
18713 achieved by interleaving a vector and vector load such that they'll
18714 dispatch to the same superslice. If this pairing cannot be achieved
18715 then it is best to pair vector insns together and vector load insns
18716 together.
18717
18718 To aid in this pairing, vec_pairing maintains the current state with
18719 the following values:
18720
18721 0 : Initial state, no vecload/vector pairing has been started.
18722
18723 1 : A vecload or vector insn has been issued and a candidate for
18724 pairing has been found and moved to the end of the ready
18725 list. */
18726 if (type == TYPE_VECLOAD)
18727 {
18728 /* Issued a vecload. */
18729 if (vec_pairing == 0)
18730 {
18731 int vecload_pos = -1;
18732 /* We issued a single vecload, look for a vector insn to pair it
18733 with. If one isn't found, try to pair another vecload. */
18734 pos = lastpos;
18735 while (pos >= 0)
18736 {
18737 if (recog_memoized (ready[pos]) >= 0)
18738 {
18739 type2 = get_attr_type (ready[pos]);
18740 if (is_power9_pairable_vec_type (type2))
18741 {
18742 /* Found a vector insn to pair with, move it to the
18743 end of the ready list so it is scheduled next. */
18744 move_to_end_of_ready (ready, pos, lastpos);
18745 vec_pairing = 1;
18746 return cached_can_issue_more;
18747 }
18748 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18749 /* Remember position of first vecload seen. */
18750 vecload_pos = pos;
18751 }
18752 pos--;
18753 }
18754 if (vecload_pos >= 0)
18755 {
18756 /* Didn't find a vector to pair with but did find a vecload,
18757 move it to the end of the ready list. */
18758 move_to_end_of_ready (ready, vecload_pos, lastpos);
18759 vec_pairing = 1;
18760 return cached_can_issue_more;
18761 }
18762 }
18763 }
18764 else if (is_power9_pairable_vec_type (type))
18765 {
18766 /* Issued a vector operation. */
18767 if (vec_pairing == 0)
18768 {
18769 int vec_pos = -1;
18770 /* We issued a single vector insn, look for a vecload to pair it
18771 with. If one isn't found, try to pair another vector. */
18772 pos = lastpos;
18773 while (pos >= 0)
18774 {
18775 if (recog_memoized (ready[pos]) >= 0)
18776 {
18777 type2 = get_attr_type (ready[pos]);
18778 if (type2 == TYPE_VECLOAD)
18779 {
18780 /* Found a vecload insn to pair with, move it to the
18781 end of the ready list so it is scheduled next. */
18782 move_to_end_of_ready (ready, pos, lastpos);
18783 vec_pairing = 1;
18784 return cached_can_issue_more;
18785 }
18786 else if (is_power9_pairable_vec_type (type2)
18787 && vec_pos == -1)
18788 /* Remember position of first vector insn seen. */
18789 vec_pos = pos;
18790 }
18791 pos--;
18792 }
18793 if (vec_pos >= 0)
18794 {
18795 /* Didn't find a vecload to pair with but did find a vector
18796 insn, move it to the end of the ready list. */
18797 move_to_end_of_ready (ready, vec_pos, lastpos);
18798 vec_pairing = 1;
18799 return cached_can_issue_more;
18800 }
18801 }
18802 }
18803
18804 /* We've either finished a vec/vecload pair, couldn't find an insn to
18805 continue the current pair, or the last insn had nothing to do with
18806 with pairing. In any case, reset the state. */
18807 vec_pairing = 0;
18808 }
18809
18810 return cached_can_issue_more;
18811 }
18812
18813 /* We are about to begin issuing insns for this clock cycle. */
18814
18815 static int
18816 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18817 rtx_insn **ready ATTRIBUTE_UNUSED,
18818 int *pn_ready ATTRIBUTE_UNUSED,
18819 int clock_var ATTRIBUTE_UNUSED)
18820 {
18821 int n_ready = *pn_ready;
18822
18823 if (sched_verbose)
18824 fprintf (dump, "// rs6000_sched_reorder :\n");
18825
18826 /* Reorder the ready list, if the second to last ready insn
18827 is a nonepipeline insn. */
18828 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18829 {
18830 if (is_nonpipeline_insn (ready[n_ready - 1])
18831 && (recog_memoized (ready[n_ready - 2]) > 0))
18832 /* Simply swap first two insns. */
18833 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18834 }
18835
18836 if (rs6000_tune == PROCESSOR_POWER6)
18837 load_store_pendulum = 0;
18838
18839 return rs6000_issue_rate ();
18840 }
18841
18842 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18843
18844 static int
18845 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18846 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18847 {
18848 if (sched_verbose)
18849 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18850
18851 /* Do Power6 dependent reordering if necessary. */
18852 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18853 return power6_sched_reorder2 (ready, *pn_ready - 1);
18854
18855 /* Do Power9 dependent reordering if necessary. */
18856 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18857 && recog_memoized (last_scheduled_insn) >= 0)
18858 return power9_sched_reorder2 (ready, *pn_ready - 1);
18859
18860 return cached_can_issue_more;
18861 }
18862
18863 /* Return whether the presence of INSN causes a dispatch group termination
18864 of group WHICH_GROUP.
18865
18866 If WHICH_GROUP == current_group, this function will return true if INSN
18867 causes the termination of the current group (i.e, the dispatch group to
18868 which INSN belongs). This means that INSN will be the last insn in the
18869 group it belongs to.
18870
18871 If WHICH_GROUP == previous_group, this function will return true if INSN
18872 causes the termination of the previous group (i.e, the dispatch group that
18873 precedes the group to which INSN belongs). This means that INSN will be
18874 the first insn in the group it belongs to). */
18875
18876 static bool
18877 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18878 {
18879 bool first, last;
18880
18881 if (! insn)
18882 return false;
18883
18884 first = insn_must_be_first_in_group (insn);
18885 last = insn_must_be_last_in_group (insn);
18886
18887 if (first && last)
18888 return true;
18889
18890 if (which_group == current_group)
18891 return last;
18892 else if (which_group == previous_group)
18893 return first;
18894
18895 return false;
18896 }
18897
18898
18899 static bool
18900 insn_must_be_first_in_group (rtx_insn *insn)
18901 {
18902 enum attr_type type;
18903
18904 if (!insn
18905 || NOTE_P (insn)
18906 || DEBUG_INSN_P (insn)
18907 || GET_CODE (PATTERN (insn)) == USE
18908 || GET_CODE (PATTERN (insn)) == CLOBBER)
18909 return false;
18910
18911 switch (rs6000_tune)
18912 {
18913 case PROCESSOR_POWER5:
18914 if (is_cracked_insn (insn))
18915 return true;
18916 /* FALLTHRU */
18917 case PROCESSOR_POWER4:
18918 if (is_microcoded_insn (insn))
18919 return true;
18920
18921 if (!rs6000_sched_groups)
18922 return false;
18923
18924 type = get_attr_type (insn);
18925
18926 switch (type)
18927 {
18928 case TYPE_MFCR:
18929 case TYPE_MFCRF:
18930 case TYPE_MTCR:
18931 case TYPE_CR_LOGICAL:
18932 case TYPE_MTJMPR:
18933 case TYPE_MFJMPR:
18934 case TYPE_DIV:
18935 case TYPE_LOAD_L:
18936 case TYPE_STORE_C:
18937 case TYPE_ISYNC:
18938 case TYPE_SYNC:
18939 return true;
18940 default:
18941 break;
18942 }
18943 break;
18944 case PROCESSOR_POWER6:
18945 type = get_attr_type (insn);
18946
18947 switch (type)
18948 {
18949 case TYPE_EXTS:
18950 case TYPE_CNTLZ:
18951 case TYPE_TRAP:
18952 case TYPE_MUL:
18953 case TYPE_INSERT:
18954 case TYPE_FPCOMPARE:
18955 case TYPE_MFCR:
18956 case TYPE_MTCR:
18957 case TYPE_MFJMPR:
18958 case TYPE_MTJMPR:
18959 case TYPE_ISYNC:
18960 case TYPE_SYNC:
18961 case TYPE_LOAD_L:
18962 case TYPE_STORE_C:
18963 return true;
18964 case TYPE_SHIFT:
18965 if (get_attr_dot (insn) == DOT_NO
18966 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18967 return true;
18968 else
18969 break;
18970 case TYPE_DIV:
18971 if (get_attr_size (insn) == SIZE_32)
18972 return true;
18973 else
18974 break;
18975 case TYPE_LOAD:
18976 case TYPE_STORE:
18977 case TYPE_FPLOAD:
18978 case TYPE_FPSTORE:
18979 if (get_attr_update (insn) == UPDATE_YES)
18980 return true;
18981 else
18982 break;
18983 default:
18984 break;
18985 }
18986 break;
18987 case PROCESSOR_POWER7:
18988 type = get_attr_type (insn);
18989
18990 switch (type)
18991 {
18992 case TYPE_CR_LOGICAL:
18993 case TYPE_MFCR:
18994 case TYPE_MFCRF:
18995 case TYPE_MTCR:
18996 case TYPE_DIV:
18997 case TYPE_ISYNC:
18998 case TYPE_LOAD_L:
18999 case TYPE_STORE_C:
19000 case TYPE_MFJMPR:
19001 case TYPE_MTJMPR:
19002 return true;
19003 case TYPE_MUL:
19004 case TYPE_SHIFT:
19005 case TYPE_EXTS:
19006 if (get_attr_dot (insn) == DOT_YES)
19007 return true;
19008 else
19009 break;
19010 case TYPE_LOAD:
19011 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19012 || get_attr_update (insn) == UPDATE_YES)
19013 return true;
19014 else
19015 break;
19016 case TYPE_STORE:
19017 case TYPE_FPLOAD:
19018 case TYPE_FPSTORE:
19019 if (get_attr_update (insn) == UPDATE_YES)
19020 return true;
19021 else
19022 break;
19023 default:
19024 break;
19025 }
19026 break;
19027 case PROCESSOR_POWER8:
19028 type = get_attr_type (insn);
19029
19030 switch (type)
19031 {
19032 case TYPE_CR_LOGICAL:
19033 case TYPE_MFCR:
19034 case TYPE_MFCRF:
19035 case TYPE_MTCR:
19036 case TYPE_SYNC:
19037 case TYPE_ISYNC:
19038 case TYPE_LOAD_L:
19039 case TYPE_STORE_C:
19040 case TYPE_VECSTORE:
19041 case TYPE_MFJMPR:
19042 case TYPE_MTJMPR:
19043 return true;
19044 case TYPE_SHIFT:
19045 case TYPE_EXTS:
19046 case TYPE_MUL:
19047 if (get_attr_dot (insn) == DOT_YES)
19048 return true;
19049 else
19050 break;
19051 case TYPE_LOAD:
19052 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19053 || get_attr_update (insn) == UPDATE_YES)
19054 return true;
19055 else
19056 break;
19057 case TYPE_STORE:
19058 if (get_attr_update (insn) == UPDATE_YES
19059 && get_attr_indexed (insn) == INDEXED_YES)
19060 return true;
19061 else
19062 break;
19063 default:
19064 break;
19065 }
19066 break;
19067 default:
19068 break;
19069 }
19070
19071 return false;
19072 }
19073
19074 static bool
19075 insn_must_be_last_in_group (rtx_insn *insn)
19076 {
19077 enum attr_type type;
19078
19079 if (!insn
19080 || NOTE_P (insn)
19081 || DEBUG_INSN_P (insn)
19082 || GET_CODE (PATTERN (insn)) == USE
19083 || GET_CODE (PATTERN (insn)) == CLOBBER)
19084 return false;
19085
19086 switch (rs6000_tune) {
19087 case PROCESSOR_POWER4:
19088 case PROCESSOR_POWER5:
19089 if (is_microcoded_insn (insn))
19090 return true;
19091
19092 if (is_branch_slot_insn (insn))
19093 return true;
19094
19095 break;
19096 case PROCESSOR_POWER6:
19097 type = get_attr_type (insn);
19098
19099 switch (type)
19100 {
19101 case TYPE_EXTS:
19102 case TYPE_CNTLZ:
19103 case TYPE_TRAP:
19104 case TYPE_MUL:
19105 case TYPE_FPCOMPARE:
19106 case TYPE_MFCR:
19107 case TYPE_MTCR:
19108 case TYPE_MFJMPR:
19109 case TYPE_MTJMPR:
19110 case TYPE_ISYNC:
19111 case TYPE_SYNC:
19112 case TYPE_LOAD_L:
19113 case TYPE_STORE_C:
19114 return true;
19115 case TYPE_SHIFT:
19116 if (get_attr_dot (insn) == DOT_NO
19117 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19118 return true;
19119 else
19120 break;
19121 case TYPE_DIV:
19122 if (get_attr_size (insn) == SIZE_32)
19123 return true;
19124 else
19125 break;
19126 default:
19127 break;
19128 }
19129 break;
19130 case PROCESSOR_POWER7:
19131 type = get_attr_type (insn);
19132
19133 switch (type)
19134 {
19135 case TYPE_ISYNC:
19136 case TYPE_SYNC:
19137 case TYPE_LOAD_L:
19138 case TYPE_STORE_C:
19139 return true;
19140 case TYPE_LOAD:
19141 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19142 && get_attr_update (insn) == UPDATE_YES)
19143 return true;
19144 else
19145 break;
19146 case TYPE_STORE:
19147 if (get_attr_update (insn) == UPDATE_YES
19148 && get_attr_indexed (insn) == INDEXED_YES)
19149 return true;
19150 else
19151 break;
19152 default:
19153 break;
19154 }
19155 break;
19156 case PROCESSOR_POWER8:
19157 type = get_attr_type (insn);
19158
19159 switch (type)
19160 {
19161 case TYPE_MFCR:
19162 case TYPE_MTCR:
19163 case TYPE_ISYNC:
19164 case TYPE_SYNC:
19165 case TYPE_LOAD_L:
19166 case TYPE_STORE_C:
19167 return true;
19168 case TYPE_LOAD:
19169 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19170 && get_attr_update (insn) == UPDATE_YES)
19171 return true;
19172 else
19173 break;
19174 case TYPE_STORE:
19175 if (get_attr_update (insn) == UPDATE_YES
19176 && get_attr_indexed (insn) == INDEXED_YES)
19177 return true;
19178 else
19179 break;
19180 default:
19181 break;
19182 }
19183 break;
19184 default:
19185 break;
19186 }
19187
19188 return false;
19189 }
19190
19191 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19192 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19193
19194 static bool
19195 is_costly_group (rtx *group_insns, rtx next_insn)
19196 {
19197 int i;
19198 int issue_rate = rs6000_issue_rate ();
19199
19200 for (i = 0; i < issue_rate; i++)
19201 {
19202 sd_iterator_def sd_it;
19203 dep_t dep;
19204 rtx insn = group_insns[i];
19205
19206 if (!insn)
19207 continue;
19208
19209 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19210 {
19211 rtx next = DEP_CON (dep);
19212
19213 if (next == next_insn
19214 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19215 return true;
19216 }
19217 }
19218
19219 return false;
19220 }
19221
19222 /* Utility of the function redefine_groups.
19223 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19224 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19225 to keep it "far" (in a separate group) from GROUP_INSNS, following
19226 one of the following schemes, depending on the value of the flag
19227 -minsert_sched_nops = X:
19228 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19229 in order to force NEXT_INSN into a separate group.
19230 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19231 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19232 insertion (has a group just ended, how many vacant issue slots remain in the
19233 last group, and how many dispatch groups were encountered so far). */
19234
19235 static int
19236 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19237 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19238 int *group_count)
19239 {
19240 rtx nop;
19241 bool force;
19242 int issue_rate = rs6000_issue_rate ();
19243 bool end = *group_end;
19244 int i;
19245
19246 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19247 return can_issue_more;
19248
19249 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19250 return can_issue_more;
19251
19252 force = is_costly_group (group_insns, next_insn);
19253 if (!force)
19254 return can_issue_more;
19255
19256 if (sched_verbose > 6)
19257 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19258 *group_count ,can_issue_more);
19259
19260 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19261 {
19262 if (*group_end)
19263 can_issue_more = 0;
19264
19265 /* Since only a branch can be issued in the last issue_slot, it is
19266 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19267 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19268 in this case the last nop will start a new group and the branch
19269 will be forced to the new group. */
19270 if (can_issue_more && !is_branch_slot_insn (next_insn))
19271 can_issue_more--;
19272
19273 /* Do we have a special group ending nop? */
19274 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19275 || rs6000_tune == PROCESSOR_POWER8)
19276 {
19277 nop = gen_group_ending_nop ();
19278 emit_insn_before (nop, next_insn);
19279 can_issue_more = 0;
19280 }
19281 else
19282 while (can_issue_more > 0)
19283 {
19284 nop = gen_nop ();
19285 emit_insn_before (nop, next_insn);
19286 can_issue_more--;
19287 }
19288
19289 *group_end = true;
19290 return 0;
19291 }
19292
19293 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19294 {
19295 int n_nops = rs6000_sched_insert_nops;
19296
19297 /* Nops can't be issued from the branch slot, so the effective
19298 issue_rate for nops is 'issue_rate - 1'. */
19299 if (can_issue_more == 0)
19300 can_issue_more = issue_rate;
19301 can_issue_more--;
19302 if (can_issue_more == 0)
19303 {
19304 can_issue_more = issue_rate - 1;
19305 (*group_count)++;
19306 end = true;
19307 for (i = 0; i < issue_rate; i++)
19308 {
19309 group_insns[i] = 0;
19310 }
19311 }
19312
19313 while (n_nops > 0)
19314 {
19315 nop = gen_nop ();
19316 emit_insn_before (nop, next_insn);
19317 if (can_issue_more == issue_rate - 1) /* new group begins */
19318 end = false;
19319 can_issue_more--;
19320 if (can_issue_more == 0)
19321 {
19322 can_issue_more = issue_rate - 1;
19323 (*group_count)++;
19324 end = true;
19325 for (i = 0; i < issue_rate; i++)
19326 {
19327 group_insns[i] = 0;
19328 }
19329 }
19330 n_nops--;
19331 }
19332
19333 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19334 can_issue_more++;
19335
19336 /* Is next_insn going to start a new group? */
19337 *group_end
19338 = (end
19339 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19340 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19341 || (can_issue_more < issue_rate &&
19342 insn_terminates_group_p (next_insn, previous_group)));
19343 if (*group_end && end)
19344 (*group_count)--;
19345
19346 if (sched_verbose > 6)
19347 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19348 *group_count, can_issue_more);
19349 return can_issue_more;
19350 }
19351
19352 return can_issue_more;
19353 }
19354
19355 /* This function tries to synch the dispatch groups that the compiler "sees"
19356 with the dispatch groups that the processor dispatcher is expected to
19357 form in practice. It tries to achieve this synchronization by forcing the
19358 estimated processor grouping on the compiler (as opposed to the function
19359 'pad_goups' which tries to force the scheduler's grouping on the processor).
19360
19361 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19362 examines the (estimated) dispatch groups that will be formed by the processor
19363 dispatcher. It marks these group boundaries to reflect the estimated
19364 processor grouping, overriding the grouping that the scheduler had marked.
19365 Depending on the value of the flag '-minsert-sched-nops' this function can
19366 force certain insns into separate groups or force a certain distance between
19367 them by inserting nops, for example, if there exists a "costly dependence"
19368 between the insns.
19369
19370 The function estimates the group boundaries that the processor will form as
19371 follows: It keeps track of how many vacant issue slots are available after
19372 each insn. A subsequent insn will start a new group if one of the following
19373 4 cases applies:
19374 - no more vacant issue slots remain in the current dispatch group.
19375 - only the last issue slot, which is the branch slot, is vacant, but the next
19376 insn is not a branch.
19377 - only the last 2 or less issue slots, including the branch slot, are vacant,
19378 which means that a cracked insn (which occupies two issue slots) can't be
19379 issued in this group.
19380 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19381 start a new group. */
19382
19383 static int
19384 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19385 rtx_insn *tail)
19386 {
19387 rtx_insn *insn, *next_insn;
19388 int issue_rate;
19389 int can_issue_more;
19390 int slot, i;
19391 bool group_end;
19392 int group_count = 0;
19393 rtx *group_insns;
19394
19395 /* Initialize. */
19396 issue_rate = rs6000_issue_rate ();
19397 group_insns = XALLOCAVEC (rtx, issue_rate);
19398 for (i = 0; i < issue_rate; i++)
19399 {
19400 group_insns[i] = 0;
19401 }
19402 can_issue_more = issue_rate;
19403 slot = 0;
19404 insn = get_next_active_insn (prev_head_insn, tail);
19405 group_end = false;
19406
19407 while (insn != NULL_RTX)
19408 {
19409 slot = (issue_rate - can_issue_more);
19410 group_insns[slot] = insn;
19411 can_issue_more =
19412 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19413 if (insn_terminates_group_p (insn, current_group))
19414 can_issue_more = 0;
19415
19416 next_insn = get_next_active_insn (insn, tail);
19417 if (next_insn == NULL_RTX)
19418 return group_count + 1;
19419
19420 /* Is next_insn going to start a new group? */
19421 group_end
19422 = (can_issue_more == 0
19423 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19424 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19425 || (can_issue_more < issue_rate &&
19426 insn_terminates_group_p (next_insn, previous_group)));
19427
19428 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19429 next_insn, &group_end, can_issue_more,
19430 &group_count);
19431
19432 if (group_end)
19433 {
19434 group_count++;
19435 can_issue_more = 0;
19436 for (i = 0; i < issue_rate; i++)
19437 {
19438 group_insns[i] = 0;
19439 }
19440 }
19441
19442 if (GET_MODE (next_insn) == TImode && can_issue_more)
19443 PUT_MODE (next_insn, VOIDmode);
19444 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19445 PUT_MODE (next_insn, TImode);
19446
19447 insn = next_insn;
19448 if (can_issue_more == 0)
19449 can_issue_more = issue_rate;
19450 } /* while */
19451
19452 return group_count;
19453 }
19454
19455 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19456 dispatch group boundaries that the scheduler had marked. Pad with nops
19457 any dispatch groups which have vacant issue slots, in order to force the
19458 scheduler's grouping on the processor dispatcher. The function
19459 returns the number of dispatch groups found. */
19460
19461 static int
19462 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19463 rtx_insn *tail)
19464 {
19465 rtx_insn *insn, *next_insn;
19466 rtx nop;
19467 int issue_rate;
19468 int can_issue_more;
19469 int group_end;
19470 int group_count = 0;
19471
19472 /* Initialize issue_rate. */
19473 issue_rate = rs6000_issue_rate ();
19474 can_issue_more = issue_rate;
19475
19476 insn = get_next_active_insn (prev_head_insn, tail);
19477 next_insn = get_next_active_insn (insn, tail);
19478
19479 while (insn != NULL_RTX)
19480 {
19481 can_issue_more =
19482 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19483
19484 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19485
19486 if (next_insn == NULL_RTX)
19487 break;
19488
19489 if (group_end)
19490 {
19491 /* If the scheduler had marked group termination at this location
19492 (between insn and next_insn), and neither insn nor next_insn will
19493 force group termination, pad the group with nops to force group
19494 termination. */
19495 if (can_issue_more
19496 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19497 && !insn_terminates_group_p (insn, current_group)
19498 && !insn_terminates_group_p (next_insn, previous_group))
19499 {
19500 if (!is_branch_slot_insn (next_insn))
19501 can_issue_more--;
19502
19503 while (can_issue_more)
19504 {
19505 nop = gen_nop ();
19506 emit_insn_before (nop, next_insn);
19507 can_issue_more--;
19508 }
19509 }
19510
19511 can_issue_more = issue_rate;
19512 group_count++;
19513 }
19514
19515 insn = next_insn;
19516 next_insn = get_next_active_insn (insn, tail);
19517 }
19518
19519 return group_count;
19520 }
19521
19522 /* We're beginning a new block. Initialize data structures as necessary. */
19523
19524 static void
19525 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19526 int sched_verbose ATTRIBUTE_UNUSED,
19527 int max_ready ATTRIBUTE_UNUSED)
19528 {
19529 last_scheduled_insn = NULL;
19530 load_store_pendulum = 0;
19531 divide_cnt = 0;
19532 vec_pairing = 0;
19533 }
19534
19535 /* The following function is called at the end of scheduling BB.
19536 After reload, it inserts nops at insn group bundling. */
19537
19538 static void
19539 rs6000_sched_finish (FILE *dump, int sched_verbose)
19540 {
19541 int n_groups;
19542
19543 if (sched_verbose)
19544 fprintf (dump, "=== Finishing schedule.\n");
19545
19546 if (reload_completed && rs6000_sched_groups)
19547 {
19548 /* Do not run sched_finish hook when selective scheduling enabled. */
19549 if (sel_sched_p ())
19550 return;
19551
19552 if (rs6000_sched_insert_nops == sched_finish_none)
19553 return;
19554
19555 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19556 n_groups = pad_groups (dump, sched_verbose,
19557 current_sched_info->prev_head,
19558 current_sched_info->next_tail);
19559 else
19560 n_groups = redefine_groups (dump, sched_verbose,
19561 current_sched_info->prev_head,
19562 current_sched_info->next_tail);
19563
19564 if (sched_verbose >= 6)
19565 {
19566 fprintf (dump, "ngroups = %d\n", n_groups);
19567 print_rtl (dump, current_sched_info->prev_head);
19568 fprintf (dump, "Done finish_sched\n");
19569 }
19570 }
19571 }
19572
19573 struct rs6000_sched_context
19574 {
19575 short cached_can_issue_more;
19576 rtx_insn *last_scheduled_insn;
19577 int load_store_pendulum;
19578 int divide_cnt;
19579 int vec_pairing;
19580 };
19581
19582 typedef struct rs6000_sched_context rs6000_sched_context_def;
19583 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19584
19585 /* Allocate store for new scheduling context. */
19586 static void *
19587 rs6000_alloc_sched_context (void)
19588 {
19589 return xmalloc (sizeof (rs6000_sched_context_def));
19590 }
19591
19592 /* If CLEAN_P is true then initializes _SC with clean data,
19593 and from the global context otherwise. */
19594 static void
19595 rs6000_init_sched_context (void *_sc, bool clean_p)
19596 {
19597 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19598
19599 if (clean_p)
19600 {
19601 sc->cached_can_issue_more = 0;
19602 sc->last_scheduled_insn = NULL;
19603 sc->load_store_pendulum = 0;
19604 sc->divide_cnt = 0;
19605 sc->vec_pairing = 0;
19606 }
19607 else
19608 {
19609 sc->cached_can_issue_more = cached_can_issue_more;
19610 sc->last_scheduled_insn = last_scheduled_insn;
19611 sc->load_store_pendulum = load_store_pendulum;
19612 sc->divide_cnt = divide_cnt;
19613 sc->vec_pairing = vec_pairing;
19614 }
19615 }
19616
19617 /* Sets the global scheduling context to the one pointed to by _SC. */
19618 static void
19619 rs6000_set_sched_context (void *_sc)
19620 {
19621 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19622
19623 gcc_assert (sc != NULL);
19624
19625 cached_can_issue_more = sc->cached_can_issue_more;
19626 last_scheduled_insn = sc->last_scheduled_insn;
19627 load_store_pendulum = sc->load_store_pendulum;
19628 divide_cnt = sc->divide_cnt;
19629 vec_pairing = sc->vec_pairing;
19630 }
19631
19632 /* Free _SC. */
19633 static void
19634 rs6000_free_sched_context (void *_sc)
19635 {
19636 gcc_assert (_sc != NULL);
19637
19638 free (_sc);
19639 }
19640
19641 static bool
19642 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19643 {
19644 switch (get_attr_type (insn))
19645 {
19646 case TYPE_DIV:
19647 case TYPE_SDIV:
19648 case TYPE_DDIV:
19649 case TYPE_VECDIV:
19650 case TYPE_SSQRT:
19651 case TYPE_DSQRT:
19652 return false;
19653
19654 default:
19655 return true;
19656 }
19657 }
19658 \f
19659 /* Length in units of the trampoline for entering a nested function. */
19660
19661 int
19662 rs6000_trampoline_size (void)
19663 {
19664 int ret = 0;
19665
19666 switch (DEFAULT_ABI)
19667 {
19668 default:
19669 gcc_unreachable ();
19670
19671 case ABI_AIX:
19672 ret = (TARGET_32BIT) ? 12 : 24;
19673 break;
19674
19675 case ABI_ELFv2:
19676 gcc_assert (!TARGET_32BIT);
19677 ret = 32;
19678 break;
19679
19680 case ABI_DARWIN:
19681 case ABI_V4:
19682 ret = (TARGET_32BIT) ? 40 : 48;
19683 break;
19684 }
19685
19686 return ret;
19687 }
19688
19689 /* Emit RTL insns to initialize the variable parts of a trampoline.
19690 FNADDR is an RTX for the address of the function's pure code.
19691 CXT is an RTX for the static chain value for the function. */
19692
19693 static void
19694 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19695 {
19696 int regsize = (TARGET_32BIT) ? 4 : 8;
19697 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19698 rtx ctx_reg = force_reg (Pmode, cxt);
19699 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19700
19701 switch (DEFAULT_ABI)
19702 {
19703 default:
19704 gcc_unreachable ();
19705
19706 /* Under AIX, just build the 3 word function descriptor */
19707 case ABI_AIX:
19708 {
19709 rtx fnmem, fn_reg, toc_reg;
19710
19711 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19712 error ("you cannot take the address of a nested function if you use "
19713 "the %qs option", "-mno-pointers-to-nested-functions");
19714
19715 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19716 fn_reg = gen_reg_rtx (Pmode);
19717 toc_reg = gen_reg_rtx (Pmode);
19718
19719 /* Macro to shorten the code expansions below. */
19720 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19721
19722 m_tramp = replace_equiv_address (m_tramp, addr);
19723
19724 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19725 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19726 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19727 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19728 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19729
19730 # undef MEM_PLUS
19731 }
19732 break;
19733
19734 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19735 case ABI_ELFv2:
19736 case ABI_DARWIN:
19737 case ABI_V4:
19738 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19739 LCT_NORMAL, VOIDmode,
19740 addr, Pmode,
19741 GEN_INT (rs6000_trampoline_size ()), SImode,
19742 fnaddr, Pmode,
19743 ctx_reg, Pmode);
19744 break;
19745 }
19746 }
19747
19748 \f
19749 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19750 identifier as an argument, so the front end shouldn't look it up. */
19751
19752 static bool
19753 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19754 {
19755 return is_attribute_p ("altivec", attr_id);
19756 }
19757
19758 /* Handle the "altivec" attribute. The attribute may have
19759 arguments as follows:
19760
19761 __attribute__((altivec(vector__)))
19762 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19763 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19764
19765 and may appear more than once (e.g., 'vector bool char') in a
19766 given declaration. */
19767
19768 static tree
19769 rs6000_handle_altivec_attribute (tree *node,
19770 tree name ATTRIBUTE_UNUSED,
19771 tree args,
19772 int flags ATTRIBUTE_UNUSED,
19773 bool *no_add_attrs)
19774 {
19775 tree type = *node, result = NULL_TREE;
19776 machine_mode mode;
19777 int unsigned_p;
19778 char altivec_type
19779 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19780 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19781 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19782 : '?');
19783
19784 while (POINTER_TYPE_P (type)
19785 || TREE_CODE (type) == FUNCTION_TYPE
19786 || TREE_CODE (type) == METHOD_TYPE
19787 || TREE_CODE (type) == ARRAY_TYPE)
19788 type = TREE_TYPE (type);
19789
19790 mode = TYPE_MODE (type);
19791
19792 /* Check for invalid AltiVec type qualifiers. */
19793 if (type == long_double_type_node)
19794 error ("use of %<long double%> in AltiVec types is invalid");
19795 else if (type == boolean_type_node)
19796 error ("use of boolean types in AltiVec types is invalid");
19797 else if (TREE_CODE (type) == COMPLEX_TYPE)
19798 error ("use of %<complex%> in AltiVec types is invalid");
19799 else if (DECIMAL_FLOAT_MODE_P (mode))
19800 error ("use of decimal floating point types in AltiVec types is invalid");
19801 else if (!TARGET_VSX)
19802 {
19803 if (type == long_unsigned_type_node || type == long_integer_type_node)
19804 {
19805 if (TARGET_64BIT)
19806 error ("use of %<long%> in AltiVec types is invalid for "
19807 "64-bit code without %qs", "-mvsx");
19808 else if (rs6000_warn_altivec_long)
19809 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19810 "use %<int%>");
19811 }
19812 else if (type == long_long_unsigned_type_node
19813 || type == long_long_integer_type_node)
19814 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19815 "-mvsx");
19816 else if (type == double_type_node)
19817 error ("use of %<double%> in AltiVec types is invalid without %qs",
19818 "-mvsx");
19819 }
19820
19821 switch (altivec_type)
19822 {
19823 case 'v':
19824 unsigned_p = TYPE_UNSIGNED (type);
19825 switch (mode)
19826 {
19827 case E_TImode:
19828 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19829 break;
19830 case E_DImode:
19831 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19832 break;
19833 case E_SImode:
19834 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19835 break;
19836 case E_HImode:
19837 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19838 break;
19839 case E_QImode:
19840 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19841 break;
19842 case E_SFmode: result = V4SF_type_node; break;
19843 case E_DFmode: result = V2DF_type_node; break;
19844 /* If the user says 'vector int bool', we may be handed the 'bool'
19845 attribute _before_ the 'vector' attribute, and so select the
19846 proper type in the 'b' case below. */
19847 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19848 case E_V2DImode: case E_V2DFmode:
19849 result = type;
19850 default: break;
19851 }
19852 break;
19853 case 'b':
19854 switch (mode)
19855 {
19856 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19857 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19858 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19859 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19860 default: break;
19861 }
19862 break;
19863 case 'p':
19864 switch (mode)
19865 {
19866 case E_V8HImode: result = pixel_V8HI_type_node;
19867 default: break;
19868 }
19869 default: break;
19870 }
19871
19872 /* Propagate qualifiers attached to the element type
19873 onto the vector type. */
19874 if (result && result != type && TYPE_QUALS (type))
19875 result = build_qualified_type (result, TYPE_QUALS (type));
19876
19877 *no_add_attrs = true; /* No need to hang on to the attribute. */
19878
19879 if (result)
19880 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19881
19882 return NULL_TREE;
19883 }
19884
19885 /* AltiVec defines five built-in scalar types that serve as vector
19886 elements; we must teach the compiler how to mangle them. The 128-bit
19887 floating point mangling is target-specific as well. MMA defines
19888 two built-in types to be used as opaque vector types. */
19889
19890 static const char *
19891 rs6000_mangle_type (const_tree type)
19892 {
19893 type = TYPE_MAIN_VARIANT (type);
19894
19895 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19896 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
19897 && TREE_CODE (type) != OPAQUE_TYPE)
19898 return NULL;
19899
19900 if (type == bool_char_type_node) return "U6__boolc";
19901 if (type == bool_short_type_node) return "U6__bools";
19902 if (type == pixel_type_node) return "u7__pixel";
19903 if (type == bool_int_type_node) return "U6__booli";
19904 if (type == bool_long_long_type_node) return "U6__boolx";
19905
19906 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19907 return "g";
19908 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19909 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19910
19911 if (type == vector_pair_type_node)
19912 return "u13__vector_pair";
19913 if (type == vector_quad_type_node)
19914 return "u13__vector_quad";
19915
19916 /* For all other types, use the default mangling. */
19917 return NULL;
19918 }
19919
19920 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19921 struct attribute_spec.handler. */
19922
19923 static tree
19924 rs6000_handle_longcall_attribute (tree *node, tree name,
19925 tree args ATTRIBUTE_UNUSED,
19926 int flags ATTRIBUTE_UNUSED,
19927 bool *no_add_attrs)
19928 {
19929 if (TREE_CODE (*node) != FUNCTION_TYPE
19930 && TREE_CODE (*node) != FIELD_DECL
19931 && TREE_CODE (*node) != TYPE_DECL)
19932 {
19933 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19934 name);
19935 *no_add_attrs = true;
19936 }
19937
19938 return NULL_TREE;
19939 }
19940
19941 /* Set longcall attributes on all functions declared when
19942 rs6000_default_long_calls is true. */
19943 static void
19944 rs6000_set_default_type_attributes (tree type)
19945 {
19946 if (rs6000_default_long_calls
19947 && (TREE_CODE (type) == FUNCTION_TYPE
19948 || TREE_CODE (type) == METHOD_TYPE))
19949 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19950 NULL_TREE,
19951 TYPE_ATTRIBUTES (type));
19952
19953 #if TARGET_MACHO
19954 darwin_set_default_type_attributes (type);
19955 #endif
19956 }
19957
19958 /* Return a reference suitable for calling a function with the
19959 longcall attribute. */
19960
19961 static rtx
19962 rs6000_longcall_ref (rtx call_ref, rtx arg)
19963 {
19964 /* System V adds '.' to the internal name, so skip them. */
19965 const char *call_name = XSTR (call_ref, 0);
19966 if (*call_name == '.')
19967 {
19968 while (*call_name == '.')
19969 call_name++;
19970
19971 tree node = get_identifier (call_name);
19972 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19973 }
19974
19975 if (TARGET_PLTSEQ)
19976 {
19977 rtx base = const0_rtx;
19978 int regno = 12;
19979 if (rs6000_pcrel_p ())
19980 {
19981 rtx reg = gen_rtx_REG (Pmode, regno);
19982 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19983 gen_rtvec (3, base, call_ref, arg),
19984 UNSPECV_PLT_PCREL);
19985 emit_insn (gen_rtx_SET (reg, u));
19986 return reg;
19987 }
19988
19989 if (DEFAULT_ABI == ABI_ELFv2)
19990 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19991 else
19992 {
19993 if (flag_pic)
19994 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19995 regno = 11;
19996 }
19997 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19998 may be used by a function global entry point. For SysV4, r11
19999 is used by __glink_PLTresolve lazy resolver entry. */
20000 rtx reg = gen_rtx_REG (Pmode, regno);
20001 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20002 UNSPEC_PLT16_HA);
20003 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20004 gen_rtvec (3, reg, call_ref, arg),
20005 UNSPECV_PLT16_LO);
20006 emit_insn (gen_rtx_SET (reg, hi));
20007 emit_insn (gen_rtx_SET (reg, lo));
20008 return reg;
20009 }
20010
20011 return force_reg (Pmode, call_ref);
20012 }
20013 \f
20014 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20015 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20016 #endif
20017
20018 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20019 struct attribute_spec.handler. */
20020 static tree
20021 rs6000_handle_struct_attribute (tree *node, tree name,
20022 tree args ATTRIBUTE_UNUSED,
20023 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20024 {
20025 tree *type = NULL;
20026 if (DECL_P (*node))
20027 {
20028 if (TREE_CODE (*node) == TYPE_DECL)
20029 type = &TREE_TYPE (*node);
20030 }
20031 else
20032 type = node;
20033
20034 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20035 || TREE_CODE (*type) == UNION_TYPE)))
20036 {
20037 warning (OPT_Wattributes, "%qE attribute ignored", name);
20038 *no_add_attrs = true;
20039 }
20040
20041 else if ((is_attribute_p ("ms_struct", name)
20042 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20043 || ((is_attribute_p ("gcc_struct", name)
20044 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20045 {
20046 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20047 name);
20048 *no_add_attrs = true;
20049 }
20050
20051 return NULL_TREE;
20052 }
20053
20054 static bool
20055 rs6000_ms_bitfield_layout_p (const_tree record_type)
20056 {
20057 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20058 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20059 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20060 }
20061 \f
20062 #ifdef USING_ELFOS_H
20063
20064 /* A get_unnamed_section callback, used for switching to toc_section. */
20065
20066 static void
20067 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20068 {
20069 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20070 && TARGET_MINIMAL_TOC)
20071 {
20072 if (!toc_initialized)
20073 {
20074 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20075 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20076 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20077 fprintf (asm_out_file, "\t.tc ");
20078 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20079 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20080 fprintf (asm_out_file, "\n");
20081
20082 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20083 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20084 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20085 fprintf (asm_out_file, " = .+32768\n");
20086 toc_initialized = 1;
20087 }
20088 else
20089 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20090 }
20091 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20092 {
20093 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20094 if (!toc_initialized)
20095 {
20096 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20097 toc_initialized = 1;
20098 }
20099 }
20100 else
20101 {
20102 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20103 if (!toc_initialized)
20104 {
20105 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20106 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20107 fprintf (asm_out_file, " = .+32768\n");
20108 toc_initialized = 1;
20109 }
20110 }
20111 }
20112
20113 /* Implement TARGET_ASM_INIT_SECTIONS. */
20114
20115 static void
20116 rs6000_elf_asm_init_sections (void)
20117 {
20118 toc_section
20119 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20120
20121 sdata2_section
20122 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20123 SDATA2_SECTION_ASM_OP);
20124 }
20125
20126 /* Implement TARGET_SELECT_RTX_SECTION. */
20127
20128 static section *
20129 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20130 unsigned HOST_WIDE_INT align)
20131 {
20132 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20133 return toc_section;
20134 else
20135 return default_elf_select_rtx_section (mode, x, align);
20136 }
20137 \f
20138 /* For a SYMBOL_REF, set generic flags and then perform some
20139 target-specific processing.
20140
20141 When the AIX ABI is requested on a non-AIX system, replace the
20142 function name with the real name (with a leading .) rather than the
20143 function descriptor name. This saves a lot of overriding code to
20144 read the prefixes. */
20145
20146 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20147 static void
20148 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20149 {
20150 default_encode_section_info (decl, rtl, first);
20151
20152 if (first
20153 && TREE_CODE (decl) == FUNCTION_DECL
20154 && !TARGET_AIX
20155 && DEFAULT_ABI == ABI_AIX)
20156 {
20157 rtx sym_ref = XEXP (rtl, 0);
20158 size_t len = strlen (XSTR (sym_ref, 0));
20159 char *str = XALLOCAVEC (char, len + 2);
20160 str[0] = '.';
20161 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20162 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20163 }
20164 }
20165
20166 static inline bool
20167 compare_section_name (const char *section, const char *templ)
20168 {
20169 int len;
20170
20171 len = strlen (templ);
20172 return (strncmp (section, templ, len) == 0
20173 && (section[len] == 0 || section[len] == '.'));
20174 }
20175
20176 bool
20177 rs6000_elf_in_small_data_p (const_tree decl)
20178 {
20179 if (rs6000_sdata == SDATA_NONE)
20180 return false;
20181
20182 /* We want to merge strings, so we never consider them small data. */
20183 if (TREE_CODE (decl) == STRING_CST)
20184 return false;
20185
20186 /* Functions are never in the small data area. */
20187 if (TREE_CODE (decl) == FUNCTION_DECL)
20188 return false;
20189
20190 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20191 {
20192 const char *section = DECL_SECTION_NAME (decl);
20193 if (compare_section_name (section, ".sdata")
20194 || compare_section_name (section, ".sdata2")
20195 || compare_section_name (section, ".gnu.linkonce.s")
20196 || compare_section_name (section, ".sbss")
20197 || compare_section_name (section, ".sbss2")
20198 || compare_section_name (section, ".gnu.linkonce.sb")
20199 || strcmp (section, ".PPC.EMB.sdata0") == 0
20200 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20201 return true;
20202 }
20203 else
20204 {
20205 /* If we are told not to put readonly data in sdata, then don't. */
20206 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20207 && !rs6000_readonly_in_sdata)
20208 return false;
20209
20210 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20211
20212 if (size > 0
20213 && size <= g_switch_value
20214 /* If it's not public, and we're not going to reference it there,
20215 there's no need to put it in the small data section. */
20216 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20217 return true;
20218 }
20219
20220 return false;
20221 }
20222
20223 #endif /* USING_ELFOS_H */
20224 \f
20225 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20226
20227 static bool
20228 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20229 {
20230 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20231 }
20232
20233 /* Do not place thread-local symbols refs in the object blocks. */
20234
20235 static bool
20236 rs6000_use_blocks_for_decl_p (const_tree decl)
20237 {
20238 return !DECL_THREAD_LOCAL_P (decl);
20239 }
20240 \f
20241 /* Return a REG that occurs in ADDR with coefficient 1.
20242 ADDR can be effectively incremented by incrementing REG.
20243
20244 r0 is special and we must not select it as an address
20245 register by this routine since our caller will try to
20246 increment the returned register via an "la" instruction. */
20247
20248 rtx
20249 find_addr_reg (rtx addr)
20250 {
20251 while (GET_CODE (addr) == PLUS)
20252 {
20253 if (REG_P (XEXP (addr, 0))
20254 && REGNO (XEXP (addr, 0)) != 0)
20255 addr = XEXP (addr, 0);
20256 else if (REG_P (XEXP (addr, 1))
20257 && REGNO (XEXP (addr, 1)) != 0)
20258 addr = XEXP (addr, 1);
20259 else if (CONSTANT_P (XEXP (addr, 0)))
20260 addr = XEXP (addr, 1);
20261 else if (CONSTANT_P (XEXP (addr, 1)))
20262 addr = XEXP (addr, 0);
20263 else
20264 gcc_unreachable ();
20265 }
20266 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20267 return addr;
20268 }
20269
20270 void
20271 rs6000_fatal_bad_address (rtx op)
20272 {
20273 fatal_insn ("bad address", op);
20274 }
20275
20276 #if TARGET_MACHO
20277
20278 vec<branch_island, va_gc> *branch_islands;
20279
20280 /* Remember to generate a branch island for far calls to the given
20281 function. */
20282
20283 static void
20284 add_compiler_branch_island (tree label_name, tree function_name,
20285 int line_number)
20286 {
20287 branch_island bi = {function_name, label_name, line_number};
20288 vec_safe_push (branch_islands, bi);
20289 }
20290
20291 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20292 already there or not. */
20293
20294 static int
20295 no_previous_def (tree function_name)
20296 {
20297 branch_island *bi;
20298 unsigned ix;
20299
20300 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20301 if (function_name == bi->function_name)
20302 return 0;
20303 return 1;
20304 }
20305
20306 /* GET_PREV_LABEL gets the label name from the previous definition of
20307 the function. */
20308
20309 static tree
20310 get_prev_label (tree function_name)
20311 {
20312 branch_island *bi;
20313 unsigned ix;
20314
20315 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20316 if (function_name == bi->function_name)
20317 return bi->label_name;
20318 return NULL_TREE;
20319 }
20320
20321 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20322
20323 void
20324 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20325 {
20326 unsigned int length;
20327 char *symbol_name, *lazy_ptr_name;
20328 char *local_label_0;
20329 static unsigned label = 0;
20330
20331 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20332 symb = (*targetm.strip_name_encoding) (symb);
20333
20334 length = strlen (symb);
20335 symbol_name = XALLOCAVEC (char, length + 32);
20336 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20337
20338 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20339 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20340
20341 if (MACHOPIC_PURE)
20342 {
20343 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20344 fprintf (file, "\t.align 5\n");
20345
20346 fprintf (file, "%s:\n", stub);
20347 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20348
20349 label++;
20350 local_label_0 = XALLOCAVEC (char, 16);
20351 sprintf (local_label_0, "L%u$spb", label);
20352
20353 fprintf (file, "\tmflr r0\n");
20354 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20355 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20356 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20357 lazy_ptr_name, local_label_0);
20358 fprintf (file, "\tmtlr r0\n");
20359 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20360 (TARGET_64BIT ? "ldu" : "lwzu"),
20361 lazy_ptr_name, local_label_0);
20362 fprintf (file, "\tmtctr r12\n");
20363 fprintf (file, "\tbctr\n");
20364 }
20365 else /* mdynamic-no-pic or mkernel. */
20366 {
20367 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20368 fprintf (file, "\t.align 4\n");
20369
20370 fprintf (file, "%s:\n", stub);
20371 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20372
20373 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20374 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20375 (TARGET_64BIT ? "ldu" : "lwzu"),
20376 lazy_ptr_name);
20377 fprintf (file, "\tmtctr r12\n");
20378 fprintf (file, "\tbctr\n");
20379 }
20380
20381 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20382 fprintf (file, "%s:\n", lazy_ptr_name);
20383 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20384 fprintf (file, "%sdyld_stub_binding_helper\n",
20385 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20386 }
20387
20388 /* Legitimize PIC addresses. If the address is already
20389 position-independent, we return ORIG. Newly generated
20390 position-independent addresses go into a reg. This is REG if non
20391 zero, otherwise we allocate register(s) as necessary. */
20392
20393 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20394
20395 rtx
20396 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20397 rtx reg)
20398 {
20399 rtx base, offset;
20400
20401 if (reg == NULL && !reload_completed)
20402 reg = gen_reg_rtx (Pmode);
20403
20404 if (GET_CODE (orig) == CONST)
20405 {
20406 rtx reg_temp;
20407
20408 if (GET_CODE (XEXP (orig, 0)) == PLUS
20409 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20410 return orig;
20411
20412 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20413
20414 /* Use a different reg for the intermediate value, as
20415 it will be marked UNCHANGING. */
20416 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20417 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20418 Pmode, reg_temp);
20419 offset =
20420 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20421 Pmode, reg);
20422
20423 if (CONST_INT_P (offset))
20424 {
20425 if (SMALL_INT (offset))
20426 return plus_constant (Pmode, base, INTVAL (offset));
20427 else if (!reload_completed)
20428 offset = force_reg (Pmode, offset);
20429 else
20430 {
20431 rtx mem = force_const_mem (Pmode, orig);
20432 return machopic_legitimize_pic_address (mem, Pmode, reg);
20433 }
20434 }
20435 return gen_rtx_PLUS (Pmode, base, offset);
20436 }
20437
20438 /* Fall back on generic machopic code. */
20439 return machopic_legitimize_pic_address (orig, mode, reg);
20440 }
20441
20442 /* Output a .machine directive for the Darwin assembler, and call
20443 the generic start_file routine. */
20444
20445 static void
20446 rs6000_darwin_file_start (void)
20447 {
20448 static const struct
20449 {
20450 const char *arg;
20451 const char *name;
20452 HOST_WIDE_INT if_set;
20453 } mapping[] = {
20454 { "ppc64", "ppc64", MASK_64BIT },
20455 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20456 { "power4", "ppc970", 0 },
20457 { "G5", "ppc970", 0 },
20458 { "7450", "ppc7450", 0 },
20459 { "7400", "ppc7400", MASK_ALTIVEC },
20460 { "G4", "ppc7400", 0 },
20461 { "750", "ppc750", 0 },
20462 { "740", "ppc750", 0 },
20463 { "G3", "ppc750", 0 },
20464 { "604e", "ppc604e", 0 },
20465 { "604", "ppc604", 0 },
20466 { "603e", "ppc603", 0 },
20467 { "603", "ppc603", 0 },
20468 { "601", "ppc601", 0 },
20469 { NULL, "ppc", 0 } };
20470 const char *cpu_id = "";
20471 size_t i;
20472
20473 rs6000_file_start ();
20474 darwin_file_start ();
20475
20476 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20477
20478 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20479 cpu_id = rs6000_default_cpu;
20480
20481 if (global_options_set.x_rs6000_cpu_index)
20482 cpu_id = processor_target_table[rs6000_cpu_index].name;
20483
20484 /* Look through the mapping array. Pick the first name that either
20485 matches the argument, has a bit set in IF_SET that is also set
20486 in the target flags, or has a NULL name. */
20487
20488 i = 0;
20489 while (mapping[i].arg != NULL
20490 && strcmp (mapping[i].arg, cpu_id) != 0
20491 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20492 i++;
20493
20494 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20495 }
20496
20497 #endif /* TARGET_MACHO */
20498
20499 #if TARGET_ELF
20500 static int
20501 rs6000_elf_reloc_rw_mask (void)
20502 {
20503 if (flag_pic)
20504 return 3;
20505 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20506 return 2;
20507 else
20508 return 0;
20509 }
20510
20511 /* Record an element in the table of global constructors. SYMBOL is
20512 a SYMBOL_REF of the function to be called; PRIORITY is a number
20513 between 0 and MAX_INIT_PRIORITY.
20514
20515 This differs from default_named_section_asm_out_constructor in
20516 that we have special handling for -mrelocatable. */
20517
20518 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20519 static void
20520 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20521 {
20522 const char *section = ".ctors";
20523 char buf[18];
20524
20525 if (priority != DEFAULT_INIT_PRIORITY)
20526 {
20527 sprintf (buf, ".ctors.%.5u",
20528 /* Invert the numbering so the linker puts us in the proper
20529 order; constructors are run from right to left, and the
20530 linker sorts in increasing order. */
20531 MAX_INIT_PRIORITY - priority);
20532 section = buf;
20533 }
20534
20535 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20536 assemble_align (POINTER_SIZE);
20537
20538 if (DEFAULT_ABI == ABI_V4
20539 && (TARGET_RELOCATABLE || flag_pic > 1))
20540 {
20541 fputs ("\t.long (", asm_out_file);
20542 output_addr_const (asm_out_file, symbol);
20543 fputs (")@fixup\n", asm_out_file);
20544 }
20545 else
20546 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20547 }
20548
20549 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20550 static void
20551 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20552 {
20553 const char *section = ".dtors";
20554 char buf[18];
20555
20556 if (priority != DEFAULT_INIT_PRIORITY)
20557 {
20558 sprintf (buf, ".dtors.%.5u",
20559 /* Invert the numbering so the linker puts us in the proper
20560 order; constructors are run from right to left, and the
20561 linker sorts in increasing order. */
20562 MAX_INIT_PRIORITY - priority);
20563 section = buf;
20564 }
20565
20566 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20567 assemble_align (POINTER_SIZE);
20568
20569 if (DEFAULT_ABI == ABI_V4
20570 && (TARGET_RELOCATABLE || flag_pic > 1))
20571 {
20572 fputs ("\t.long (", asm_out_file);
20573 output_addr_const (asm_out_file, symbol);
20574 fputs (")@fixup\n", asm_out_file);
20575 }
20576 else
20577 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20578 }
20579
20580 void
20581 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20582 {
20583 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20584 {
20585 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20586 ASM_OUTPUT_LABEL (file, name);
20587 fputs (DOUBLE_INT_ASM_OP, file);
20588 rs6000_output_function_entry (file, name);
20589 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20590 if (DOT_SYMBOLS)
20591 {
20592 fputs ("\t.size\t", file);
20593 assemble_name (file, name);
20594 fputs (",24\n\t.type\t.", file);
20595 assemble_name (file, name);
20596 fputs (",@function\n", file);
20597 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20598 {
20599 fputs ("\t.globl\t.", file);
20600 assemble_name (file, name);
20601 putc ('\n', file);
20602 }
20603 }
20604 else
20605 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20606 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20607 rs6000_output_function_entry (file, name);
20608 fputs (":\n", file);
20609 return;
20610 }
20611
20612 int uses_toc;
20613 if (DEFAULT_ABI == ABI_V4
20614 && (TARGET_RELOCATABLE || flag_pic > 1)
20615 && !TARGET_SECURE_PLT
20616 && (!constant_pool_empty_p () || crtl->profile)
20617 && (uses_toc = uses_TOC ()))
20618 {
20619 char buf[256];
20620
20621 if (uses_toc == 2)
20622 switch_to_other_text_partition ();
20623 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20624
20625 fprintf (file, "\t.long ");
20626 assemble_name (file, toc_label_name);
20627 need_toc_init = 1;
20628 putc ('-', file);
20629 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20630 assemble_name (file, buf);
20631 putc ('\n', file);
20632 if (uses_toc == 2)
20633 switch_to_other_text_partition ();
20634 }
20635
20636 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20637 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20638
20639 if (TARGET_CMODEL == CMODEL_LARGE
20640 && rs6000_global_entry_point_prologue_needed_p ())
20641 {
20642 char buf[256];
20643
20644 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20645
20646 fprintf (file, "\t.quad .TOC.-");
20647 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20648 assemble_name (file, buf);
20649 putc ('\n', file);
20650 }
20651
20652 if (DEFAULT_ABI == ABI_AIX)
20653 {
20654 const char *desc_name, *orig_name;
20655
20656 orig_name = (*targetm.strip_name_encoding) (name);
20657 desc_name = orig_name;
20658 while (*desc_name == '.')
20659 desc_name++;
20660
20661 if (TREE_PUBLIC (decl))
20662 fprintf (file, "\t.globl %s\n", desc_name);
20663
20664 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20665 fprintf (file, "%s:\n", desc_name);
20666 fprintf (file, "\t.long %s\n", orig_name);
20667 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20668 fputs ("\t.long 0\n", file);
20669 fprintf (file, "\t.previous\n");
20670 }
20671 ASM_OUTPUT_LABEL (file, name);
20672 }
20673
20674 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20675 static void
20676 rs6000_elf_file_end (void)
20677 {
20678 #ifdef HAVE_AS_GNU_ATTRIBUTE
20679 /* ??? The value emitted depends on options active at file end.
20680 Assume anyone using #pragma or attributes that might change
20681 options knows what they are doing. */
20682 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20683 && rs6000_passes_float)
20684 {
20685 int fp;
20686
20687 if (TARGET_HARD_FLOAT)
20688 fp = 1;
20689 else
20690 fp = 2;
20691 if (rs6000_passes_long_double)
20692 {
20693 if (!TARGET_LONG_DOUBLE_128)
20694 fp |= 2 * 4;
20695 else if (TARGET_IEEEQUAD)
20696 fp |= 3 * 4;
20697 else
20698 fp |= 1 * 4;
20699 }
20700 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20701 }
20702 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20703 {
20704 if (rs6000_passes_vector)
20705 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20706 (TARGET_ALTIVEC_ABI ? 2 : 1));
20707 if (rs6000_returns_struct)
20708 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20709 aix_struct_return ? 2 : 1);
20710 }
20711 #endif
20712 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20713 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20714 file_end_indicate_exec_stack ();
20715 #endif
20716
20717 if (flag_split_stack)
20718 file_end_indicate_split_stack ();
20719
20720 if (cpu_builtin_p)
20721 {
20722 /* We have expanded a CPU builtin, so we need to emit a reference to
20723 the special symbol that LIBC uses to declare it supports the
20724 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20725 switch_to_section (data_section);
20726 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20727 fprintf (asm_out_file, "\t%s %s\n",
20728 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20729 }
20730 }
20731 #endif
20732
20733 #if TARGET_XCOFF
20734
20735 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20736 #define HAVE_XCOFF_DWARF_EXTRAS 0
20737 #endif
20738
20739 static enum unwind_info_type
20740 rs6000_xcoff_debug_unwind_info (void)
20741 {
20742 return UI_NONE;
20743 }
20744
20745 static void
20746 rs6000_xcoff_asm_output_anchor (rtx symbol)
20747 {
20748 char buffer[100];
20749
20750 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20751 SYMBOL_REF_BLOCK_OFFSET (symbol));
20752 fprintf (asm_out_file, "%s", SET_ASM_OP);
20753 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20754 fprintf (asm_out_file, ",");
20755 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20756 fprintf (asm_out_file, "\n");
20757 }
20758
20759 static void
20760 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20761 {
20762 fputs (GLOBAL_ASM_OP, stream);
20763 RS6000_OUTPUT_BASENAME (stream, name);
20764 putc ('\n', stream);
20765 }
20766
20767 /* A get_unnamed_decl callback, used for read-only sections. PTR
20768 points to the section string variable. */
20769
20770 static void
20771 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20772 {
20773 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20774 *(const char *const *) directive,
20775 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20776 }
20777
20778 /* Likewise for read-write sections. */
20779
20780 static void
20781 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20782 {
20783 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20784 *(const char *const *) directive,
20785 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20786 }
20787
20788 static void
20789 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20790 {
20791 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20792 *(const char *const *) directive,
20793 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20794 }
20795
20796 /* A get_unnamed_section callback, used for switching to toc_section. */
20797
20798 static void
20799 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20800 {
20801 if (TARGET_MINIMAL_TOC)
20802 {
20803 /* toc_section is always selected at least once from
20804 rs6000_xcoff_file_start, so this is guaranteed to
20805 always be defined once and only once in each file. */
20806 if (!toc_initialized)
20807 {
20808 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20809 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20810 toc_initialized = 1;
20811 }
20812 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20813 (TARGET_32BIT ? "" : ",3"));
20814 }
20815 else
20816 fputs ("\t.toc\n", asm_out_file);
20817 }
20818
20819 /* Implement TARGET_ASM_INIT_SECTIONS. */
20820
20821 static void
20822 rs6000_xcoff_asm_init_sections (void)
20823 {
20824 read_only_data_section
20825 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20826 &xcoff_read_only_section_name);
20827
20828 private_data_section
20829 = get_unnamed_section (SECTION_WRITE,
20830 rs6000_xcoff_output_readwrite_section_asm_op,
20831 &xcoff_private_data_section_name);
20832
20833 read_only_private_data_section
20834 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20835 &xcoff_private_rodata_section_name);
20836
20837 tls_data_section
20838 = get_unnamed_section (SECTION_TLS,
20839 rs6000_xcoff_output_tls_section_asm_op,
20840 &xcoff_tls_data_section_name);
20841
20842 tls_private_data_section
20843 = get_unnamed_section (SECTION_TLS,
20844 rs6000_xcoff_output_tls_section_asm_op,
20845 &xcoff_private_data_section_name);
20846
20847 toc_section
20848 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20849
20850 readonly_data_section = read_only_data_section;
20851 }
20852
20853 static int
20854 rs6000_xcoff_reloc_rw_mask (void)
20855 {
20856 return 3;
20857 }
20858
20859 static void
20860 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20861 tree decl ATTRIBUTE_UNUSED)
20862 {
20863 int smclass;
20864 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20865
20866 if (flags & SECTION_EXCLUDE)
20867 smclass = 4;
20868 else if (flags & SECTION_DEBUG)
20869 {
20870 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20871 return;
20872 }
20873 else if (flags & SECTION_CODE)
20874 smclass = 0;
20875 else if (flags & SECTION_TLS)
20876 smclass = 3;
20877 else if (flags & SECTION_WRITE)
20878 smclass = 2;
20879 else
20880 smclass = 1;
20881
20882 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20883 (flags & SECTION_CODE) ? "." : "",
20884 name, suffix[smclass], flags & SECTION_ENTSIZE);
20885 }
20886
20887 #define IN_NAMED_SECTION(DECL) \
20888 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20889 && DECL_SECTION_NAME (DECL) != NULL)
20890
20891 static section *
20892 rs6000_xcoff_select_section (tree decl, int reloc,
20893 unsigned HOST_WIDE_INT align)
20894 {
20895 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20896 named section. */
20897 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
20898 {
20899 resolve_unique_section (decl, reloc, true);
20900 if (IN_NAMED_SECTION (decl))
20901 return get_named_section (decl, NULL, reloc);
20902 }
20903
20904 if (decl_readonly_section (decl, reloc))
20905 {
20906 if (TREE_PUBLIC (decl))
20907 return read_only_data_section;
20908 else
20909 return read_only_private_data_section;
20910 }
20911 else
20912 {
20913 #if HAVE_AS_TLS
20914 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20915 {
20916 if (TREE_PUBLIC (decl))
20917 return tls_data_section;
20918 else if (bss_initializer_p (decl))
20919 {
20920 /* Convert to COMMON to emit in BSS. */
20921 DECL_COMMON (decl) = 1;
20922 return tls_comm_section;
20923 }
20924 else
20925 return tls_private_data_section;
20926 }
20927 else
20928 #endif
20929 if (TREE_PUBLIC (decl))
20930 return data_section;
20931 else
20932 return private_data_section;
20933 }
20934 }
20935
20936 static void
20937 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20938 {
20939 const char *name;
20940
20941 /* Use select_section for private data and uninitialized data with
20942 alignment <= BIGGEST_ALIGNMENT. */
20943 if (!TREE_PUBLIC (decl)
20944 || DECL_COMMON (decl)
20945 || (DECL_INITIAL (decl) == NULL_TREE
20946 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20947 || DECL_INITIAL (decl) == error_mark_node
20948 || (flag_zero_initialized_in_bss
20949 && initializer_zerop (DECL_INITIAL (decl))))
20950 return;
20951
20952 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20953 name = (*targetm.strip_name_encoding) (name);
20954 set_decl_section_name (decl, name);
20955 }
20956
20957 /* Select section for constant in constant pool.
20958
20959 On RS/6000, all constants are in the private read-only data area.
20960 However, if this is being placed in the TOC it must be output as a
20961 toc entry. */
20962
20963 static section *
20964 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20965 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20966 {
20967 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20968 return toc_section;
20969 else
20970 return read_only_private_data_section;
20971 }
20972
20973 /* Remove any trailing [DS] or the like from the symbol name. */
20974
20975 static const char *
20976 rs6000_xcoff_strip_name_encoding (const char *name)
20977 {
20978 size_t len;
20979 if (*name == '*')
20980 name++;
20981 len = strlen (name);
20982 if (name[len - 1] == ']')
20983 return ggc_alloc_string (name, len - 4);
20984 else
20985 return name;
20986 }
20987
20988 /* Section attributes. AIX is always PIC. */
20989
20990 static unsigned int
20991 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20992 {
20993 unsigned int align;
20994 unsigned int flags = default_section_type_flags (decl, name, reloc);
20995
20996 /* Align to at least UNIT size. */
20997 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20998 align = MIN_UNITS_PER_WORD;
20999 else
21000 /* Increase alignment of large objects if not already stricter. */
21001 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21002 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21003 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21004
21005 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21006 }
21007
21008 /* Output at beginning of assembler file.
21009
21010 Initialize the section names for the RS/6000 at this point.
21011
21012 Specify filename, including full path, to assembler.
21013
21014 We want to go into the TOC section so at least one .toc will be emitted.
21015 Also, in order to output proper .bs/.es pairs, we need at least one static
21016 [RW] section emitted.
21017
21018 Finally, declare mcount when profiling to make the assembler happy. */
21019
21020 static void
21021 rs6000_xcoff_file_start (void)
21022 {
21023 rs6000_gen_section_name (&xcoff_bss_section_name,
21024 main_input_filename, ".bss_");
21025 rs6000_gen_section_name (&xcoff_private_data_section_name,
21026 main_input_filename, ".rw_");
21027 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21028 main_input_filename, ".rop_");
21029 rs6000_gen_section_name (&xcoff_read_only_section_name,
21030 main_input_filename, ".ro_");
21031 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21032 main_input_filename, ".tls_");
21033 rs6000_gen_section_name (&xcoff_tbss_section_name,
21034 main_input_filename, ".tbss_[UL]");
21035
21036 fputs ("\t.file\t", asm_out_file);
21037 output_quoted_string (asm_out_file, main_input_filename);
21038 fputc ('\n', asm_out_file);
21039 if (write_symbols != NO_DEBUG)
21040 switch_to_section (private_data_section);
21041 switch_to_section (toc_section);
21042 switch_to_section (text_section);
21043 if (profile_flag)
21044 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21045 rs6000_file_start ();
21046 }
21047
21048 /* Output at end of assembler file.
21049 On the RS/6000, referencing data should automatically pull in text. */
21050
21051 static void
21052 rs6000_xcoff_file_end (void)
21053 {
21054 switch_to_section (text_section);
21055 fputs ("_section_.text:\n", asm_out_file);
21056 switch_to_section (data_section);
21057 fputs (TARGET_32BIT
21058 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21059 asm_out_file);
21060 }
21061
21062 struct declare_alias_data
21063 {
21064 FILE *file;
21065 bool function_descriptor;
21066 };
21067
21068 /* Declare alias N. A helper function for for_node_and_aliases. */
21069
21070 static bool
21071 rs6000_declare_alias (struct symtab_node *n, void *d)
21072 {
21073 struct declare_alias_data *data = (struct declare_alias_data *)d;
21074 /* Main symbol is output specially, because varasm machinery does part of
21075 the job for us - we do not need to declare .globl/lglobs and such. */
21076 if (!n->alias || n->weakref)
21077 return false;
21078
21079 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21080 return false;
21081
21082 /* Prevent assemble_alias from trying to use .set pseudo operation
21083 that does not behave as expected by the middle-end. */
21084 TREE_ASM_WRITTEN (n->decl) = true;
21085
21086 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21087 char *buffer = (char *) alloca (strlen (name) + 2);
21088 char *p;
21089 int dollar_inside = 0;
21090
21091 strcpy (buffer, name);
21092 p = strchr (buffer, '$');
21093 while (p) {
21094 *p = '_';
21095 dollar_inside++;
21096 p = strchr (p + 1, '$');
21097 }
21098 if (TREE_PUBLIC (n->decl))
21099 {
21100 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21101 {
21102 if (dollar_inside) {
21103 if (data->function_descriptor)
21104 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21105 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21106 }
21107 if (data->function_descriptor)
21108 {
21109 fputs ("\t.globl .", data->file);
21110 RS6000_OUTPUT_BASENAME (data->file, buffer);
21111 putc ('\n', data->file);
21112 }
21113 fputs ("\t.globl ", data->file);
21114 RS6000_OUTPUT_BASENAME (data->file, buffer);
21115 putc ('\n', data->file);
21116 }
21117 #ifdef ASM_WEAKEN_DECL
21118 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21119 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21120 #endif
21121 }
21122 else
21123 {
21124 if (dollar_inside)
21125 {
21126 if (data->function_descriptor)
21127 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21128 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21129 }
21130 if (data->function_descriptor)
21131 {
21132 fputs ("\t.lglobl .", data->file);
21133 RS6000_OUTPUT_BASENAME (data->file, buffer);
21134 putc ('\n', data->file);
21135 }
21136 fputs ("\t.lglobl ", data->file);
21137 RS6000_OUTPUT_BASENAME (data->file, buffer);
21138 putc ('\n', data->file);
21139 }
21140 if (data->function_descriptor)
21141 fputs (".", data->file);
21142 RS6000_OUTPUT_BASENAME (data->file, buffer);
21143 fputs (":\n", data->file);
21144 return false;
21145 }
21146
21147
21148 #ifdef HAVE_GAS_HIDDEN
21149 /* Helper function to calculate visibility of a DECL
21150 and return the value as a const string. */
21151
21152 static const char *
21153 rs6000_xcoff_visibility (tree decl)
21154 {
21155 static const char * const visibility_types[] = {
21156 "", ",protected", ",hidden", ",internal"
21157 };
21158
21159 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21160 return visibility_types[vis];
21161 }
21162 #endif
21163
21164
21165 /* This macro produces the initial definition of a function name.
21166 On the RS/6000, we need to place an extra '.' in the function name and
21167 output the function descriptor.
21168 Dollar signs are converted to underscores.
21169
21170 The csect for the function will have already been created when
21171 text_section was selected. We do have to go back to that csect, however.
21172
21173 The third and fourth parameters to the .function pseudo-op (16 and 044)
21174 are placeholders which no longer have any use.
21175
21176 Because AIX assembler's .set command has unexpected semantics, we output
21177 all aliases as alternative labels in front of the definition. */
21178
21179 void
21180 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21181 {
21182 char *buffer = (char *) alloca (strlen (name) + 1);
21183 char *p;
21184 int dollar_inside = 0;
21185 struct declare_alias_data data = {file, false};
21186
21187 strcpy (buffer, name);
21188 p = strchr (buffer, '$');
21189 while (p) {
21190 *p = '_';
21191 dollar_inside++;
21192 p = strchr (p + 1, '$');
21193 }
21194 if (TREE_PUBLIC (decl))
21195 {
21196 if (!RS6000_WEAK || !DECL_WEAK (decl))
21197 {
21198 if (dollar_inside) {
21199 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21200 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21201 }
21202 fputs ("\t.globl .", file);
21203 RS6000_OUTPUT_BASENAME (file, buffer);
21204 #ifdef HAVE_GAS_HIDDEN
21205 fputs (rs6000_xcoff_visibility (decl), file);
21206 #endif
21207 putc ('\n', file);
21208 }
21209 }
21210 else
21211 {
21212 if (dollar_inside) {
21213 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21214 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21215 }
21216 fputs ("\t.lglobl .", file);
21217 RS6000_OUTPUT_BASENAME (file, buffer);
21218 putc ('\n', file);
21219 }
21220 fputs ("\t.csect ", file);
21221 RS6000_OUTPUT_BASENAME (file, buffer);
21222 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
21223 RS6000_OUTPUT_BASENAME (file, buffer);
21224 fputs (":\n", file);
21225 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21226 &data, true);
21227 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21228 RS6000_OUTPUT_BASENAME (file, buffer);
21229 fputs (", TOC[tc0], 0\n", file);
21230 in_section = NULL;
21231 switch_to_section (function_section (decl));
21232 putc ('.', file);
21233 RS6000_OUTPUT_BASENAME (file, buffer);
21234 fputs (":\n", file);
21235 data.function_descriptor = true;
21236 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21237 &data, true);
21238 if (!DECL_IGNORED_P (decl))
21239 {
21240 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21241 xcoffout_declare_function (file, decl, buffer);
21242 else if (write_symbols == DWARF2_DEBUG)
21243 {
21244 name = (*targetm.strip_name_encoding) (name);
21245 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21246 }
21247 }
21248 return;
21249 }
21250
21251
21252 /* Output assembly language to globalize a symbol from a DECL,
21253 possibly with visibility. */
21254
21255 void
21256 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21257 {
21258 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21259 fputs (GLOBAL_ASM_OP, stream);
21260 RS6000_OUTPUT_BASENAME (stream, name);
21261 #ifdef HAVE_GAS_HIDDEN
21262 fputs (rs6000_xcoff_visibility (decl), stream);
21263 #endif
21264 putc ('\n', stream);
21265 }
21266
21267 /* Output assembly language to define a symbol as COMMON from a DECL,
21268 possibly with visibility. */
21269
21270 void
21271 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21272 tree decl ATTRIBUTE_UNUSED,
21273 const char *name,
21274 unsigned HOST_WIDE_INT size,
21275 unsigned HOST_WIDE_INT align)
21276 {
21277 unsigned HOST_WIDE_INT align2 = 2;
21278
21279 if (align > 32)
21280 align2 = floor_log2 (align / BITS_PER_UNIT);
21281 else if (size > 4)
21282 align2 = 3;
21283
21284 fputs (COMMON_ASM_OP, stream);
21285 RS6000_OUTPUT_BASENAME (stream, name);
21286
21287 fprintf (stream,
21288 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
21289 size, align2);
21290
21291 #ifdef HAVE_GAS_HIDDEN
21292 if (decl != NULL)
21293 fputs (rs6000_xcoff_visibility (decl), stream);
21294 #endif
21295 putc ('\n', stream);
21296 }
21297
21298 /* This macro produces the initial definition of a object (variable) name.
21299 Because AIX assembler's .set command has unexpected semantics, we output
21300 all aliases as alternative labels in front of the definition. */
21301
21302 void
21303 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21304 {
21305 struct declare_alias_data data = {file, false};
21306 RS6000_OUTPUT_BASENAME (file, name);
21307 fputs (":\n", file);
21308 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21309 &data, true);
21310 }
21311
21312 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21313
21314 void
21315 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21316 {
21317 fputs (integer_asm_op (size, FALSE), file);
21318 assemble_name (file, label);
21319 fputs ("-$", file);
21320 }
21321
21322 /* Output a symbol offset relative to the dbase for the current object.
21323 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21324 signed offsets.
21325
21326 __gcc_unwind_dbase is embedded in all executables/libraries through
21327 libgcc/config/rs6000/crtdbase.S. */
21328
21329 void
21330 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21331 {
21332 fputs (integer_asm_op (size, FALSE), file);
21333 assemble_name (file, label);
21334 fputs("-__gcc_unwind_dbase", file);
21335 }
21336
21337 #ifdef HAVE_AS_TLS
21338 static void
21339 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21340 {
21341 rtx symbol;
21342 int flags;
21343 const char *symname;
21344
21345 default_encode_section_info (decl, rtl, first);
21346
21347 /* Careful not to prod global register variables. */
21348 if (!MEM_P (rtl))
21349 return;
21350 symbol = XEXP (rtl, 0);
21351 if (!SYMBOL_REF_P (symbol))
21352 return;
21353
21354 flags = SYMBOL_REF_FLAGS (symbol);
21355
21356 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21357 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21358
21359 SYMBOL_REF_FLAGS (symbol) = flags;
21360
21361 /* Append mapping class to extern decls. */
21362 symname = XSTR (symbol, 0);
21363 if (decl /* sync condition with assemble_external () */
21364 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
21365 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
21366 || TREE_CODE (decl) == FUNCTION_DECL)
21367 && symname[strlen (symname) - 1] != ']')
21368 {
21369 char *newname = (char *) alloca (strlen (symname) + 5);
21370 strcpy (newname, symname);
21371 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
21372 ? "[DS]" : "[UA]"));
21373 XSTR (symbol, 0) = ggc_strdup (newname);
21374 }
21375 }
21376 #endif /* HAVE_AS_TLS */
21377 #endif /* TARGET_XCOFF */
21378
21379 void
21380 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21381 const char *name, const char *val)
21382 {
21383 fputs ("\t.weak\t", stream);
21384 RS6000_OUTPUT_BASENAME (stream, name);
21385 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21386 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21387 {
21388 if (TARGET_XCOFF)
21389 fputs ("[DS]", stream);
21390 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21391 if (TARGET_XCOFF)
21392 fputs (rs6000_xcoff_visibility (decl), stream);
21393 #endif
21394 fputs ("\n\t.weak\t.", stream);
21395 RS6000_OUTPUT_BASENAME (stream, name);
21396 }
21397 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21398 if (TARGET_XCOFF)
21399 fputs (rs6000_xcoff_visibility (decl), stream);
21400 #endif
21401 fputc ('\n', stream);
21402 if (val)
21403 {
21404 #ifdef ASM_OUTPUT_DEF
21405 ASM_OUTPUT_DEF (stream, name, val);
21406 #endif
21407 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21408 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21409 {
21410 fputs ("\t.set\t.", stream);
21411 RS6000_OUTPUT_BASENAME (stream, name);
21412 fputs (",.", stream);
21413 RS6000_OUTPUT_BASENAME (stream, val);
21414 fputc ('\n', stream);
21415 }
21416 }
21417 }
21418
21419
21420 /* Return true if INSN should not be copied. */
21421
21422 static bool
21423 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21424 {
21425 return recog_memoized (insn) >= 0
21426 && get_attr_cannot_copy (insn);
21427 }
21428
21429 /* Compute a (partial) cost for rtx X. Return true if the complete
21430 cost has been computed, and false if subexpressions should be
21431 scanned. In either case, *TOTAL contains the cost result. */
21432
21433 static bool
21434 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21435 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21436 {
21437 int code = GET_CODE (x);
21438
21439 switch (code)
21440 {
21441 /* On the RS/6000, if it is valid in the insn, it is free. */
21442 case CONST_INT:
21443 if (((outer_code == SET
21444 || outer_code == PLUS
21445 || outer_code == MINUS)
21446 && (satisfies_constraint_I (x)
21447 || satisfies_constraint_L (x)))
21448 || (outer_code == AND
21449 && (satisfies_constraint_K (x)
21450 || (mode == SImode
21451 ? satisfies_constraint_L (x)
21452 : satisfies_constraint_J (x))))
21453 || ((outer_code == IOR || outer_code == XOR)
21454 && (satisfies_constraint_K (x)
21455 || (mode == SImode
21456 ? satisfies_constraint_L (x)
21457 : satisfies_constraint_J (x))))
21458 || outer_code == ASHIFT
21459 || outer_code == ASHIFTRT
21460 || outer_code == LSHIFTRT
21461 || outer_code == ROTATE
21462 || outer_code == ROTATERT
21463 || outer_code == ZERO_EXTRACT
21464 || (outer_code == MULT
21465 && satisfies_constraint_I (x))
21466 || ((outer_code == DIV || outer_code == UDIV
21467 || outer_code == MOD || outer_code == UMOD)
21468 && exact_log2 (INTVAL (x)) >= 0)
21469 || (outer_code == COMPARE
21470 && (satisfies_constraint_I (x)
21471 || satisfies_constraint_K (x)))
21472 || ((outer_code == EQ || outer_code == NE)
21473 && (satisfies_constraint_I (x)
21474 || satisfies_constraint_K (x)
21475 || (mode == SImode
21476 ? satisfies_constraint_L (x)
21477 : satisfies_constraint_J (x))))
21478 || (outer_code == GTU
21479 && satisfies_constraint_I (x))
21480 || (outer_code == LTU
21481 && satisfies_constraint_P (x)))
21482 {
21483 *total = 0;
21484 return true;
21485 }
21486 else if ((outer_code == PLUS
21487 && reg_or_add_cint_operand (x, mode))
21488 || (outer_code == MINUS
21489 && reg_or_sub_cint_operand (x, mode))
21490 || ((outer_code == SET
21491 || outer_code == IOR
21492 || outer_code == XOR)
21493 && (INTVAL (x)
21494 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21495 {
21496 *total = COSTS_N_INSNS (1);
21497 return true;
21498 }
21499 /* FALLTHRU */
21500
21501 case CONST_DOUBLE:
21502 case CONST_WIDE_INT:
21503 case CONST:
21504 case HIGH:
21505 case SYMBOL_REF:
21506 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21507 return true;
21508
21509 case MEM:
21510 /* When optimizing for size, MEM should be slightly more expensive
21511 than generating address, e.g., (plus (reg) (const)).
21512 L1 cache latency is about two instructions. */
21513 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21514 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21515 *total += COSTS_N_INSNS (100);
21516 return true;
21517
21518 case LABEL_REF:
21519 *total = 0;
21520 return true;
21521
21522 case PLUS:
21523 case MINUS:
21524 if (FLOAT_MODE_P (mode))
21525 *total = rs6000_cost->fp;
21526 else
21527 *total = COSTS_N_INSNS (1);
21528 return false;
21529
21530 case MULT:
21531 if (CONST_INT_P (XEXP (x, 1))
21532 && satisfies_constraint_I (XEXP (x, 1)))
21533 {
21534 if (INTVAL (XEXP (x, 1)) >= -256
21535 && INTVAL (XEXP (x, 1)) <= 255)
21536 *total = rs6000_cost->mulsi_const9;
21537 else
21538 *total = rs6000_cost->mulsi_const;
21539 }
21540 else if (mode == SFmode)
21541 *total = rs6000_cost->fp;
21542 else if (FLOAT_MODE_P (mode))
21543 *total = rs6000_cost->dmul;
21544 else if (mode == DImode)
21545 *total = rs6000_cost->muldi;
21546 else
21547 *total = rs6000_cost->mulsi;
21548 return false;
21549
21550 case FMA:
21551 if (mode == SFmode)
21552 *total = rs6000_cost->fp;
21553 else
21554 *total = rs6000_cost->dmul;
21555 break;
21556
21557 case DIV:
21558 case MOD:
21559 if (FLOAT_MODE_P (mode))
21560 {
21561 *total = mode == DFmode ? rs6000_cost->ddiv
21562 : rs6000_cost->sdiv;
21563 return false;
21564 }
21565 /* FALLTHRU */
21566
21567 case UDIV:
21568 case UMOD:
21569 if (CONST_INT_P (XEXP (x, 1))
21570 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21571 {
21572 if (code == DIV || code == MOD)
21573 /* Shift, addze */
21574 *total = COSTS_N_INSNS (2);
21575 else
21576 /* Shift */
21577 *total = COSTS_N_INSNS (1);
21578 }
21579 else
21580 {
21581 if (GET_MODE (XEXP (x, 1)) == DImode)
21582 *total = rs6000_cost->divdi;
21583 else
21584 *total = rs6000_cost->divsi;
21585 }
21586 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21587 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21588 *total += COSTS_N_INSNS (2);
21589 return false;
21590
21591 case CTZ:
21592 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21593 return false;
21594
21595 case FFS:
21596 *total = COSTS_N_INSNS (4);
21597 return false;
21598
21599 case POPCOUNT:
21600 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21601 return false;
21602
21603 case PARITY:
21604 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21605 return false;
21606
21607 case NOT:
21608 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21609 *total = 0;
21610 else
21611 *total = COSTS_N_INSNS (1);
21612 return false;
21613
21614 case AND:
21615 if (CONST_INT_P (XEXP (x, 1)))
21616 {
21617 rtx left = XEXP (x, 0);
21618 rtx_code left_code = GET_CODE (left);
21619
21620 /* rotate-and-mask: 1 insn. */
21621 if ((left_code == ROTATE
21622 || left_code == ASHIFT
21623 || left_code == LSHIFTRT)
21624 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21625 {
21626 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21627 if (!CONST_INT_P (XEXP (left, 1)))
21628 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21629 *total += COSTS_N_INSNS (1);
21630 return true;
21631 }
21632
21633 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21634 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21635 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21636 || (val & 0xffff) == val
21637 || (val & 0xffff0000) == val
21638 || ((val & 0xffff) == 0 && mode == SImode))
21639 {
21640 *total = rtx_cost (left, mode, AND, 0, speed);
21641 *total += COSTS_N_INSNS (1);
21642 return true;
21643 }
21644
21645 /* 2 insns. */
21646 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21647 {
21648 *total = rtx_cost (left, mode, AND, 0, speed);
21649 *total += COSTS_N_INSNS (2);
21650 return true;
21651 }
21652 }
21653
21654 *total = COSTS_N_INSNS (1);
21655 return false;
21656
21657 case IOR:
21658 /* FIXME */
21659 *total = COSTS_N_INSNS (1);
21660 return true;
21661
21662 case CLZ:
21663 case XOR:
21664 case ZERO_EXTRACT:
21665 *total = COSTS_N_INSNS (1);
21666 return false;
21667
21668 case ASHIFT:
21669 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21670 the sign extend and shift separately within the insn. */
21671 if (TARGET_EXTSWSLI && mode == DImode
21672 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21673 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21674 {
21675 *total = 0;
21676 return false;
21677 }
21678 /* fall through */
21679
21680 case ASHIFTRT:
21681 case LSHIFTRT:
21682 case ROTATE:
21683 case ROTATERT:
21684 /* Handle mul_highpart. */
21685 if (outer_code == TRUNCATE
21686 && GET_CODE (XEXP (x, 0)) == MULT)
21687 {
21688 if (mode == DImode)
21689 *total = rs6000_cost->muldi;
21690 else
21691 *total = rs6000_cost->mulsi;
21692 return true;
21693 }
21694 else if (outer_code == AND)
21695 *total = 0;
21696 else
21697 *total = COSTS_N_INSNS (1);
21698 return false;
21699
21700 case SIGN_EXTEND:
21701 case ZERO_EXTEND:
21702 if (MEM_P (XEXP (x, 0)))
21703 *total = 0;
21704 else
21705 *total = COSTS_N_INSNS (1);
21706 return false;
21707
21708 case COMPARE:
21709 case NEG:
21710 case ABS:
21711 if (!FLOAT_MODE_P (mode))
21712 {
21713 *total = COSTS_N_INSNS (1);
21714 return false;
21715 }
21716 /* FALLTHRU */
21717
21718 case FLOAT:
21719 case UNSIGNED_FLOAT:
21720 case FIX:
21721 case UNSIGNED_FIX:
21722 case FLOAT_TRUNCATE:
21723 *total = rs6000_cost->fp;
21724 return false;
21725
21726 case FLOAT_EXTEND:
21727 if (mode == DFmode)
21728 *total = rs6000_cost->sfdf_convert;
21729 else
21730 *total = rs6000_cost->fp;
21731 return false;
21732
21733 case CALL:
21734 case IF_THEN_ELSE:
21735 if (!speed)
21736 {
21737 *total = COSTS_N_INSNS (1);
21738 return true;
21739 }
21740 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21741 {
21742 *total = rs6000_cost->fp;
21743 return false;
21744 }
21745 break;
21746
21747 case NE:
21748 case EQ:
21749 case GTU:
21750 case LTU:
21751 /* Carry bit requires mode == Pmode.
21752 NEG or PLUS already counted so only add one. */
21753 if (mode == Pmode
21754 && (outer_code == NEG || outer_code == PLUS))
21755 {
21756 *total = COSTS_N_INSNS (1);
21757 return true;
21758 }
21759 /* FALLTHRU */
21760
21761 case GT:
21762 case LT:
21763 case UNORDERED:
21764 if (outer_code == SET)
21765 {
21766 if (XEXP (x, 1) == const0_rtx)
21767 {
21768 *total = COSTS_N_INSNS (2);
21769 return true;
21770 }
21771 else
21772 {
21773 *total = COSTS_N_INSNS (3);
21774 return false;
21775 }
21776 }
21777 /* CC COMPARE. */
21778 if (outer_code == COMPARE)
21779 {
21780 *total = 0;
21781 return true;
21782 }
21783 break;
21784
21785 case UNSPEC:
21786 if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
21787 {
21788 *total = 0;
21789 return true;
21790 }
21791 break;
21792
21793 default:
21794 break;
21795 }
21796
21797 return false;
21798 }
21799
21800 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21801
21802 static bool
21803 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21804 int opno, int *total, bool speed)
21805 {
21806 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21807
21808 fprintf (stderr,
21809 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21810 "opno = %d, total = %d, speed = %s, x:\n",
21811 ret ? "complete" : "scan inner",
21812 GET_MODE_NAME (mode),
21813 GET_RTX_NAME (outer_code),
21814 opno,
21815 *total,
21816 speed ? "true" : "false");
21817
21818 debug_rtx (x);
21819
21820 return ret;
21821 }
21822
21823 static int
21824 rs6000_insn_cost (rtx_insn *insn, bool speed)
21825 {
21826 if (recog_memoized (insn) < 0)
21827 return 0;
21828
21829 /* If we are optimizing for size, just use the length. */
21830 if (!speed)
21831 return get_attr_length (insn);
21832
21833 /* Use the cost if provided. */
21834 int cost = get_attr_cost (insn);
21835 if (cost > 0)
21836 return cost;
21837
21838 /* If the insn tells us how many insns there are, use that. Otherwise use
21839 the length/4. Adjust the insn length to remove the extra size that
21840 prefixed instructions take. */
21841 int n = get_attr_num_insns (insn);
21842 if (n == 0)
21843 {
21844 int length = get_attr_length (insn);
21845 if (get_attr_prefixed (insn) == PREFIXED_YES)
21846 {
21847 int adjust = 0;
21848 ADJUST_INSN_LENGTH (insn, adjust);
21849 length -= adjust;
21850 }
21851
21852 n = length / 4;
21853 }
21854
21855 enum attr_type type = get_attr_type (insn);
21856
21857 switch (type)
21858 {
21859 case TYPE_LOAD:
21860 case TYPE_FPLOAD:
21861 case TYPE_VECLOAD:
21862 cost = COSTS_N_INSNS (n + 1);
21863 break;
21864
21865 case TYPE_MUL:
21866 switch (get_attr_size (insn))
21867 {
21868 case SIZE_8:
21869 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21870 break;
21871 case SIZE_16:
21872 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21873 break;
21874 case SIZE_32:
21875 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21876 break;
21877 case SIZE_64:
21878 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21879 break;
21880 default:
21881 gcc_unreachable ();
21882 }
21883 break;
21884 case TYPE_DIV:
21885 switch (get_attr_size (insn))
21886 {
21887 case SIZE_32:
21888 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21889 break;
21890 case SIZE_64:
21891 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21892 break;
21893 default:
21894 gcc_unreachable ();
21895 }
21896 break;
21897
21898 case TYPE_FP:
21899 cost = n * rs6000_cost->fp;
21900 break;
21901 case TYPE_DMUL:
21902 cost = n * rs6000_cost->dmul;
21903 break;
21904 case TYPE_SDIV:
21905 cost = n * rs6000_cost->sdiv;
21906 break;
21907 case TYPE_DDIV:
21908 cost = n * rs6000_cost->ddiv;
21909 break;
21910
21911 case TYPE_SYNC:
21912 case TYPE_LOAD_L:
21913 case TYPE_MFCR:
21914 case TYPE_MFCRF:
21915 cost = COSTS_N_INSNS (n + 2);
21916 break;
21917
21918 default:
21919 cost = COSTS_N_INSNS (n);
21920 }
21921
21922 return cost;
21923 }
21924
21925 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21926
21927 static int
21928 rs6000_debug_address_cost (rtx x, machine_mode mode,
21929 addr_space_t as, bool speed)
21930 {
21931 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21932
21933 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21934 ret, speed ? "true" : "false");
21935 debug_rtx (x);
21936
21937 return ret;
21938 }
21939
21940
21941 /* A C expression returning the cost of moving data from a register of class
21942 CLASS1 to one of CLASS2. */
21943
21944 static int
21945 rs6000_register_move_cost (machine_mode mode,
21946 reg_class_t from, reg_class_t to)
21947 {
21948 int ret;
21949 reg_class_t rclass;
21950
21951 if (TARGET_DEBUG_COST)
21952 dbg_cost_ctrl++;
21953
21954 /* If we have VSX, we can easily move between FPR or Altivec registers,
21955 otherwise we can only easily move within classes.
21956 Do this first so we give best-case answers for union classes
21957 containing both gprs and vsx regs. */
21958 HARD_REG_SET to_vsx, from_vsx;
21959 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21960 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21961 if (!hard_reg_set_empty_p (to_vsx)
21962 && !hard_reg_set_empty_p (from_vsx)
21963 && (TARGET_VSX
21964 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21965 {
21966 int reg = FIRST_FPR_REGNO;
21967 if (TARGET_VSX
21968 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21969 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21970 reg = FIRST_ALTIVEC_REGNO;
21971 ret = 2 * hard_regno_nregs (reg, mode);
21972 }
21973
21974 /* Moves from/to GENERAL_REGS. */
21975 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21976 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21977 {
21978 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21979 {
21980 if (TARGET_DIRECT_MOVE)
21981 {
21982 /* Keep the cost for direct moves above that for within
21983 a register class even if the actual processor cost is
21984 comparable. We do this because a direct move insn
21985 can't be a nop, whereas with ideal register
21986 allocation a move within the same class might turn
21987 out to be a nop. */
21988 if (rs6000_tune == PROCESSOR_POWER9
21989 || rs6000_tune == PROCESSOR_POWER10)
21990 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21991 else
21992 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21993 /* SFmode requires a conversion when moving between gprs
21994 and vsx. */
21995 if (mode == SFmode)
21996 ret += 2;
21997 }
21998 else
21999 ret = (rs6000_memory_move_cost (mode, rclass, false)
22000 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22001 }
22002
22003 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22004 shift. */
22005 else if (rclass == CR_REGS)
22006 ret = 4;
22007
22008 /* For those processors that have slow LR/CTR moves, make them more
22009 expensive than memory in order to bias spills to memory .*/
22010 else if ((rs6000_tune == PROCESSOR_POWER6
22011 || rs6000_tune == PROCESSOR_POWER7
22012 || rs6000_tune == PROCESSOR_POWER8
22013 || rs6000_tune == PROCESSOR_POWER9)
22014 && reg_class_subset_p (rclass, SPECIAL_REGS))
22015 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22016
22017 else
22018 /* A move will cost one instruction per GPR moved. */
22019 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22020 }
22021
22022 /* Everything else has to go through GENERAL_REGS. */
22023 else
22024 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22025 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22026
22027 if (TARGET_DEBUG_COST)
22028 {
22029 if (dbg_cost_ctrl == 1)
22030 fprintf (stderr,
22031 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22032 ret, GET_MODE_NAME (mode), reg_class_names[from],
22033 reg_class_names[to]);
22034 dbg_cost_ctrl--;
22035 }
22036
22037 return ret;
22038 }
22039
22040 /* A C expressions returning the cost of moving data of MODE from a register to
22041 or from memory. */
22042
22043 static int
22044 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22045 bool in ATTRIBUTE_UNUSED)
22046 {
22047 int ret;
22048
22049 if (TARGET_DEBUG_COST)
22050 dbg_cost_ctrl++;
22051
22052 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22053 ret = 4 * hard_regno_nregs (0, mode);
22054 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22055 || reg_classes_intersect_p (rclass, VSX_REGS)))
22056 ret = 4 * hard_regno_nregs (32, mode);
22057 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22058 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22059 else
22060 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22061
22062 if (TARGET_DEBUG_COST)
22063 {
22064 if (dbg_cost_ctrl == 1)
22065 fprintf (stderr,
22066 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22067 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22068 dbg_cost_ctrl--;
22069 }
22070
22071 return ret;
22072 }
22073
22074 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22075
22076 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22077 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22078 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22079 move cost between GENERAL_REGS and VSX_REGS low.
22080
22081 It might seem reasonable to use a union class. After all, if usage
22082 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22083 rather than memory. However, in cases where register pressure of
22084 both is high, like the cactus_adm spec test, allowing
22085 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22086 the first scheduling pass. This is partly due to an allocno of
22087 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22088 class, which gives too high a pressure for GENERAL_REGS and too low
22089 for VSX_REGS. So, force a choice of the subclass here.
22090
22091 The best class is also the union if GENERAL_REGS and VSX_REGS have
22092 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22093 allocno class, since trying to narrow down the class by regno mode
22094 is prone to error. For example, SImode is allowed in VSX regs and
22095 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22096 it would be wrong to choose an allocno of GENERAL_REGS based on
22097 SImode. */
22098
22099 static reg_class_t
22100 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22101 reg_class_t allocno_class,
22102 reg_class_t best_class)
22103 {
22104 switch (allocno_class)
22105 {
22106 case GEN_OR_VSX_REGS:
22107 /* best_class must be a subset of allocno_class. */
22108 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22109 || best_class == GEN_OR_FLOAT_REGS
22110 || best_class == VSX_REGS
22111 || best_class == ALTIVEC_REGS
22112 || best_class == FLOAT_REGS
22113 || best_class == GENERAL_REGS
22114 || best_class == BASE_REGS);
22115 /* Use best_class but choose wider classes when copying from the
22116 wider class to best_class is cheap. This mimics IRA choice
22117 of allocno class. */
22118 if (best_class == BASE_REGS)
22119 return GENERAL_REGS;
22120 if (TARGET_VSX
22121 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
22122 return VSX_REGS;
22123 return best_class;
22124
22125 default:
22126 break;
22127 }
22128
22129 return allocno_class;
22130 }
22131
22132 /* Returns a code for a target-specific builtin that implements
22133 reciprocal of the function, or NULL_TREE if not available. */
22134
22135 static tree
22136 rs6000_builtin_reciprocal (tree fndecl)
22137 {
22138 switch (DECL_MD_FUNCTION_CODE (fndecl))
22139 {
22140 case VSX_BUILTIN_XVSQRTDP:
22141 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
22142 return NULL_TREE;
22143
22144 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
22145
22146 case VSX_BUILTIN_XVSQRTSP:
22147 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
22148 return NULL_TREE;
22149
22150 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
22151
22152 default:
22153 return NULL_TREE;
22154 }
22155 }
22156
22157 /* Load up a constant. If the mode is a vector mode, splat the value across
22158 all of the vector elements. */
22159
22160 static rtx
22161 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22162 {
22163 rtx reg;
22164
22165 if (mode == SFmode || mode == DFmode)
22166 {
22167 rtx d = const_double_from_real_value (dconst, mode);
22168 reg = force_reg (mode, d);
22169 }
22170 else if (mode == V4SFmode)
22171 {
22172 rtx d = const_double_from_real_value (dconst, SFmode);
22173 rtvec v = gen_rtvec (4, d, d, d, d);
22174 reg = gen_reg_rtx (mode);
22175 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22176 }
22177 else if (mode == V2DFmode)
22178 {
22179 rtx d = const_double_from_real_value (dconst, DFmode);
22180 rtvec v = gen_rtvec (2, d, d);
22181 reg = gen_reg_rtx (mode);
22182 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22183 }
22184 else
22185 gcc_unreachable ();
22186
22187 return reg;
22188 }
22189
22190 /* Generate an FMA instruction. */
22191
22192 static void
22193 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22194 {
22195 machine_mode mode = GET_MODE (target);
22196 rtx dst;
22197
22198 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22199 gcc_assert (dst != NULL);
22200
22201 if (dst != target)
22202 emit_move_insn (target, dst);
22203 }
22204
22205 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22206
22207 static void
22208 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22209 {
22210 machine_mode mode = GET_MODE (dst);
22211 rtx r;
22212
22213 /* This is a tad more complicated, since the fnma_optab is for
22214 a different expression: fma(-m1, m2, a), which is the same
22215 thing except in the case of signed zeros.
22216
22217 Fortunately we know that if FMA is supported that FNMSUB is
22218 also supported in the ISA. Just expand it directly. */
22219
22220 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22221
22222 r = gen_rtx_NEG (mode, a);
22223 r = gen_rtx_FMA (mode, m1, m2, r);
22224 r = gen_rtx_NEG (mode, r);
22225 emit_insn (gen_rtx_SET (dst, r));
22226 }
22227
22228 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22229 add a reg_note saying that this was a division. Support both scalar and
22230 vector divide. Assumes no trapping math and finite arguments. */
22231
22232 void
22233 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22234 {
22235 machine_mode mode = GET_MODE (dst);
22236 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22237 int i;
22238
22239 /* Low precision estimates guarantee 5 bits of accuracy. High
22240 precision estimates guarantee 14 bits of accuracy. SFmode
22241 requires 23 bits of accuracy. DFmode requires 52 bits of
22242 accuracy. Each pass at least doubles the accuracy, leading
22243 to the following. */
22244 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22245 if (mode == DFmode || mode == V2DFmode)
22246 passes++;
22247
22248 enum insn_code code = optab_handler (smul_optab, mode);
22249 insn_gen_fn gen_mul = GEN_FCN (code);
22250
22251 gcc_assert (code != CODE_FOR_nothing);
22252
22253 one = rs6000_load_constant_and_splat (mode, dconst1);
22254
22255 /* x0 = 1./d estimate */
22256 x0 = gen_reg_rtx (mode);
22257 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22258 UNSPEC_FRES)));
22259
22260 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22261 if (passes > 1) {
22262
22263 /* e0 = 1. - d * x0 */
22264 e0 = gen_reg_rtx (mode);
22265 rs6000_emit_nmsub (e0, d, x0, one);
22266
22267 /* x1 = x0 + e0 * x0 */
22268 x1 = gen_reg_rtx (mode);
22269 rs6000_emit_madd (x1, e0, x0, x0);
22270
22271 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22272 ++i, xprev = xnext, eprev = enext) {
22273
22274 /* enext = eprev * eprev */
22275 enext = gen_reg_rtx (mode);
22276 emit_insn (gen_mul (enext, eprev, eprev));
22277
22278 /* xnext = xprev + enext * xprev */
22279 xnext = gen_reg_rtx (mode);
22280 rs6000_emit_madd (xnext, enext, xprev, xprev);
22281 }
22282
22283 } else
22284 xprev = x0;
22285
22286 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22287
22288 /* u = n * xprev */
22289 u = gen_reg_rtx (mode);
22290 emit_insn (gen_mul (u, n, xprev));
22291
22292 /* v = n - (d * u) */
22293 v = gen_reg_rtx (mode);
22294 rs6000_emit_nmsub (v, d, u, n);
22295
22296 /* dst = (v * xprev) + u */
22297 rs6000_emit_madd (dst, v, xprev, u);
22298
22299 if (note_p)
22300 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22301 }
22302
22303 /* Goldschmidt's Algorithm for single/double-precision floating point
22304 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22305
22306 void
22307 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22308 {
22309 machine_mode mode = GET_MODE (src);
22310 rtx e = gen_reg_rtx (mode);
22311 rtx g = gen_reg_rtx (mode);
22312 rtx h = gen_reg_rtx (mode);
22313
22314 /* Low precision estimates guarantee 5 bits of accuracy. High
22315 precision estimates guarantee 14 bits of accuracy. SFmode
22316 requires 23 bits of accuracy. DFmode requires 52 bits of
22317 accuracy. Each pass at least doubles the accuracy, leading
22318 to the following. */
22319 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22320 if (mode == DFmode || mode == V2DFmode)
22321 passes++;
22322
22323 int i;
22324 rtx mhalf;
22325 enum insn_code code = optab_handler (smul_optab, mode);
22326 insn_gen_fn gen_mul = GEN_FCN (code);
22327
22328 gcc_assert (code != CODE_FOR_nothing);
22329
22330 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22331
22332 /* e = rsqrt estimate */
22333 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22334 UNSPEC_RSQRT)));
22335
22336 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22337 if (!recip)
22338 {
22339 rtx zero = force_reg (mode, CONST0_RTX (mode));
22340
22341 if (mode == SFmode)
22342 {
22343 rtx target = emit_conditional_move (e, GT, src, zero, mode,
22344 e, zero, mode, 0);
22345 if (target != e)
22346 emit_move_insn (e, target);
22347 }
22348 else
22349 {
22350 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22351 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22352 }
22353 }
22354
22355 /* g = sqrt estimate. */
22356 emit_insn (gen_mul (g, e, src));
22357 /* h = 1/(2*sqrt) estimate. */
22358 emit_insn (gen_mul (h, e, mhalf));
22359
22360 if (recip)
22361 {
22362 if (passes == 1)
22363 {
22364 rtx t = gen_reg_rtx (mode);
22365 rs6000_emit_nmsub (t, g, h, mhalf);
22366 /* Apply correction directly to 1/rsqrt estimate. */
22367 rs6000_emit_madd (dst, e, t, e);
22368 }
22369 else
22370 {
22371 for (i = 0; i < passes; i++)
22372 {
22373 rtx t1 = gen_reg_rtx (mode);
22374 rtx g1 = gen_reg_rtx (mode);
22375 rtx h1 = gen_reg_rtx (mode);
22376
22377 rs6000_emit_nmsub (t1, g, h, mhalf);
22378 rs6000_emit_madd (g1, g, t1, g);
22379 rs6000_emit_madd (h1, h, t1, h);
22380
22381 g = g1;
22382 h = h1;
22383 }
22384 /* Multiply by 2 for 1/rsqrt. */
22385 emit_insn (gen_add3_insn (dst, h, h));
22386 }
22387 }
22388 else
22389 {
22390 rtx t = gen_reg_rtx (mode);
22391 rs6000_emit_nmsub (t, g, h, mhalf);
22392 rs6000_emit_madd (dst, g, t, g);
22393 }
22394
22395 return;
22396 }
22397
22398 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22399 (Power7) targets. DST is the target, and SRC is the argument operand. */
22400
22401 void
22402 rs6000_emit_popcount (rtx dst, rtx src)
22403 {
22404 machine_mode mode = GET_MODE (dst);
22405 rtx tmp1, tmp2;
22406
22407 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22408 if (TARGET_POPCNTD)
22409 {
22410 if (mode == SImode)
22411 emit_insn (gen_popcntdsi2 (dst, src));
22412 else
22413 emit_insn (gen_popcntddi2 (dst, src));
22414 return;
22415 }
22416
22417 tmp1 = gen_reg_rtx (mode);
22418
22419 if (mode == SImode)
22420 {
22421 emit_insn (gen_popcntbsi2 (tmp1, src));
22422 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22423 NULL_RTX, 0);
22424 tmp2 = force_reg (SImode, tmp2);
22425 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22426 }
22427 else
22428 {
22429 emit_insn (gen_popcntbdi2 (tmp1, src));
22430 tmp2 = expand_mult (DImode, tmp1,
22431 GEN_INT ((HOST_WIDE_INT)
22432 0x01010101 << 32 | 0x01010101),
22433 NULL_RTX, 0);
22434 tmp2 = force_reg (DImode, tmp2);
22435 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22436 }
22437 }
22438
22439
22440 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22441 target, and SRC is the argument operand. */
22442
22443 void
22444 rs6000_emit_parity (rtx dst, rtx src)
22445 {
22446 machine_mode mode = GET_MODE (dst);
22447 rtx tmp;
22448
22449 tmp = gen_reg_rtx (mode);
22450
22451 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22452 if (TARGET_CMPB)
22453 {
22454 if (mode == SImode)
22455 {
22456 emit_insn (gen_popcntbsi2 (tmp, src));
22457 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22458 }
22459 else
22460 {
22461 emit_insn (gen_popcntbdi2 (tmp, src));
22462 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22463 }
22464 return;
22465 }
22466
22467 if (mode == SImode)
22468 {
22469 /* Is mult+shift >= shift+xor+shift+xor? */
22470 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22471 {
22472 rtx tmp1, tmp2, tmp3, tmp4;
22473
22474 tmp1 = gen_reg_rtx (SImode);
22475 emit_insn (gen_popcntbsi2 (tmp1, src));
22476
22477 tmp2 = gen_reg_rtx (SImode);
22478 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22479 tmp3 = gen_reg_rtx (SImode);
22480 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22481
22482 tmp4 = gen_reg_rtx (SImode);
22483 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22484 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22485 }
22486 else
22487 rs6000_emit_popcount (tmp, src);
22488 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22489 }
22490 else
22491 {
22492 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22493 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22494 {
22495 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22496
22497 tmp1 = gen_reg_rtx (DImode);
22498 emit_insn (gen_popcntbdi2 (tmp1, src));
22499
22500 tmp2 = gen_reg_rtx (DImode);
22501 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22502 tmp3 = gen_reg_rtx (DImode);
22503 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22504
22505 tmp4 = gen_reg_rtx (DImode);
22506 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22507 tmp5 = gen_reg_rtx (DImode);
22508 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22509
22510 tmp6 = gen_reg_rtx (DImode);
22511 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22512 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22513 }
22514 else
22515 rs6000_emit_popcount (tmp, src);
22516 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22517 }
22518 }
22519
22520 /* Expand an Altivec constant permutation for little endian mode.
22521 OP0 and OP1 are the input vectors and TARGET is the output vector.
22522 SEL specifies the constant permutation vector.
22523
22524 There are two issues: First, the two input operands must be
22525 swapped so that together they form a double-wide array in LE
22526 order. Second, the vperm instruction has surprising behavior
22527 in LE mode: it interprets the elements of the source vectors
22528 in BE mode ("left to right") and interprets the elements of
22529 the destination vector in LE mode ("right to left"). To
22530 correct for this, we must subtract each element of the permute
22531 control vector from 31.
22532
22533 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22534 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22535 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22536 serve as the permute control vector. Then, in BE mode,
22537
22538 vperm 9,10,11,12
22539
22540 places the desired result in vr9. However, in LE mode the
22541 vector contents will be
22542
22543 vr10 = 00000003 00000002 00000001 00000000
22544 vr11 = 00000007 00000006 00000005 00000004
22545
22546 The result of the vperm using the same permute control vector is
22547
22548 vr9 = 05000000 07000000 01000000 03000000
22549
22550 That is, the leftmost 4 bytes of vr10 are interpreted as the
22551 source for the rightmost 4 bytes of vr9, and so on.
22552
22553 If we change the permute control vector to
22554
22555 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22556
22557 and issue
22558
22559 vperm 9,11,10,12
22560
22561 we get the desired
22562
22563 vr9 = 00000006 00000004 00000002 00000000. */
22564
22565 static void
22566 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22567 const vec_perm_indices &sel)
22568 {
22569 unsigned int i;
22570 rtx perm[16];
22571 rtx constv, unspec;
22572
22573 /* Unpack and adjust the constant selector. */
22574 for (i = 0; i < 16; ++i)
22575 {
22576 unsigned int elt = 31 - (sel[i] & 31);
22577 perm[i] = GEN_INT (elt);
22578 }
22579
22580 /* Expand to a permute, swapping the inputs and using the
22581 adjusted selector. */
22582 if (!REG_P (op0))
22583 op0 = force_reg (V16QImode, op0);
22584 if (!REG_P (op1))
22585 op1 = force_reg (V16QImode, op1);
22586
22587 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22588 constv = force_reg (V16QImode, constv);
22589 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22590 UNSPEC_VPERM);
22591 if (!REG_P (target))
22592 {
22593 rtx tmp = gen_reg_rtx (V16QImode);
22594 emit_move_insn (tmp, unspec);
22595 unspec = tmp;
22596 }
22597
22598 emit_move_insn (target, unspec);
22599 }
22600
22601 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22602 permute control vector. But here it's not a constant, so we must
22603 generate a vector NAND or NOR to do the adjustment. */
22604
22605 void
22606 altivec_expand_vec_perm_le (rtx operands[4])
22607 {
22608 rtx notx, iorx, unspec;
22609 rtx target = operands[0];
22610 rtx op0 = operands[1];
22611 rtx op1 = operands[2];
22612 rtx sel = operands[3];
22613 rtx tmp = target;
22614 rtx norreg = gen_reg_rtx (V16QImode);
22615 machine_mode mode = GET_MODE (target);
22616
22617 /* Get everything in regs so the pattern matches. */
22618 if (!REG_P (op0))
22619 op0 = force_reg (mode, op0);
22620 if (!REG_P (op1))
22621 op1 = force_reg (mode, op1);
22622 if (!REG_P (sel))
22623 sel = force_reg (V16QImode, sel);
22624 if (!REG_P (target))
22625 tmp = gen_reg_rtx (mode);
22626
22627 if (TARGET_P9_VECTOR)
22628 {
22629 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22630 UNSPEC_VPERMR);
22631 }
22632 else
22633 {
22634 /* Invert the selector with a VNAND if available, else a VNOR.
22635 The VNAND is preferred for future fusion opportunities. */
22636 notx = gen_rtx_NOT (V16QImode, sel);
22637 iorx = (TARGET_P8_VECTOR
22638 ? gen_rtx_IOR (V16QImode, notx, notx)
22639 : gen_rtx_AND (V16QImode, notx, notx));
22640 emit_insn (gen_rtx_SET (norreg, iorx));
22641
22642 /* Permute with operands reversed and adjusted selector. */
22643 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22644 UNSPEC_VPERM);
22645 }
22646
22647 /* Copy into target, possibly by way of a register. */
22648 if (!REG_P (target))
22649 {
22650 emit_move_insn (tmp, unspec);
22651 unspec = tmp;
22652 }
22653
22654 emit_move_insn (target, unspec);
22655 }
22656
22657 /* Expand an Altivec constant permutation. Return true if we match
22658 an efficient implementation; false to fall back to VPERM.
22659
22660 OP0 and OP1 are the input vectors and TARGET is the output vector.
22661 SEL specifies the constant permutation vector. */
22662
22663 static bool
22664 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22665 const vec_perm_indices &sel)
22666 {
22667 struct altivec_perm_insn {
22668 HOST_WIDE_INT mask;
22669 enum insn_code impl;
22670 unsigned char perm[16];
22671 };
22672 static const struct altivec_perm_insn patterns[] = {
22673 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
22674 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22675 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
22676 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22677 { OPTION_MASK_ALTIVEC,
22678 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22679 : CODE_FOR_altivec_vmrglb_direct),
22680 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22681 { OPTION_MASK_ALTIVEC,
22682 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22683 : CODE_FOR_altivec_vmrglh_direct),
22684 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22685 { OPTION_MASK_ALTIVEC,
22686 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22687 : CODE_FOR_altivec_vmrglw_direct),
22688 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22689 { OPTION_MASK_ALTIVEC,
22690 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22691 : CODE_FOR_altivec_vmrghb_direct),
22692 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22693 { OPTION_MASK_ALTIVEC,
22694 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22695 : CODE_FOR_altivec_vmrghh_direct),
22696 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22697 { OPTION_MASK_ALTIVEC,
22698 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22699 : CODE_FOR_altivec_vmrghw_direct),
22700 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22701 { OPTION_MASK_P8_VECTOR,
22702 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22703 : CODE_FOR_p8_vmrgow_v4sf_direct),
22704 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22705 { OPTION_MASK_P8_VECTOR,
22706 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22707 : CODE_FOR_p8_vmrgew_v4sf_direct),
22708 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22709 };
22710
22711 unsigned int i, j, elt, which;
22712 unsigned char perm[16];
22713 rtx x;
22714 bool one_vec;
22715
22716 /* Unpack the constant selector. */
22717 for (i = which = 0; i < 16; ++i)
22718 {
22719 elt = sel[i] & 31;
22720 which |= (elt < 16 ? 1 : 2);
22721 perm[i] = elt;
22722 }
22723
22724 /* Simplify the constant selector based on operands. */
22725 switch (which)
22726 {
22727 default:
22728 gcc_unreachable ();
22729
22730 case 3:
22731 one_vec = false;
22732 if (!rtx_equal_p (op0, op1))
22733 break;
22734 /* FALLTHRU */
22735
22736 case 2:
22737 for (i = 0; i < 16; ++i)
22738 perm[i] &= 15;
22739 op0 = op1;
22740 one_vec = true;
22741 break;
22742
22743 case 1:
22744 op1 = op0;
22745 one_vec = true;
22746 break;
22747 }
22748
22749 /* Look for splat patterns. */
22750 if (one_vec)
22751 {
22752 elt = perm[0];
22753
22754 for (i = 0; i < 16; ++i)
22755 if (perm[i] != elt)
22756 break;
22757 if (i == 16)
22758 {
22759 if (!BYTES_BIG_ENDIAN)
22760 elt = 15 - elt;
22761 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22762 return true;
22763 }
22764
22765 if (elt % 2 == 0)
22766 {
22767 for (i = 0; i < 16; i += 2)
22768 if (perm[i] != elt || perm[i + 1] != elt + 1)
22769 break;
22770 if (i == 16)
22771 {
22772 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22773 x = gen_reg_rtx (V8HImode);
22774 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22775 GEN_INT (field)));
22776 emit_move_insn (target, gen_lowpart (V16QImode, x));
22777 return true;
22778 }
22779 }
22780
22781 if (elt % 4 == 0)
22782 {
22783 for (i = 0; i < 16; i += 4)
22784 if (perm[i] != elt
22785 || perm[i + 1] != elt + 1
22786 || perm[i + 2] != elt + 2
22787 || perm[i + 3] != elt + 3)
22788 break;
22789 if (i == 16)
22790 {
22791 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22792 x = gen_reg_rtx (V4SImode);
22793 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22794 GEN_INT (field)));
22795 emit_move_insn (target, gen_lowpart (V16QImode, x));
22796 return true;
22797 }
22798 }
22799 }
22800
22801 /* Look for merge and pack patterns. */
22802 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22803 {
22804 bool swapped;
22805
22806 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22807 continue;
22808
22809 elt = patterns[j].perm[0];
22810 if (perm[0] == elt)
22811 swapped = false;
22812 else if (perm[0] == elt + 16)
22813 swapped = true;
22814 else
22815 continue;
22816 for (i = 1; i < 16; ++i)
22817 {
22818 elt = patterns[j].perm[i];
22819 if (swapped)
22820 elt = (elt >= 16 ? elt - 16 : elt + 16);
22821 else if (one_vec && elt >= 16)
22822 elt -= 16;
22823 if (perm[i] != elt)
22824 break;
22825 }
22826 if (i == 16)
22827 {
22828 enum insn_code icode = patterns[j].impl;
22829 machine_mode omode = insn_data[icode].operand[0].mode;
22830 machine_mode imode = insn_data[icode].operand[1].mode;
22831
22832 /* For little-endian, don't use vpkuwum and vpkuhum if the
22833 underlying vector type is not V4SI and V8HI, respectively.
22834 For example, using vpkuwum with a V8HI picks up the even
22835 halfwords (BE numbering) when the even halfwords (LE
22836 numbering) are what we need. */
22837 if (!BYTES_BIG_ENDIAN
22838 && icode == CODE_FOR_altivec_vpkuwum_direct
22839 && ((REG_P (op0)
22840 && GET_MODE (op0) != V4SImode)
22841 || (SUBREG_P (op0)
22842 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22843 continue;
22844 if (!BYTES_BIG_ENDIAN
22845 && icode == CODE_FOR_altivec_vpkuhum_direct
22846 && ((REG_P (op0)
22847 && GET_MODE (op0) != V8HImode)
22848 || (SUBREG_P (op0)
22849 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22850 continue;
22851
22852 /* For little-endian, the two input operands must be swapped
22853 (or swapped back) to ensure proper right-to-left numbering
22854 from 0 to 2N-1. */
22855 if (swapped ^ !BYTES_BIG_ENDIAN)
22856 std::swap (op0, op1);
22857 if (imode != V16QImode)
22858 {
22859 op0 = gen_lowpart (imode, op0);
22860 op1 = gen_lowpart (imode, op1);
22861 }
22862 if (omode == V16QImode)
22863 x = target;
22864 else
22865 x = gen_reg_rtx (omode);
22866 emit_insn (GEN_FCN (icode) (x, op0, op1));
22867 if (omode != V16QImode)
22868 emit_move_insn (target, gen_lowpart (V16QImode, x));
22869 return true;
22870 }
22871 }
22872
22873 if (!BYTES_BIG_ENDIAN)
22874 {
22875 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22876 return true;
22877 }
22878
22879 return false;
22880 }
22881
22882 /* Expand a VSX Permute Doubleword constant permutation.
22883 Return true if we match an efficient implementation. */
22884
22885 static bool
22886 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22887 unsigned char perm0, unsigned char perm1)
22888 {
22889 rtx x;
22890
22891 /* If both selectors come from the same operand, fold to single op. */
22892 if ((perm0 & 2) == (perm1 & 2))
22893 {
22894 if (perm0 & 2)
22895 op0 = op1;
22896 else
22897 op1 = op0;
22898 }
22899 /* If both operands are equal, fold to simpler permutation. */
22900 if (rtx_equal_p (op0, op1))
22901 {
22902 perm0 = perm0 & 1;
22903 perm1 = (perm1 & 1) + 2;
22904 }
22905 /* If the first selector comes from the second operand, swap. */
22906 else if (perm0 & 2)
22907 {
22908 if (perm1 & 2)
22909 return false;
22910 perm0 -= 2;
22911 perm1 += 2;
22912 std::swap (op0, op1);
22913 }
22914 /* If the second selector does not come from the second operand, fail. */
22915 else if ((perm1 & 2) == 0)
22916 return false;
22917
22918 /* Success! */
22919 if (target != NULL)
22920 {
22921 machine_mode vmode, dmode;
22922 rtvec v;
22923
22924 vmode = GET_MODE (target);
22925 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22926 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22927 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22928 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22929 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22930 emit_insn (gen_rtx_SET (target, x));
22931 }
22932 return true;
22933 }
22934
22935 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22936
22937 static bool
22938 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22939 rtx op1, const vec_perm_indices &sel)
22940 {
22941 bool testing_p = !target;
22942
22943 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22944 if (TARGET_ALTIVEC && testing_p)
22945 return true;
22946
22947 if (op0)
22948 {
22949 rtx nop0 = force_reg (vmode, op0);
22950 if (op0 == op1)
22951 op1 = nop0;
22952 op0 = nop0;
22953 }
22954 if (op1)
22955 op1 = force_reg (vmode, op1);
22956
22957 /* Check for ps_merge* or xxpermdi insns. */
22958 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22959 {
22960 if (testing_p)
22961 {
22962 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22963 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22964 }
22965 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22966 return true;
22967 }
22968
22969 if (TARGET_ALTIVEC)
22970 {
22971 /* Force the target-independent code to lower to V16QImode. */
22972 if (vmode != V16QImode)
22973 return false;
22974 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22975 return true;
22976 }
22977
22978 return false;
22979 }
22980
22981 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22982 OP0 and OP1 are the input vectors and TARGET is the output vector.
22983 PERM specifies the constant permutation vector. */
22984
22985 static void
22986 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22987 machine_mode vmode, const vec_perm_builder &perm)
22988 {
22989 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22990 if (x != target)
22991 emit_move_insn (target, x);
22992 }
22993
22994 /* Expand an extract even operation. */
22995
22996 void
22997 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22998 {
22999 machine_mode vmode = GET_MODE (target);
23000 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23001 vec_perm_builder perm (nelt, nelt, 1);
23002
23003 for (i = 0; i < nelt; i++)
23004 perm.quick_push (i * 2);
23005
23006 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23007 }
23008
23009 /* Expand a vector interleave operation. */
23010
23011 void
23012 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23013 {
23014 machine_mode vmode = GET_MODE (target);
23015 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23016 vec_perm_builder perm (nelt, nelt, 1);
23017
23018 high = (highp ? 0 : nelt / 2);
23019 for (i = 0; i < nelt / 2; i++)
23020 {
23021 perm.quick_push (i + high);
23022 perm.quick_push (i + nelt + high);
23023 }
23024
23025 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23026 }
23027
23028 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23029 void
23030 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23031 {
23032 HOST_WIDE_INT hwi_scale (scale);
23033 REAL_VALUE_TYPE r_pow;
23034 rtvec v = rtvec_alloc (2);
23035 rtx elt;
23036 rtx scale_vec = gen_reg_rtx (V2DFmode);
23037 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23038 elt = const_double_from_real_value (r_pow, DFmode);
23039 RTVEC_ELT (v, 0) = elt;
23040 RTVEC_ELT (v, 1) = elt;
23041 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23042 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23043 }
23044
23045 /* Return an RTX representing where to find the function value of a
23046 function returning MODE. */
23047 static rtx
23048 rs6000_complex_function_value (machine_mode mode)
23049 {
23050 unsigned int regno;
23051 rtx r1, r2;
23052 machine_mode inner = GET_MODE_INNER (mode);
23053 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23054
23055 if (TARGET_FLOAT128_TYPE
23056 && (mode == KCmode
23057 || (mode == TCmode && TARGET_IEEEQUAD)))
23058 regno = ALTIVEC_ARG_RETURN;
23059
23060 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23061 regno = FP_ARG_RETURN;
23062
23063 else
23064 {
23065 regno = GP_ARG_RETURN;
23066
23067 /* 32-bit is OK since it'll go in r3/r4. */
23068 if (TARGET_32BIT && inner_bytes >= 4)
23069 return gen_rtx_REG (mode, regno);
23070 }
23071
23072 if (inner_bytes >= 8)
23073 return gen_rtx_REG (mode, regno);
23074
23075 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23076 const0_rtx);
23077 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23078 GEN_INT (inner_bytes));
23079 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23080 }
23081
23082 /* Return an rtx describing a return value of MODE as a PARALLEL
23083 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23084 stride REG_STRIDE. */
23085
23086 static rtx
23087 rs6000_parallel_return (machine_mode mode,
23088 int n_elts, machine_mode elt_mode,
23089 unsigned int regno, unsigned int reg_stride)
23090 {
23091 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23092
23093 int i;
23094 for (i = 0; i < n_elts; i++)
23095 {
23096 rtx r = gen_rtx_REG (elt_mode, regno);
23097 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23098 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23099 regno += reg_stride;
23100 }
23101
23102 return par;
23103 }
23104
23105 /* Target hook for TARGET_FUNCTION_VALUE.
23106
23107 An integer value is in r3 and a floating-point value is in fp1,
23108 unless -msoft-float. */
23109
23110 static rtx
23111 rs6000_function_value (const_tree valtype,
23112 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23113 bool outgoing ATTRIBUTE_UNUSED)
23114 {
23115 machine_mode mode;
23116 unsigned int regno;
23117 machine_mode elt_mode;
23118 int n_elts;
23119
23120 /* Special handling for structs in darwin64. */
23121 if (TARGET_MACHO
23122 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23123 {
23124 CUMULATIVE_ARGS valcum;
23125 rtx valret;
23126
23127 valcum.words = 0;
23128 valcum.fregno = FP_ARG_MIN_REG;
23129 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23130 /* Do a trial code generation as if this were going to be passed as
23131 an argument; if any part goes in memory, we return NULL. */
23132 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23133 if (valret)
23134 return valret;
23135 /* Otherwise fall through to standard ABI rules. */
23136 }
23137
23138 mode = TYPE_MODE (valtype);
23139
23140 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23141 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23142 {
23143 int first_reg, n_regs;
23144
23145 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23146 {
23147 /* _Decimal128 must use even/odd register pairs. */
23148 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23149 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23150 }
23151 else
23152 {
23153 first_reg = ALTIVEC_ARG_RETURN;
23154 n_regs = 1;
23155 }
23156
23157 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23158 }
23159
23160 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23161 if (TARGET_32BIT && TARGET_POWERPC64)
23162 switch (mode)
23163 {
23164 default:
23165 break;
23166 case E_DImode:
23167 case E_SCmode:
23168 case E_DCmode:
23169 case E_TCmode:
23170 int count = GET_MODE_SIZE (mode) / 4;
23171 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23172 }
23173
23174 if ((INTEGRAL_TYPE_P (valtype)
23175 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23176 || POINTER_TYPE_P (valtype))
23177 mode = TARGET_32BIT ? SImode : DImode;
23178
23179 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23180 /* _Decimal128 must use an even/odd register pair. */
23181 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23182 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23183 && !FLOAT128_VECTOR_P (mode))
23184 regno = FP_ARG_RETURN;
23185 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23186 && targetm.calls.split_complex_arg)
23187 return rs6000_complex_function_value (mode);
23188 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23189 return register is used in both cases, and we won't see V2DImode/V2DFmode
23190 for pure altivec, combine the two cases. */
23191 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23192 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23193 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23194 regno = ALTIVEC_ARG_RETURN;
23195 else
23196 regno = GP_ARG_RETURN;
23197
23198 return gen_rtx_REG (mode, regno);
23199 }
23200
23201 /* Define how to find the value returned by a library function
23202 assuming the value has mode MODE. */
23203 rtx
23204 rs6000_libcall_value (machine_mode mode)
23205 {
23206 unsigned int regno;
23207
23208 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23209 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23210 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23211
23212 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23213 /* _Decimal128 must use an even/odd register pair. */
23214 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23215 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23216 regno = FP_ARG_RETURN;
23217 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23218 return register is used in both cases, and we won't see V2DImode/V2DFmode
23219 for pure altivec, combine the two cases. */
23220 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23221 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23222 regno = ALTIVEC_ARG_RETURN;
23223 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23224 return rs6000_complex_function_value (mode);
23225 else
23226 regno = GP_ARG_RETURN;
23227
23228 return gen_rtx_REG (mode, regno);
23229 }
23230
23231 /* Compute register pressure classes. We implement the target hook to avoid
23232 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23233 lead to incorrect estimates of number of available registers and therefor
23234 increased register pressure/spill. */
23235 static int
23236 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23237 {
23238 int n;
23239
23240 n = 0;
23241 pressure_classes[n++] = GENERAL_REGS;
23242 if (TARGET_VSX)
23243 pressure_classes[n++] = VSX_REGS;
23244 else
23245 {
23246 if (TARGET_ALTIVEC)
23247 pressure_classes[n++] = ALTIVEC_REGS;
23248 if (TARGET_HARD_FLOAT)
23249 pressure_classes[n++] = FLOAT_REGS;
23250 }
23251 pressure_classes[n++] = CR_REGS;
23252 pressure_classes[n++] = SPECIAL_REGS;
23253
23254 return n;
23255 }
23256
23257 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23258 Frame pointer elimination is automatically handled.
23259
23260 For the RS/6000, if frame pointer elimination is being done, we would like
23261 to convert ap into fp, not sp.
23262
23263 We need r30 if -mminimal-toc was specified, and there are constant pool
23264 references. */
23265
23266 static bool
23267 rs6000_can_eliminate (const int from, const int to)
23268 {
23269 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23270 ? ! frame_pointer_needed
23271 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23272 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23273 || constant_pool_empty_p ()
23274 : true);
23275 }
23276
23277 /* Define the offset between two registers, FROM to be eliminated and its
23278 replacement TO, at the start of a routine. */
23279 HOST_WIDE_INT
23280 rs6000_initial_elimination_offset (int from, int to)
23281 {
23282 rs6000_stack_t *info = rs6000_stack_info ();
23283 HOST_WIDE_INT offset;
23284
23285 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23286 offset = info->push_p ? 0 : -info->total_size;
23287 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23288 {
23289 offset = info->push_p ? 0 : -info->total_size;
23290 if (FRAME_GROWS_DOWNWARD)
23291 offset += info->fixed_size + info->vars_size + info->parm_size;
23292 }
23293 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23294 offset = FRAME_GROWS_DOWNWARD
23295 ? info->fixed_size + info->vars_size + info->parm_size
23296 : 0;
23297 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23298 offset = info->total_size;
23299 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23300 offset = info->push_p ? info->total_size : 0;
23301 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23302 offset = 0;
23303 else
23304 gcc_unreachable ();
23305
23306 return offset;
23307 }
23308
23309 /* Fill in sizes of registers used by unwinder. */
23310
23311 static void
23312 rs6000_init_dwarf_reg_sizes_extra (tree address)
23313 {
23314 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23315 {
23316 int i;
23317 machine_mode mode = TYPE_MODE (char_type_node);
23318 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23319 rtx mem = gen_rtx_MEM (BLKmode, addr);
23320 rtx value = gen_int_mode (16, mode);
23321
23322 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23323 The unwinder still needs to know the size of Altivec registers. */
23324
23325 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23326 {
23327 int column = DWARF_REG_TO_UNWIND_COLUMN
23328 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23329 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23330
23331 emit_move_insn (adjust_address (mem, mode, offset), value);
23332 }
23333 }
23334 }
23335
23336 /* Map internal gcc register numbers to debug format register numbers.
23337 FORMAT specifies the type of debug register number to use:
23338 0 -- debug information, except for frame-related sections
23339 1 -- DWARF .debug_frame section
23340 2 -- DWARF .eh_frame section */
23341
23342 unsigned int
23343 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23344 {
23345 /* On some platforms, we use the standard DWARF register
23346 numbering for .debug_info and .debug_frame. */
23347 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
23348 {
23349 #ifdef RS6000_USE_DWARF_NUMBERING
23350 if (regno <= 31)
23351 return regno;
23352 if (FP_REGNO_P (regno))
23353 return regno - FIRST_FPR_REGNO + 32;
23354 if (ALTIVEC_REGNO_P (regno))
23355 return regno - FIRST_ALTIVEC_REGNO + 1124;
23356 if (regno == LR_REGNO)
23357 return 108;
23358 if (regno == CTR_REGNO)
23359 return 109;
23360 if (regno == CA_REGNO)
23361 return 101; /* XER */
23362 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23363 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23364 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23365 to the DWARF reg for CR. */
23366 if (format == 1 && regno == CR2_REGNO)
23367 return 64;
23368 if (CR_REGNO_P (regno))
23369 return regno - CR0_REGNO + 86;
23370 if (regno == VRSAVE_REGNO)
23371 return 356;
23372 if (regno == VSCR_REGNO)
23373 return 67;
23374
23375 /* These do not make much sense. */
23376 if (regno == FRAME_POINTER_REGNUM)
23377 return 111;
23378 if (regno == ARG_POINTER_REGNUM)
23379 return 67;
23380 if (regno == 64)
23381 return 100;
23382
23383 gcc_unreachable ();
23384 #endif
23385 }
23386
23387 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23388 information, and also for .eh_frame. */
23389 /* Translate the regnos to their numbers in GCC 7 (and before). */
23390 if (regno <= 31)
23391 return regno;
23392 if (FP_REGNO_P (regno))
23393 return regno - FIRST_FPR_REGNO + 32;
23394 if (ALTIVEC_REGNO_P (regno))
23395 return regno - FIRST_ALTIVEC_REGNO + 77;
23396 if (regno == LR_REGNO)
23397 return 65;
23398 if (regno == CTR_REGNO)
23399 return 66;
23400 if (regno == CA_REGNO)
23401 return 76; /* XER */
23402 if (CR_REGNO_P (regno))
23403 return regno - CR0_REGNO + 68;
23404 if (regno == VRSAVE_REGNO)
23405 return 109;
23406 if (regno == VSCR_REGNO)
23407 return 110;
23408
23409 if (regno == FRAME_POINTER_REGNUM)
23410 return 111;
23411 if (regno == ARG_POINTER_REGNUM)
23412 return 67;
23413 if (regno == 64)
23414 return 64;
23415
23416 gcc_unreachable ();
23417 }
23418
23419 /* target hook eh_return_filter_mode */
23420 static scalar_int_mode
23421 rs6000_eh_return_filter_mode (void)
23422 {
23423 return TARGET_32BIT ? SImode : word_mode;
23424 }
23425
23426 /* Target hook for translate_mode_attribute. */
23427 static machine_mode
23428 rs6000_translate_mode_attribute (machine_mode mode)
23429 {
23430 if ((FLOAT128_IEEE_P (mode)
23431 && ieee128_float_type_node == long_double_type_node)
23432 || (FLOAT128_IBM_P (mode)
23433 && ibm128_float_type_node == long_double_type_node))
23434 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23435 return mode;
23436 }
23437
23438 /* Target hook for scalar_mode_supported_p. */
23439 static bool
23440 rs6000_scalar_mode_supported_p (scalar_mode mode)
23441 {
23442 /* -m32 does not support TImode. This is the default, from
23443 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23444 same ABI as for -m32. But default_scalar_mode_supported_p allows
23445 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23446 for -mpowerpc64. */
23447 if (TARGET_32BIT && mode == TImode)
23448 return false;
23449
23450 if (DECIMAL_FLOAT_MODE_P (mode))
23451 return default_decimal_float_supported_p ();
23452 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23453 return true;
23454 else
23455 return default_scalar_mode_supported_p (mode);
23456 }
23457
23458 /* Target hook for vector_mode_supported_p. */
23459 static bool
23460 rs6000_vector_mode_supported_p (machine_mode mode)
23461 {
23462 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23463 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23464 double-double. */
23465 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23466 return true;
23467
23468 else
23469 return false;
23470 }
23471
23472 /* Target hook for floatn_mode. */
23473 static opt_scalar_float_mode
23474 rs6000_floatn_mode (int n, bool extended)
23475 {
23476 if (extended)
23477 {
23478 switch (n)
23479 {
23480 case 32:
23481 return DFmode;
23482
23483 case 64:
23484 if (TARGET_FLOAT128_TYPE)
23485 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23486 else
23487 return opt_scalar_float_mode ();
23488
23489 case 128:
23490 return opt_scalar_float_mode ();
23491
23492 default:
23493 /* Those are the only valid _FloatNx types. */
23494 gcc_unreachable ();
23495 }
23496 }
23497 else
23498 {
23499 switch (n)
23500 {
23501 case 32:
23502 return SFmode;
23503
23504 case 64:
23505 return DFmode;
23506
23507 case 128:
23508 if (TARGET_FLOAT128_TYPE)
23509 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23510 else
23511 return opt_scalar_float_mode ();
23512
23513 default:
23514 return opt_scalar_float_mode ();
23515 }
23516 }
23517
23518 }
23519
23520 /* Target hook for c_mode_for_suffix. */
23521 static machine_mode
23522 rs6000_c_mode_for_suffix (char suffix)
23523 {
23524 if (TARGET_FLOAT128_TYPE)
23525 {
23526 if (suffix == 'q' || suffix == 'Q')
23527 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23528
23529 /* At the moment, we are not defining a suffix for IBM extended double.
23530 If/when the default for -mabi=ieeelongdouble is changed, and we want
23531 to support __ibm128 constants in legacy library code, we may need to
23532 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23533 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23534 __float80 constants. */
23535 }
23536
23537 return VOIDmode;
23538 }
23539
23540 /* Target hook for invalid_arg_for_unprototyped_fn. */
23541 static const char *
23542 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23543 {
23544 return (!rs6000_darwin64_abi
23545 && typelist == 0
23546 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23547 && (funcdecl == NULL_TREE
23548 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23549 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23550 ? N_("AltiVec argument passed to unprototyped function")
23551 : NULL;
23552 }
23553
23554 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23555 setup by using __stack_chk_fail_local hidden function instead of
23556 calling __stack_chk_fail directly. Otherwise it is better to call
23557 __stack_chk_fail directly. */
23558
23559 static tree ATTRIBUTE_UNUSED
23560 rs6000_stack_protect_fail (void)
23561 {
23562 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23563 ? default_hidden_stack_protect_fail ()
23564 : default_external_stack_protect_fail ();
23565 }
23566
23567 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23568
23569 #if TARGET_ELF
23570 static unsigned HOST_WIDE_INT
23571 rs6000_asan_shadow_offset (void)
23572 {
23573 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23574 }
23575 #endif
23576 \f
23577 /* Mask options that we want to support inside of attribute((target)) and
23578 #pragma GCC target operations. Note, we do not include things like
23579 64/32-bit, endianness, hard/soft floating point, etc. that would have
23580 different calling sequences. */
23581
23582 struct rs6000_opt_mask {
23583 const char *name; /* option name */
23584 HOST_WIDE_INT mask; /* mask to set */
23585 bool invert; /* invert sense of mask */
23586 bool valid_target; /* option is a target option */
23587 };
23588
23589 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23590 {
23591 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23592 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23593 false, true },
23594 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23595 false, true },
23596 { "cmpb", OPTION_MASK_CMPB, false, true },
23597 { "crypto", OPTION_MASK_CRYPTO, false, true },
23598 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23599 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23600 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23601 false, true },
23602 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23603 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23604 { "fprnd", OPTION_MASK_FPRND, false, true },
23605 { "power10", OPTION_MASK_POWER10, false, true },
23606 { "hard-dfp", OPTION_MASK_DFP, false, true },
23607 { "htm", OPTION_MASK_HTM, false, true },
23608 { "isel", OPTION_MASK_ISEL, false, true },
23609 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23610 { "mfpgpr", 0, false, true },
23611 { "mma", OPTION_MASK_MMA, false, true },
23612 { "modulo", OPTION_MASK_MODULO, false, true },
23613 { "mulhw", OPTION_MASK_MULHW, false, true },
23614 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23615 { "pcrel", OPTION_MASK_PCREL, false, true },
23616 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23617 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23618 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23619 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23620 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23621 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23622 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23623 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23624 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23625 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23626 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23627 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23628 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23629 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23630 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23631 { "string", 0, false, true },
23632 { "update", OPTION_MASK_NO_UPDATE, true , true },
23633 { "vsx", OPTION_MASK_VSX, false, true },
23634 #ifdef OPTION_MASK_64BIT
23635 #if TARGET_AIX_OS
23636 { "aix64", OPTION_MASK_64BIT, false, false },
23637 { "aix32", OPTION_MASK_64BIT, true, false },
23638 #else
23639 { "64", OPTION_MASK_64BIT, false, false },
23640 { "32", OPTION_MASK_64BIT, true, false },
23641 #endif
23642 #endif
23643 #ifdef OPTION_MASK_EABI
23644 { "eabi", OPTION_MASK_EABI, false, false },
23645 #endif
23646 #ifdef OPTION_MASK_LITTLE_ENDIAN
23647 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23648 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23649 #endif
23650 #ifdef OPTION_MASK_RELOCATABLE
23651 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23652 #endif
23653 #ifdef OPTION_MASK_STRICT_ALIGN
23654 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23655 #endif
23656 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23657 { "string", 0, false, false },
23658 };
23659
23660 /* Builtin mask mapping for printing the flags. */
23661 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
23662 {
23663 { "altivec", RS6000_BTM_ALTIVEC, false, false },
23664 { "vsx", RS6000_BTM_VSX, false, false },
23665 { "fre", RS6000_BTM_FRE, false, false },
23666 { "fres", RS6000_BTM_FRES, false, false },
23667 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
23668 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23669 { "popcntd", RS6000_BTM_POPCNTD, false, false },
23670 { "cell", RS6000_BTM_CELL, false, false },
23671 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23672 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
23673 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
23674 { "crypto", RS6000_BTM_CRYPTO, false, false },
23675 { "htm", RS6000_BTM_HTM, false, false },
23676 { "hard-dfp", RS6000_BTM_DFP, false, false },
23677 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
23678 { "long-double-128", RS6000_BTM_LDBL128, false, false },
23679 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
23680 { "float128", RS6000_BTM_FLOAT128, false, false },
23681 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
23682 { "mma", RS6000_BTM_MMA, false, false },
23683 { "power10", RS6000_BTM_P10, false, false },
23684 };
23685
23686 /* Option variables that we want to support inside attribute((target)) and
23687 #pragma GCC target operations. */
23688
23689 struct rs6000_opt_var {
23690 const char *name; /* option name */
23691 size_t global_offset; /* offset of the option in global_options. */
23692 size_t target_offset; /* offset of the option in target options. */
23693 };
23694
23695 static struct rs6000_opt_var const rs6000_opt_vars[] =
23696 {
23697 { "friz",
23698 offsetof (struct gcc_options, x_TARGET_FRIZ),
23699 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23700 { "avoid-indexed-addresses",
23701 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23702 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23703 { "longcall",
23704 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23705 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23706 { "optimize-swaps",
23707 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23708 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23709 { "allow-movmisalign",
23710 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23711 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23712 { "sched-groups",
23713 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23714 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23715 { "always-hint",
23716 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23717 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23718 { "align-branch-targets",
23719 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23720 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23721 { "sched-prolog",
23722 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23723 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23724 { "sched-epilog",
23725 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23726 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23727 { "speculate-indirect-jumps",
23728 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23729 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23730 };
23731
23732 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23733 parsing. Return true if there were no errors. */
23734
23735 static bool
23736 rs6000_inner_target_options (tree args, bool attr_p)
23737 {
23738 bool ret = true;
23739
23740 if (args == NULL_TREE)
23741 ;
23742
23743 else if (TREE_CODE (args) == STRING_CST)
23744 {
23745 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23746 char *q;
23747
23748 while ((q = strtok (p, ",")) != NULL)
23749 {
23750 bool error_p = false;
23751 bool not_valid_p = false;
23752 const char *cpu_opt = NULL;
23753
23754 p = NULL;
23755 if (strncmp (q, "cpu=", 4) == 0)
23756 {
23757 int cpu_index = rs6000_cpu_name_lookup (q+4);
23758 if (cpu_index >= 0)
23759 rs6000_cpu_index = cpu_index;
23760 else
23761 {
23762 error_p = true;
23763 cpu_opt = q+4;
23764 }
23765 }
23766 else if (strncmp (q, "tune=", 5) == 0)
23767 {
23768 int tune_index = rs6000_cpu_name_lookup (q+5);
23769 if (tune_index >= 0)
23770 rs6000_tune_index = tune_index;
23771 else
23772 {
23773 error_p = true;
23774 cpu_opt = q+5;
23775 }
23776 }
23777 else
23778 {
23779 size_t i;
23780 bool invert = false;
23781 char *r = q;
23782
23783 error_p = true;
23784 if (strncmp (r, "no-", 3) == 0)
23785 {
23786 invert = true;
23787 r += 3;
23788 }
23789
23790 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23791 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23792 {
23793 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23794
23795 if (!rs6000_opt_masks[i].valid_target)
23796 not_valid_p = true;
23797 else
23798 {
23799 error_p = false;
23800 rs6000_isa_flags_explicit |= mask;
23801
23802 /* VSX needs altivec, so -mvsx automagically sets
23803 altivec and disables -mavoid-indexed-addresses. */
23804 if (!invert)
23805 {
23806 if (mask == OPTION_MASK_VSX)
23807 {
23808 mask |= OPTION_MASK_ALTIVEC;
23809 TARGET_AVOID_XFORM = 0;
23810 }
23811 }
23812
23813 if (rs6000_opt_masks[i].invert)
23814 invert = !invert;
23815
23816 if (invert)
23817 rs6000_isa_flags &= ~mask;
23818 else
23819 rs6000_isa_flags |= mask;
23820 }
23821 break;
23822 }
23823
23824 if (error_p && !not_valid_p)
23825 {
23826 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23827 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23828 {
23829 size_t j = rs6000_opt_vars[i].global_offset;
23830 *((int *) ((char *)&global_options + j)) = !invert;
23831 error_p = false;
23832 not_valid_p = false;
23833 break;
23834 }
23835 }
23836 }
23837
23838 if (error_p)
23839 {
23840 const char *eprefix, *esuffix;
23841
23842 ret = false;
23843 if (attr_p)
23844 {
23845 eprefix = "__attribute__((__target__(";
23846 esuffix = ")))";
23847 }
23848 else
23849 {
23850 eprefix = "#pragma GCC target ";
23851 esuffix = "";
23852 }
23853
23854 if (cpu_opt)
23855 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23856 q, esuffix);
23857 else if (not_valid_p)
23858 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23859 else
23860 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23861 }
23862 }
23863 }
23864
23865 else if (TREE_CODE (args) == TREE_LIST)
23866 {
23867 do
23868 {
23869 tree value = TREE_VALUE (args);
23870 if (value)
23871 {
23872 bool ret2 = rs6000_inner_target_options (value, attr_p);
23873 if (!ret2)
23874 ret = false;
23875 }
23876 args = TREE_CHAIN (args);
23877 }
23878 while (args != NULL_TREE);
23879 }
23880
23881 else
23882 {
23883 error ("attribute %<target%> argument not a string");
23884 return false;
23885 }
23886
23887 return ret;
23888 }
23889
23890 /* Print out the target options as a list for -mdebug=target. */
23891
23892 static void
23893 rs6000_debug_target_options (tree args, const char *prefix)
23894 {
23895 if (args == NULL_TREE)
23896 fprintf (stderr, "%s<NULL>", prefix);
23897
23898 else if (TREE_CODE (args) == STRING_CST)
23899 {
23900 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23901 char *q;
23902
23903 while ((q = strtok (p, ",")) != NULL)
23904 {
23905 p = NULL;
23906 fprintf (stderr, "%s\"%s\"", prefix, q);
23907 prefix = ", ";
23908 }
23909 }
23910
23911 else if (TREE_CODE (args) == TREE_LIST)
23912 {
23913 do
23914 {
23915 tree value = TREE_VALUE (args);
23916 if (value)
23917 {
23918 rs6000_debug_target_options (value, prefix);
23919 prefix = ", ";
23920 }
23921 args = TREE_CHAIN (args);
23922 }
23923 while (args != NULL_TREE);
23924 }
23925
23926 else
23927 gcc_unreachable ();
23928
23929 return;
23930 }
23931
23932 \f
23933 /* Hook to validate attribute((target("..."))). */
23934
23935 static bool
23936 rs6000_valid_attribute_p (tree fndecl,
23937 tree ARG_UNUSED (name),
23938 tree args,
23939 int flags)
23940 {
23941 struct cl_target_option cur_target;
23942 bool ret;
23943 tree old_optimize;
23944 tree new_target, new_optimize;
23945 tree func_optimize;
23946
23947 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23948
23949 if (TARGET_DEBUG_TARGET)
23950 {
23951 tree tname = DECL_NAME (fndecl);
23952 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23953 if (tname)
23954 fprintf (stderr, "function: %.*s\n",
23955 (int) IDENTIFIER_LENGTH (tname),
23956 IDENTIFIER_POINTER (tname));
23957 else
23958 fprintf (stderr, "function: unknown\n");
23959
23960 fprintf (stderr, "args:");
23961 rs6000_debug_target_options (args, " ");
23962 fprintf (stderr, "\n");
23963
23964 if (flags)
23965 fprintf (stderr, "flags: 0x%x\n", flags);
23966
23967 fprintf (stderr, "--------------------\n");
23968 }
23969
23970 /* attribute((target("default"))) does nothing, beyond
23971 affecting multi-versioning. */
23972 if (TREE_VALUE (args)
23973 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23974 && TREE_CHAIN (args) == NULL_TREE
23975 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23976 return true;
23977
23978 old_optimize = build_optimization_node (&global_options,
23979 &global_options_set);
23980 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23981
23982 /* If the function changed the optimization levels as well as setting target
23983 options, start with the optimizations specified. */
23984 if (func_optimize && func_optimize != old_optimize)
23985 cl_optimization_restore (&global_options, &global_options_set,
23986 TREE_OPTIMIZATION (func_optimize));
23987
23988 /* The target attributes may also change some optimization flags, so update
23989 the optimization options if necessary. */
23990 cl_target_option_save (&cur_target, &global_options, &global_options_set);
23991 rs6000_cpu_index = rs6000_tune_index = -1;
23992 ret = rs6000_inner_target_options (args, true);
23993
23994 /* Set up any additional state. */
23995 if (ret)
23996 {
23997 ret = rs6000_option_override_internal (false);
23998 new_target = build_target_option_node (&global_options,
23999 &global_options_set);
24000 }
24001 else
24002 new_target = NULL;
24003
24004 new_optimize = build_optimization_node (&global_options,
24005 &global_options_set);
24006
24007 if (!new_target)
24008 ret = false;
24009
24010 else if (fndecl)
24011 {
24012 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24013
24014 if (old_optimize != new_optimize)
24015 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24016 }
24017
24018 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24019
24020 if (old_optimize != new_optimize)
24021 cl_optimization_restore (&global_options, &global_options_set,
24022 TREE_OPTIMIZATION (old_optimize));
24023
24024 return ret;
24025 }
24026
24027 \f
24028 /* Hook to validate the current #pragma GCC target and set the state, and
24029 update the macros based on what was changed. If ARGS is NULL, then
24030 POP_TARGET is used to reset the options. */
24031
24032 bool
24033 rs6000_pragma_target_parse (tree args, tree pop_target)
24034 {
24035 tree prev_tree = build_target_option_node (&global_options,
24036 &global_options_set);
24037 tree cur_tree;
24038 struct cl_target_option *prev_opt, *cur_opt;
24039 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24040 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24041
24042 if (TARGET_DEBUG_TARGET)
24043 {
24044 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24045 fprintf (stderr, "args:");
24046 rs6000_debug_target_options (args, " ");
24047 fprintf (stderr, "\n");
24048
24049 if (pop_target)
24050 {
24051 fprintf (stderr, "pop_target:\n");
24052 debug_tree (pop_target);
24053 }
24054 else
24055 fprintf (stderr, "pop_target: <NULL>\n");
24056
24057 fprintf (stderr, "--------------------\n");
24058 }
24059
24060 if (! args)
24061 {
24062 cur_tree = ((pop_target)
24063 ? pop_target
24064 : target_option_default_node);
24065 cl_target_option_restore (&global_options, &global_options_set,
24066 TREE_TARGET_OPTION (cur_tree));
24067 }
24068 else
24069 {
24070 rs6000_cpu_index = rs6000_tune_index = -1;
24071 if (!rs6000_inner_target_options (args, false)
24072 || !rs6000_option_override_internal (false)
24073 || (cur_tree = build_target_option_node (&global_options,
24074 &global_options_set))
24075 == NULL_TREE)
24076 {
24077 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24078 fprintf (stderr, "invalid pragma\n");
24079
24080 return false;
24081 }
24082 }
24083
24084 target_option_current_node = cur_tree;
24085 rs6000_activate_target_options (target_option_current_node);
24086
24087 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24088 change the macros that are defined. */
24089 if (rs6000_target_modify_macros_ptr)
24090 {
24091 prev_opt = TREE_TARGET_OPTION (prev_tree);
24092 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24093 prev_flags = prev_opt->x_rs6000_isa_flags;
24094
24095 cur_opt = TREE_TARGET_OPTION (cur_tree);
24096 cur_flags = cur_opt->x_rs6000_isa_flags;
24097 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24098
24099 diff_bumask = (prev_bumask ^ cur_bumask);
24100 diff_flags = (prev_flags ^ cur_flags);
24101
24102 if ((diff_flags != 0) || (diff_bumask != 0))
24103 {
24104 /* Delete old macros. */
24105 rs6000_target_modify_macros_ptr (false,
24106 prev_flags & diff_flags,
24107 prev_bumask & diff_bumask);
24108
24109 /* Define new macros. */
24110 rs6000_target_modify_macros_ptr (true,
24111 cur_flags & diff_flags,
24112 cur_bumask & diff_bumask);
24113 }
24114 }
24115
24116 return true;
24117 }
24118
24119 \f
24120 /* Remember the last target of rs6000_set_current_function. */
24121 static GTY(()) tree rs6000_previous_fndecl;
24122
24123 /* Restore target's globals from NEW_TREE and invalidate the
24124 rs6000_previous_fndecl cache. */
24125
24126 void
24127 rs6000_activate_target_options (tree new_tree)
24128 {
24129 cl_target_option_restore (&global_options, &global_options_set,
24130 TREE_TARGET_OPTION (new_tree));
24131 if (TREE_TARGET_GLOBALS (new_tree))
24132 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24133 else if (new_tree == target_option_default_node)
24134 restore_target_globals (&default_target_globals);
24135 else
24136 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24137 rs6000_previous_fndecl = NULL_TREE;
24138 }
24139
24140 /* Establish appropriate back-end context for processing the function
24141 FNDECL. The argument might be NULL to indicate processing at top
24142 level, outside of any function scope. */
24143 static void
24144 rs6000_set_current_function (tree fndecl)
24145 {
24146 if (TARGET_DEBUG_TARGET)
24147 {
24148 fprintf (stderr, "\n==================== rs6000_set_current_function");
24149
24150 if (fndecl)
24151 fprintf (stderr, ", fndecl %s (%p)",
24152 (DECL_NAME (fndecl)
24153 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24154 : "<unknown>"), (void *)fndecl);
24155
24156 if (rs6000_previous_fndecl)
24157 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24158
24159 fprintf (stderr, "\n");
24160 }
24161
24162 /* Only change the context if the function changes. This hook is called
24163 several times in the course of compiling a function, and we don't want to
24164 slow things down too much or call target_reinit when it isn't safe. */
24165 if (fndecl == rs6000_previous_fndecl)
24166 return;
24167
24168 tree old_tree;
24169 if (rs6000_previous_fndecl == NULL_TREE)
24170 old_tree = target_option_current_node;
24171 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24172 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24173 else
24174 old_tree = target_option_default_node;
24175
24176 tree new_tree;
24177 if (fndecl == NULL_TREE)
24178 {
24179 if (old_tree != target_option_current_node)
24180 new_tree = target_option_current_node;
24181 else
24182 new_tree = NULL_TREE;
24183 }
24184 else
24185 {
24186 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24187 if (new_tree == NULL_TREE)
24188 new_tree = target_option_default_node;
24189 }
24190
24191 if (TARGET_DEBUG_TARGET)
24192 {
24193 if (new_tree)
24194 {
24195 fprintf (stderr, "\nnew fndecl target specific options:\n");
24196 debug_tree (new_tree);
24197 }
24198
24199 if (old_tree)
24200 {
24201 fprintf (stderr, "\nold fndecl target specific options:\n");
24202 debug_tree (old_tree);
24203 }
24204
24205 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24206 fprintf (stderr, "--------------------\n");
24207 }
24208
24209 if (new_tree && old_tree != new_tree)
24210 rs6000_activate_target_options (new_tree);
24211
24212 if (fndecl)
24213 rs6000_previous_fndecl = fndecl;
24214 }
24215
24216 \f
24217 /* Save the current options */
24218
24219 static void
24220 rs6000_function_specific_save (struct cl_target_option *ptr,
24221 struct gcc_options *opts,
24222 struct gcc_options */* opts_set */)
24223 {
24224 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24225 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24226 }
24227
24228 /* Restore the current options */
24229
24230 static void
24231 rs6000_function_specific_restore (struct gcc_options *opts,
24232 struct gcc_options */* opts_set */,
24233 struct cl_target_option *ptr)
24234
24235 {
24236 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24237 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24238 (void) rs6000_option_override_internal (false);
24239 }
24240
24241 /* Print the current options */
24242
24243 static void
24244 rs6000_function_specific_print (FILE *file, int indent,
24245 struct cl_target_option *ptr)
24246 {
24247 rs6000_print_isa_options (file, indent, "Isa options set",
24248 ptr->x_rs6000_isa_flags);
24249
24250 rs6000_print_isa_options (file, indent, "Isa options explicit",
24251 ptr->x_rs6000_isa_flags_explicit);
24252 }
24253
24254 /* Helper function to print the current isa or misc options on a line. */
24255
24256 static void
24257 rs6000_print_options_internal (FILE *file,
24258 int indent,
24259 const char *string,
24260 HOST_WIDE_INT flags,
24261 const char *prefix,
24262 const struct rs6000_opt_mask *opts,
24263 size_t num_elements)
24264 {
24265 size_t i;
24266 size_t start_column = 0;
24267 size_t cur_column;
24268 size_t max_column = 120;
24269 size_t prefix_len = strlen (prefix);
24270 size_t comma_len = 0;
24271 const char *comma = "";
24272
24273 if (indent)
24274 start_column += fprintf (file, "%*s", indent, "");
24275
24276 if (!flags)
24277 {
24278 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24279 return;
24280 }
24281
24282 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24283
24284 /* Print the various mask options. */
24285 cur_column = start_column;
24286 for (i = 0; i < num_elements; i++)
24287 {
24288 bool invert = opts[i].invert;
24289 const char *name = opts[i].name;
24290 const char *no_str = "";
24291 HOST_WIDE_INT mask = opts[i].mask;
24292 size_t len = comma_len + prefix_len + strlen (name);
24293
24294 if (!invert)
24295 {
24296 if ((flags & mask) == 0)
24297 {
24298 no_str = "no-";
24299 len += strlen ("no-");
24300 }
24301
24302 flags &= ~mask;
24303 }
24304
24305 else
24306 {
24307 if ((flags & mask) != 0)
24308 {
24309 no_str = "no-";
24310 len += strlen ("no-");
24311 }
24312
24313 flags |= mask;
24314 }
24315
24316 cur_column += len;
24317 if (cur_column > max_column)
24318 {
24319 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24320 cur_column = start_column + len;
24321 comma = "";
24322 }
24323
24324 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24325 comma = ", ";
24326 comma_len = strlen (", ");
24327 }
24328
24329 fputs ("\n", file);
24330 }
24331
24332 /* Helper function to print the current isa options on a line. */
24333
24334 static void
24335 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24336 HOST_WIDE_INT flags)
24337 {
24338 rs6000_print_options_internal (file, indent, string, flags, "-m",
24339 &rs6000_opt_masks[0],
24340 ARRAY_SIZE (rs6000_opt_masks));
24341 }
24342
24343 static void
24344 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24345 HOST_WIDE_INT flags)
24346 {
24347 rs6000_print_options_internal (file, indent, string, flags, "",
24348 &rs6000_builtin_mask_names[0],
24349 ARRAY_SIZE (rs6000_builtin_mask_names));
24350 }
24351
24352 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24353 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24354 -mupper-regs-df, etc.).
24355
24356 If the user used -mno-power8-vector, we need to turn off all of the implicit
24357 ISA 2.07 and 3.0 options that relate to the vector unit.
24358
24359 If the user used -mno-power9-vector, we need to turn off all of the implicit
24360 ISA 3.0 options that relate to the vector unit.
24361
24362 This function does not handle explicit options such as the user specifying
24363 -mdirect-move. These are handled in rs6000_option_override_internal, and
24364 the appropriate error is given if needed.
24365
24366 We return a mask of all of the implicit options that should not be enabled
24367 by default. */
24368
24369 static HOST_WIDE_INT
24370 rs6000_disable_incompatible_switches (void)
24371 {
24372 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24373 size_t i, j;
24374
24375 static const struct {
24376 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24377 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24378 const char *const name; /* name of the switch. */
24379 } flags[] = {
24380 { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" },
24381 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24382 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24383 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24384 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24385 };
24386
24387 for (i = 0; i < ARRAY_SIZE (flags); i++)
24388 {
24389 HOST_WIDE_INT no_flag = flags[i].no_flag;
24390
24391 if ((rs6000_isa_flags & no_flag) == 0
24392 && (rs6000_isa_flags_explicit & no_flag) != 0)
24393 {
24394 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24395 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24396 & rs6000_isa_flags
24397 & dep_flags);
24398
24399 if (set_flags)
24400 {
24401 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24402 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24403 {
24404 set_flags &= ~rs6000_opt_masks[j].mask;
24405 error ("%<-mno-%s%> turns off %<-m%s%>",
24406 flags[i].name,
24407 rs6000_opt_masks[j].name);
24408 }
24409
24410 gcc_assert (!set_flags);
24411 }
24412
24413 rs6000_isa_flags &= ~dep_flags;
24414 ignore_masks |= no_flag | dep_flags;
24415 }
24416 }
24417
24418 return ignore_masks;
24419 }
24420
24421 \f
24422 /* Helper function for printing the function name when debugging. */
24423
24424 static const char *
24425 get_decl_name (tree fn)
24426 {
24427 tree name;
24428
24429 if (!fn)
24430 return "<null>";
24431
24432 name = DECL_NAME (fn);
24433 if (!name)
24434 return "<no-name>";
24435
24436 return IDENTIFIER_POINTER (name);
24437 }
24438
24439 /* Return the clone id of the target we are compiling code for in a target
24440 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24441 the priority list for the target clones (ordered from lowest to
24442 highest). */
24443
24444 static int
24445 rs6000_clone_priority (tree fndecl)
24446 {
24447 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24448 HOST_WIDE_INT isa_masks;
24449 int ret = CLONE_DEFAULT;
24450 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24451 const char *attrs_str = NULL;
24452
24453 attrs = TREE_VALUE (TREE_VALUE (attrs));
24454 attrs_str = TREE_STRING_POINTER (attrs);
24455
24456 /* Return priority zero for default function. Return the ISA needed for the
24457 function if it is not the default. */
24458 if (strcmp (attrs_str, "default") != 0)
24459 {
24460 if (fn_opts == NULL_TREE)
24461 fn_opts = target_option_default_node;
24462
24463 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24464 isa_masks = rs6000_isa_flags;
24465 else
24466 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24467
24468 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24469 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24470 break;
24471 }
24472
24473 if (TARGET_DEBUG_TARGET)
24474 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24475 get_decl_name (fndecl), ret);
24476
24477 return ret;
24478 }
24479
24480 /* This compares the priority of target features in function DECL1 and DECL2.
24481 It returns positive value if DECL1 is higher priority, negative value if
24482 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24483 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24484
24485 static int
24486 rs6000_compare_version_priority (tree decl1, tree decl2)
24487 {
24488 int priority1 = rs6000_clone_priority (decl1);
24489 int priority2 = rs6000_clone_priority (decl2);
24490 int ret = priority1 - priority2;
24491
24492 if (TARGET_DEBUG_TARGET)
24493 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24494 get_decl_name (decl1), get_decl_name (decl2), ret);
24495
24496 return ret;
24497 }
24498
24499 /* Make a dispatcher declaration for the multi-versioned function DECL.
24500 Calls to DECL function will be replaced with calls to the dispatcher
24501 by the front-end. Returns the decl of the dispatcher function. */
24502
24503 static tree
24504 rs6000_get_function_versions_dispatcher (void *decl)
24505 {
24506 tree fn = (tree) decl;
24507 struct cgraph_node *node = NULL;
24508 struct cgraph_node *default_node = NULL;
24509 struct cgraph_function_version_info *node_v = NULL;
24510 struct cgraph_function_version_info *first_v = NULL;
24511
24512 tree dispatch_decl = NULL;
24513
24514 struct cgraph_function_version_info *default_version_info = NULL;
24515 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24516
24517 if (TARGET_DEBUG_TARGET)
24518 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24519 get_decl_name (fn));
24520
24521 node = cgraph_node::get (fn);
24522 gcc_assert (node != NULL);
24523
24524 node_v = node->function_version ();
24525 gcc_assert (node_v != NULL);
24526
24527 if (node_v->dispatcher_resolver != NULL)
24528 return node_v->dispatcher_resolver;
24529
24530 /* Find the default version and make it the first node. */
24531 first_v = node_v;
24532 /* Go to the beginning of the chain. */
24533 while (first_v->prev != NULL)
24534 first_v = first_v->prev;
24535
24536 default_version_info = first_v;
24537 while (default_version_info != NULL)
24538 {
24539 const tree decl2 = default_version_info->this_node->decl;
24540 if (is_function_default_version (decl2))
24541 break;
24542 default_version_info = default_version_info->next;
24543 }
24544
24545 /* If there is no default node, just return NULL. */
24546 if (default_version_info == NULL)
24547 return NULL;
24548
24549 /* Make default info the first node. */
24550 if (first_v != default_version_info)
24551 {
24552 default_version_info->prev->next = default_version_info->next;
24553 if (default_version_info->next)
24554 default_version_info->next->prev = default_version_info->prev;
24555 first_v->prev = default_version_info;
24556 default_version_info->next = first_v;
24557 default_version_info->prev = NULL;
24558 }
24559
24560 default_node = default_version_info->this_node;
24561
24562 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24563 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24564 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24565 "exports hardware capability bits");
24566 #else
24567
24568 if (targetm.has_ifunc_p ())
24569 {
24570 struct cgraph_function_version_info *it_v = NULL;
24571 struct cgraph_node *dispatcher_node = NULL;
24572 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24573
24574 /* Right now, the dispatching is done via ifunc. */
24575 dispatch_decl = make_dispatcher_decl (default_node->decl);
24576
24577 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24578 gcc_assert (dispatcher_node != NULL);
24579 dispatcher_node->dispatcher_function = 1;
24580 dispatcher_version_info
24581 = dispatcher_node->insert_new_function_version ();
24582 dispatcher_version_info->next = default_version_info;
24583 dispatcher_node->definition = 1;
24584
24585 /* Set the dispatcher for all the versions. */
24586 it_v = default_version_info;
24587 while (it_v != NULL)
24588 {
24589 it_v->dispatcher_resolver = dispatch_decl;
24590 it_v = it_v->next;
24591 }
24592 }
24593 else
24594 {
24595 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24596 "multiversioning needs ifunc which is not supported "
24597 "on this target");
24598 }
24599 #endif
24600
24601 return dispatch_decl;
24602 }
24603
24604 /* Make the resolver function decl to dispatch the versions of a multi-
24605 versioned function, DEFAULT_DECL. Create an empty basic block in the
24606 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24607 function. */
24608
24609 static tree
24610 make_resolver_func (const tree default_decl,
24611 const tree dispatch_decl,
24612 basic_block *empty_bb)
24613 {
24614 /* Make the resolver function static. The resolver function returns
24615 void *. */
24616 tree decl_name = clone_function_name (default_decl, "resolver");
24617 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24618 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24619 tree decl = build_fn_decl (resolver_name, type);
24620 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24621
24622 DECL_NAME (decl) = decl_name;
24623 TREE_USED (decl) = 1;
24624 DECL_ARTIFICIAL (decl) = 1;
24625 DECL_IGNORED_P (decl) = 0;
24626 TREE_PUBLIC (decl) = 0;
24627 DECL_UNINLINABLE (decl) = 1;
24628
24629 /* Resolver is not external, body is generated. */
24630 DECL_EXTERNAL (decl) = 0;
24631 DECL_EXTERNAL (dispatch_decl) = 0;
24632
24633 DECL_CONTEXT (decl) = NULL_TREE;
24634 DECL_INITIAL (decl) = make_node (BLOCK);
24635 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24636
24637 if (DECL_COMDAT_GROUP (default_decl)
24638 || TREE_PUBLIC (default_decl))
24639 {
24640 /* In this case, each translation unit with a call to this
24641 versioned function will put out a resolver. Ensure it
24642 is comdat to keep just one copy. */
24643 DECL_COMDAT (decl) = 1;
24644 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24645 }
24646 else
24647 TREE_PUBLIC (dispatch_decl) = 0;
24648
24649 /* Build result decl and add to function_decl. */
24650 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24651 DECL_CONTEXT (t) = decl;
24652 DECL_ARTIFICIAL (t) = 1;
24653 DECL_IGNORED_P (t) = 1;
24654 DECL_RESULT (decl) = t;
24655
24656 gimplify_function_tree (decl);
24657 push_cfun (DECL_STRUCT_FUNCTION (decl));
24658 *empty_bb = init_lowered_empty_function (decl, false,
24659 profile_count::uninitialized ());
24660
24661 cgraph_node::add_new_function (decl, true);
24662 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24663
24664 pop_cfun ();
24665
24666 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24667 DECL_ATTRIBUTES (dispatch_decl)
24668 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24669
24670 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24671
24672 return decl;
24673 }
24674
24675 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24676 return a pointer to VERSION_DECL if we are running on a machine that
24677 supports the index CLONE_ISA hardware architecture bits. This function will
24678 be called during version dispatch to decide which function version to
24679 execute. It returns the basic block at the end, to which more conditions
24680 can be added. */
24681
24682 static basic_block
24683 add_condition_to_bb (tree function_decl, tree version_decl,
24684 int clone_isa, basic_block new_bb)
24685 {
24686 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
24687
24688 gcc_assert (new_bb != NULL);
24689 gimple_seq gseq = bb_seq (new_bb);
24690
24691
24692 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
24693 build_fold_addr_expr (version_decl));
24694 tree result_var = create_tmp_var (ptr_type_node);
24695 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
24696 gimple *return_stmt = gimple_build_return (result_var);
24697
24698 if (clone_isa == CLONE_DEFAULT)
24699 {
24700 gimple_seq_add_stmt (&gseq, convert_stmt);
24701 gimple_seq_add_stmt (&gseq, return_stmt);
24702 set_bb_seq (new_bb, gseq);
24703 gimple_set_bb (convert_stmt, new_bb);
24704 gimple_set_bb (return_stmt, new_bb);
24705 pop_cfun ();
24706 return new_bb;
24707 }
24708
24709 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24710 tree cond_var = create_tmp_var (bool_int_type_node);
24711 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24712 const char *arg_str = rs6000_clone_map[clone_isa].name;
24713 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24714 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24715 gimple_call_set_lhs (call_cond_stmt, cond_var);
24716
24717 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24718 gimple_set_bb (call_cond_stmt, new_bb);
24719 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24720
24721 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24722 NULL_TREE, NULL_TREE);
24723 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24724 gimple_set_bb (if_else_stmt, new_bb);
24725 gimple_seq_add_stmt (&gseq, if_else_stmt);
24726
24727 gimple_seq_add_stmt (&gseq, convert_stmt);
24728 gimple_seq_add_stmt (&gseq, return_stmt);
24729 set_bb_seq (new_bb, gseq);
24730
24731 basic_block bb1 = new_bb;
24732 edge e12 = split_block (bb1, if_else_stmt);
24733 basic_block bb2 = e12->dest;
24734 e12->flags &= ~EDGE_FALLTHRU;
24735 e12->flags |= EDGE_TRUE_VALUE;
24736
24737 edge e23 = split_block (bb2, return_stmt);
24738 gimple_set_bb (convert_stmt, bb2);
24739 gimple_set_bb (return_stmt, bb2);
24740
24741 basic_block bb3 = e23->dest;
24742 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24743
24744 remove_edge (e23);
24745 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24746
24747 pop_cfun ();
24748 return bb3;
24749 }
24750
24751 /* This function generates the dispatch function for multi-versioned functions.
24752 DISPATCH_DECL is the function which will contain the dispatch logic.
24753 FNDECLS are the function choices for dispatch, and is a tree chain.
24754 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24755 code is generated. */
24756
24757 static int
24758 dispatch_function_versions (tree dispatch_decl,
24759 void *fndecls_p,
24760 basic_block *empty_bb)
24761 {
24762 int ix;
24763 tree ele;
24764 vec<tree> *fndecls;
24765 tree clones[CLONE_MAX];
24766
24767 if (TARGET_DEBUG_TARGET)
24768 fputs ("dispatch_function_versions, top\n", stderr);
24769
24770 gcc_assert (dispatch_decl != NULL
24771 && fndecls_p != NULL
24772 && empty_bb != NULL);
24773
24774 /* fndecls_p is actually a vector. */
24775 fndecls = static_cast<vec<tree> *> (fndecls_p);
24776
24777 /* At least one more version other than the default. */
24778 gcc_assert (fndecls->length () >= 2);
24779
24780 /* The first version in the vector is the default decl. */
24781 memset ((void *) clones, '\0', sizeof (clones));
24782 clones[CLONE_DEFAULT] = (*fndecls)[0];
24783
24784 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24785 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24786 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24787 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24788 to insert the code here to do the call. */
24789
24790 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24791 {
24792 int priority = rs6000_clone_priority (ele);
24793 if (!clones[priority])
24794 clones[priority] = ele;
24795 }
24796
24797 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24798 if (clones[ix])
24799 {
24800 if (TARGET_DEBUG_TARGET)
24801 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24802 ix, get_decl_name (clones[ix]));
24803
24804 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24805 *empty_bb);
24806 }
24807
24808 return 0;
24809 }
24810
24811 /* Generate the dispatching code body to dispatch multi-versioned function
24812 DECL. The target hook is called to process the "target" attributes and
24813 provide the code to dispatch the right function at run-time. NODE points
24814 to the dispatcher decl whose body will be created. */
24815
24816 static tree
24817 rs6000_generate_version_dispatcher_body (void *node_p)
24818 {
24819 tree resolver;
24820 basic_block empty_bb;
24821 struct cgraph_node *node = (cgraph_node *) node_p;
24822 struct cgraph_function_version_info *ninfo = node->function_version ();
24823
24824 if (ninfo->dispatcher_resolver)
24825 return ninfo->dispatcher_resolver;
24826
24827 /* node is going to be an alias, so remove the finalized bit. */
24828 node->definition = false;
24829
24830 /* The first version in the chain corresponds to the default version. */
24831 ninfo->dispatcher_resolver = resolver
24832 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24833
24834 if (TARGET_DEBUG_TARGET)
24835 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24836 get_decl_name (resolver));
24837
24838 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24839 auto_vec<tree, 2> fn_ver_vec;
24840
24841 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24842 vinfo;
24843 vinfo = vinfo->next)
24844 {
24845 struct cgraph_node *version = vinfo->this_node;
24846 /* Check for virtual functions here again, as by this time it should
24847 have been determined if this function needs a vtable index or
24848 not. This happens for methods in derived classes that override
24849 virtual methods in base classes but are not explicitly marked as
24850 virtual. */
24851 if (DECL_VINDEX (version->decl))
24852 sorry ("Virtual function multiversioning not supported");
24853
24854 fn_ver_vec.safe_push (version->decl);
24855 }
24856
24857 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24858 cgraph_edge::rebuild_edges ();
24859 pop_cfun ();
24860 return resolver;
24861 }
24862
24863 \f
24864 /* Hook to determine if one function can safely inline another. */
24865
24866 static bool
24867 rs6000_can_inline_p (tree caller, tree callee)
24868 {
24869 bool ret = false;
24870 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24871 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24872
24873 /* If the callee has no option attributes, then it is ok to inline. */
24874 if (!callee_tree)
24875 ret = true;
24876
24877 else
24878 {
24879 HOST_WIDE_INT caller_isa;
24880 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24881 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24882 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24883
24884 /* If the caller has option attributes, then use them.
24885 Otherwise, use the command line options. */
24886 if (caller_tree)
24887 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24888 else
24889 caller_isa = rs6000_isa_flags;
24890
24891 /* The callee's options must be a subset of the caller's options, i.e.
24892 a vsx function may inline an altivec function, but a no-vsx function
24893 must not inline a vsx function. However, for those options that the
24894 callee has explicitly enabled or disabled, then we must enforce that
24895 the callee's and caller's options match exactly; see PR70010. */
24896 if (((caller_isa & callee_isa) == callee_isa)
24897 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24898 ret = true;
24899 }
24900
24901 if (TARGET_DEBUG_TARGET)
24902 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24903 get_decl_name (caller), get_decl_name (callee),
24904 (ret ? "can" : "cannot"));
24905
24906 return ret;
24907 }
24908 \f
24909 /* Allocate a stack temp and fixup the address so it meets the particular
24910 memory requirements (either offetable or REG+REG addressing). */
24911
24912 rtx
24913 rs6000_allocate_stack_temp (machine_mode mode,
24914 bool offsettable_p,
24915 bool reg_reg_p)
24916 {
24917 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24918 rtx addr = XEXP (stack, 0);
24919 int strict_p = reload_completed;
24920
24921 if (!legitimate_indirect_address_p (addr, strict_p))
24922 {
24923 if (offsettable_p
24924 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24925 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24926
24927 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24928 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24929 }
24930
24931 return stack;
24932 }
24933
24934 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24935 convert to such a form to deal with memory reference instructions
24936 like STFIWX and LDBRX that only take reg+reg addressing. */
24937
24938 rtx
24939 rs6000_force_indexed_or_indirect_mem (rtx x)
24940 {
24941 machine_mode mode = GET_MODE (x);
24942
24943 gcc_assert (MEM_P (x));
24944 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24945 {
24946 rtx addr = XEXP (x, 0);
24947 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24948 {
24949 rtx reg = XEXP (addr, 0);
24950 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24951 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24952 gcc_assert (REG_P (reg));
24953 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24954 addr = reg;
24955 }
24956 else if (GET_CODE (addr) == PRE_MODIFY)
24957 {
24958 rtx reg = XEXP (addr, 0);
24959 rtx expr = XEXP (addr, 1);
24960 gcc_assert (REG_P (reg));
24961 gcc_assert (GET_CODE (expr) == PLUS);
24962 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24963 addr = reg;
24964 }
24965
24966 if (GET_CODE (addr) == PLUS)
24967 {
24968 rtx op0 = XEXP (addr, 0);
24969 rtx op1 = XEXP (addr, 1);
24970 op0 = force_reg (Pmode, op0);
24971 op1 = force_reg (Pmode, op1);
24972 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24973 }
24974 else
24975 x = replace_equiv_address (x, force_reg (Pmode, addr));
24976 }
24977
24978 return x;
24979 }
24980
24981 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24982
24983 On the RS/6000, all integer constants are acceptable, most won't be valid
24984 for particular insns, though. Only easy FP constants are acceptable. */
24985
24986 static bool
24987 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24988 {
24989 if (TARGET_ELF && tls_referenced_p (x))
24990 return false;
24991
24992 if (CONST_DOUBLE_P (x))
24993 return easy_fp_constant (x, mode);
24994
24995 if (GET_CODE (x) == CONST_VECTOR)
24996 return easy_vector_constant (x, mode);
24997
24998 return true;
24999 }
25000
25001 \f
25002 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25003
25004 static bool
25005 chain_already_loaded (rtx_insn *last)
25006 {
25007 for (; last != NULL; last = PREV_INSN (last))
25008 {
25009 if (NONJUMP_INSN_P (last))
25010 {
25011 rtx patt = PATTERN (last);
25012
25013 if (GET_CODE (patt) == SET)
25014 {
25015 rtx lhs = XEXP (patt, 0);
25016
25017 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25018 return true;
25019 }
25020 }
25021 }
25022 return false;
25023 }
25024
25025 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25026
25027 void
25028 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25029 {
25030 rtx func = func_desc;
25031 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25032 rtx toc_load = NULL_RTX;
25033 rtx toc_restore = NULL_RTX;
25034 rtx func_addr;
25035 rtx abi_reg = NULL_RTX;
25036 rtx call[5];
25037 int n_call;
25038 rtx insn;
25039 bool is_pltseq_longcall;
25040
25041 if (global_tlsarg)
25042 tlsarg = global_tlsarg;
25043
25044 /* Handle longcall attributes. */
25045 is_pltseq_longcall = false;
25046 if ((INTVAL (cookie) & CALL_LONG) != 0
25047 && GET_CODE (func_desc) == SYMBOL_REF)
25048 {
25049 func = rs6000_longcall_ref (func_desc, tlsarg);
25050 if (TARGET_PLTSEQ)
25051 is_pltseq_longcall = true;
25052 }
25053
25054 /* Handle indirect calls. */
25055 if (!SYMBOL_REF_P (func)
25056 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25057 {
25058 if (!rs6000_pcrel_p ())
25059 {
25060 /* Save the TOC into its reserved slot before the call,
25061 and prepare to restore it after the call. */
25062 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25063 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25064 gen_rtvec (1, stack_toc_offset),
25065 UNSPEC_TOCSLOT);
25066 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25067
25068 /* Can we optimize saving the TOC in the prologue or
25069 do we need to do it at every call? */
25070 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25071 cfun->machine->save_toc_in_prologue = true;
25072 else
25073 {
25074 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25075 rtx stack_toc_mem = gen_frame_mem (Pmode,
25076 gen_rtx_PLUS (Pmode, stack_ptr,
25077 stack_toc_offset));
25078 MEM_VOLATILE_P (stack_toc_mem) = 1;
25079 if (is_pltseq_longcall)
25080 {
25081 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25082 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25083 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25084 }
25085 else
25086 emit_move_insn (stack_toc_mem, toc_reg);
25087 }
25088 }
25089
25090 if (DEFAULT_ABI == ABI_ELFv2)
25091 {
25092 /* A function pointer in the ELFv2 ABI is just a plain address, but
25093 the ABI requires it to be loaded into r12 before the call. */
25094 func_addr = gen_rtx_REG (Pmode, 12);
25095 emit_move_insn (func_addr, func);
25096 abi_reg = func_addr;
25097 /* Indirect calls via CTR are strongly preferred over indirect
25098 calls via LR, so move the address there. Needed to mark
25099 this insn for linker plt sequence editing too. */
25100 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25101 if (is_pltseq_longcall)
25102 {
25103 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25104 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25105 emit_insn (gen_rtx_SET (func_addr, mark_func));
25106 v = gen_rtvec (2, func_addr, func_desc);
25107 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25108 }
25109 else
25110 emit_move_insn (func_addr, abi_reg);
25111 }
25112 else
25113 {
25114 /* A function pointer under AIX is a pointer to a data area whose
25115 first word contains the actual address of the function, whose
25116 second word contains a pointer to its TOC, and whose third word
25117 contains a value to place in the static chain register (r11).
25118 Note that if we load the static chain, our "trampoline" need
25119 not have any executable code. */
25120
25121 /* Load up address of the actual function. */
25122 func = force_reg (Pmode, func);
25123 func_addr = gen_reg_rtx (Pmode);
25124 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25125
25126 /* Indirect calls via CTR are strongly preferred over indirect
25127 calls via LR, so move the address there. */
25128 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25129 emit_move_insn (ctr_reg, func_addr);
25130 func_addr = ctr_reg;
25131
25132 /* Prepare to load the TOC of the called function. Note that the
25133 TOC load must happen immediately before the actual call so
25134 that unwinding the TOC registers works correctly. See the
25135 comment in frob_update_context. */
25136 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25137 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25138 gen_rtx_PLUS (Pmode, func,
25139 func_toc_offset));
25140 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25141
25142 /* If we have a static chain, load it up. But, if the call was
25143 originally direct, the 3rd word has not been written since no
25144 trampoline has been built, so we ought not to load it, lest we
25145 override a static chain value. */
25146 if (!(GET_CODE (func_desc) == SYMBOL_REF
25147 && SYMBOL_REF_FUNCTION_P (func_desc))
25148 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25149 && !chain_already_loaded (get_current_sequence ()->next->last))
25150 {
25151 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25152 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25153 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25154 gen_rtx_PLUS (Pmode, func,
25155 func_sc_offset));
25156 emit_move_insn (sc_reg, func_sc_mem);
25157 abi_reg = sc_reg;
25158 }
25159 }
25160 }
25161 else
25162 {
25163 /* No TOC register needed for calls from PC-relative callers. */
25164 if (!rs6000_pcrel_p ())
25165 /* Direct calls use the TOC: for local calls, the callee will
25166 assume the TOC register is set; for non-local calls, the
25167 PLT stub needs the TOC register. */
25168 abi_reg = toc_reg;
25169 func_addr = func;
25170 }
25171
25172 /* Create the call. */
25173 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25174 if (value != NULL_RTX)
25175 call[0] = gen_rtx_SET (value, call[0]);
25176 call[1] = gen_rtx_USE (VOIDmode, cookie);
25177 n_call = 2;
25178
25179 if (toc_load)
25180 call[n_call++] = toc_load;
25181 if (toc_restore)
25182 call[n_call++] = toc_restore;
25183
25184 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25185
25186 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25187 insn = emit_call_insn (insn);
25188
25189 /* Mention all registers defined by the ABI to hold information
25190 as uses in CALL_INSN_FUNCTION_USAGE. */
25191 if (abi_reg)
25192 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25193 }
25194
25195 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25196
25197 void
25198 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25199 {
25200 rtx call[2];
25201 rtx insn;
25202 rtx r12 = NULL_RTX;
25203 rtx func_addr = func_desc;
25204
25205 gcc_assert (INTVAL (cookie) == 0);
25206
25207 if (global_tlsarg)
25208 tlsarg = global_tlsarg;
25209
25210 /* For ELFv2, r12 and CTR need to hold the function address
25211 for an indirect call. */
25212 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25213 {
25214 r12 = gen_rtx_REG (Pmode, 12);
25215 emit_move_insn (r12, func_desc);
25216 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25217 emit_move_insn (func_addr, r12);
25218 }
25219
25220 /* Create the call. */
25221 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25222 if (value != NULL_RTX)
25223 call[0] = gen_rtx_SET (value, call[0]);
25224
25225 call[1] = simple_return_rtx;
25226
25227 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25228 insn = emit_call_insn (insn);
25229
25230 /* Note use of the TOC register. */
25231 if (!rs6000_pcrel_p ())
25232 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25233 gen_rtx_REG (Pmode, TOC_REGNUM));
25234
25235 /* Note use of r12. */
25236 if (r12)
25237 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25238 }
25239
25240 /* Expand code to perform a call under the SYSV4 ABI. */
25241
25242 void
25243 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25244 {
25245 rtx func = func_desc;
25246 rtx func_addr;
25247 rtx call[4];
25248 rtx insn;
25249 rtx abi_reg = NULL_RTX;
25250 int n;
25251
25252 if (global_tlsarg)
25253 tlsarg = global_tlsarg;
25254
25255 /* Handle longcall attributes. */
25256 if ((INTVAL (cookie) & CALL_LONG) != 0
25257 && GET_CODE (func_desc) == SYMBOL_REF)
25258 {
25259 func = rs6000_longcall_ref (func_desc, tlsarg);
25260 /* If the longcall was implemented as an inline PLT call using
25261 PLT unspecs then func will be REG:r11. If not, func will be
25262 a pseudo reg. The inline PLT call sequence supports lazy
25263 linking (and longcalls to functions in dlopen'd libraries).
25264 The other style of longcalls don't. The lazy linking entry
25265 to the dynamic symbol resolver requires r11 be the function
25266 address (as it is for linker generated PLT stubs). Ensure
25267 r11 stays valid to the bctrl by marking r11 used by the call. */
25268 if (TARGET_PLTSEQ)
25269 abi_reg = func;
25270 }
25271
25272 /* Handle indirect calls. */
25273 if (GET_CODE (func) != SYMBOL_REF)
25274 {
25275 func = force_reg (Pmode, func);
25276
25277 /* Indirect calls via CTR are strongly preferred over indirect
25278 calls via LR, so move the address there. That can't be left
25279 to reload because we want to mark every instruction in an
25280 inline PLT call sequence with a reloc, enabling the linker to
25281 edit the sequence back to a direct call when that makes sense. */
25282 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25283 if (abi_reg)
25284 {
25285 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25286 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25287 emit_insn (gen_rtx_SET (func_addr, mark_func));
25288 v = gen_rtvec (2, func_addr, func_desc);
25289 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25290 }
25291 else
25292 emit_move_insn (func_addr, func);
25293 }
25294 else
25295 func_addr = func;
25296
25297 /* Create the call. */
25298 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25299 if (value != NULL_RTX)
25300 call[0] = gen_rtx_SET (value, call[0]);
25301
25302 call[1] = gen_rtx_USE (VOIDmode, cookie);
25303 n = 2;
25304 if (TARGET_SECURE_PLT
25305 && flag_pic
25306 && GET_CODE (func_addr) == SYMBOL_REF
25307 && !SYMBOL_REF_LOCAL_P (func_addr))
25308 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25309
25310 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25311
25312 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25313 insn = emit_call_insn (insn);
25314 if (abi_reg)
25315 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25316 }
25317
25318 /* Expand code to perform a sibling call under the SysV4 ABI. */
25319
25320 void
25321 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25322 {
25323 rtx func = func_desc;
25324 rtx func_addr;
25325 rtx call[3];
25326 rtx insn;
25327 rtx abi_reg = NULL_RTX;
25328
25329 if (global_tlsarg)
25330 tlsarg = global_tlsarg;
25331
25332 /* Handle longcall attributes. */
25333 if ((INTVAL (cookie) & CALL_LONG) != 0
25334 && GET_CODE (func_desc) == SYMBOL_REF)
25335 {
25336 func = rs6000_longcall_ref (func_desc, tlsarg);
25337 /* If the longcall was implemented as an inline PLT call using
25338 PLT unspecs then func will be REG:r11. If not, func will be
25339 a pseudo reg. The inline PLT call sequence supports lazy
25340 linking (and longcalls to functions in dlopen'd libraries).
25341 The other style of longcalls don't. The lazy linking entry
25342 to the dynamic symbol resolver requires r11 be the function
25343 address (as it is for linker generated PLT stubs). Ensure
25344 r11 stays valid to the bctr by marking r11 used by the call. */
25345 if (TARGET_PLTSEQ)
25346 abi_reg = func;
25347 }
25348
25349 /* Handle indirect calls. */
25350 if (GET_CODE (func) != SYMBOL_REF)
25351 {
25352 func = force_reg (Pmode, func);
25353
25354 /* Indirect sibcalls must go via CTR. That can't be left to
25355 reload because we want to mark every instruction in an inline
25356 PLT call sequence with a reloc, enabling the linker to edit
25357 the sequence back to a direct call when that makes sense. */
25358 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25359 if (abi_reg)
25360 {
25361 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25362 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25363 emit_insn (gen_rtx_SET (func_addr, mark_func));
25364 v = gen_rtvec (2, func_addr, func_desc);
25365 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25366 }
25367 else
25368 emit_move_insn (func_addr, func);
25369 }
25370 else
25371 func_addr = func;
25372
25373 /* Create the call. */
25374 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25375 if (value != NULL_RTX)
25376 call[0] = gen_rtx_SET (value, call[0]);
25377
25378 call[1] = gen_rtx_USE (VOIDmode, cookie);
25379 call[2] = simple_return_rtx;
25380
25381 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25382 insn = emit_call_insn (insn);
25383 if (abi_reg)
25384 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25385 }
25386
25387 #if TARGET_MACHO
25388
25389 /* Expand code to perform a call under the Darwin ABI.
25390 Modulo handling of mlongcall, this is much the same as sysv.
25391 if/when the longcall optimisation is removed, we could drop this
25392 code and use the sysv case (taking care to avoid the tls stuff).
25393
25394 We can use this for sibcalls too, if needed. */
25395
25396 void
25397 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25398 rtx cookie, bool sibcall)
25399 {
25400 rtx func = func_desc;
25401 rtx func_addr;
25402 rtx call[3];
25403 rtx insn;
25404 int cookie_val = INTVAL (cookie);
25405 bool make_island = false;
25406
25407 /* Handle longcall attributes, there are two cases for Darwin:
25408 1) Newer linkers are capable of synthesising any branch islands needed.
25409 2) We need a helper branch island synthesised by the compiler.
25410 The second case has mostly been retired and we don't use it for m64.
25411 In fact, it's is an optimisation, we could just indirect as sysv does..
25412 ... however, backwards compatibility for now.
25413 If we're going to use this, then we need to keep the CALL_LONG bit set,
25414 so that we can pick up the special insn form later. */
25415 if ((cookie_val & CALL_LONG) != 0
25416 && GET_CODE (func_desc) == SYMBOL_REF)
25417 {
25418 /* FIXME: the longcall opt should not hang off this flag, it is most
25419 likely incorrect for kernel-mode code-generation. */
25420 if (darwin_symbol_stubs && TARGET_32BIT)
25421 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25422 else
25423 {
25424 /* The linker is capable of doing this, but the user explicitly
25425 asked for -mlongcall, so we'll do the 'normal' version. */
25426 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25427 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25428 }
25429 }
25430
25431 /* Handle indirect calls. */
25432 if (GET_CODE (func) != SYMBOL_REF)
25433 {
25434 func = force_reg (Pmode, func);
25435
25436 /* Indirect calls via CTR are strongly preferred over indirect
25437 calls via LR, and are required for indirect sibcalls, so move
25438 the address there. */
25439 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25440 emit_move_insn (func_addr, func);
25441 }
25442 else
25443 func_addr = func;
25444
25445 /* Create the call. */
25446 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25447 if (value != NULL_RTX)
25448 call[0] = gen_rtx_SET (value, call[0]);
25449
25450 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25451
25452 if (sibcall)
25453 call[2] = simple_return_rtx;
25454 else
25455 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25456
25457 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25458 insn = emit_call_insn (insn);
25459 /* Now we have the debug info in the insn, we can set up the branch island
25460 if we're using one. */
25461 if (make_island)
25462 {
25463 tree funname = get_identifier (XSTR (func_desc, 0));
25464
25465 if (no_previous_def (funname))
25466 {
25467 rtx label_rtx = gen_label_rtx ();
25468 char *label_buf, temp_buf[256];
25469 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25470 CODE_LABEL_NUMBER (label_rtx));
25471 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25472 tree labelname = get_identifier (label_buf);
25473 add_compiler_branch_island (labelname, funname,
25474 insn_line ((const rtx_insn*)insn));
25475 }
25476 }
25477 }
25478 #endif
25479
25480 void
25481 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25482 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25483 {
25484 #if TARGET_MACHO
25485 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25486 #else
25487 gcc_unreachable();
25488 #endif
25489 }
25490
25491
25492 void
25493 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25494 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25495 {
25496 #if TARGET_MACHO
25497 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25498 #else
25499 gcc_unreachable();
25500 #endif
25501 }
25502
25503 /* Return whether we should generate PC-relative code for FNDECL. */
25504 bool
25505 rs6000_fndecl_pcrel_p (const_tree fndecl)
25506 {
25507 if (DEFAULT_ABI != ABI_ELFv2)
25508 return false;
25509
25510 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25511
25512 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25513 && TARGET_CMODEL == CMODEL_MEDIUM);
25514 }
25515
25516 /* Return whether we should generate PC-relative code for *FN. */
25517 bool
25518 rs6000_function_pcrel_p (struct function *fn)
25519 {
25520 if (DEFAULT_ABI != ABI_ELFv2)
25521 return false;
25522
25523 /* Optimize usual case. */
25524 if (fn == cfun)
25525 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25526 && TARGET_CMODEL == CMODEL_MEDIUM);
25527
25528 return rs6000_fndecl_pcrel_p (fn->decl);
25529 }
25530
25531 /* Return whether we should generate PC-relative code for the current
25532 function. */
25533 bool
25534 rs6000_pcrel_p ()
25535 {
25536 return (DEFAULT_ABI == ABI_ELFv2
25537 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25538 && TARGET_CMODEL == CMODEL_MEDIUM);
25539 }
25540
25541 \f
25542 /* Given an address (ADDR), a mode (MODE), and what the format of the
25543 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25544 for the address. */
25545
25546 enum insn_form
25547 address_to_insn_form (rtx addr,
25548 machine_mode mode,
25549 enum non_prefixed_form non_prefixed_format)
25550 {
25551 /* Single register is easy. */
25552 if (REG_P (addr) || SUBREG_P (addr))
25553 return INSN_FORM_BASE_REG;
25554
25555 /* If the non prefixed instruction format doesn't support offset addressing,
25556 make sure only indexed addressing is allowed.
25557
25558 We special case SDmode so that the register allocator does not try to move
25559 SDmode through GPR registers, but instead uses the 32-bit integer load and
25560 store instructions for the floating point registers. */
25561 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25562 {
25563 if (GET_CODE (addr) != PLUS)
25564 return INSN_FORM_BAD;
25565
25566 rtx op0 = XEXP (addr, 0);
25567 rtx op1 = XEXP (addr, 1);
25568 if (!REG_P (op0) && !SUBREG_P (op0))
25569 return INSN_FORM_BAD;
25570
25571 if (!REG_P (op1) && !SUBREG_P (op1))
25572 return INSN_FORM_BAD;
25573
25574 return INSN_FORM_X;
25575 }
25576
25577 /* Deal with update forms. */
25578 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25579 return INSN_FORM_UPDATE;
25580
25581 /* Handle PC-relative symbols and labels. Check for both local and
25582 external symbols. Assume labels are always local. TLS symbols
25583 are not PC-relative for rs6000. */
25584 if (TARGET_PCREL)
25585 {
25586 if (LABEL_REF_P (addr))
25587 return INSN_FORM_PCREL_LOCAL;
25588
25589 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25590 {
25591 if (!SYMBOL_REF_LOCAL_P (addr))
25592 return INSN_FORM_PCREL_EXTERNAL;
25593 else
25594 return INSN_FORM_PCREL_LOCAL;
25595 }
25596 }
25597
25598 if (GET_CODE (addr) == CONST)
25599 addr = XEXP (addr, 0);
25600
25601 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25602 if (GET_CODE (addr) == LO_SUM)
25603 return INSN_FORM_LO_SUM;
25604
25605 /* Everything below must be an offset address of some form. */
25606 if (GET_CODE (addr) != PLUS)
25607 return INSN_FORM_BAD;
25608
25609 rtx op0 = XEXP (addr, 0);
25610 rtx op1 = XEXP (addr, 1);
25611
25612 /* Check for indexed addresses. */
25613 if (REG_P (op1) || SUBREG_P (op1))
25614 {
25615 if (REG_P (op0) || SUBREG_P (op0))
25616 return INSN_FORM_X;
25617
25618 return INSN_FORM_BAD;
25619 }
25620
25621 if (!CONST_INT_P (op1))
25622 return INSN_FORM_BAD;
25623
25624 HOST_WIDE_INT offset = INTVAL (op1);
25625 if (!SIGNED_INTEGER_34BIT_P (offset))
25626 return INSN_FORM_BAD;
25627
25628 /* Check for local and external PC-relative addresses. Labels are always
25629 local. TLS symbols are not PC-relative for rs6000. */
25630 if (TARGET_PCREL)
25631 {
25632 if (LABEL_REF_P (op0))
25633 return INSN_FORM_PCREL_LOCAL;
25634
25635 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
25636 {
25637 if (!SYMBOL_REF_LOCAL_P (op0))
25638 return INSN_FORM_PCREL_EXTERNAL;
25639 else
25640 return INSN_FORM_PCREL_LOCAL;
25641 }
25642 }
25643
25644 /* If it isn't PC-relative, the address must use a base register. */
25645 if (!REG_P (op0) && !SUBREG_P (op0))
25646 return INSN_FORM_BAD;
25647
25648 /* Large offsets must be prefixed. */
25649 if (!SIGNED_INTEGER_16BIT_P (offset))
25650 {
25651 if (TARGET_PREFIXED)
25652 return INSN_FORM_PREFIXED_NUMERIC;
25653
25654 return INSN_FORM_BAD;
25655 }
25656
25657 /* We have a 16-bit offset, see what default instruction format to use. */
25658 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
25659 {
25660 unsigned size = GET_MODE_SIZE (mode);
25661
25662 /* On 64-bit systems, assume 64-bit integers need to use DS form
25663 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25664 (for LXV and STXV). TImode is problematical in that its normal usage
25665 is expected to be GPRs where it wants a DS instruction format, but if
25666 it goes into the vector registers, it wants a DQ instruction
25667 format. */
25668 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
25669 non_prefixed_format = NON_PREFIXED_DS;
25670
25671 else if (TARGET_VSX && size >= 16
25672 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
25673 non_prefixed_format = NON_PREFIXED_DQ;
25674
25675 else
25676 non_prefixed_format = NON_PREFIXED_D;
25677 }
25678
25679 /* Classify the D/DS/DQ-form addresses. */
25680 switch (non_prefixed_format)
25681 {
25682 /* Instruction format D, all 16 bits are valid. */
25683 case NON_PREFIXED_D:
25684 return INSN_FORM_D;
25685
25686 /* Instruction format DS, bottom 2 bits must be 0. */
25687 case NON_PREFIXED_DS:
25688 if ((offset & 3) == 0)
25689 return INSN_FORM_DS;
25690
25691 else if (TARGET_PREFIXED)
25692 return INSN_FORM_PREFIXED_NUMERIC;
25693
25694 else
25695 return INSN_FORM_BAD;
25696
25697 /* Instruction format DQ, bottom 4 bits must be 0. */
25698 case NON_PREFIXED_DQ:
25699 if ((offset & 15) == 0)
25700 return INSN_FORM_DQ;
25701
25702 else if (TARGET_PREFIXED)
25703 return INSN_FORM_PREFIXED_NUMERIC;
25704
25705 else
25706 return INSN_FORM_BAD;
25707
25708 default:
25709 break;
25710 }
25711
25712 return INSN_FORM_BAD;
25713 }
25714
25715 /* Helper function to see if we're potentially looking at lfs/stfs.
25716 - PARALLEL containing a SET and a CLOBBER
25717 - stfs:
25718 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25719 - CLOBBER is a V4SF
25720 - lfs:
25721 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25722 - CLOBBER is a DI
25723 */
25724
25725 static bool
25726 is_lfs_stfs_insn (rtx_insn *insn)
25727 {
25728 rtx pattern = PATTERN (insn);
25729 if (GET_CODE (pattern) != PARALLEL)
25730 return false;
25731
25732 /* This should be a parallel with exactly one set and one clobber. */
25733 if (XVECLEN (pattern, 0) != 2)
25734 return false;
25735
25736 rtx set = XVECEXP (pattern, 0, 0);
25737 if (GET_CODE (set) != SET)
25738 return false;
25739
25740 rtx clobber = XVECEXP (pattern, 0, 1);
25741 if (GET_CODE (clobber) != CLOBBER)
25742 return false;
25743
25744 /* All we care is that the destination of the SET is a mem:SI,
25745 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25746 should be a scratch:V4SF. */
25747
25748 rtx dest = SET_DEST (set);
25749 rtx src = SET_SRC (set);
25750 rtx scratch = SET_DEST (clobber);
25751
25752 if (GET_CODE (src) != UNSPEC)
25753 return false;
25754
25755 /* stfs case. */
25756 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
25757 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
25758 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
25759 return true;
25760
25761 /* lfs case. */
25762 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
25763 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
25764 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
25765 return true;
25766
25767 return false;
25768 }
25769
25770 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25771 instruction format (D/DS/DQ) used for offset memory. */
25772
25773 static enum non_prefixed_form
25774 reg_to_non_prefixed (rtx reg, machine_mode mode)
25775 {
25776 /* If it isn't a register, use the defaults. */
25777 if (!REG_P (reg) && !SUBREG_P (reg))
25778 return NON_PREFIXED_DEFAULT;
25779
25780 unsigned int r = reg_or_subregno (reg);
25781
25782 /* If we have a pseudo, use the default instruction format. */
25783 if (!HARD_REGISTER_NUM_P (r))
25784 return NON_PREFIXED_DEFAULT;
25785
25786 unsigned size = GET_MODE_SIZE (mode);
25787
25788 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25789 128-bit floating point, and 128-bit integers. Before power9, only indexed
25790 addressing was available for vectors. */
25791 if (FP_REGNO_P (r))
25792 {
25793 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25794 return NON_PREFIXED_D;
25795
25796 else if (size < 8)
25797 return NON_PREFIXED_X;
25798
25799 else if (TARGET_VSX && size >= 16
25800 && (VECTOR_MODE_P (mode)
25801 || VECTOR_ALIGNMENT_P (mode)
25802 || mode == TImode || mode == CTImode))
25803 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25804
25805 else
25806 return NON_PREFIXED_DEFAULT;
25807 }
25808
25809 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25810 128-bit floating point, and 128-bit integers. Before power9, only indexed
25811 addressing was available. */
25812 else if (ALTIVEC_REGNO_P (r))
25813 {
25814 if (!TARGET_P9_VECTOR)
25815 return NON_PREFIXED_X;
25816
25817 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25818 return NON_PREFIXED_DS;
25819
25820 else if (size < 8)
25821 return NON_PREFIXED_X;
25822
25823 else if (TARGET_VSX && size >= 16
25824 && (VECTOR_MODE_P (mode)
25825 || VECTOR_ALIGNMENT_P (mode)
25826 || mode == TImode || mode == CTImode))
25827 return NON_PREFIXED_DQ;
25828
25829 else
25830 return NON_PREFIXED_DEFAULT;
25831 }
25832
25833 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25834 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25835 through the GPR registers for memory operations. */
25836 else if (TARGET_POWERPC64 && size >= 8)
25837 return NON_PREFIXED_DS;
25838
25839 return NON_PREFIXED_D;
25840 }
25841
25842 \f
25843 /* Whether a load instruction is a prefixed instruction. This is called from
25844 the prefixed attribute processing. */
25845
25846 bool
25847 prefixed_load_p (rtx_insn *insn)
25848 {
25849 /* Validate the insn to make sure it is a normal load insn. */
25850 extract_insn_cached (insn);
25851 if (recog_data.n_operands < 2)
25852 return false;
25853
25854 rtx reg = recog_data.operand[0];
25855 rtx mem = recog_data.operand[1];
25856
25857 if (!REG_P (reg) && !SUBREG_P (reg))
25858 return false;
25859
25860 if (!MEM_P (mem))
25861 return false;
25862
25863 /* Prefixed load instructions do not support update or indexed forms. */
25864 if (get_attr_indexed (insn) == INDEXED_YES
25865 || get_attr_update (insn) == UPDATE_YES)
25866 return false;
25867
25868 /* LWA uses the DS format instead of the D format that LWZ uses. */
25869 enum non_prefixed_form non_prefixed;
25870 machine_mode reg_mode = GET_MODE (reg);
25871 machine_mode mem_mode = GET_MODE (mem);
25872
25873 if (mem_mode == SImode && reg_mode == DImode
25874 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25875 non_prefixed = NON_PREFIXED_DS;
25876
25877 else
25878 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25879
25880 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25881 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
25882 else
25883 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25884 }
25885
25886 /* Whether a store instruction is a prefixed instruction. This is called from
25887 the prefixed attribute processing. */
25888
25889 bool
25890 prefixed_store_p (rtx_insn *insn)
25891 {
25892 /* Validate the insn to make sure it is a normal store insn. */
25893 extract_insn_cached (insn);
25894 if (recog_data.n_operands < 2)
25895 return false;
25896
25897 rtx mem = recog_data.operand[0];
25898 rtx reg = recog_data.operand[1];
25899
25900 if (!REG_P (reg) && !SUBREG_P (reg))
25901 return false;
25902
25903 if (!MEM_P (mem))
25904 return false;
25905
25906 /* Prefixed store instructions do not support update or indexed forms. */
25907 if (get_attr_indexed (insn) == INDEXED_YES
25908 || get_attr_update (insn) == UPDATE_YES)
25909 return false;
25910
25911 machine_mode mem_mode = GET_MODE (mem);
25912 rtx addr = XEXP (mem, 0);
25913 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25914
25915 /* Need to make sure we aren't looking at a stfs which doesn't look
25916 like the other things reg_to_non_prefixed/address_is_prefixed
25917 looks for. */
25918 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25919 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
25920 else
25921 return address_is_prefixed (addr, mem_mode, non_prefixed);
25922 }
25923
25924 /* Whether a load immediate or add instruction is a prefixed instruction. This
25925 is called from the prefixed attribute processing. */
25926
25927 bool
25928 prefixed_paddi_p (rtx_insn *insn)
25929 {
25930 rtx set = single_set (insn);
25931 if (!set)
25932 return false;
25933
25934 rtx dest = SET_DEST (set);
25935 rtx src = SET_SRC (set);
25936
25937 if (!REG_P (dest) && !SUBREG_P (dest))
25938 return false;
25939
25940 /* Is this a load immediate that can't be done with a simple ADDI or
25941 ADDIS? */
25942 if (CONST_INT_P (src))
25943 return (satisfies_constraint_eI (src)
25944 && !satisfies_constraint_I (src)
25945 && !satisfies_constraint_L (src));
25946
25947 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25948 ADDIS? */
25949 if (GET_CODE (src) == PLUS)
25950 {
25951 rtx op1 = XEXP (src, 1);
25952
25953 return (CONST_INT_P (op1)
25954 && satisfies_constraint_eI (op1)
25955 && !satisfies_constraint_I (op1)
25956 && !satisfies_constraint_L (op1));
25957 }
25958
25959 /* If not, is it a load of a PC-relative address? */
25960 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25961 return false;
25962
25963 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25964 return false;
25965
25966 enum insn_form iform = address_to_insn_form (src, Pmode,
25967 NON_PREFIXED_DEFAULT);
25968
25969 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25970 }
25971
25972 /* Whether the next instruction needs a 'p' prefix issued before the
25973 instruction is printed out. */
25974 static bool next_insn_prefixed_p;
25975
25976 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25977 outputting the assembler code. On the PowerPC, we remember if the current
25978 insn is a prefixed insn where we need to emit a 'p' before the insn.
25979
25980 In addition, if the insn is part of a PC-relative reference to an external
25981 label optimization, this is recorded also. */
25982 void
25983 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25984 {
25985 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25986 return;
25987 }
25988
25989 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25990 We use it to emit a 'p' for prefixed insns that is set in
25991 FINAL_PRESCAN_INSN. */
25992 void
25993 rs6000_asm_output_opcode (FILE *stream)
25994 {
25995 if (next_insn_prefixed_p)
25996 fprintf (stream, "p");
25997
25998 return;
25999 }
26000
26001 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26002 should be adjusted to reflect any required changes. This macro is used when
26003 there is some systematic length adjustment required that would be difficult
26004 to express in the length attribute.
26005
26006 In the PowerPC, we use this to adjust the length of an instruction if one or
26007 more prefixed instructions are generated, using the attribute
26008 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26009 hardware requires that a prefied instruciton does not cross a 64-byte
26010 boundary. This means the compiler has to assume the length of the first
26011 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26012 already set for the non-prefixed instruction, we just need to udpate for the
26013 difference. */
26014
26015 int
26016 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26017 {
26018 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26019 {
26020 rtx pattern = PATTERN (insn);
26021 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26022 && get_attr_prefixed (insn) == PREFIXED_YES)
26023 {
26024 int num_prefixed = get_attr_max_prefixed_insns (insn);
26025 length += 4 * (num_prefixed + 1);
26026 }
26027 }
26028
26029 return length;
26030 }
26031
26032 \f
26033 #ifdef HAVE_GAS_HIDDEN
26034 # define USE_HIDDEN_LINKONCE 1
26035 #else
26036 # define USE_HIDDEN_LINKONCE 0
26037 #endif
26038
26039 /* Fills in the label name that should be used for a 476 link stack thunk. */
26040
26041 void
26042 get_ppc476_thunk_name (char name[32])
26043 {
26044 gcc_assert (TARGET_LINK_STACK);
26045
26046 if (USE_HIDDEN_LINKONCE)
26047 sprintf (name, "__ppc476.get_thunk");
26048 else
26049 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26050 }
26051
26052 /* This function emits the simple thunk routine that is used to preserve
26053 the link stack on the 476 cpu. */
26054
26055 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26056 static void
26057 rs6000_code_end (void)
26058 {
26059 char name[32];
26060 tree decl;
26061
26062 if (!TARGET_LINK_STACK)
26063 return;
26064
26065 get_ppc476_thunk_name (name);
26066
26067 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26068 build_function_type_list (void_type_node, NULL_TREE));
26069 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26070 NULL_TREE, void_type_node);
26071 TREE_PUBLIC (decl) = 1;
26072 TREE_STATIC (decl) = 1;
26073
26074 #if RS6000_WEAK
26075 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26076 {
26077 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26078 targetm.asm_out.unique_section (decl, 0);
26079 switch_to_section (get_named_section (decl, NULL, 0));
26080 DECL_WEAK (decl) = 1;
26081 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26082 targetm.asm_out.globalize_label (asm_out_file, name);
26083 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26084 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26085 }
26086 else
26087 #endif
26088 {
26089 switch_to_section (text_section);
26090 ASM_OUTPUT_LABEL (asm_out_file, name);
26091 }
26092
26093 DECL_INITIAL (decl) = make_node (BLOCK);
26094 current_function_decl = decl;
26095 allocate_struct_function (decl, false);
26096 init_function_start (decl);
26097 first_function_block_is_cold = false;
26098 /* Make sure unwind info is emitted for the thunk if needed. */
26099 final_start_function (emit_barrier (), asm_out_file, 1);
26100
26101 fputs ("\tblr\n", asm_out_file);
26102
26103 final_end_function ();
26104 init_insn_lengths ();
26105 free_after_compilation (cfun);
26106 set_cfun (NULL);
26107 current_function_decl = NULL;
26108 }
26109
26110 /* Add r30 to hard reg set if the prologue sets it up and it is not
26111 pic_offset_table_rtx. */
26112
26113 static void
26114 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26115 {
26116 if (!TARGET_SINGLE_PIC_BASE
26117 && TARGET_TOC
26118 && TARGET_MINIMAL_TOC
26119 && !constant_pool_empty_p ())
26120 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26121 if (cfun->machine->split_stack_argp_used)
26122 add_to_hard_reg_set (&set->set, Pmode, 12);
26123
26124 /* Make sure the hard reg set doesn't include r2, which was possibly added
26125 via PIC_OFFSET_TABLE_REGNUM. */
26126 if (TARGET_TOC)
26127 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26128 }
26129
26130 \f
26131 /* Helper function for rs6000_split_logical to emit a logical instruction after
26132 spliting the operation to single GPR registers.
26133
26134 DEST is the destination register.
26135 OP1 and OP2 are the input source registers.
26136 CODE is the base operation (AND, IOR, XOR, NOT).
26137 MODE is the machine mode.
26138 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26139 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26140 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26141
26142 static void
26143 rs6000_split_logical_inner (rtx dest,
26144 rtx op1,
26145 rtx op2,
26146 enum rtx_code code,
26147 machine_mode mode,
26148 bool complement_final_p,
26149 bool complement_op1_p,
26150 bool complement_op2_p)
26151 {
26152 rtx bool_rtx;
26153
26154 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26155 if (op2 && CONST_INT_P (op2)
26156 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26157 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26158 {
26159 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26160 HOST_WIDE_INT value = INTVAL (op2) & mask;
26161
26162 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26163 if (code == AND)
26164 {
26165 if (value == 0)
26166 {
26167 emit_insn (gen_rtx_SET (dest, const0_rtx));
26168 return;
26169 }
26170
26171 else if (value == mask)
26172 {
26173 if (!rtx_equal_p (dest, op1))
26174 emit_insn (gen_rtx_SET (dest, op1));
26175 return;
26176 }
26177 }
26178
26179 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26180 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26181 else if (code == IOR || code == XOR)
26182 {
26183 if (value == 0)
26184 {
26185 if (!rtx_equal_p (dest, op1))
26186 emit_insn (gen_rtx_SET (dest, op1));
26187 return;
26188 }
26189 }
26190 }
26191
26192 if (code == AND && mode == SImode
26193 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26194 {
26195 emit_insn (gen_andsi3 (dest, op1, op2));
26196 return;
26197 }
26198
26199 if (complement_op1_p)
26200 op1 = gen_rtx_NOT (mode, op1);
26201
26202 if (complement_op2_p)
26203 op2 = gen_rtx_NOT (mode, op2);
26204
26205 /* For canonical RTL, if only one arm is inverted it is the first. */
26206 if (!complement_op1_p && complement_op2_p)
26207 std::swap (op1, op2);
26208
26209 bool_rtx = ((code == NOT)
26210 ? gen_rtx_NOT (mode, op1)
26211 : gen_rtx_fmt_ee (code, mode, op1, op2));
26212
26213 if (complement_final_p)
26214 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26215
26216 emit_insn (gen_rtx_SET (dest, bool_rtx));
26217 }
26218
26219 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26220 operations are split immediately during RTL generation to allow for more
26221 optimizations of the AND/IOR/XOR.
26222
26223 OPERANDS is an array containing the destination and two input operands.
26224 CODE is the base operation (AND, IOR, XOR, NOT).
26225 MODE is the machine mode.
26226 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26227 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26228 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26229 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26230 formation of the AND instructions. */
26231
26232 static void
26233 rs6000_split_logical_di (rtx operands[3],
26234 enum rtx_code code,
26235 bool complement_final_p,
26236 bool complement_op1_p,
26237 bool complement_op2_p)
26238 {
26239 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26240 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26241 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26242 enum hi_lo { hi = 0, lo = 1 };
26243 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26244 size_t i;
26245
26246 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26247 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26248 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26249 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26250
26251 if (code == NOT)
26252 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26253 else
26254 {
26255 if (!CONST_INT_P (operands[2]))
26256 {
26257 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26258 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26259 }
26260 else
26261 {
26262 HOST_WIDE_INT value = INTVAL (operands[2]);
26263 HOST_WIDE_INT value_hi_lo[2];
26264
26265 gcc_assert (!complement_final_p);
26266 gcc_assert (!complement_op1_p);
26267 gcc_assert (!complement_op2_p);
26268
26269 value_hi_lo[hi] = value >> 32;
26270 value_hi_lo[lo] = value & lower_32bits;
26271
26272 for (i = 0; i < 2; i++)
26273 {
26274 HOST_WIDE_INT sub_value = value_hi_lo[i];
26275
26276 if (sub_value & sign_bit)
26277 sub_value |= upper_32bits;
26278
26279 op2_hi_lo[i] = GEN_INT (sub_value);
26280
26281 /* If this is an AND instruction, check to see if we need to load
26282 the value in a register. */
26283 if (code == AND && sub_value != -1 && sub_value != 0
26284 && !and_operand (op2_hi_lo[i], SImode))
26285 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26286 }
26287 }
26288 }
26289
26290 for (i = 0; i < 2; i++)
26291 {
26292 /* Split large IOR/XOR operations. */
26293 if ((code == IOR || code == XOR)
26294 && CONST_INT_P (op2_hi_lo[i])
26295 && !complement_final_p
26296 && !complement_op1_p
26297 && !complement_op2_p
26298 && !logical_const_operand (op2_hi_lo[i], SImode))
26299 {
26300 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26301 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26302 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26303 rtx tmp = gen_reg_rtx (SImode);
26304
26305 /* Make sure the constant is sign extended. */
26306 if ((hi_16bits & sign_bit) != 0)
26307 hi_16bits |= upper_32bits;
26308
26309 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26310 code, SImode, false, false, false);
26311
26312 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26313 code, SImode, false, false, false);
26314 }
26315 else
26316 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26317 code, SImode, complement_final_p,
26318 complement_op1_p, complement_op2_p);
26319 }
26320
26321 return;
26322 }
26323
26324 /* Split the insns that make up boolean operations operating on multiple GPR
26325 registers. The boolean MD patterns ensure that the inputs either are
26326 exactly the same as the output registers, or there is no overlap.
26327
26328 OPERANDS is an array containing the destination and two input operands.
26329 CODE is the base operation (AND, IOR, XOR, NOT).
26330 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26331 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26332 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26333
26334 void
26335 rs6000_split_logical (rtx operands[3],
26336 enum rtx_code code,
26337 bool complement_final_p,
26338 bool complement_op1_p,
26339 bool complement_op2_p)
26340 {
26341 machine_mode mode = GET_MODE (operands[0]);
26342 machine_mode sub_mode;
26343 rtx op0, op1, op2;
26344 int sub_size, regno0, regno1, nregs, i;
26345
26346 /* If this is DImode, use the specialized version that can run before
26347 register allocation. */
26348 if (mode == DImode && !TARGET_POWERPC64)
26349 {
26350 rs6000_split_logical_di (operands, code, complement_final_p,
26351 complement_op1_p, complement_op2_p);
26352 return;
26353 }
26354
26355 op0 = operands[0];
26356 op1 = operands[1];
26357 op2 = (code == NOT) ? NULL_RTX : operands[2];
26358 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26359 sub_size = GET_MODE_SIZE (sub_mode);
26360 regno0 = REGNO (op0);
26361 regno1 = REGNO (op1);
26362
26363 gcc_assert (reload_completed);
26364 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26365 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26366
26367 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26368 gcc_assert (nregs > 1);
26369
26370 if (op2 && REG_P (op2))
26371 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26372
26373 for (i = 0; i < nregs; i++)
26374 {
26375 int offset = i * sub_size;
26376 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26377 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26378 rtx sub_op2 = ((code == NOT)
26379 ? NULL_RTX
26380 : simplify_subreg (sub_mode, op2, mode, offset));
26381
26382 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26383 complement_final_p, complement_op1_p,
26384 complement_op2_p);
26385 }
26386
26387 return;
26388 }
26389
26390 \f
26391 /* Return true if the peephole2 can combine a load involving a combination of
26392 an addis instruction and a load with an offset that can be fused together on
26393 a power8. */
26394
26395 bool
26396 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
26397 rtx addis_value, /* addis value. */
26398 rtx target, /* target register that is loaded. */
26399 rtx mem) /* bottom part of the memory addr. */
26400 {
26401 rtx addr;
26402 rtx base_reg;
26403
26404 /* Validate arguments. */
26405 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
26406 return false;
26407
26408 if (!base_reg_operand (target, GET_MODE (target)))
26409 return false;
26410
26411 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
26412 return false;
26413
26414 /* Allow sign/zero extension. */
26415 if (GET_CODE (mem) == ZERO_EXTEND
26416 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
26417 mem = XEXP (mem, 0);
26418
26419 if (!MEM_P (mem))
26420 return false;
26421
26422 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
26423 return false;
26424
26425 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
26426 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
26427 return false;
26428
26429 /* Validate that the register used to load the high value is either the
26430 register being loaded, or we can safely replace its use.
26431
26432 This function is only called from the peephole2 pass and we assume that
26433 there are 2 instructions in the peephole (addis and load), so we want to
26434 check if the target register was not used in the memory address and the
26435 register to hold the addis result is dead after the peephole. */
26436 if (REGNO (addis_reg) != REGNO (target))
26437 {
26438 if (reg_mentioned_p (target, mem))
26439 return false;
26440
26441 if (!peep2_reg_dead_p (2, addis_reg))
26442 return false;
26443
26444 /* If the target register being loaded is the stack pointer, we must
26445 avoid loading any other value into it, even temporarily. */
26446 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
26447 return false;
26448 }
26449
26450 base_reg = XEXP (addr, 0);
26451 return REGNO (addis_reg) == REGNO (base_reg);
26452 }
26453
26454 /* During the peephole2 pass, adjust and expand the insns for a load fusion
26455 sequence. We adjust the addis register to use the target register. If the
26456 load sign extends, we adjust the code to do the zero extending load, and an
26457 explicit sign extension later since the fusion only covers zero extending
26458 loads.
26459
26460 The operands are:
26461 operands[0] register set with addis (to be replaced with target)
26462 operands[1] value set via addis
26463 operands[2] target register being loaded
26464 operands[3] D-form memory reference using operands[0]. */
26465
26466 void
26467 expand_fusion_gpr_load (rtx *operands)
26468 {
26469 rtx addis_value = operands[1];
26470 rtx target = operands[2];
26471 rtx orig_mem = operands[3];
26472 rtx new_addr, new_mem, orig_addr, offset;
26473 enum rtx_code plus_or_lo_sum;
26474 machine_mode target_mode = GET_MODE (target);
26475 machine_mode extend_mode = target_mode;
26476 machine_mode ptr_mode = Pmode;
26477 enum rtx_code extend = UNKNOWN;
26478
26479 if (GET_CODE (orig_mem) == ZERO_EXTEND
26480 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
26481 {
26482 extend = GET_CODE (orig_mem);
26483 orig_mem = XEXP (orig_mem, 0);
26484 target_mode = GET_MODE (orig_mem);
26485 }
26486
26487 gcc_assert (MEM_P (orig_mem));
26488
26489 orig_addr = XEXP (orig_mem, 0);
26490 plus_or_lo_sum = GET_CODE (orig_addr);
26491 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
26492
26493 offset = XEXP (orig_addr, 1);
26494 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
26495 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
26496
26497 if (extend != UNKNOWN)
26498 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
26499
26500 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
26501 UNSPEC_FUSION_GPR);
26502 emit_insn (gen_rtx_SET (target, new_mem));
26503
26504 if (extend == SIGN_EXTEND)
26505 {
26506 int sub_off = ((BYTES_BIG_ENDIAN)
26507 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
26508 : 0);
26509 rtx sign_reg
26510 = simplify_subreg (target_mode, target, extend_mode, sub_off);
26511
26512 emit_insn (gen_rtx_SET (target,
26513 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
26514 }
26515
26516 return;
26517 }
26518
26519 /* Emit the addis instruction that will be part of a fused instruction
26520 sequence. */
26521
26522 void
26523 emit_fusion_addis (rtx target, rtx addis_value)
26524 {
26525 rtx fuse_ops[10];
26526 const char *addis_str = NULL;
26527
26528 /* Emit the addis instruction. */
26529 fuse_ops[0] = target;
26530 if (satisfies_constraint_L (addis_value))
26531 {
26532 fuse_ops[1] = addis_value;
26533 addis_str = "lis %0,%v1";
26534 }
26535
26536 else if (GET_CODE (addis_value) == PLUS)
26537 {
26538 rtx op0 = XEXP (addis_value, 0);
26539 rtx op1 = XEXP (addis_value, 1);
26540
26541 if (REG_P (op0) && CONST_INT_P (op1)
26542 && satisfies_constraint_L (op1))
26543 {
26544 fuse_ops[1] = op0;
26545 fuse_ops[2] = op1;
26546 addis_str = "addis %0,%1,%v2";
26547 }
26548 }
26549
26550 else if (GET_CODE (addis_value) == HIGH)
26551 {
26552 rtx value = XEXP (addis_value, 0);
26553 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
26554 {
26555 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
26556 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
26557 if (TARGET_ELF)
26558 addis_str = "addis %0,%2,%1@toc@ha";
26559
26560 else if (TARGET_XCOFF)
26561 addis_str = "addis %0,%1@u(%2)";
26562
26563 else
26564 gcc_unreachable ();
26565 }
26566
26567 else if (GET_CODE (value) == PLUS)
26568 {
26569 rtx op0 = XEXP (value, 0);
26570 rtx op1 = XEXP (value, 1);
26571
26572 if (GET_CODE (op0) == UNSPEC
26573 && XINT (op0, 1) == UNSPEC_TOCREL
26574 && CONST_INT_P (op1))
26575 {
26576 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
26577 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
26578 fuse_ops[3] = op1;
26579 if (TARGET_ELF)
26580 addis_str = "addis %0,%2,%1+%3@toc@ha";
26581
26582 else if (TARGET_XCOFF)
26583 addis_str = "addis %0,%1+%3@u(%2)";
26584
26585 else
26586 gcc_unreachable ();
26587 }
26588 }
26589
26590 else if (satisfies_constraint_L (value))
26591 {
26592 fuse_ops[1] = value;
26593 addis_str = "lis %0,%v1";
26594 }
26595
26596 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
26597 {
26598 fuse_ops[1] = value;
26599 addis_str = "lis %0,%1@ha";
26600 }
26601 }
26602
26603 if (!addis_str)
26604 fatal_insn ("Could not generate addis value for fusion", addis_value);
26605
26606 output_asm_insn (addis_str, fuse_ops);
26607 }
26608
26609 /* Emit a D-form load or store instruction that is the second instruction
26610 of a fusion sequence. */
26611
26612 static void
26613 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
26614 {
26615 rtx fuse_ops[10];
26616 char insn_template[80];
26617
26618 fuse_ops[0] = load_reg;
26619 fuse_ops[1] = addis_reg;
26620
26621 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
26622 {
26623 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
26624 fuse_ops[2] = offset;
26625 output_asm_insn (insn_template, fuse_ops);
26626 }
26627
26628 else if (GET_CODE (offset) == UNSPEC
26629 && XINT (offset, 1) == UNSPEC_TOCREL)
26630 {
26631 if (TARGET_ELF)
26632 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
26633
26634 else if (TARGET_XCOFF)
26635 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26636
26637 else
26638 gcc_unreachable ();
26639
26640 fuse_ops[2] = XVECEXP (offset, 0, 0);
26641 output_asm_insn (insn_template, fuse_ops);
26642 }
26643
26644 else if (GET_CODE (offset) == PLUS
26645 && GET_CODE (XEXP (offset, 0)) == UNSPEC
26646 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
26647 && CONST_INT_P (XEXP (offset, 1)))
26648 {
26649 rtx tocrel_unspec = XEXP (offset, 0);
26650 if (TARGET_ELF)
26651 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
26652
26653 else if (TARGET_XCOFF)
26654 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
26655
26656 else
26657 gcc_unreachable ();
26658
26659 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
26660 fuse_ops[3] = XEXP (offset, 1);
26661 output_asm_insn (insn_template, fuse_ops);
26662 }
26663
26664 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
26665 {
26666 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26667
26668 fuse_ops[2] = offset;
26669 output_asm_insn (insn_template, fuse_ops);
26670 }
26671
26672 else
26673 fatal_insn ("Unable to generate load/store offset for fusion", offset);
26674
26675 return;
26676 }
26677
26678 /* Given an address, convert it into the addis and load offset parts. Addresses
26679 created during the peephole2 process look like:
26680 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
26681 (unspec [(...)] UNSPEC_TOCREL)) */
26682
26683 static void
26684 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
26685 {
26686 rtx hi, lo;
26687
26688 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
26689 {
26690 hi = XEXP (addr, 0);
26691 lo = XEXP (addr, 1);
26692 }
26693 else
26694 gcc_unreachable ();
26695
26696 *p_hi = hi;
26697 *p_lo = lo;
26698 }
26699
26700 /* Return a string to fuse an addis instruction with a gpr load to the same
26701 register that we loaded up the addis instruction. The address that is used
26702 is the logical address that was formed during peephole2:
26703 (lo_sum (high) (low-part))
26704
26705 The code is complicated, so we call output_asm_insn directly, and just
26706 return "". */
26707
26708 const char *
26709 emit_fusion_gpr_load (rtx target, rtx mem)
26710 {
26711 rtx addis_value;
26712 rtx addr;
26713 rtx load_offset;
26714 const char *load_str = NULL;
26715 machine_mode mode;
26716
26717 if (GET_CODE (mem) == ZERO_EXTEND)
26718 mem = XEXP (mem, 0);
26719
26720 gcc_assert (REG_P (target) && MEM_P (mem));
26721
26722 addr = XEXP (mem, 0);
26723 fusion_split_address (addr, &addis_value, &load_offset);
26724
26725 /* Now emit the load instruction to the same register. */
26726 mode = GET_MODE (mem);
26727 switch (mode)
26728 {
26729 case E_QImode:
26730 load_str = "lbz";
26731 break;
26732
26733 case E_HImode:
26734 load_str = "lhz";
26735 break;
26736
26737 case E_SImode:
26738 case E_SFmode:
26739 load_str = "lwz";
26740 break;
26741
26742 case E_DImode:
26743 case E_DFmode:
26744 gcc_assert (TARGET_POWERPC64);
26745 load_str = "ld";
26746 break;
26747
26748 default:
26749 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
26750 }
26751
26752 /* Emit the addis instruction. */
26753 emit_fusion_addis (target, addis_value);
26754
26755 /* Emit the D-form load instruction. */
26756 emit_fusion_load (target, target, load_offset, load_str);
26757
26758 return "";
26759 }
26760 \f
26761
26762 #ifdef RS6000_GLIBC_ATOMIC_FENV
26763 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26764 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
26765 #endif
26766
26767 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26768
26769 static void
26770 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
26771 {
26772 if (!TARGET_HARD_FLOAT)
26773 {
26774 #ifdef RS6000_GLIBC_ATOMIC_FENV
26775 if (atomic_hold_decl == NULL_TREE)
26776 {
26777 atomic_hold_decl
26778 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26779 get_identifier ("__atomic_feholdexcept"),
26780 build_function_type_list (void_type_node,
26781 double_ptr_type_node,
26782 NULL_TREE));
26783 TREE_PUBLIC (atomic_hold_decl) = 1;
26784 DECL_EXTERNAL (atomic_hold_decl) = 1;
26785 }
26786
26787 if (atomic_clear_decl == NULL_TREE)
26788 {
26789 atomic_clear_decl
26790 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26791 get_identifier ("__atomic_feclearexcept"),
26792 build_function_type_list (void_type_node,
26793 NULL_TREE));
26794 TREE_PUBLIC (atomic_clear_decl) = 1;
26795 DECL_EXTERNAL (atomic_clear_decl) = 1;
26796 }
26797
26798 tree const_double = build_qualified_type (double_type_node,
26799 TYPE_QUAL_CONST);
26800 tree const_double_ptr = build_pointer_type (const_double);
26801 if (atomic_update_decl == NULL_TREE)
26802 {
26803 atomic_update_decl
26804 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26805 get_identifier ("__atomic_feupdateenv"),
26806 build_function_type_list (void_type_node,
26807 const_double_ptr,
26808 NULL_TREE));
26809 TREE_PUBLIC (atomic_update_decl) = 1;
26810 DECL_EXTERNAL (atomic_update_decl) = 1;
26811 }
26812
26813 tree fenv_var = create_tmp_var_raw (double_type_node);
26814 TREE_ADDRESSABLE (fenv_var) = 1;
26815 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26816 build4 (TARGET_EXPR, double_type_node, fenv_var,
26817 void_node, NULL_TREE, NULL_TREE));
26818
26819 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26820 *clear = build_call_expr (atomic_clear_decl, 0);
26821 *update = build_call_expr (atomic_update_decl, 1,
26822 fold_convert (const_double_ptr, fenv_addr));
26823 #endif
26824 return;
26825 }
26826
26827 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26828 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26829 tree call_mffs = build_call_expr (mffs, 0);
26830
26831 /* Generates the equivalent of feholdexcept (&fenv_var)
26832
26833 *fenv_var = __builtin_mffs ();
26834 double fenv_hold;
26835 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26836 __builtin_mtfsf (0xff, fenv_hold); */
26837
26838 /* Mask to clear everything except for the rounding modes and non-IEEE
26839 arithmetic flag. */
26840 const unsigned HOST_WIDE_INT hold_exception_mask
26841 = HOST_WIDE_INT_C (0xffffffff00000007);
26842
26843 tree fenv_var = create_tmp_var_raw (double_type_node);
26844
26845 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26846 NULL_TREE, NULL_TREE);
26847
26848 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26849 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26850 build_int_cst (uint64_type_node,
26851 hold_exception_mask));
26852
26853 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26854 fenv_llu_and);
26855
26856 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26857 build_int_cst (unsigned_type_node, 0xff),
26858 fenv_hold_mtfsf);
26859
26860 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26861
26862 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26863
26864 double fenv_clear = __builtin_mffs ();
26865 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26866 __builtin_mtfsf (0xff, fenv_clear); */
26867
26868 /* Mask to clear everything except for the rounding modes and non-IEEE
26869 arithmetic flag. */
26870 const unsigned HOST_WIDE_INT clear_exception_mask
26871 = HOST_WIDE_INT_C (0xffffffff00000000);
26872
26873 tree fenv_clear = create_tmp_var_raw (double_type_node);
26874
26875 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26876 call_mffs, NULL_TREE, NULL_TREE);
26877
26878 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26879 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26880 fenv_clean_llu,
26881 build_int_cst (uint64_type_node,
26882 clear_exception_mask));
26883
26884 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26885 fenv_clear_llu_and);
26886
26887 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26888 build_int_cst (unsigned_type_node, 0xff),
26889 fenv_clear_mtfsf);
26890
26891 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26892
26893 /* Generates the equivalent of feupdateenv (&fenv_var)
26894
26895 double old_fenv = __builtin_mffs ();
26896 double fenv_update;
26897 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26898 (*(uint64_t*)fenv_var 0x1ff80fff);
26899 __builtin_mtfsf (0xff, fenv_update); */
26900
26901 const unsigned HOST_WIDE_INT update_exception_mask
26902 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26903 const unsigned HOST_WIDE_INT new_exception_mask
26904 = HOST_WIDE_INT_C (0x1ff80fff);
26905
26906 tree old_fenv = create_tmp_var_raw (double_type_node);
26907 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26908 call_mffs, NULL_TREE, NULL_TREE);
26909
26910 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26911 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26912 build_int_cst (uint64_type_node,
26913 update_exception_mask));
26914
26915 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26916 build_int_cst (uint64_type_node,
26917 new_exception_mask));
26918
26919 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26920 old_llu_and, new_llu_and);
26921
26922 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26923 new_llu_mask);
26924
26925 tree update_mtfsf = build_call_expr (mtfsf, 2,
26926 build_int_cst (unsigned_type_node, 0xff),
26927 fenv_update_mtfsf);
26928
26929 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26930 }
26931
26932 void
26933 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26934 {
26935 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26936
26937 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26938 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26939
26940 /* The destination of the vmrgew instruction layout is:
26941 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26942 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26943 vmrgew instruction will be correct. */
26944 if (BYTES_BIG_ENDIAN)
26945 {
26946 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26947 GEN_INT (0)));
26948 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26949 GEN_INT (3)));
26950 }
26951 else
26952 {
26953 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26954 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26955 }
26956
26957 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26958 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26959
26960 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26961 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26962
26963 if (BYTES_BIG_ENDIAN)
26964 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26965 else
26966 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26967 }
26968
26969 void
26970 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26971 {
26972 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26973
26974 rtx_tmp0 = gen_reg_rtx (V2DImode);
26975 rtx_tmp1 = gen_reg_rtx (V2DImode);
26976
26977 /* The destination of the vmrgew instruction layout is:
26978 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26979 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26980 vmrgew instruction will be correct. */
26981 if (BYTES_BIG_ENDIAN)
26982 {
26983 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26984 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26985 }
26986 else
26987 {
26988 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26989 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26990 }
26991
26992 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26993 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26994
26995 if (signed_convert)
26996 {
26997 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26998 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26999 }
27000 else
27001 {
27002 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27003 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27004 }
27005
27006 if (BYTES_BIG_ENDIAN)
27007 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27008 else
27009 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27010 }
27011
27012 void
27013 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27014 rtx src2)
27015 {
27016 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27017
27018 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27019 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27020
27021 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27022 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27023
27024 rtx_tmp2 = gen_reg_rtx (V4SImode);
27025 rtx_tmp3 = gen_reg_rtx (V4SImode);
27026
27027 if (signed_convert)
27028 {
27029 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27030 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27031 }
27032 else
27033 {
27034 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
27035 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
27036 }
27037
27038 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
27039 }
27040
27041 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27042
27043 static bool
27044 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
27045 optimization_type opt_type)
27046 {
27047 switch (op)
27048 {
27049 case rsqrt_optab:
27050 return (opt_type == OPTIMIZE_FOR_SPEED
27051 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
27052
27053 default:
27054 return true;
27055 }
27056 }
27057
27058 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27059
27060 static HOST_WIDE_INT
27061 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
27062 {
27063 if (TREE_CODE (exp) == STRING_CST
27064 && (STRICT_ALIGNMENT || !optimize_size))
27065 return MAX (align, BITS_PER_WORD);
27066 return align;
27067 }
27068
27069 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27070
27071 static HOST_WIDE_INT
27072 rs6000_starting_frame_offset (void)
27073 {
27074 if (FRAME_GROWS_DOWNWARD)
27075 return 0;
27076 return RS6000_STARTING_FRAME_OFFSET;
27077 }
27078 \f
27079
27080 /* Create an alias for a mangled name where we have changed the mangling (in
27081 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
27082 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
27083
27084 #if TARGET_ELF && RS6000_WEAK
27085 static void
27086 rs6000_globalize_decl_name (FILE * stream, tree decl)
27087 {
27088 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
27089
27090 targetm.asm_out.globalize_label (stream, name);
27091
27092 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
27093 {
27094 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
27095 const char *old_name;
27096
27097 ieee128_mangling_gcc_8_1 = true;
27098 lang_hooks.set_decl_assembler_name (decl);
27099 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
27100 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
27101 ieee128_mangling_gcc_8_1 = false;
27102
27103 if (strcmp (name, old_name) != 0)
27104 {
27105 fprintf (stream, "\t.weak %s\n", old_name);
27106 fprintf (stream, "\t.set %s,%s\n", old_name, name);
27107 }
27108 }
27109 }
27110 #endif
27111
27112 \f
27113 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27114 function names from <foo>l to <foo>f128 if the default long double type is
27115 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27116 include file switches the names on systems that support long double as IEEE
27117 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27118 In the future, glibc will export names like __ieee128_sinf128 and we can
27119 switch to using those instead of using sinf128, which pollutes the user's
27120 namespace.
27121
27122 This will switch the names for Fortran math functions as well (which doesn't
27123 use math.h). However, Fortran needs other changes to the compiler and
27124 library before you can switch the real*16 type at compile time.
27125
27126 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27127 only do this if the default is that long double is IBM extended double, and
27128 the user asked for IEEE 128-bit. */
27129
27130 static tree
27131 rs6000_mangle_decl_assembler_name (tree decl, tree id)
27132 {
27133 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
27134 && TREE_CODE (decl) == FUNCTION_DECL
27135 && DECL_IS_UNDECLARED_BUILTIN (decl))
27136 {
27137 size_t len = IDENTIFIER_LENGTH (id);
27138 const char *name = IDENTIFIER_POINTER (id);
27139
27140 if (name[len - 1] == 'l')
27141 {
27142 bool uses_ieee128_p = false;
27143 tree type = TREE_TYPE (decl);
27144 machine_mode ret_mode = TYPE_MODE (type);
27145
27146 /* See if the function returns a IEEE 128-bit floating point type or
27147 complex type. */
27148 if (ret_mode == TFmode || ret_mode == TCmode)
27149 uses_ieee128_p = true;
27150 else
27151 {
27152 function_args_iterator args_iter;
27153 tree arg;
27154
27155 /* See if the function passes a IEEE 128-bit floating point type
27156 or complex type. */
27157 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
27158 {
27159 machine_mode arg_mode = TYPE_MODE (arg);
27160 if (arg_mode == TFmode || arg_mode == TCmode)
27161 {
27162 uses_ieee128_p = true;
27163 break;
27164 }
27165 }
27166 }
27167
27168 /* If we passed or returned an IEEE 128-bit floating point type,
27169 change the name. */
27170 if (uses_ieee128_p)
27171 {
27172 char *name2 = (char *) alloca (len + 4);
27173 memcpy (name2, name, len - 1);
27174 strcpy (name2 + len - 1, "f128");
27175 id = get_identifier (name2);
27176 }
27177 }
27178 }
27179
27180 return id;
27181 }
27182
27183 /* Predict whether the given loop in gimple will be transformed in the RTL
27184 doloop_optimize pass. */
27185
27186 static bool
27187 rs6000_predict_doloop_p (struct loop *loop)
27188 {
27189 gcc_assert (loop);
27190
27191 /* On rs6000, targetm.can_use_doloop_p is actually
27192 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
27193 if (loop->inner != NULL)
27194 {
27195 if (dump_file && (dump_flags & TDF_DETAILS))
27196 fprintf (dump_file, "Predict doloop failure due to"
27197 " loop nesting.\n");
27198 return false;
27199 }
27200
27201 return true;
27202 }
27203
27204 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
27205
27206 static bool
27207 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
27208 {
27209 gcc_assert (MEM_P (mem));
27210
27211 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
27212 type addresses, so don't allow MEMs with those address types to be
27213 substituted as an equivalent expression. See PR93974 for details. */
27214 if (GET_CODE (XEXP (mem, 0)) == AND)
27215 return true;
27216
27217 return false;
27218 }
27219
27220 /* Implement TARGET_INVALID_CONVERSION. */
27221
27222 static const char *
27223 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
27224 {
27225 /* Make sure we're working with the canonical types. */
27226 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
27227 fromtype = TYPE_CANONICAL (fromtype);
27228 if (TYPE_CANONICAL (totype) != NULL_TREE)
27229 totype = TYPE_CANONICAL (totype);
27230
27231 machine_mode frommode = TYPE_MODE (fromtype);
27232 machine_mode tomode = TYPE_MODE (totype);
27233
27234 if (frommode != tomode)
27235 {
27236 /* Do not allow conversions to/from XOmode and OOmode types. */
27237 if (frommode == XOmode)
27238 return N_("invalid conversion from type %<__vector_quad%>");
27239 if (tomode == XOmode)
27240 return N_("invalid conversion to type %<__vector_quad%>");
27241 if (frommode == OOmode)
27242 return N_("invalid conversion from type %<__vector_pair%>");
27243 if (tomode == OOmode)
27244 return N_("invalid conversion to type %<__vector_pair%>");
27245 }
27246 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
27247 {
27248 /* We really care about the modes of the base types. */
27249 frommode = TYPE_MODE (TREE_TYPE (fromtype));
27250 tomode = TYPE_MODE (TREE_TYPE (totype));
27251
27252 /* Do not allow conversions to/from XOmode and OOmode pointer
27253 types, except to/from void pointers. */
27254 if (frommode != tomode
27255 && frommode != VOIDmode
27256 && tomode != VOIDmode)
27257 {
27258 if (frommode == XOmode)
27259 return N_("invalid conversion from type %<* __vector_quad%>");
27260 if (tomode == XOmode)
27261 return N_("invalid conversion to type %<* __vector_quad%>");
27262 if (frommode == OOmode)
27263 return N_("invalid conversion from type %<* __vector_pair%>");
27264 if (tomode == OOmode)
27265 return N_("invalid conversion to type %<* __vector_pair%>");
27266 }
27267 }
27268
27269 /* Conversion allowed. */
27270 return NULL;
27271 }
27272
27273 long long
27274 rs6000_const_f32_to_i32 (rtx operand)
27275 {
27276 long long value;
27277 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
27278
27279 gcc_assert (GET_MODE (operand) == SFmode);
27280 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
27281 return value;
27282 }
27283
27284 void
27285 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
27286 {
27287 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
27288 inform (input_location,
27289 "the result for the xxspltidp instruction "
27290 "is undefined for subnormal input values");
27291 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
27292 }
27293
27294 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
27295
27296 static bool
27297 rs6000_gen_pic_addr_diff_vec (void)
27298 {
27299 return rs6000_relative_jumptables;
27300 }
27301
27302 void
27303 rs6000_output_addr_vec_elt (FILE *file, int value)
27304 {
27305 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
27306 char buf[100];
27307
27308 fprintf (file, "%s", directive);
27309 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
27310 assemble_name (file, buf);
27311 fprintf (file, "\n");
27312 }
27313
27314 struct gcc_target targetm = TARGET_INITIALIZER;
27315
27316 #include "gt-rs6000.h"